1 #include <../src/ksp/pc/impls/bddc/bddc.h> 2 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 3 #include <petscblaslapack.h> 4 5 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y); 6 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y); 7 8 #undef __FUNCT__ 9 #define __FUNCT__ "PCBDDCAdaptiveSelection" 10 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 11 { 12 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 13 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 14 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 15 PetscBLASInt *B_iwork,*B_ifail; 16 PetscScalar *work,lwork; 17 PetscScalar *St,*S,*eigv; 18 PetscScalar *Sarray,*Starray; 19 PetscScalar *Smult,*Seigv; 20 PetscReal *eigs,thresh; 21 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs; 22 #if defined(PETSC_USE_COMPLEX) 23 PetscReal *rwork; 24 #endif 25 PetscErrorCode ierr; 26 27 PetscFunctionBegin; 28 if (!sub_schurs->use_mumps) { 29 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS"); 30 } 31 if (!sub_schurs->is_hermitian || !sub_schurs->is_posdef) { 32 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Not yet implemented"); 33 } 34 35 /* max size of subsets */ 36 mss = 0; 37 for (i=0;i<sub_schurs->n_subs;i++) { 38 if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) { 39 PetscInt subset_size; 40 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 41 mss = PetscMax(mss,subset_size); 42 } 43 } 44 45 /* min/max and threshold */ 46 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 47 nmin = pcbddc->adaptive_nmin > -1 ? pcbddc->adaptive_nmin : 1; 48 nmax = PetscMax(nmin,nmax); 49 if (pcbddc->adaptive_threshold > 1.0) { 50 thresh = 1.0/pcbddc->adaptive_threshold; 51 } else { 52 thresh = 1.0; 53 } 54 55 /* allocate lapack workspace */ 56 cum = cum2 = 0; 57 maxneigs = 0; 58 for (i=0;i<sub_schurs->n_subs;i++) { 59 if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) { 60 PetscInt n,subset_size; 61 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 62 n = PetscMin(subset_size,nmax); 63 cum += subset_size*n; 64 cum2 += n; 65 maxneigs = PetscMax(maxneigs,n); 66 } 67 } 68 69 if (mss) { 70 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 71 PetscBLASInt B_itype = 1; 72 PetscBLASInt B_N = mss; 73 PetscScalar zero = 0.0; 74 PetscScalar eps = 0.0; /* dlamch? */ 75 76 B_lwork = -1; 77 S = NULL; 78 St = NULL; 79 eigs = NULL; 80 eigv = NULL; 81 B_iwork = NULL; 82 B_ifail = NULL; 83 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 84 #if defined(PETSC_USE_COMPLEX) 85 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 86 #else 87 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 88 #endif 89 if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 90 ierr = PetscFPTrapPop();CHKERRQ(ierr); 91 } else { 92 /* TODO */ 93 } 94 } else { 95 lwork = 0; 96 } 97 98 nv = 0; 99 if (sub_schurs->is_Ej_com) { /* complement of subsets, each entry is a vertex */ 100 ierr = ISGetLocalSize(sub_schurs->is_Ej_com,&nv);CHKERRQ(ierr); 101 } 102 ierr = PetscBLASIntCast((PetscInt)lwork,&B_lwork);CHKERRQ(ierr); 103 ierr = PetscMalloc7(mss*mss,&S,mss*mss,&St,mss*mss,&eigv,mss,&eigs, 104 B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr); 105 #if defined(PETSC_USE_COMPLEX) 106 ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr); 107 #endif 108 ierr = PetscMalloc2(mss*mss,&Smult,mss*mss,&Seigv);CHKERRQ(ierr); 109 ierr = PetscMalloc4(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 110 nv+cum2+1,&pcbddc->adaptive_constraints_ptrs, 111 nv+cum,&pcbddc->adaptive_constraints_idxs, 112 nv+cum,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 113 ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr); 114 115 maxneigs = 0; 116 cum = cum2 = cumarray = 0; 117 if (sub_schurs->is_Ej_com) { 118 const PetscInt *idxs; 119 120 ierr = ISGetIndices(sub_schurs->is_Ej_com,&idxs);CHKERRQ(ierr); 121 for (cum=0;cum<nv;cum++) { 122 pcbddc->adaptive_constraints_n[cum] = 1; 123 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 124 pcbddc->adaptive_constraints_ptrs[cum] = cum; 125 pcbddc->adaptive_constraints_data[cum] = 1.0; 126 } 127 cum2 = cum; 128 ierr = ISRestoreIndices(sub_schurs->is_Ej_com,&idxs);CHKERRQ(ierr); 129 } 130 131 if (mss) { /* multilevel */ 132 if (pcbddc->use_deluxe_scaling) { 133 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 134 } else { 135 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_all,&Sarray);CHKERRQ(ierr); 136 } 137 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 138 } 139 140 for (i=0;i<sub_schurs->n_subs;i++) { 141 PetscInt j,subset_size; 142 143 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 144 if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) { 145 const PetscInt *idxs; 146 PetscScalar one = 1.0,zero=0.0; 147 PetscBLASInt B_N; 148 149 /* S should be copied since we need it for deluxe scaling */ 150 if (sub_schurs->is_hermitian) { 151 PetscInt j; 152 for (j=0;j<subset_size;j++) { 153 ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 154 } 155 for (j=0;j<subset_size;j++) { 156 ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 157 } 158 } else { 159 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 160 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 161 } 162 /* is always this the right matrix? */ 163 ierr = PetscMemcpy(Smult,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 164 165 /* we could reuse space already allocated when building sum_S_Ej_tilda_all */ 166 /* St = Starray+cumarray; */ 167 168 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 169 PetscBLASInt B_itype = 1; 170 PetscBLASInt B_IL = 1, B_IU; 171 PetscScalar eps = -1.0; /* dlamch? */ 172 PetscInt nmin_s; 173 174 /* ask for eigenvalues lower than thresh */ 175 PetscPrintf(PETSC_COMM_SELF,"[%d] Computing for sub %d/%d.\n",PetscGlobalRank,i,sub_schurs->n_subs); 176 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 177 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 178 #if defined(PETSC_USE_COMPLEX) 179 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 180 #else 181 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 182 #endif 183 ierr = PetscFPTrapPop();CHKERRQ(ierr); 184 if (B_ierr) { 185 if (B_ierr < 0 ) { 186 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 187 } else if (B_ierr <= B_N) { 188 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 189 } else { 190 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 191 } 192 } 193 194 if (B_neigs > nmax) { 195 PetscPrintf(PETSC_COMM_SELF,"[%d] found %d eigs, more than maximum required %d.\n",PetscGlobalRank,B_neigs,nmax); 196 B_neigs = nmax; 197 } 198 199 nmin_s = PetscMin(nmin,B_N); 200 if (B_neigs < nmin_s) { 201 PetscBLASInt B_neigs2; 202 203 B_IL = B_neigs + 1; 204 ierr = PetscBLASIntCast(nmin_s,&B_IU);CHKERRQ(ierr); 205 PetscPrintf(PETSC_COMM_SELF,"[%d] found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",PetscGlobalRank,B_neigs,nmin,B_IL,B_IU); 206 if (sub_schurs->is_hermitian) { 207 PetscInt j; 208 for (j=0;j<subset_size;j++) { 209 ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 210 } 211 for (j=0;j<subset_size;j++) { 212 ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 213 } 214 } else { 215 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 216 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 217 } 218 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 219 #if defined(PETSC_USE_COMPLEX) 220 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 221 #else 222 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 223 #endif 224 ierr = PetscFPTrapPop();CHKERRQ(ierr); 225 B_neigs += B_neigs2; 226 } 227 if (B_ierr) { 228 if (B_ierr < 0 ) { 229 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 230 } else if (B_ierr <= B_N) { 231 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 232 } else { 233 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 234 } 235 } 236 PetscPrintf(PETSC_COMM_SELF,"[%d] -> Got %d eigs\n",PetscGlobalRank,B_neigs); 237 for (j=0;j<B_neigs;j++) { 238 if (eigs[j] == 0.0) { 239 PetscPrintf(PETSC_COMM_SELF,"[%d] Inf\n",PetscGlobalRank); 240 } else { 241 PetscPrintf(PETSC_COMM_SELF,"[%d] %1.6e\n",PetscGlobalRank,1.0/eigs[j]); 242 } 243 } 244 } else { 245 /* TODO */ 246 } 247 maxneigs = PetscMax(B_neigs,maxneigs); 248 pcbddc->adaptive_constraints_n[i+nv] = B_neigs; 249 250 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 251 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&B_N,&B_neigs,&B_N,&one,Smult,&B_N,eigv,&B_N,&zero,Seigv,&B_N)); 252 ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+cum2,Seigv,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 253 #if 0 254 PetscInt ii; 255 for (ii=0;ii<B_neigs;ii++) { 256 PetscPrintf(PETSC_COMM_SELF,"[%d] -> Eigenvector %d/%d (%d)\n",PetscGlobalRank,ii,B_neigs,B_N); 257 for (j=0;j<B_N;j++) { 258 PetscPrintf(PETSC_COMM_SELF,"[%d] %1.4e %1.4e\n",PetscGlobalRank,eigv[ii*B_N+j],Seigv[ii*B_N+j]); 259 } 260 } 261 #endif 262 ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 263 for (j=0;j<B_neigs;j++) { 264 #if 0 265 { 266 PetscBLASInt Blas_N,Blas_one = 1.0; 267 PetscScalar norm; 268 ierr = PetscBLASIntCast(subset_size,&Blas_N);CHKERRQ(ierr); 269 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,pcbddc->adaptive_constraints_data+cum2,&Blas_one,pcbddc->adaptive_constraints_data+cum2,&Blas_one)); 270 if (pcbddc->adaptive_constraints_data[cum2] > 0.0) { 271 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 272 } else { 273 norm = -1.0/PetscSqrtReal(PetscRealPart(norm)); 274 } 275 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,pcbddc->adaptive_constraints_data+cum2,&Blas_one)); 276 } 277 #endif 278 ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+cum2,idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr); 279 pcbddc->adaptive_constraints_ptrs[cum++] = cum2; 280 cum2 += subset_size; 281 } 282 ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 283 } 284 /* shift for next computation */ 285 cumarray += subset_size*subset_size; 286 } 287 pcbddc->adaptive_constraints_ptrs[cum] = cum2; 288 ierr = PetscFree2(Smult,Seigv);CHKERRQ(ierr); 289 290 if (mss) { 291 if (pcbddc->use_deluxe_scaling) { 292 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 293 } else { 294 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_all,&Sarray);CHKERRQ(ierr); 295 } 296 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 297 } 298 ierr = PetscFree7(S,St,eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr); 299 #if defined(PETSC_USE_COMPLEX) 300 ierr = PetscFree(rwork);CHKERRQ(ierr); 301 #endif 302 if (pcbddc->dbg_flag) { 303 PetscInt maxneigs_r; 304 ierr = MPI_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 305 ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr); 306 } 307 PetscFunctionReturn(0); 308 } 309 310 #undef __FUNCT__ 311 #define __FUNCT__ "PCBDDCSetUpSolvers" 312 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 313 { 314 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 315 PetscScalar *coarse_submat_vals; 316 PetscErrorCode ierr; 317 318 PetscFunctionBegin; 319 /* Setup local scatters R_to_B and (optionally) R_to_D */ 320 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 321 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 322 323 /* Setup local neumann solver ksp_R */ 324 /* PCBDDCSetUpLocalScatters should be called first! */ 325 ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 326 327 /* Change global null space passed in by the user if change of basis has been requested */ 328 if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) { 329 ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr); 330 } 331 332 /* 333 Setup local correction and local part of coarse basis. 334 Gives back the dense local part of the coarse matrix in column major ordering 335 */ 336 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 337 338 /* Compute total number of coarse nodes and setup coarse solver */ 339 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 340 341 /* free */ 342 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 343 PetscFunctionReturn(0); 344 } 345 346 #undef __FUNCT__ 347 #define __FUNCT__ "PCBDDCResetCustomization" 348 PetscErrorCode PCBDDCResetCustomization(PC pc) 349 { 350 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 351 PetscErrorCode ierr; 352 353 PetscFunctionBegin; 354 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 355 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 356 ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr); 357 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 358 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 359 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 360 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 361 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 362 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 363 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 364 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 365 PetscFunctionReturn(0); 366 } 367 368 #undef __FUNCT__ 369 #define __FUNCT__ "PCBDDCResetTopography" 370 PetscErrorCode PCBDDCResetTopography(PC pc) 371 { 372 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 373 PetscErrorCode ierr; 374 375 PetscFunctionBegin; 376 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 377 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 378 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 379 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 380 ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr); 381 PetscFunctionReturn(0); 382 } 383 384 #undef __FUNCT__ 385 #define __FUNCT__ "PCBDDCResetSolvers" 386 PetscErrorCode PCBDDCResetSolvers(PC pc) 387 { 388 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 389 PetscScalar *array; 390 PetscErrorCode ierr; 391 392 PetscFunctionBegin; 393 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 394 if (pcbddc->coarse_phi_B) { 395 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr); 396 ierr = PetscFree(array);CHKERRQ(ierr); 397 } 398 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 399 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 400 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 401 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 402 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 403 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 404 if (pcbddc->local_auxmat2) { 405 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 406 ierr = PetscFree(array);CHKERRQ(ierr); 407 } 408 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 409 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 410 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 411 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 412 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 413 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 414 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 415 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 416 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 417 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 418 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 419 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 420 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 421 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 422 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 423 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 424 PetscFunctionReturn(0); 425 } 426 427 #undef __FUNCT__ 428 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors" 429 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 430 { 431 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 432 PC_IS *pcis = (PC_IS*)pc->data; 433 VecType impVecType; 434 PetscInt n_constraints,n_R,old_size; 435 PetscErrorCode ierr; 436 437 PetscFunctionBegin; 438 if (!pcbddc->ConstraintMatrix) { 439 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created"); 440 } 441 /* get sizes */ 442 n_constraints = pcbddc->local_primal_size - pcbddc->n_actual_vertices; 443 n_R = pcis->n-pcbddc->n_actual_vertices; 444 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 445 /* local work vectors (try to avoid unneeded work)*/ 446 /* R nodes */ 447 old_size = -1; 448 if (pcbddc->vec1_R) { 449 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 450 } 451 if (n_R != old_size) { 452 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 453 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 454 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 455 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 456 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 457 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 458 } 459 /* local primal dofs */ 460 old_size = -1; 461 if (pcbddc->vec1_P) { 462 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 463 } 464 if (pcbddc->local_primal_size != old_size) { 465 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 466 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 467 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 468 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 469 } 470 /* local explicit constraints */ 471 old_size = -1; 472 if (pcbddc->vec1_C) { 473 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 474 } 475 if (n_constraints && n_constraints != old_size) { 476 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 477 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 478 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 479 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 480 } 481 PetscFunctionReturn(0); 482 } 483 484 #undef __FUNCT__ 485 #define __FUNCT__ "PCBDDCSetUpCorrection" 486 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 487 { 488 PetscErrorCode ierr; 489 /* pointers to pcis and pcbddc */ 490 PC_IS* pcis = (PC_IS*)pc->data; 491 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 492 /* submatrices of local problem */ 493 Mat A_RV,A_VR,A_VV; 494 /* submatrices of local coarse problem */ 495 Mat S_VV,S_CV,S_VC,S_CC; 496 /* working matrices */ 497 Mat C_CR; 498 /* additional working stuff */ 499 PC pc_R; 500 Mat F; 501 PetscBool isLU,isCHOL,isILU; 502 503 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 504 PetscScalar *work; 505 PetscInt *idx_V_B; 506 PetscInt n,n_vertices,n_constraints; 507 PetscInt i,n_R,n_D,n_B; 508 PetscBool unsymmetric_check; 509 /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */ 510 MatType impMatType; 511 /* some shortcuts to scalars */ 512 PetscScalar one=1.0,m_one=-1.0; 513 514 PetscFunctionBegin; 515 /* get number of vertices (corners plus constraints with change of basis) 516 pcbddc->n_actual_vertices stores the actual number of vertices, pcbddc->n_vertices the number of corners computed */ 517 n_vertices = pcbddc->n_actual_vertices; 518 n_constraints = pcbddc->local_primal_size-n_vertices; 519 /* Set Non-overlapping dimensions */ 520 n_B = pcis->n_B; n_D = pcis->n - n_B; 521 n_R = pcis->n-n_vertices; 522 523 /* Set types for local objects needed by BDDC precondtioner */ 524 impMatType = MATSEQDENSE; 525 526 /* vertices in boundary numbering */ 527 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 528 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->primal_indices_local_idxs,&i,idx_V_B);CHKERRQ(ierr); 529 if (i != n_vertices) { 530 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i); 531 } 532 533 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 534 ierr = PetscMalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 535 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr); 536 ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr); 537 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr); 538 ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr); 539 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr); 540 ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr); 541 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr); 542 ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr); 543 544 unsymmetric_check = PETSC_FALSE; 545 /* allocate workspace */ 546 n = 0; 547 if (n_constraints) { 548 n += n_R*n_constraints; 549 } 550 if (n_vertices) { 551 n = PetscMax(2*n_R*n_vertices,n); 552 } 553 if (!pcbddc->issym) { 554 n = PetscMax(2*n_R*pcbddc->local_primal_size,n); 555 unsymmetric_check = PETSC_TRUE; 556 } 557 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 558 559 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 560 ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr); 561 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr); 562 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr); 563 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr); 564 if (isLU || isILU || isCHOL) { 565 ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr); 566 } else { 567 F = NULL; 568 } 569 570 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 571 if (n_constraints) { 572 Mat M1,M2,M3; 573 IS is_aux; 574 /* see if we can save some allocations */ 575 if (pcbddc->local_auxmat2) { 576 PetscInt on_R,on_constraints; 577 ierr = MatGetSize(pcbddc->local_auxmat2,&on_R,&on_constraints);CHKERRQ(ierr); 578 if (on_R != n_R || on_constraints != n_constraints) { 579 PetscScalar *marray; 580 581 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&marray);CHKERRQ(ierr); 582 ierr = PetscFree(marray);CHKERRQ(ierr); 583 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 584 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 585 } 586 } 587 /* auxiliary matrices */ 588 if (!pcbddc->local_auxmat2) { 589 PetscScalar *marray; 590 591 ierr = PetscMalloc1(2*n_R*n_constraints,&marray);CHKERRQ(ierr); 592 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,marray,&pcbddc->local_auxmat2);CHKERRQ(ierr); 593 marray += n_R*n_constraints; 594 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_R,marray,&pcbddc->local_auxmat1);CHKERRQ(ierr); 595 } 596 597 /* Extract constraints on R nodes: C_{CR} */ 598 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 599 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 600 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 601 602 /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */ 603 ierr = PetscMemzero(work,n_R*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 604 for (i=0;i<n_constraints;i++) { 605 const PetscScalar *row_cmat_values; 606 const PetscInt *row_cmat_indices; 607 PetscInt size_of_constraint,j; 608 609 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 610 for (j=0;j<size_of_constraint;j++) { 611 work[row_cmat_indices[j]+i*n_R] = -row_cmat_values[j]; 612 } 613 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 614 } 615 if (F) { 616 Mat B; 617 618 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 619 ierr = MatMatSolve(F,B,pcbddc->local_auxmat2);CHKERRQ(ierr); 620 ierr = MatDestroy(&B);CHKERRQ(ierr); 621 } else { 622 PetscScalar *xarray; 623 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&xarray);CHKERRQ(ierr); 624 for (i=0;i<n_constraints;i++) { 625 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 626 ierr = VecPlaceArray(pcbddc->vec2_R,xarray+i*n_R);CHKERRQ(ierr); 627 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 628 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 629 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 630 } 631 ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&xarray);CHKERRQ(ierr); 632 } 633 634 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 635 ierr = MatConvert(C_CR,impMatType,MAT_REUSE_MATRIX,&C_CR);CHKERRQ(ierr); 636 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 637 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr); 638 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 639 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 640 ierr = VecSet(pcbddc->vec1_C,m_one);CHKERRQ(ierr); 641 ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr); 642 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 643 ierr = MatDestroy(&M2);CHKERRQ(ierr); 644 ierr = MatDestroy(&M3);CHKERRQ(ierr); 645 /* Assemble local_auxmat1 = S_CC*C_{CR} needed by BDDC application in KSP and in preproc */ 646 ierr = MatMatMult(M1,C_CR,MAT_REUSE_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 647 ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 648 ierr = MatDestroy(&M1);CHKERRQ(ierr); 649 } 650 /* Get submatrices from subdomain matrix */ 651 if (n_vertices) { 652 Mat newmat; 653 IS is_aux; 654 PetscInt ibs,mbs; 655 PetscBool issbaij; 656 657 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 658 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 659 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 660 if (ibs != mbs) { /* need to convert to SEQAIJ */ 661 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 662 ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 663 ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 664 ierr = MatGetSubMatrix(newmat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 665 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 666 } else { 667 /* this is safe */ 668 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 669 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 670 if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 671 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 672 ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 673 ierr = MatTranspose(A_VR,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 674 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 675 ierr = MatConvert(A_VV,MATSEQBAIJ,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr); 676 } else { 677 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 678 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 679 } 680 } 681 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 682 } 683 684 /* Matrix of coarse basis functions (local) */ 685 if (pcbddc->coarse_phi_B) { 686 PetscInt on_B,on_primal,on_D=n_D; 687 if (pcbddc->coarse_phi_D) { 688 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr); 689 } 690 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 691 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 692 PetscScalar *marray; 693 694 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr); 695 ierr = PetscFree(marray);CHKERRQ(ierr); 696 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 697 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 698 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 699 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 700 } 701 } 702 703 if (!pcbddc->coarse_phi_B) { 704 PetscScalar *marray; 705 706 n = n_B*pcbddc->local_primal_size; 707 if (pcbddc->switch_static || pcbddc->dbg_flag) { 708 n += n_D*pcbddc->local_primal_size; 709 } 710 if (!pcbddc->issym) { 711 n *= 2; 712 } 713 ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr); 714 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 715 n = n_B*pcbddc->local_primal_size; 716 if (pcbddc->switch_static || pcbddc->dbg_flag) { 717 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 718 n += n_D*pcbddc->local_primal_size; 719 } 720 if (!pcbddc->issym) { 721 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 722 if (pcbddc->switch_static || pcbddc->dbg_flag) { 723 n = n_B*pcbddc->local_primal_size; 724 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 725 } 726 } else { 727 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 728 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 729 if (pcbddc->switch_static || pcbddc->dbg_flag) { 730 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 731 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 732 } 733 } 734 } 735 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 736 /* vertices */ 737 if (n_vertices) { 738 739 if (n_R) { 740 Mat A_RRmA_RV,S_VVt; /* S_VVt with LDA=N */ 741 PetscBLASInt B_N,B_one = 1; 742 PetscScalar *x,*y; 743 744 ierr = PetscMemzero(work,2*n_R*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 745 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 746 ierr = MatConvert(A_RV,impMatType,MAT_REUSE_MATRIX,&A_RV);CHKERRQ(ierr); 747 if (F) { 748 ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr); 749 } else { 750 ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr); 751 for (i=0;i<n_vertices;i++) { 752 ierr = VecPlaceArray(pcbddc->vec1_R,y+i*n_R);CHKERRQ(ierr); 753 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 754 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 755 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 756 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 757 } 758 ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr); 759 } 760 ierr = MatScale(A_RRmA_RV,m_one);CHKERRQ(ierr); 761 /* S_VV and S_CV are the subdomain contribution to coarse matrix. WARNING -> column major ordering */ 762 if (n_constraints) { 763 Mat B; 764 ierr = MatMatMult(pcbddc->local_auxmat1,A_RRmA_RV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr); 765 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr); 766 ierr = MatMatMult(pcbddc->local_auxmat2,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 767 ierr = MatScale(S_CV,m_one);CHKERRQ(ierr); 768 ierr = PetscBLASIntCast(n_R*n_vertices,&B_N);CHKERRQ(ierr); 769 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+n_R*n_vertices,&B_one,work,&B_one)); 770 ierr = MatDestroy(&B);CHKERRQ(ierr); 771 } 772 ierr = MatConvert(A_VR,impMatType,MAT_REUSE_MATRIX,&A_VR);CHKERRQ(ierr); 773 ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr); 774 ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr); 775 ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr); 776 ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr); 777 ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr); 778 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one)); 779 ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr); 780 ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr); 781 ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 782 ierr = MatDestroy(&S_VVt);CHKERRQ(ierr); 783 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 784 } else { 785 ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr); 786 ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 787 } 788 /* coarse basis functions */ 789 for (i=0;i<n_vertices;i++) { 790 PetscScalar *y; 791 792 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr); 793 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 794 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 795 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 796 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 797 y[n_B*i+idx_V_B[i]] = 1.0; 798 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 799 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 800 801 if (pcbddc->switch_static || pcbddc->dbg_flag) { 802 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 803 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 804 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 805 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 806 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 807 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 808 } 809 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 810 } 811 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 812 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 813 } 814 815 if (n_constraints) { 816 Mat B; 817 818 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 819 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 820 ierr = MatMatMult(pcbddc->local_auxmat2,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 821 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 822 if (n_vertices) { 823 ierr = MatMatMult(A_VR,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_VC);CHKERRQ(ierr); 824 } 825 ierr = MatDestroy(&B);CHKERRQ(ierr); 826 /* coarse basis functions */ 827 for (i=0;i<n_constraints;i++) { 828 PetscScalar *y; 829 830 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr); 831 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 832 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr); 833 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 834 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 835 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 836 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 837 838 if (pcbddc->switch_static || pcbddc->dbg_flag) { 839 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 840 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr); 841 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 842 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 843 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 844 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 845 } 846 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 847 } 848 } 849 850 /* compute other basis functions for non-symmetric problems */ 851 if (!pcbddc->issym) { 852 Mat B,X; 853 854 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,pcbddc->local_primal_size,work,&B);CHKERRQ(ierr); 855 856 if (n_constraints) { 857 Mat S_CCT,B_C; 858 859 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work+n_vertices*n_R,&B_C);CHKERRQ(ierr); 860 ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr); 861 ierr = MatTransposeMatMult(C_CR,S_CCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr); 862 ierr = MatDestroy(&S_CCT);CHKERRQ(ierr); 863 if (n_vertices) { 864 Mat B_V,S_VCT; 865 866 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&B_V);CHKERRQ(ierr); 867 ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr); 868 ierr = MatTransposeMatMult(C_CR,S_VCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr); 869 ierr = MatDestroy(&B_V);CHKERRQ(ierr); 870 ierr = MatDestroy(&S_VCT);CHKERRQ(ierr); 871 } 872 ierr = MatDestroy(&B_C);CHKERRQ(ierr); 873 } 874 if (n_vertices && n_R) { 875 Mat A_VRT; 876 PetscBLASInt B_N,B_one = 1; 877 878 if (!n_constraints) { /* if there are no constraints, reset work */ 879 ierr = PetscMemzero(work,n_R*pcbddc->local_primal_size*sizeof(PetscScalar));CHKERRQ(ierr); 880 } 881 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work+pcbddc->local_primal_size*n_R,&A_VRT);CHKERRQ(ierr); 882 ierr = MatTranspose(A_VR,MAT_REUSE_MATRIX,&A_VRT);CHKERRQ(ierr); 883 ierr = PetscBLASIntCast(n_vertices*n_R,&B_N);CHKERRQ(ierr); 884 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&m_one,work+pcbddc->local_primal_size*n_R,&B_one,work,&B_one)); 885 ierr = MatDestroy(&A_VRT);CHKERRQ(ierr); 886 } 887 888 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,pcbddc->local_primal_size,work+pcbddc->local_primal_size*n_R,&X);CHKERRQ(ierr); 889 if (F) { /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 890 for (i=0;i<pcbddc->local_primal_size;i++) { 891 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 892 ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr); 893 ierr = MatSolveTranspose(F,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 894 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 895 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 896 } 897 } else { 898 for (i=0;i<pcbddc->local_primal_size;i++) { 899 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 900 ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr); 901 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 902 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 903 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 904 } 905 } 906 ierr = MatDestroy(&B);CHKERRQ(ierr); 907 /* coarse basis functions */ 908 for (i=0;i<pcbddc->local_primal_size;i++) { 909 PetscScalar *y; 910 911 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*(i+pcbddc->local_primal_size));CHKERRQ(ierr); 912 ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 913 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 914 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 915 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 916 if (i<n_vertices) { 917 y[n_B*i+idx_V_B[i]] = 1.0; 918 } 919 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 920 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 921 922 if (pcbddc->switch_static || pcbddc->dbg_flag) { 923 ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 924 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 925 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 926 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 927 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 928 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 929 } 930 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 931 } 932 ierr = MatDestroy(&X);CHKERRQ(ierr); 933 } 934 935 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 936 ierr = MatDestroy(&S_VV);CHKERRQ(ierr); 937 ierr = MatDestroy(&S_CV);CHKERRQ(ierr); 938 ierr = MatDestroy(&S_VC);CHKERRQ(ierr); 939 ierr = MatDestroy(&S_CC);CHKERRQ(ierr); 940 /* Checking coarse_sub_mat and coarse basis functios */ 941 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 942 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 943 if (pcbddc->dbg_flag) { 944 Mat coarse_sub_mat; 945 Mat AUXMAT,TM1,TM2,TM3,TM4; 946 Mat coarse_phi_D,coarse_phi_B; 947 Mat coarse_psi_D,coarse_psi_B; 948 Mat A_II,A_BB,A_IB,A_BI; 949 MatType checkmattype=MATSEQAIJ; 950 PetscReal real_value; 951 952 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 953 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 954 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 955 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 956 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 957 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 958 if (unsymmetric_check) { 959 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 960 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 961 } 962 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 963 964 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 965 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation\n");CHKERRQ(ierr); 966 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 967 if (unsymmetric_check) { 968 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 969 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 970 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 971 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 972 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 973 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 974 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 975 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 976 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 977 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 978 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 979 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 980 } else { 981 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 982 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 983 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 984 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 985 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 986 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 987 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 988 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 989 } 990 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 991 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 992 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 993 ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr); 994 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 995 ierr = MatNorm(TM1,NORM_INFINITY,&real_value);CHKERRQ(ierr); 996 997 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 998 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 999 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1000 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 1001 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 1002 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 1003 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 1004 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 1005 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 1006 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 1007 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 1008 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 1009 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 1010 if (unsymmetric_check) { 1011 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 1012 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 1013 } 1014 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 1015 } 1016 1017 /* free memory */ 1018 ierr = PetscFree(work);CHKERRQ(ierr); 1019 if (n_vertices) { 1020 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 1021 } 1022 if (n_constraints) { 1023 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 1024 } 1025 /* get back data */ 1026 *coarse_submat_vals_n = coarse_submat_vals; 1027 PetscFunctionReturn(0); 1028 } 1029 1030 #undef __FUNCT__ 1031 #define __FUNCT__ "MatGetSubMatrixUnsorted" 1032 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, MatStructure reuse, Mat* B) 1033 { 1034 Mat *work_mat; 1035 IS isrow_s,iscol_s; 1036 PetscBool rsorted,csorted; 1037 PetscInt rsize,*idxs_perm_r,csize,*idxs_perm_c; 1038 PetscErrorCode ierr; 1039 1040 PetscFunctionBegin; 1041 ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr); 1042 ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr); 1043 ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr); 1044 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 1045 1046 if (!rsorted) { 1047 const PetscInt *idxs; 1048 PetscInt *idxs_sorted,i; 1049 1050 ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr); 1051 ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr); 1052 for (i=0;i<rsize;i++) { 1053 idxs_perm_r[i] = i; 1054 } 1055 ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr); 1056 ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr); 1057 for (i=0;i<rsize;i++) { 1058 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 1059 } 1060 ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr); 1061 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr); 1062 } else { 1063 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1064 isrow_s = isrow; 1065 } 1066 1067 if (!csorted) { 1068 if (isrow == iscol) { 1069 ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr); 1070 iscol_s = isrow_s; 1071 } else { 1072 const PetscInt *idxs; 1073 PetscInt *idxs_sorted,i; 1074 1075 ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr); 1076 ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr); 1077 for (i=0;i<csize;i++) { 1078 idxs_perm_c[i] = i; 1079 } 1080 ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr); 1081 ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr); 1082 for (i=0;i<csize;i++) { 1083 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 1084 } 1085 ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr); 1086 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr); 1087 } 1088 } else { 1089 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1090 iscol_s = iscol; 1091 } 1092 1093 ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,reuse,&work_mat);CHKERRQ(ierr); 1094 1095 if (!rsorted || !csorted) { 1096 Mat new_mat; 1097 IS is_perm_r,is_perm_c; 1098 1099 if (!rsorted) { 1100 PetscInt *idxs_r,i; 1101 ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr); 1102 for (i=0;i<rsize;i++) { 1103 idxs_r[idxs_perm_r[i]] = i; 1104 } 1105 ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr); 1106 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr); 1107 } else { 1108 ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr); 1109 } 1110 ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr); 1111 1112 if (!csorted) { 1113 if (isrow_s == iscol_s) { 1114 ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr); 1115 is_perm_c = is_perm_r; 1116 } else { 1117 PetscInt *idxs_c,i; 1118 ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr); 1119 for (i=0;i<csize;i++) { 1120 idxs_c[idxs_perm_c[i]] = i; 1121 } 1122 ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr); 1123 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr); 1124 } 1125 } else { 1126 ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr); 1127 } 1128 ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr); 1129 1130 ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr); 1131 ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr); 1132 work_mat[0] = new_mat; 1133 ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr); 1134 ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr); 1135 } 1136 1137 ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr); 1138 *B = work_mat[0]; 1139 ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr); 1140 ierr = ISDestroy(&isrow_s);CHKERRQ(ierr); 1141 ierr = ISDestroy(&iscol_s);CHKERRQ(ierr); 1142 PetscFunctionReturn(0); 1143 } 1144 1145 #undef __FUNCT__ 1146 #define __FUNCT__ "PCBDDCComputeLocalMatrix" 1147 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 1148 { 1149 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1150 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1151 Mat new_mat; 1152 IS is_local,is_global; 1153 PetscInt local_size; 1154 PetscBool isseqaij; 1155 PetscErrorCode ierr; 1156 1157 PetscFunctionBegin; 1158 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1159 ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr); 1160 ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr); 1161 ierr = ISLocalToGlobalMappingApplyIS(matis->mapping,is_local,&is_global);CHKERRQ(ierr); 1162 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 1163 ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,MAT_INITIAL_MATRIX,&new_mat);CHKERRQ(ierr); 1164 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 1165 1166 /* check */ 1167 if (pcbddc->dbg_flag) { 1168 Vec x,x_change; 1169 PetscReal error; 1170 1171 ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr); 1172 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 1173 ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr); 1174 ierr = VecScatterBegin(matis->ctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1175 ierr = VecScatterEnd(matis->ctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1176 ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr); 1177 ierr = VecScatterBegin(matis->ctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1178 ierr = VecScatterEnd(matis->ctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1179 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 1180 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 1181 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1182 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr); 1183 ierr = VecDestroy(&x);CHKERRQ(ierr); 1184 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 1185 } 1186 1187 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 1188 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 1189 if (isseqaij) { 1190 ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 1191 } else { 1192 Mat work_mat; 1193 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 1194 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 1195 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 1196 } 1197 ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,pcbddc->issym);CHKERRQ(ierr); 1198 /* 1199 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 1200 ierr = MatView(new_mat,(PetscViewer)0);CHKERRQ(ierr); 1201 */ 1202 ierr = MatDestroy(&new_mat);CHKERRQ(ierr); 1203 PetscFunctionReturn(0); 1204 } 1205 1206 #undef __FUNCT__ 1207 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 1208 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 1209 { 1210 PC_IS* pcis = (PC_IS*)(pc->data); 1211 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1212 IS is_aux1,is_aux2; 1213 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 1214 PetscInt n_vertices,i,j,n_R,n_D,n_B; 1215 PetscInt vbs,bs; 1216 PetscBT bitmask; 1217 PetscErrorCode ierr; 1218 1219 PetscFunctionBegin; 1220 /* 1221 No need to setup local scatters if 1222 - primal space is unchanged 1223 AND 1224 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 1225 AND 1226 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 1227 */ 1228 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 1229 PetscFunctionReturn(0); 1230 } 1231 /* destroy old objects */ 1232 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 1233 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 1234 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 1235 /* Set Non-overlapping dimensions */ 1236 n_B = pcis->n_B; n_D = pcis->n - n_B; 1237 n_vertices = pcbddc->n_actual_vertices; 1238 /* create auxiliary bitmask */ 1239 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 1240 for (i=0;i<n_vertices;i++) { 1241 ierr = PetscBTSet(bitmask,pcbddc->primal_indices_local_idxs[i]);CHKERRQ(ierr); 1242 } 1243 1244 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 1245 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 1246 for (i=0, n_R=0; i<pcis->n; i++) { 1247 if (!PetscBTLookup(bitmask,i)) { 1248 idx_R_local[n_R] = i; 1249 n_R++; 1250 } 1251 } 1252 1253 /* Block code */ 1254 vbs = 1; 1255 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 1256 if (bs>1 && !(n_vertices%bs)) { 1257 PetscBool is_blocked = PETSC_TRUE; 1258 PetscInt *vary; 1259 /* Verify if the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 1260 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 1261 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 1262 for (i=0; i<n_vertices; i++) vary[pcbddc->primal_indices_local_idxs[i]/bs]++; 1263 for (i=0; i<n_vertices; i++) { 1264 if (vary[i]!=0 && vary[i]!=bs) { 1265 is_blocked = PETSC_FALSE; 1266 break; 1267 } 1268 } 1269 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 1270 vbs = bs; 1271 for (i=0;i<n_R/vbs;i++) { 1272 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 1273 } 1274 } 1275 ierr = PetscFree(vary);CHKERRQ(ierr); 1276 } 1277 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 1278 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 1279 1280 /* print some info if requested */ 1281 if (pcbddc->dbg_flag) { 1282 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1283 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1284 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1285 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 1286 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 1287 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices,pcbddc->local_primal_size);CHKERRQ(ierr); 1288 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"pcbddc->n_vertices = %d, pcbddc->n_constraints = %d\n",pcbddc->n_vertices,pcbddc->n_constraints);CHKERRQ(ierr); 1289 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1290 } 1291 1292 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 1293 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1294 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 1295 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 1296 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1297 for (i=0; i<n_D; i++) { 1298 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 1299 } 1300 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1301 for (i=0, j=0; i<n_R; i++) { 1302 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 1303 aux_array1[j++] = i; 1304 } 1305 } 1306 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 1307 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1308 for (i=0, j=0; i<n_B; i++) { 1309 if (!PetscBTLookup(bitmask,is_indices[i])) { 1310 aux_array2[j++] = i; 1311 } 1312 } 1313 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1314 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 1315 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 1316 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1317 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 1318 1319 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1320 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 1321 for (i=0, j=0; i<n_R; i++) { 1322 if (PetscBTLookup(bitmask,idx_R_local[i])) { 1323 aux_array1[j++] = i; 1324 } 1325 } 1326 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 1327 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 1328 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1329 } 1330 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 1331 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1332 PetscFunctionReturn(0); 1333 } 1334 1335 1336 #undef __FUNCT__ 1337 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 1338 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 1339 { 1340 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1341 PC_IS *pcis = (PC_IS*)pc->data; 1342 PC pc_temp; 1343 Mat A_RR; 1344 MatReuse reuse; 1345 PetscScalar m_one = -1.0; 1346 PetscReal value; 1347 PetscInt n_D,n_R,ibs,mbs; 1348 PetscBool use_exact,use_exact_reduced,issbaij; 1349 PetscErrorCode ierr; 1350 /* prefixes stuff */ 1351 char dir_prefix[256],neu_prefix[256],str_level[16]; 1352 size_t len; 1353 1354 PetscFunctionBegin; 1355 1356 /* compute prefixes */ 1357 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 1358 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 1359 if (!pcbddc->current_level) { 1360 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1361 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1362 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1363 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1364 } else { 1365 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 1366 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 1367 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 1368 len -= 15; /* remove "pc_bddc_coarse_" */ 1369 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 1370 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 1371 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 1372 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 1373 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1374 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1375 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 1376 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 1377 } 1378 1379 /* DIRICHLET PROBLEM */ 1380 if (dirichlet) { 1381 /* Matrix for Dirichlet problem is pcis->A_II */ 1382 ierr = ISGetSize(pcis->is_I_local,&n_D);CHKERRQ(ierr); 1383 if (!pcbddc->ksp_D) { /* create object if not yet build */ 1384 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 1385 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 1386 /* default */ 1387 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 1388 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 1389 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1390 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1391 if (issbaij) { 1392 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1393 } else { 1394 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1395 } 1396 /* Allow user's customization */ 1397 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 1398 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1399 } 1400 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 1401 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1402 if (!n_D) { 1403 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1404 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1405 } 1406 /* Set Up KSP for Dirichlet problem of BDDC */ 1407 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 1408 /* set ksp_D into pcis data */ 1409 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 1410 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 1411 pcis->ksp_D = pcbddc->ksp_D; 1412 } 1413 1414 /* NEUMANN PROBLEM */ 1415 A_RR = 0; 1416 if (neumann) { 1417 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 1418 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 1419 if (pcbddc->ksp_R) { /* already created ksp */ 1420 PetscInt nn_R; 1421 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 1422 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 1423 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 1424 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 1425 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 1426 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1427 reuse = MAT_INITIAL_MATRIX; 1428 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 1429 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 1430 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1431 reuse = MAT_INITIAL_MATRIX; 1432 } else { /* safe to reuse the matrix */ 1433 reuse = MAT_REUSE_MATRIX; 1434 } 1435 } 1436 /* last check */ 1437 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 1438 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1439 reuse = MAT_INITIAL_MATRIX; 1440 } 1441 } else { /* first time, so we need to create the matrix */ 1442 reuse = MAT_INITIAL_MATRIX; 1443 } 1444 /* extract A_RR */ 1445 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 1446 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 1447 if (ibs != mbs) { 1448 Mat newmat; 1449 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 1450 ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 1451 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 1452 } else { 1453 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 1454 } 1455 if (!pcbddc->ksp_R) { /* create object if not present */ 1456 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 1457 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 1458 /* default */ 1459 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 1460 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 1461 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1462 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1463 if (issbaij) { 1464 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1465 } else { 1466 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1467 } 1468 /* Allow user's customization */ 1469 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 1470 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1471 } 1472 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 1473 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1474 if (!n_R) { 1475 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1476 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1477 } 1478 /* Set Up KSP for Neumann problem of BDDC */ 1479 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 1480 } 1481 1482 /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */ 1483 if (pcbddc->NullSpace || pcbddc->dbg_flag) { 1484 if (pcbddc->dbg_flag) { 1485 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1486 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1487 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1488 } 1489 if (dirichlet) { /* Dirichlet */ 1490 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 1491 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 1492 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 1493 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 1494 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 1495 /* need to be adapted? */ 1496 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1497 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1498 ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr); 1499 /* print info */ 1500 if (pcbddc->dbg_flag) { 1501 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 1502 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1503 } 1504 if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) { 1505 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr); 1506 } 1507 } 1508 if (neumann) { /* Neumann */ 1509 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 1510 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1511 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 1512 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 1513 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 1514 /* need to be adapted? */ 1515 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1516 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1517 /* print info */ 1518 if (pcbddc->dbg_flag) { 1519 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 1520 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1521 } 1522 if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */ 1523 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr); 1524 } 1525 } 1526 } 1527 /* free Neumann problem's matrix */ 1528 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1529 PetscFunctionReturn(0); 1530 } 1531 1532 #undef __FUNCT__ 1533 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection" 1534 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec rhs, Vec sol, Vec work, PetscBool applytranspose) 1535 { 1536 PetscErrorCode ierr; 1537 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1538 1539 PetscFunctionBegin; 1540 if (applytranspose) { 1541 if (pcbddc->local_auxmat1) { 1542 ierr = MatMultTranspose(pcbddc->local_auxmat2,rhs,work);CHKERRQ(ierr); 1543 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,work,rhs,rhs);CHKERRQ(ierr); 1544 } 1545 ierr = KSPSolveTranspose(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr); 1546 } else { 1547 ierr = KSPSolve(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr); 1548 if (pcbddc->local_auxmat1) { 1549 ierr = MatMult(pcbddc->local_auxmat1,sol,work);CHKERRQ(ierr); 1550 ierr = MatMultAdd(pcbddc->local_auxmat2,work,sol,sol);CHKERRQ(ierr); 1551 } 1552 } 1553 PetscFunctionReturn(0); 1554 } 1555 1556 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 1557 #undef __FUNCT__ 1558 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 1559 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 1560 { 1561 PetscErrorCode ierr; 1562 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1563 PC_IS* pcis = (PC_IS*) (pc->data); 1564 const PetscScalar zero = 0.0; 1565 1566 PetscFunctionBegin; 1567 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 1568 if (applytranspose) { 1569 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1570 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1571 } else { 1572 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1573 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1574 } 1575 /* start communications from local primal nodes to rhs of coarse solver */ 1576 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 1577 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1578 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1579 1580 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 1581 /* TODO remove null space when doing multilevel */ 1582 if (pcbddc->coarse_ksp) { 1583 if (applytranspose) { 1584 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,NULL,NULL);CHKERRQ(ierr); 1585 } else { 1586 ierr = KSPSolve(pcbddc->coarse_ksp,NULL,NULL);CHKERRQ(ierr); 1587 } 1588 } 1589 1590 /* Local solution on R nodes */ 1591 if (pcis->n) { 1592 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 1593 ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1594 ierr = VecScatterEnd(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1595 if (pcbddc->switch_static) { 1596 ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1597 ierr = VecScatterEnd(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1598 } 1599 ierr = PCBDDCSolveSubstructureCorrection(pc,pcbddc->vec1_R,pcbddc->vec2_R,pcbddc->vec1_C,applytranspose);CHKERRQ(ierr); 1600 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 1601 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1602 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1603 if (pcbddc->switch_static) { 1604 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1605 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1606 } 1607 } 1608 1609 /* communications from coarse sol to local primal nodes */ 1610 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1611 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1612 1613 /* Sum contributions from two levels */ 1614 if (applytranspose) { 1615 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1616 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1617 } else { 1618 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1619 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1620 } 1621 PetscFunctionReturn(0); 1622 } 1623 1624 #undef __FUNCT__ 1625 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 1626 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 1627 { 1628 PetscErrorCode ierr; 1629 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1630 PetscScalar *array; 1631 Vec from,to; 1632 1633 PetscFunctionBegin; 1634 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 1635 from = pcbddc->coarse_vec; 1636 to = pcbddc->vec1_P; 1637 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 1638 Vec tvec; 1639 1640 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1641 ierr = VecResetArray(tvec);CHKERRQ(ierr); 1642 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1643 ierr = VecGetArray(tvec,&array);CHKERRQ(ierr); 1644 ierr = VecPlaceArray(from,array);CHKERRQ(ierr); 1645 ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr); 1646 } 1647 } else { /* from local to global -> put data in coarse right hand side */ 1648 from = pcbddc->vec1_P; 1649 to = pcbddc->coarse_vec; 1650 } 1651 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 1652 PetscFunctionReturn(0); 1653 } 1654 1655 #undef __FUNCT__ 1656 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 1657 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 1658 { 1659 PetscErrorCode ierr; 1660 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1661 PetscScalar *array; 1662 Vec from,to; 1663 1664 PetscFunctionBegin; 1665 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 1666 from = pcbddc->coarse_vec; 1667 to = pcbddc->vec1_P; 1668 } else { /* from local to global -> put data in coarse right hand side */ 1669 from = pcbddc->vec1_P; 1670 to = pcbddc->coarse_vec; 1671 } 1672 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 1673 if (smode == SCATTER_FORWARD) { 1674 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 1675 Vec tvec; 1676 1677 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1678 ierr = VecGetArray(to,&array);CHKERRQ(ierr); 1679 ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr); 1680 ierr = VecRestoreArray(to,&array);CHKERRQ(ierr); 1681 } 1682 } else { 1683 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 1684 ierr = VecResetArray(from);CHKERRQ(ierr); 1685 } 1686 } 1687 PetscFunctionReturn(0); 1688 } 1689 1690 /* uncomment for testing purposes */ 1691 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 1692 #undef __FUNCT__ 1693 #define __FUNCT__ "PCBDDCConstraintsSetUp" 1694 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 1695 { 1696 PetscErrorCode ierr; 1697 PC_IS* pcis = (PC_IS*)(pc->data); 1698 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1699 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1700 /* one and zero */ 1701 PetscScalar one=1.0,zero=0.0; 1702 /* space to store constraints and their local indices */ 1703 PetscScalar *temp_quadrature_constraint; 1704 PetscInt *temp_indices,*temp_indices_to_constraint,*temp_indices_to_constraint_B; 1705 /* iterators */ 1706 PetscInt i,j,k,total_counts,temp_start_ptr; 1707 /* stuff to store connected components stored in pcbddc->mat_graph */ 1708 IS ISForVertices,*ISForFaces,*ISForEdges,*used_IS; 1709 PetscInt n_ISForFaces,n_ISForEdges; 1710 /* near null space stuff */ 1711 MatNullSpace nearnullsp; 1712 const Vec *nearnullvecs; 1713 Vec *localnearnullsp; 1714 PetscBool nnsp_has_cnst; 1715 PetscInt nnsp_size; 1716 PetscScalar *array; 1717 /* BLAS integers */ 1718 PetscBLASInt lwork,lierr; 1719 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 1720 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 1721 /* LAPACK working arrays for SVD or POD */ 1722 PetscBool skip_lapack; 1723 PetscScalar *work; 1724 PetscReal *singular_vals; 1725 #if defined(PETSC_USE_COMPLEX) 1726 PetscReal *rwork; 1727 #endif 1728 #if defined(PETSC_MISSING_LAPACK_GESVD) 1729 PetscBLASInt Blas_one_2=1; 1730 PetscScalar *temp_basis,*correlation_mat; 1731 #else 1732 PetscBLASInt dummy_int_1=1,dummy_int_2=1; 1733 PetscScalar dummy_scalar_1=0.0,dummy_scalar_2=0.0; 1734 #endif 1735 /* reuse */ 1736 PetscInt olocal_primal_size; 1737 PetscInt *oprimal_indices_local_idxs; 1738 /* change of basis */ 1739 PetscInt *aux_primal_numbering,*aux_primal_minloc,*global_indices; 1740 PetscBool boolforchange,qr_needed; 1741 PetscBT touched,change_basis,qr_needed_idx; 1742 /* auxiliary stuff */ 1743 PetscInt *nnz,*is_indices,*aux_primal_numbering_B; 1744 PetscInt ncc,*gidxs=NULL,*permutation=NULL,*temp_indices_to_constraint_work=NULL; 1745 PetscScalar *temp_quadrature_constraint_work=NULL; 1746 /* some quantities */ 1747 PetscInt n_vertices,total_primal_vertices,valid_constraints; 1748 PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints; 1749 1750 1751 PetscFunctionBegin; 1752 /* Destroy Mat objects computed previously */ 1753 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1754 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1755 /* TODO synch with adaptive selection */ 1756 /* Get index sets for faces, edges and vertices from graph */ 1757 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 1758 /* free unneeded index sets */ 1759 if (!pcbddc->use_vertices) { 1760 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 1761 } 1762 if (!pcbddc->use_edges) { 1763 for (i=0;i<n_ISForEdges;i++) { 1764 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 1765 } 1766 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 1767 n_ISForEdges = 0; 1768 } 1769 if (!pcbddc->use_faces) { 1770 for (i=0;i<n_ISForFaces;i++) { 1771 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 1772 } 1773 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 1774 n_ISForFaces = 0; 1775 } 1776 /* HACKS (the following two blocks of code) */ 1777 if (!ISForVertices && pcbddc->NullSpace && !pcbddc->user_ChangeOfBasisMatrix) { 1778 pcbddc->use_change_of_basis = PETSC_TRUE; 1779 if (!ISForEdges) { 1780 pcbddc->use_change_on_faces = PETSC_TRUE; 1781 } 1782 } 1783 if (pcbddc->NullSpace) { 1784 /* use_change_of_basis should be consistent among processors */ 1785 PetscBool tbool[2],gbool[2]; 1786 tbool [0] = pcbddc->use_change_of_basis; 1787 tbool [1] = pcbddc->use_change_on_faces; 1788 ierr = MPI_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1789 pcbddc->use_change_of_basis = gbool[0]; 1790 pcbddc->use_change_on_faces = gbool[1]; 1791 } 1792 /* print some info */ 1793 if (pcbddc->dbg_flag) { 1794 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1795 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 1796 i = 0; 1797 if (ISForVertices) { 1798 ierr = ISGetSize(ISForVertices,&i);CHKERRQ(ierr); 1799 } 1800 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices\n",PetscGlobalRank,i);CHKERRQ(ierr); 1801 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges\n",PetscGlobalRank,n_ISForEdges);CHKERRQ(ierr); 1802 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces\n",PetscGlobalRank,n_ISForFaces);CHKERRQ(ierr); 1803 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1804 } 1805 1806 if (!pcbddc->adaptive_selection) { 1807 /* check if near null space is attached to global mat */ 1808 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 1809 if (nearnullsp) { 1810 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 1811 /* remove any stored info */ 1812 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 1813 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 1814 /* store information for BDDC solver reuse */ 1815 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 1816 pcbddc->onearnullspace = nearnullsp; 1817 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 1818 for (i=0;i<nnsp_size;i++) { 1819 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 1820 } 1821 } else { /* if near null space is not provided BDDC uses constants by default */ 1822 nnsp_size = 0; 1823 nnsp_has_cnst = PETSC_TRUE; 1824 } 1825 /* get max number of constraints on a single cc */ 1826 max_constraints = nnsp_size; 1827 if (nnsp_has_cnst) max_constraints++; 1828 1829 /* 1830 Evaluate maximum storage size needed by the procedure 1831 - temp_indices will contain start index of each constraint stored as follows 1832 - temp_indices_to_constraint [temp_indices[i],...,temp_indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts 1833 - temp_indices_to_constraint_B[temp_indices[i],...,temp_indices[i+1]-1] will contain the indices (in boundary numbering) on which the constraint acts 1834 - temp_quadrature_constraint [temp_indices[i],...,temp_indices[i+1]-1] will contain the scalars representing the constraint itself 1835 */ 1836 total_counts = n_ISForFaces+n_ISForEdges; 1837 total_counts *= max_constraints; 1838 n_vertices = 0; 1839 if (ISForVertices) { 1840 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 1841 } 1842 total_counts += n_vertices; 1843 ierr = PetscMalloc1(total_counts+1,&temp_indices);CHKERRQ(ierr); 1844 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 1845 total_counts = 0; 1846 max_size_of_constraint = 0; 1847 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 1848 if (i<n_ISForEdges) { 1849 used_IS = &ISForEdges[i]; 1850 } else { 1851 used_IS = &ISForFaces[i-n_ISForEdges]; 1852 } 1853 ierr = ISGetSize(*used_IS,&j);CHKERRQ(ierr); 1854 total_counts += j; 1855 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 1856 } 1857 total_counts *= max_constraints; 1858 total_counts += n_vertices; 1859 ierr = PetscMalloc3(total_counts,&temp_quadrature_constraint,total_counts,&temp_indices_to_constraint,total_counts,&temp_indices_to_constraint_B);CHKERRQ(ierr); 1860 /* get local part of global near null space vectors */ 1861 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 1862 for (k=0;k<nnsp_size;k++) { 1863 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 1864 ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1865 ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1866 } 1867 1868 /* whether or not to skip lapack calls */ 1869 skip_lapack = PETSC_TRUE; 1870 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 1871 1872 /* allocate some auxiliary stuff */ 1873 if (!skip_lapack || pcbddc->use_qr_single) { 1874 ierr = PetscMalloc4(max_size_of_constraint,&gidxs,max_size_of_constraint,&permutation,max_size_of_constraint,&temp_indices_to_constraint_work,max_size_of_constraint,&temp_quadrature_constraint_work);CHKERRQ(ierr); 1875 } 1876 1877 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 1878 if (!skip_lapack) { 1879 PetscScalar temp_work; 1880 1881 #if defined(PETSC_MISSING_LAPACK_GESVD) 1882 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 1883 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 1884 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 1885 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 1886 #if defined(PETSC_USE_COMPLEX) 1887 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 1888 #endif 1889 /* now we evaluate the optimal workspace using query with lwork=-1 */ 1890 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 1891 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 1892 lwork = -1; 1893 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1894 #if !defined(PETSC_USE_COMPLEX) 1895 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 1896 #else 1897 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 1898 #endif 1899 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1900 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 1901 #else /* on missing GESVD */ 1902 /* SVD */ 1903 PetscInt max_n,min_n; 1904 max_n = max_size_of_constraint; 1905 min_n = max_constraints; 1906 if (max_size_of_constraint < max_constraints) { 1907 min_n = max_size_of_constraint; 1908 max_n = max_constraints; 1909 } 1910 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 1911 #if defined(PETSC_USE_COMPLEX) 1912 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 1913 #endif 1914 /* now we evaluate the optimal workspace using query with lwork=-1 */ 1915 lwork = -1; 1916 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 1917 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 1918 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 1919 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1920 #if !defined(PETSC_USE_COMPLEX) 1921 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,&lierr)); 1922 #else 1923 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,rwork,&lierr)); 1924 #endif 1925 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1926 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 1927 #endif /* on missing GESVD */ 1928 /* Allocate optimal workspace */ 1929 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 1930 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 1931 } 1932 /* Now we can loop on constraining sets */ 1933 total_counts = 0; 1934 temp_indices[0] = 0; 1935 /* vertices */ 1936 if (ISForVertices) { 1937 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1938 if (nnsp_has_cnst) { /* consider all vertices */ 1939 ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 1940 for (i=0;i<n_vertices;i++) { 1941 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 1942 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 1943 total_counts++; 1944 } 1945 } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */ 1946 PetscBool used_vertex; 1947 for (i=0;i<n_vertices;i++) { 1948 used_vertex = PETSC_FALSE; 1949 k = 0; 1950 while (!used_vertex && k<nnsp_size) { 1951 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1952 if (PetscAbsScalar(array[is_indices[i]])>0.0) { 1953 temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i]; 1954 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 1955 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 1956 total_counts++; 1957 used_vertex = PETSC_TRUE; 1958 } 1959 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1960 k++; 1961 } 1962 } 1963 } 1964 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1965 n_vertices = total_counts; 1966 } 1967 1968 /* edges and faces */ 1969 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 1970 if (ncc<n_ISForEdges) { 1971 used_IS = &ISForEdges[ncc]; 1972 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 1973 } else { 1974 used_IS = &ISForFaces[ncc-n_ISForEdges]; 1975 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 1976 } 1977 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 1978 temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */ 1979 ierr = ISGetSize(*used_IS,&size_of_constraint);CHKERRQ(ierr); 1980 ierr = ISGetIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1981 /* change of basis should not be performed on local periodic nodes */ 1982 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 1983 if (nnsp_has_cnst) { 1984 PetscScalar quad_value; 1985 temp_constraints++; 1986 if (!pcbddc->use_nnsp_true) { 1987 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 1988 } else { 1989 quad_value = 1.0; 1990 } 1991 ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 1992 for (j=0;j<size_of_constraint;j++) { 1993 temp_quadrature_constraint[temp_indices[total_counts]+j]=quad_value; 1994 } 1995 /* sort by global ordering if using lapack subroutines (not needed!) */ 1996 if (!skip_lapack || pcbddc->use_qr_single) { 1997 ierr = ISLocalToGlobalMappingApply(matis->mapping,size_of_constraint,temp_indices_to_constraint+temp_indices[total_counts],gidxs);CHKERRQ(ierr); 1998 for (j=0;j<size_of_constraint;j++) { 1999 permutation[j]=j; 2000 } 2001 ierr = PetscSortIntWithPermutation(size_of_constraint,gidxs,permutation);CHKERRQ(ierr); 2002 for (j=0;j<size_of_constraint;j++) { 2003 if (permutation[j]!=j) SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"This should not happen"); 2004 } 2005 for (j=0;j<size_of_constraint;j++) { 2006 temp_indices_to_constraint_work[j] = temp_indices_to_constraint[temp_indices[total_counts]+permutation[j]]; 2007 temp_quadrature_constraint_work[j] = temp_quadrature_constraint[temp_indices[total_counts]+permutation[j]]; 2008 } 2009 ierr = PetscMemcpy(temp_indices_to_constraint+temp_indices[total_counts],temp_indices_to_constraint_work,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 2010 ierr = PetscMemcpy(temp_quadrature_constraint+temp_indices[total_counts],temp_quadrature_constraint_work,size_of_constraint*sizeof(PetscScalar));CHKERRQ(ierr); 2011 } 2012 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 2013 total_counts++; 2014 } 2015 for (k=0;k<nnsp_size;k++) { 2016 PetscReal real_value; 2017 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2018 ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 2019 for (j=0;j<size_of_constraint;j++) { 2020 temp_quadrature_constraint[temp_indices[total_counts]+j]=array[is_indices[j]]; 2021 } 2022 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2023 /* check if array is null on the connected component */ 2024 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2025 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_one)); 2026 if (real_value > 0.0) { /* keep indices and values */ 2027 /* sort by global ordering if using lapack subroutines */ 2028 if (!skip_lapack || pcbddc->use_qr_single) { 2029 ierr = ISLocalToGlobalMappingApply(matis->mapping,size_of_constraint,temp_indices_to_constraint+temp_indices[total_counts],gidxs);CHKERRQ(ierr); 2030 for (j=0;j<size_of_constraint;j++) { 2031 permutation[j]=j; 2032 } 2033 ierr = PetscSortIntWithPermutation(size_of_constraint,gidxs,permutation);CHKERRQ(ierr); 2034 for (j=0;j<size_of_constraint;j++) { 2035 temp_indices_to_constraint_work[j] = temp_indices_to_constraint[temp_indices[total_counts]+permutation[j]]; 2036 temp_quadrature_constraint_work[j] = temp_quadrature_constraint[temp_indices[total_counts]+permutation[j]]; 2037 } 2038 ierr = PetscMemcpy(temp_indices_to_constraint+temp_indices[total_counts],temp_indices_to_constraint_work,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 2039 ierr = PetscMemcpy(temp_quadrature_constraint+temp_indices[total_counts],temp_quadrature_constraint_work,size_of_constraint*sizeof(PetscScalar));CHKERRQ(ierr); 2040 } 2041 temp_constraints++; 2042 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 2043 total_counts++; 2044 } 2045 } 2046 ierr = ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2047 valid_constraints = temp_constraints; 2048 if (!pcbddc->use_nnsp_true && temp_constraints) { 2049 if (temp_constraints == 1) { /* just normalize the constraint */ 2050 PetscScalar norm; 2051 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2052 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one)); 2053 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 2054 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one)); 2055 } else { /* perform SVD */ 2056 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 2057 2058 #if defined(PETSC_MISSING_LAPACK_GESVD) 2059 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 2060 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 2061 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 2062 the constraints basis will differ (by a complex factor with absolute value equal to 1) 2063 from that computed using LAPACKgesvd 2064 -> This is due to a different computation of eigenvectors in LAPACKheev 2065 -> The quality of the POD-computed basis will be the same */ 2066 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 2067 /* Store upper triangular part of correlation matrix */ 2068 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2069 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2070 for (j=0;j<temp_constraints;j++) { 2071 for (k=0;k<j+1;k++) { 2072 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k]=BLASdot_(&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Blas_one,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Blas_one_2)); 2073 } 2074 } 2075 /* compute eigenvalues and eigenvectors of correlation matrix */ 2076 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2077 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 2078 #if !defined(PETSC_USE_COMPLEX) 2079 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 2080 #else 2081 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 2082 #endif 2083 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2084 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 2085 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 2086 j = 0; 2087 while (j < temp_constraints && singular_vals[j] < tol) j++; 2088 total_counts = total_counts-j; 2089 valid_constraints = temp_constraints-j; 2090 /* scale and copy POD basis into used quadrature memory */ 2091 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2092 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2093 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 2094 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2095 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 2096 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 2097 if (j<temp_constraints) { 2098 PetscInt ii; 2099 for (k=j;k<temp_constraints;k++) singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]); 2100 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2101 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 2102 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2103 for (k=0;k<temp_constraints-j;k++) { 2104 for (ii=0;ii<size_of_constraint;ii++) { 2105 temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 2106 } 2107 } 2108 } 2109 #else /* on missing GESVD */ 2110 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2111 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2112 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2113 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2114 #if !defined(PETSC_USE_COMPLEX) 2115 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,&lierr)); 2116 #else 2117 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,rwork,&lierr)); 2118 #endif 2119 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 2120 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2121 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 2122 k = temp_constraints; 2123 if (k > size_of_constraint) k = size_of_constraint; 2124 j = 0; 2125 while (j < k && singular_vals[k-j-1] < tol) j++; 2126 valid_constraints = k-j; 2127 total_counts = total_counts-temp_constraints+valid_constraints; 2128 #endif /* on missing GESVD */ 2129 } 2130 } 2131 /* setting change_of_basis flag is safe now */ 2132 if (boolforchange) { 2133 for (j=0;j<valid_constraints;j++) { 2134 PetscBTSet(change_basis,total_counts-j-1); 2135 } 2136 } 2137 } 2138 /* free workspace */ 2139 if (!skip_lapack || pcbddc->use_qr_single) { 2140 ierr = PetscFree4(gidxs,permutation,temp_indices_to_constraint_work,temp_quadrature_constraint_work);CHKERRQ(ierr); 2141 } 2142 if (!skip_lapack) { 2143 ierr = PetscFree(work);CHKERRQ(ierr); 2144 #if defined(PETSC_USE_COMPLEX) 2145 ierr = PetscFree(rwork);CHKERRQ(ierr); 2146 #endif 2147 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 2148 #if defined(PETSC_MISSING_LAPACK_GESVD) 2149 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 2150 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 2151 #endif 2152 } 2153 for (k=0;k<nnsp_size;k++) { 2154 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 2155 } 2156 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 2157 } else { 2158 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2159 PetscInt cum = 0; 2160 2161 total_counts = 0; 2162 n_vertices = 0; 2163 if (sub_schurs->is_Ej_com) { 2164 ierr = ISGetLocalSize(sub_schurs->is_Ej_com,&n_vertices);CHKERRQ(ierr); 2165 } 2166 max_constraints = 0; 2167 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 2168 total_counts += pcbddc->adaptive_constraints_n[i]; 2169 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 2170 } 2171 temp_indices = pcbddc->adaptive_constraints_ptrs; 2172 temp_indices_to_constraint = pcbddc->adaptive_constraints_idxs; 2173 temp_quadrature_constraint = pcbddc->adaptive_constraints_data; 2174 2175 #if 0 2176 printf("Found %d totals\n",total_counts); 2177 for (i=0;i<total_counts;i++) { 2178 printf("const %d, start %d",i,temp_indices[i]); 2179 printf(" end %d:\n",temp_indices[i+1]); 2180 for (j=temp_indices[i];j<temp_indices[i+1];j++) { 2181 printf(" idxs %d",temp_indices_to_constraint[j]); 2182 printf(" data %1.2e\n",temp_quadrature_constraint[j]); 2183 } 2184 } 2185 for (i=0;i<n_vertices;i++) { 2186 PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i+n_vertices]); 2187 } 2188 for (i=0;i<sub_schurs->n_subs;i++) { 2189 PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]); 2190 } 2191 #endif 2192 2193 for (i=0;i<total_counts;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,temp_indices[i+1]-temp_indices[i]); 2194 ierr = PetscMalloc1(temp_indices[total_counts],&temp_indices_to_constraint_B);CHKERRQ(ierr); 2195 /* Change of basis */ 2196 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 2197 if (pcbddc->use_change_of_basis) { 2198 cum = n_vertices; 2199 for (i=0;i<sub_schurs->n_subs;i++) { 2200 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 2201 for (j=0;j<pcbddc->adaptive_constraints_n[i+n_vertices];j++) { 2202 ierr = PetscBTSet(change_basis,cum+j);CHKERRQ(ierr); 2203 } 2204 } 2205 cum += pcbddc->adaptive_constraints_n[i+n_vertices]; 2206 } 2207 } 2208 } 2209 2210 /* free index sets of faces, edges and vertices */ 2211 for (i=0;i<n_ISForFaces;i++) { 2212 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 2213 } 2214 if (n_ISForFaces) { 2215 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 2216 } 2217 for (i=0;i<n_ISForEdges;i++) { 2218 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 2219 } 2220 if (n_ISForEdges) { 2221 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 2222 } 2223 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 2224 2225 /* map temp_indices_to_constraint in boundary numbering */ 2226 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,temp_indices[total_counts],temp_indices_to_constraint,&i,temp_indices_to_constraint_B);CHKERRQ(ierr); 2227 if (i != temp_indices[total_counts]) { 2228 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",temp_indices[total_counts],i); 2229 } 2230 2231 /* set quantities in pcbddc data structure and store previous primal size */ 2232 /* n_vertices defines the number of subdomain corners in the primal space */ 2233 /* n_constraints defines the number of averages (they can be point primal dofs if change of basis is requested) */ 2234 olocal_primal_size = pcbddc->local_primal_size; 2235 pcbddc->local_primal_size = total_counts; 2236 pcbddc->n_vertices = n_vertices; 2237 pcbddc->n_constraints = pcbddc->local_primal_size-pcbddc->n_vertices; 2238 2239 /* Create constraint matrix */ 2240 /* The constraint matrix is used to compute the l2g map of primal dofs */ 2241 /* so we need to set it up properly either with or without change of basis */ 2242 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 2243 ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr); 2244 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 2245 /* array to compute a local numbering of constraints : vertices first then constraints */ 2246 ierr = PetscMalloc1(pcbddc->local_primal_size,&aux_primal_numbering);CHKERRQ(ierr); 2247 /* array to select the proper local node (of minimum index with respect to global ordering) when changing the basis */ 2248 /* note: it should not be needed since IS for faces and edges are already sorted by global ordering when analyzing the graph but... just in case */ 2249 ierr = PetscMalloc1(pcbddc->local_primal_size,&aux_primal_minloc);CHKERRQ(ierr); 2250 /* auxiliary stuff for basis change */ 2251 ierr = PetscMalloc1(max_size_of_constraint,&global_indices);CHKERRQ(ierr); 2252 ierr = PetscBTCreate(pcis->n_B,&touched);CHKERRQ(ierr); 2253 2254 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 2255 total_primal_vertices=0; 2256 for (i=0;i<pcbddc->local_primal_size;i++) { 2257 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 2258 if (size_of_constraint == 1) { 2259 ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]]);CHKERRQ(ierr); 2260 aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]]; 2261 aux_primal_minloc[total_primal_vertices]=0; 2262 total_primal_vertices++; 2263 } else if (PetscBTLookup(change_basis,i)) { /* Same procedure used in PCBDDCGetPrimalConstraintsLocalIdx */ 2264 PetscInt min_loc,min_index; 2265 ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],global_indices);CHKERRQ(ierr); 2266 /* find first untouched local node */ 2267 k = 0; 2268 while (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) k++; 2269 min_index = global_indices[k]; 2270 min_loc = k; 2271 /* search the minimum among global nodes already untouched on the cc */ 2272 for (k=1;k<size_of_constraint;k++) { 2273 /* there can be more than one constraint on a single connected component */ 2274 if (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k]) && min_index > global_indices[k]) { 2275 min_index = global_indices[k]; 2276 min_loc = k; 2277 } 2278 } 2279 ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]+min_loc]);CHKERRQ(ierr); 2280 aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]+min_loc]; 2281 aux_primal_minloc[total_primal_vertices]=min_loc; 2282 total_primal_vertices++; 2283 } 2284 } 2285 /* determine if a QR strategy is needed for change of basis */ 2286 qr_needed = PETSC_FALSE; 2287 ierr = PetscBTCreate(pcbddc->local_primal_size,&qr_needed_idx);CHKERRQ(ierr); 2288 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 2289 if (PetscBTLookup(change_basis,i)) { 2290 if (!pcbddc->use_qr_single) { 2291 size_of_constraint = temp_indices[i+1]-temp_indices[i]; 2292 j = 0; 2293 for (k=0;k<size_of_constraint;k++) { 2294 if (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) { 2295 j++; 2296 } 2297 } 2298 /* found more than one primal dof on the cc */ 2299 if (j > 1) { 2300 PetscBTSet(qr_needed_idx,i); 2301 qr_needed = PETSC_TRUE; 2302 } 2303 } else { 2304 PetscBTSet(qr_needed_idx,i); 2305 qr_needed = PETSC_TRUE; 2306 } 2307 } 2308 } 2309 /* free workspace */ 2310 ierr = PetscFree(global_indices);CHKERRQ(ierr); 2311 2312 /* permute indices in order to have a sorted set of vertices */ 2313 ierr = PetscSortInt(total_primal_vertices,aux_primal_numbering);CHKERRQ(ierr); 2314 2315 /* nonzero structure of constraint matrix */ 2316 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 2317 for (i=0;i<total_primal_vertices;i++) nnz[i]=1; 2318 j=total_primal_vertices; 2319 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 2320 if (!PetscBTLookup(change_basis,i)) { 2321 nnz[j]=temp_indices[i+1]-temp_indices[i]; 2322 j++; 2323 } 2324 } 2325 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 2326 ierr = PetscFree(nnz);CHKERRQ(ierr); 2327 /* set values in constraint matrix */ 2328 for (i=0;i<total_primal_vertices;i++) { 2329 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,aux_primal_numbering[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 2330 } 2331 total_counts = total_primal_vertices; 2332 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 2333 if (!PetscBTLookup(change_basis,i)) { 2334 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 2335 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&total_counts,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);CHKERRQ(ierr); 2336 total_counts++; 2337 } 2338 } 2339 /* assembling */ 2340 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2341 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2342 /* 2343 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 2344 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 2345 */ 2346 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 2347 if (pcbddc->use_change_of_basis) { 2348 /* dual and primal dofs on a single cc */ 2349 PetscInt dual_dofs,primal_dofs; 2350 /* iterator on aux_primal_minloc (ordered as read from nearnullspace: vertices, edges and then constraints) */ 2351 PetscInt primal_counter; 2352 /* working stuff for GEQRF */ 2353 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 2354 PetscBLASInt lqr_work; 2355 /* working stuff for UNGQR */ 2356 PetscScalar *gqr_work,lgqr_work_t; 2357 PetscBLASInt lgqr_work; 2358 /* working stuff for TRTRS */ 2359 PetscScalar *trs_rhs; 2360 PetscBLASInt Blas_NRHS; 2361 /* pointers for values insertion into change of basis matrix */ 2362 PetscInt *start_rows,*start_cols; 2363 PetscScalar *start_vals; 2364 /* working stuff for values insertion */ 2365 PetscBT is_primal; 2366 /* matrix sizes */ 2367 PetscInt global_size,local_size; 2368 /* temporary change of basis */ 2369 Mat localChangeOfBasisMatrix; 2370 2371 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 2372 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 2373 ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 2374 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 2375 /* nonzeros for local mat */ 2376 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 2377 for (i=0;i<pcis->n;i++) nnz[i]=1; 2378 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 2379 if (PetscBTLookup(change_basis,i)) { 2380 size_of_constraint = temp_indices[i+1]-temp_indices[i]; 2381 if (PetscBTLookup(qr_needed_idx,i)) { 2382 for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint[temp_indices[i]+j]] = size_of_constraint; 2383 } else { 2384 for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint[temp_indices[i]+j]] = 2; 2385 /* get local primal index on the cc */ 2386 j = 0; 2387 while (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+j])) j++; 2388 nnz[temp_indices_to_constraint[temp_indices[i]+j]] = size_of_constraint; 2389 } 2390 } 2391 } 2392 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 2393 ierr = PetscFree(nnz);CHKERRQ(ierr); 2394 /* Set initial identity in the matrix */ 2395 for (i=0;i<pcis->n;i++) { 2396 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 2397 } 2398 2399 if (pcbddc->dbg_flag) { 2400 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 2401 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 2402 } 2403 2404 2405 /* Now we loop on the constraints which need a change of basis */ 2406 /* 2407 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 2408 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 2409 2410 Basic blocks of change of basis matrix T computed by 2411 2412 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 2413 2414 | 1 0 ... 0 s_1/S | 2415 | 0 1 ... 0 s_2/S | 2416 | ... | 2417 | 0 ... 1 s_{n-1}/S | 2418 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 2419 2420 with S = \sum_{i=1}^n s_i^2 2421 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 2422 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 2423 2424 - QR decomposition of constraints otherwise 2425 */ 2426 if (qr_needed) { 2427 /* space to store Q */ 2428 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 2429 /* first we issue queries for optimal work */ 2430 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 2431 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 2432 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2433 lqr_work = -1; 2434 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 2435 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 2436 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 2437 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 2438 lgqr_work = -1; 2439 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 2440 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 2441 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 2442 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2443 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 2444 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 2445 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 2446 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 2447 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 2448 /* array to store scaling factors for reflectors */ 2449 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 2450 /* array to store rhs and solution of triangular solver */ 2451 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 2452 /* allocating workspace for check */ 2453 if (pcbddc->dbg_flag) { 2454 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&work);CHKERRQ(ierr); 2455 } 2456 } 2457 /* array to store whether a node is primal or not */ 2458 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 2459 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 2460 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,aux_primal_numbering,&i,aux_primal_numbering_B);CHKERRQ(ierr); 2461 if (i != total_primal_vertices) { 2462 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i); 2463 } 2464 for (i=0;i<total_primal_vertices;i++) { 2465 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 2466 } 2467 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 2468 2469 /* loop on constraints and see whether or not they need a change of basis and compute it */ 2470 /* -> using implicit ordering contained in temp_indices data */ 2471 total_counts = pcbddc->n_vertices; 2472 primal_counter = total_counts; 2473 while (total_counts<pcbddc->local_primal_size) { 2474 primal_dofs = 1; 2475 if (PetscBTLookup(change_basis,total_counts)) { 2476 /* get all constraints with same support: if more then one constraint is present on the cc then surely indices are stored contiguosly */ 2477 while (total_counts+primal_dofs < pcbddc->local_primal_size && temp_indices_to_constraint[temp_indices[total_counts]] == temp_indices_to_constraint[temp_indices[total_counts+primal_dofs]]) { 2478 primal_dofs++; 2479 } 2480 /* get constraint info */ 2481 size_of_constraint = temp_indices[total_counts+1]-temp_indices[total_counts]; 2482 dual_dofs = size_of_constraint-primal_dofs; 2483 2484 if (pcbddc->dbg_flag) { 2485 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d to %d (incl) need a change of basis (size %d)\n",total_counts,total_counts+primal_dofs-1,size_of_constraint);CHKERRQ(ierr); 2486 } 2487 2488 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 2489 2490 /* copy quadrature constraints for change of basis check */ 2491 if (pcbddc->dbg_flag) { 2492 ierr = PetscMemcpy(work,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2493 } 2494 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 2495 ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2496 2497 /* compute QR decomposition of constraints */ 2498 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2499 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2500 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2501 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2502 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 2503 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 2504 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2505 2506 /* explictly compute R^-T */ 2507 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 2508 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 2509 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2510 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 2511 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2512 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 2513 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2514 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 2515 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 2516 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2517 2518 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 2519 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2520 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2521 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 2522 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2523 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2524 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 2525 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 2526 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2527 2528 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 2529 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 2530 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 2531 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2532 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2533 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 2534 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2535 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 2536 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 2537 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2538 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_LDC)); 2539 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2540 ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2541 2542 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 2543 start_rows = &temp_indices_to_constraint[temp_indices[total_counts]]; 2544 /* insert cols for primal dofs */ 2545 for (j=0;j<primal_dofs;j++) { 2546 start_vals = &qr_basis[j*size_of_constraint]; 2547 start_cols = &temp_indices_to_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter+j]]; 2548 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 2549 } 2550 /* insert cols for dual dofs */ 2551 for (j=0,k=0;j<dual_dofs;k++) { 2552 if (!PetscBTLookup(is_primal,temp_indices_to_constraint_B[temp_indices[total_counts]+k])) { 2553 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 2554 start_cols = &temp_indices_to_constraint[temp_indices[total_counts]+k]; 2555 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 2556 j++; 2557 } 2558 } 2559 2560 /* check change of basis */ 2561 if (pcbddc->dbg_flag) { 2562 PetscInt ii,jj; 2563 PetscBool valid_qr=PETSC_TRUE; 2564 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 2565 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2566 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 2567 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2568 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 2569 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 2570 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2571 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&work[size_of_constraint*primal_dofs],&Blas_LDC)); 2572 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2573 for (jj=0;jj<size_of_constraint;jj++) { 2574 for (ii=0;ii<primal_dofs;ii++) { 2575 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 2576 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 2577 } 2578 } 2579 if (!valid_qr) { 2580 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 2581 for (jj=0;jj<size_of_constraint;jj++) { 2582 for (ii=0;ii<primal_dofs;ii++) { 2583 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 2584 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2585 } 2586 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 2587 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2588 } 2589 } 2590 } 2591 } else { 2592 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 2593 } 2594 } 2595 } else { /* simple transformation block */ 2596 PetscInt row,col; 2597 PetscScalar val,norm; 2598 2599 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2600 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,temp_quadrature_constraint+temp_indices[total_counts],&Blas_one,temp_quadrature_constraint+temp_indices[total_counts],&Blas_one)); 2601 for (j=0;j<size_of_constraint;j++) { 2602 PetscInt row_B = temp_indices_to_constraint_B[temp_indices[total_counts]+j]; 2603 row = temp_indices_to_constraint[temp_indices[total_counts]+j]; 2604 if (!PetscBTLookup(is_primal,row_B)) { 2605 col = temp_indices_to_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]; 2606 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 2607 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,temp_quadrature_constraint[temp_indices[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 2608 } else { 2609 for (k=0;k<size_of_constraint;k++) { 2610 col = temp_indices_to_constraint[temp_indices[total_counts]+k]; 2611 if (row != col) { 2612 val = -temp_quadrature_constraint[temp_indices[total_counts]+k]/temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]; 2613 } else { 2614 val = temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]/norm; 2615 } 2616 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 2617 } 2618 } 2619 } 2620 if (pcbddc->dbg_flag) { 2621 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 2622 } 2623 } 2624 /* increment primal counter */ 2625 primal_counter += primal_dofs; 2626 } else { 2627 if (pcbddc->dbg_flag) { 2628 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,temp_indices[total_counts+1]-temp_indices[total_counts]);CHKERRQ(ierr); 2629 } 2630 } 2631 /* increment constraint counter total_counts */ 2632 total_counts += primal_dofs; 2633 } 2634 2635 /* free workspace */ 2636 if (qr_needed) { 2637 if (pcbddc->dbg_flag) { 2638 ierr = PetscFree(work);CHKERRQ(ierr); 2639 } 2640 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 2641 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 2642 ierr = PetscFree(qr_work);CHKERRQ(ierr); 2643 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 2644 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 2645 } 2646 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 2647 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2648 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2649 2650 /* assembling of global change of variable */ 2651 { 2652 Mat tmat; 2653 PetscInt bs; 2654 2655 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 2656 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 2657 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 2658 ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr); 2659 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2660 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 2661 ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr); 2662 ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr); 2663 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 2664 ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr); 2665 ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2666 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 2667 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 2668 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 2669 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2670 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2671 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 2672 ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr); 2673 } 2674 /* check */ 2675 if (pcbddc->dbg_flag) { 2676 PetscReal error; 2677 Vec x,x_change; 2678 2679 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 2680 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 2681 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 2682 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 2683 ierr = VecScatterBegin(matis->ctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2684 ierr = VecScatterEnd(matis->ctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2685 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 2686 ierr = VecScatterBegin(matis->ctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2687 ierr = VecScatterEnd(matis->ctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2688 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 2689 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 2690 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 2691 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2692 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr); 2693 ierr = VecDestroy(&x);CHKERRQ(ierr); 2694 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 2695 } 2696 2697 /* adapt sub_schurs computed (if any) */ 2698 if (pcbddc->use_deluxe_scaling) { 2699 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 2700 if (sub_schurs->n_subs_par_g) { 2701 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Change of basis with deluxe scaling and parallel problems still needs to be implemented"); 2702 } 2703 if (sub_schurs->S_Ej_all) { 2704 Mat S_1,S_2,tmat; 2705 IS is_all_N; 2706 2707 ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr); 2708 ierr = MatGetSubMatrixUnsorted(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr); 2709 ierr = ISDestroy(&is_all_N);CHKERRQ(ierr); 2710 ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_1);CHKERRQ(ierr); 2711 ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr); 2712 sub_schurs->S_Ej_all = S_1; 2713 ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_2);CHKERRQ(ierr); 2714 ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 2715 sub_schurs->sum_S_Ej_all = S_2; 2716 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 2717 } 2718 } 2719 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 2720 } else if (pcbddc->user_ChangeOfBasisMatrix) { 2721 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 2722 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 2723 } 2724 2725 /* set up change of basis context */ 2726 if (pcbddc->ChangeOfBasisMatrix) { 2727 PCBDDCChange_ctx change_ctx; 2728 2729 if (!pcbddc->new_global_mat) { 2730 PetscInt global_size,local_size; 2731 2732 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 2733 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 2734 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->new_global_mat);CHKERRQ(ierr); 2735 ierr = MatSetSizes(pcbddc->new_global_mat,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 2736 ierr = MatSetType(pcbddc->new_global_mat,MATSHELL);CHKERRQ(ierr); 2737 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT,(void (*)(void))PCBDDCMatMult_Private);CHKERRQ(ierr); 2738 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCMatMultTranspose_Private);CHKERRQ(ierr); 2739 ierr = PetscNew(&change_ctx);CHKERRQ(ierr); 2740 ierr = MatShellSetContext(pcbddc->new_global_mat,change_ctx);CHKERRQ(ierr); 2741 } else { 2742 ierr = MatShellGetContext(pcbddc->new_global_mat,&change_ctx);CHKERRQ(ierr); 2743 ierr = MatDestroy(&change_ctx->global_change);CHKERRQ(ierr); 2744 ierr = VecDestroyVecs(2,&change_ctx->work);CHKERRQ(ierr); 2745 } 2746 if (!pcbddc->user_ChangeOfBasisMatrix) { 2747 ierr = PetscObjectReference((PetscObject)pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2748 change_ctx->global_change = pcbddc->ChangeOfBasisMatrix; 2749 } else { 2750 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 2751 change_ctx->global_change = pcbddc->user_ChangeOfBasisMatrix; 2752 } 2753 ierr = VecDuplicateVecs(pcis->vec1_global,2,&change_ctx->work);CHKERRQ(ierr); 2754 ierr = MatSetUp(pcbddc->new_global_mat);CHKERRQ(ierr); 2755 ierr = MatAssemblyBegin(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2756 ierr = MatAssemblyEnd(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2757 } 2758 2759 /* get indices in local ordering for vertices and constraints */ 2760 if (olocal_primal_size == pcbddc->local_primal_size) { /* if this is true, I need to check if a new primal space has been introduced */ 2761 ierr = PetscMalloc1(olocal_primal_size,&oprimal_indices_local_idxs);CHKERRQ(ierr); 2762 ierr = PetscMemcpy(oprimal_indices_local_idxs,pcbddc->primal_indices_local_idxs,olocal_primal_size*sizeof(PetscInt));CHKERRQ(ierr); 2763 } 2764 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 2765 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2766 ierr = PetscMalloc1(pcbddc->local_primal_size,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2767 ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&i,&aux_primal_numbering);CHKERRQ(ierr); 2768 ierr = PetscMemcpy(pcbddc->primal_indices_local_idxs,aux_primal_numbering,i*sizeof(PetscInt));CHKERRQ(ierr); 2769 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 2770 ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&j,&aux_primal_numbering);CHKERRQ(ierr); 2771 ierr = PetscMemcpy(&pcbddc->primal_indices_local_idxs[i],aux_primal_numbering,j*sizeof(PetscInt));CHKERRQ(ierr); 2772 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 2773 /* set quantities in PCBDDC data struct */ 2774 pcbddc->n_actual_vertices = i; 2775 /* check if a new primal space has been introduced */ 2776 pcbddc->new_primal_space_local = PETSC_TRUE; 2777 if (olocal_primal_size == pcbddc->local_primal_size) { 2778 ierr = PetscMemcmp(pcbddc->primal_indices_local_idxs,oprimal_indices_local_idxs,olocal_primal_size,&pcbddc->new_primal_space_local);CHKERRQ(ierr); 2779 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 2780 ierr = PetscFree(oprimal_indices_local_idxs);CHKERRQ(ierr); 2781 } 2782 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 2783 ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2784 2785 /* flush dbg viewer */ 2786 if (pcbddc->dbg_flag) { 2787 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2788 } 2789 2790 /* free workspace */ 2791 ierr = PetscBTDestroy(&touched);CHKERRQ(ierr); 2792 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 2793 ierr = PetscFree(aux_primal_minloc);CHKERRQ(ierr); 2794 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 2795 if (!pcbddc->adaptive_selection) { 2796 ierr = PetscFree(temp_indices);CHKERRQ(ierr); 2797 ierr = PetscFree3(temp_quadrature_constraint,temp_indices_to_constraint,temp_indices_to_constraint_B);CHKERRQ(ierr); 2798 } else { 2799 ierr = PetscFree4(pcbddc->adaptive_constraints_n, 2800 pcbddc->adaptive_constraints_ptrs, 2801 pcbddc->adaptive_constraints_idxs, 2802 pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 2803 ierr = PetscFree(temp_indices_to_constraint_B);CHKERRQ(ierr); 2804 } 2805 PetscFunctionReturn(0); 2806 } 2807 2808 #undef __FUNCT__ 2809 #define __FUNCT__ "PCBDDCAnalyzeInterface" 2810 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 2811 { 2812 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2813 PC_IS *pcis = (PC_IS*)pc->data; 2814 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2815 PetscInt ierr,i,vertex_size; 2816 PetscViewer viewer=pcbddc->dbg_viewer; 2817 2818 PetscFunctionBegin; 2819 /* Reset previously computed graph */ 2820 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 2821 /* Init local Graph struct */ 2822 ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping);CHKERRQ(ierr); 2823 2824 /* Check validity of the csr graph passed in by the user */ 2825 if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) { 2826 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 2827 } 2828 2829 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 2830 if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) { 2831 PetscInt *xadj,*adjncy; 2832 PetscInt nvtxs; 2833 2834 if (pcbddc->use_local_adj) { 2835 PetscBool flg_row=PETSC_FALSE; 2836 2837 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 2838 if (flg_row) { 2839 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 2840 pcbddc->computed_rowadj = PETSC_TRUE; 2841 } 2842 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 2843 } else if (pcbddc->current_level) { /* just compute subdomain's connected components for coarser levels */ 2844 IS is_dummy; 2845 ISLocalToGlobalMapping l2gmap_dummy; 2846 PetscInt j,sum; 2847 PetscInt *cxadj,*cadjncy; 2848 const PetscInt *idxs; 2849 PCBDDCGraph graph; 2850 PetscBT is_on_boundary; 2851 2852 ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n,0,1,&is_dummy);CHKERRQ(ierr); 2853 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 2854 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2855 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 2856 ierr = PCBDDCGraphInit(graph,l2gmap_dummy);CHKERRQ(ierr); 2857 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 2858 graph->xadj = xadj; 2859 graph->adjncy = adjncy; 2860 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 2861 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 2862 2863 if (pcbddc->dbg_flag) { 2864 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] Found %d subdomains\n",PetscGlobalRank,graph->ncc);CHKERRQ(ierr); 2865 for (i=0;i<graph->ncc;i++) { 2866 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] %d cc size %d\n",PetscGlobalRank,i,graph->cptr[i+1]-graph->cptr[i]);CHKERRQ(ierr); 2867 } 2868 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2869 } 2870 2871 ierr = PetscBTCreate(nvtxs,&is_on_boundary);CHKERRQ(ierr); 2872 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2873 for (i=0;i<pcis->n_B;i++) { 2874 ierr = PetscBTSet(is_on_boundary,idxs[i]);CHKERRQ(ierr); 2875 } 2876 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2877 2878 ierr = PetscCalloc1(nvtxs+1,&cxadj);CHKERRQ(ierr); 2879 sum = 0; 2880 for (i=0;i<graph->ncc;i++) { 2881 PetscInt sizecc = 0; 2882 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 2883 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 2884 sizecc++; 2885 } 2886 } 2887 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 2888 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 2889 cxadj[graph->queue[j]] = sizecc; 2890 } 2891 } 2892 sum += sizecc*sizecc; 2893 } 2894 ierr = PetscMalloc1(sum,&cadjncy);CHKERRQ(ierr); 2895 sum = 0; 2896 for (i=0;i<nvtxs;i++) { 2897 PetscInt temp = cxadj[i]; 2898 cxadj[i] = sum; 2899 sum += temp; 2900 } 2901 cxadj[nvtxs] = sum; 2902 for (i=0;i<graph->ncc;i++) { 2903 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 2904 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 2905 PetscInt k,sizecc = 0; 2906 for (k=graph->cptr[i];k<graph->cptr[i+1];k++) { 2907 if (PetscBTLookup(is_on_boundary,graph->queue[k])) { 2908 cadjncy[cxadj[graph->queue[j]]+sizecc]=graph->queue[k]; 2909 sizecc++; 2910 } 2911 } 2912 } 2913 } 2914 } 2915 if (nvtxs) { 2916 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,cxadj,cadjncy,PETSC_OWN_POINTER);CHKERRQ(ierr); 2917 } else { 2918 ierr = PetscFree(cxadj);CHKERRQ(ierr); 2919 ierr = PetscFree(cadjncy);CHKERRQ(ierr); 2920 } 2921 graph->xadj = 0; 2922 graph->adjncy = 0; 2923 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 2924 ierr = PetscBTDestroy(&is_on_boundary);CHKERRQ(ierr); 2925 } 2926 } 2927 2928 /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */ 2929 vertex_size = 1; 2930 if (pcbddc->user_provided_isfordofs) { 2931 if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */ 2932 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 2933 for (i=0;i<pcbddc->n_ISForDofs;i++) { 2934 ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 2935 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 2936 } 2937 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 2938 pcbddc->n_ISForDofs = 0; 2939 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 2940 } 2941 /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */ 2942 ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr); 2943 } else { 2944 if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */ 2945 ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr); 2946 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 2947 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 2948 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 2949 } 2950 } 2951 } 2952 2953 /* Setup of Graph */ 2954 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */ 2955 ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 2956 } 2957 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */ 2958 ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 2959 } 2960 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices); 2961 2962 /* Graph's connected components analysis */ 2963 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 2964 2965 /* print some info to stdout */ 2966 if (pcbddc->dbg_flag) { 2967 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer); 2968 } 2969 2970 /* mark topography has done */ 2971 pcbddc->recompute_topography = PETSC_FALSE; 2972 PetscFunctionReturn(0); 2973 } 2974 2975 #undef __FUNCT__ 2976 #define __FUNCT__ "PCBDDCGetPrimalVerticesLocalIdx" 2977 PetscErrorCode PCBDDCGetPrimalVerticesLocalIdx(PC pc, PetscInt *n_vertices, PetscInt **vertices_idx) 2978 { 2979 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 2980 PetscInt *vertices,*row_cmat_indices,n,i,size_of_constraint,local_primal_size; 2981 PetscErrorCode ierr; 2982 2983 PetscFunctionBegin; 2984 n = 0; 2985 vertices = 0; 2986 if (pcbddc->ConstraintMatrix) { 2987 ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&i);CHKERRQ(ierr); 2988 for (i=0;i<local_primal_size;i++) { 2989 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2990 if (size_of_constraint == 1) n++; 2991 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2992 } 2993 if (vertices_idx) { 2994 ierr = PetscMalloc1(n,&vertices);CHKERRQ(ierr); 2995 n = 0; 2996 for (i=0;i<local_primal_size;i++) { 2997 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2998 if (size_of_constraint == 1) { 2999 vertices[n++]=row_cmat_indices[0]; 3000 } 3001 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 3002 } 3003 } 3004 } 3005 *n_vertices = n; 3006 if (vertices_idx) *vertices_idx = vertices; 3007 PetscFunctionReturn(0); 3008 } 3009 3010 #undef __FUNCT__ 3011 #define __FUNCT__ "PCBDDCGetPrimalConstraintsLocalIdx" 3012 PetscErrorCode PCBDDCGetPrimalConstraintsLocalIdx(PC pc, PetscInt *n_constraints, PetscInt **constraints_idx) 3013 { 3014 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 3015 PetscInt *constraints_index,*row_cmat_indices,*row_cmat_global_indices; 3016 PetscInt n,i,j,size_of_constraint,local_primal_size,local_size,max_size_of_constraint,min_index,min_loc; 3017 PetscBT touched; 3018 PetscErrorCode ierr; 3019 3020 /* This function assumes that the number of local constraints per connected component 3021 is not greater than the number of nodes defined for the connected component 3022 (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */ 3023 PetscFunctionBegin; 3024 n = 0; 3025 constraints_index = 0; 3026 if (pcbddc->ConstraintMatrix) { 3027 ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&local_size);CHKERRQ(ierr); 3028 max_size_of_constraint = 0; 3029 for (i=0;i<local_primal_size;i++) { 3030 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 3031 if (size_of_constraint > 1) { 3032 n++; 3033 } 3034 max_size_of_constraint = PetscMax(size_of_constraint,max_size_of_constraint); 3035 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 3036 } 3037 if (constraints_idx) { 3038 ierr = PetscMalloc1(n,&constraints_index);CHKERRQ(ierr); 3039 ierr = PetscMalloc1(max_size_of_constraint,&row_cmat_global_indices);CHKERRQ(ierr); 3040 ierr = PetscBTCreate(local_size,&touched);CHKERRQ(ierr); 3041 n = 0; 3042 for (i=0;i<local_primal_size;i++) { 3043 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 3044 if (size_of_constraint > 1) { 3045 ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,row_cmat_indices,row_cmat_global_indices);CHKERRQ(ierr); 3046 /* find first untouched local node */ 3047 j = 0; 3048 while (PetscBTLookup(touched,row_cmat_indices[j])) j++; 3049 min_index = row_cmat_global_indices[j]; 3050 min_loc = j; 3051 /* search the minimum among nodes not yet touched on the connected component 3052 since there can be more than one constraint on a single cc */ 3053 for (j=1;j<size_of_constraint;j++) { 3054 if (!PetscBTLookup(touched,row_cmat_indices[j]) && min_index > row_cmat_global_indices[j]) { 3055 min_index = row_cmat_global_indices[j]; 3056 min_loc = j; 3057 } 3058 } 3059 ierr = PetscBTSet(touched,row_cmat_indices[min_loc]);CHKERRQ(ierr); 3060 constraints_index[n++] = row_cmat_indices[min_loc]; 3061 } 3062 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 3063 } 3064 ierr = PetscBTDestroy(&touched);CHKERRQ(ierr); 3065 ierr = PetscFree(row_cmat_global_indices);CHKERRQ(ierr); 3066 } 3067 } 3068 *n_constraints = n; 3069 if (constraints_idx) *constraints_idx = constraints_index; 3070 PetscFunctionReturn(0); 3071 } 3072 3073 #undef __FUNCT__ 3074 #define __FUNCT__ "PCBDDCSubsetNumbering" 3075 PetscErrorCode PCBDDCSubsetNumbering(MPI_Comm comm,ISLocalToGlobalMapping l2gmap, PetscInt n_local_dofs, PetscInt local_dofs[], PetscInt local_dofs_mult[], PetscInt* n_global_subset, PetscInt* global_numbering_subset[]) 3076 { 3077 Vec local_vec,global_vec; 3078 IS seqis,paris; 3079 VecScatter scatter_ctx; 3080 PetscScalar *array; 3081 PetscInt *temp_global_dofs; 3082 PetscScalar globalsum; 3083 PetscInt i,j,s; 3084 PetscInt nlocals,first_index,old_index,max_local; 3085 PetscMPIInt rank_prec_comm,size_prec_comm,max_global; 3086 PetscMPIInt *dof_sizes,*dof_displs; 3087 PetscBool first_found; 3088 PetscErrorCode ierr; 3089 3090 PetscFunctionBegin; 3091 /* mpi buffers */ 3092 ierr = MPI_Comm_size(comm,&size_prec_comm);CHKERRQ(ierr); 3093 ierr = MPI_Comm_rank(comm,&rank_prec_comm);CHKERRQ(ierr); 3094 j = ( !rank_prec_comm ? size_prec_comm : 0); 3095 ierr = PetscMalloc1(j,&dof_sizes);CHKERRQ(ierr); 3096 ierr = PetscMalloc1(j,&dof_displs);CHKERRQ(ierr); 3097 /* get maximum size of subset */ 3098 ierr = PetscMalloc1(n_local_dofs,&temp_global_dofs);CHKERRQ(ierr); 3099 ierr = ISLocalToGlobalMappingApply(l2gmap,n_local_dofs,local_dofs,temp_global_dofs);CHKERRQ(ierr); 3100 max_local = 0; 3101 for (i=0;i<n_local_dofs;i++) { 3102 if (max_local < temp_global_dofs[i] ) { 3103 max_local = temp_global_dofs[i]; 3104 } 3105 } 3106 ierr = MPI_Allreduce(&max_local,&max_global,1,MPIU_INT,MPI_MAX,comm);CHKERRQ(ierr); 3107 max_global++; 3108 max_local = 0; 3109 for (i=0;i<n_local_dofs;i++) { 3110 if (max_local < local_dofs[i] ) { 3111 max_local = local_dofs[i]; 3112 } 3113 } 3114 max_local++; 3115 /* allocate workspace */ 3116 ierr = VecCreate(PETSC_COMM_SELF,&local_vec);CHKERRQ(ierr); 3117 ierr = VecSetSizes(local_vec,PETSC_DECIDE,max_local);CHKERRQ(ierr); 3118 ierr = VecSetType(local_vec,VECSEQ);CHKERRQ(ierr); 3119 ierr = VecCreate(comm,&global_vec);CHKERRQ(ierr); 3120 ierr = VecSetSizes(global_vec,PETSC_DECIDE,max_global);CHKERRQ(ierr); 3121 ierr = VecSetType(global_vec,VECMPI);CHKERRQ(ierr); 3122 /* create scatter */ 3123 ierr = ISCreateGeneral(PETSC_COMM_SELF,n_local_dofs,local_dofs,PETSC_COPY_VALUES,&seqis);CHKERRQ(ierr); 3124 ierr = ISCreateGeneral(comm,n_local_dofs,temp_global_dofs,PETSC_COPY_VALUES,&paris);CHKERRQ(ierr); 3125 ierr = VecScatterCreate(local_vec,seqis,global_vec,paris,&scatter_ctx);CHKERRQ(ierr); 3126 ierr = ISDestroy(&seqis);CHKERRQ(ierr); 3127 ierr = ISDestroy(&paris);CHKERRQ(ierr); 3128 /* init array */ 3129 ierr = VecSet(global_vec,0.0);CHKERRQ(ierr); 3130 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 3131 ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr); 3132 if (local_dofs_mult) { 3133 for (i=0;i<n_local_dofs;i++) { 3134 array[local_dofs[i]]=(PetscScalar)local_dofs_mult[i]; 3135 } 3136 } else { 3137 for (i=0;i<n_local_dofs;i++) { 3138 array[local_dofs[i]]=1.0; 3139 } 3140 } 3141 ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr); 3142 /* scatter into global vec and get total number of global dofs */ 3143 ierr = VecScatterBegin(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3144 ierr = VecScatterEnd(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3145 ierr = VecSum(global_vec,&globalsum);CHKERRQ(ierr); 3146 *n_global_subset = (PetscInt)PetscRealPart(globalsum); 3147 /* Fill global_vec with cumulative function for global numbering */ 3148 ierr = VecGetArray(global_vec,&array);CHKERRQ(ierr); 3149 ierr = VecGetLocalSize(global_vec,&s);CHKERRQ(ierr); 3150 nlocals = 0; 3151 first_index = -1; 3152 first_found = PETSC_FALSE; 3153 for (i=0;i<s;i++) { 3154 if (!first_found && PetscRealPart(array[i]) > 0.1) { 3155 first_found = PETSC_TRUE; 3156 first_index = i; 3157 } 3158 nlocals += (PetscInt)PetscRealPart(array[i]); 3159 } 3160 ierr = MPI_Gather(&nlocals,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr); 3161 if (!rank_prec_comm) { 3162 dof_displs[0]=0; 3163 for (i=1;i<size_prec_comm;i++) { 3164 dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1]; 3165 } 3166 } 3167 ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&nlocals,1,MPIU_INT,0,comm);CHKERRQ(ierr); 3168 if (first_found) { 3169 array[first_index] += (PetscScalar)nlocals; 3170 old_index = first_index; 3171 for (i=first_index+1;i<s;i++) { 3172 if (PetscRealPart(array[i]) > 0.1) { 3173 array[i] += array[old_index]; 3174 old_index = i; 3175 } 3176 } 3177 } 3178 ierr = VecRestoreArray(global_vec,&array);CHKERRQ(ierr); 3179 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 3180 ierr = VecScatterBegin(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3181 ierr = VecScatterEnd(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3182 /* get global ordering of local dofs */ 3183 ierr = VecGetArrayRead(local_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 3184 if (local_dofs_mult) { 3185 for (i=0;i<n_local_dofs;i++) { 3186 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-local_dofs_mult[i]; 3187 } 3188 } else { 3189 for (i=0;i<n_local_dofs;i++) { 3190 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-1; 3191 } 3192 } 3193 ierr = VecRestoreArrayRead(local_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 3194 /* free workspace */ 3195 ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr); 3196 ierr = VecDestroy(&local_vec);CHKERRQ(ierr); 3197 ierr = VecDestroy(&global_vec);CHKERRQ(ierr); 3198 ierr = PetscFree(dof_sizes);CHKERRQ(ierr); 3199 ierr = PetscFree(dof_displs);CHKERRQ(ierr); 3200 /* return pointer to global ordering of local dofs */ 3201 *global_numbering_subset = temp_global_dofs; 3202 PetscFunctionReturn(0); 3203 } 3204 3205 #undef __FUNCT__ 3206 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 3207 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 3208 { 3209 PetscInt i,j; 3210 PetscScalar *alphas; 3211 PetscErrorCode ierr; 3212 3213 PetscFunctionBegin; 3214 /* this implements stabilized Gram-Schmidt */ 3215 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 3216 for (i=0;i<n;i++) { 3217 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 3218 if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); } 3219 for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); } 3220 } 3221 ierr = PetscFree(alphas);CHKERRQ(ierr); 3222 PetscFunctionReturn(0); 3223 } 3224 3225 #undef __FUNCT__ 3226 #define __FUNCT__ "MatISGetSubassemblingPattern" 3227 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscBool contiguous, IS* is_sends) 3228 { 3229 Mat subdomain_adj; 3230 IS new_ranks,ranks_send_to; 3231 MatPartitioning partitioner; 3232 Mat_IS *matis; 3233 PetscInt n_neighs,*neighs,*n_shared,**shared; 3234 PetscInt prank; 3235 PetscMPIInt size,rank,color; 3236 PetscInt *xadj,*adjncy,*oldranks; 3237 PetscInt *adjncy_wgt,*v_wgt,*is_indices,*ranks_send_to_idx; 3238 PetscInt i,local_size,threshold=0; 3239 PetscErrorCode ierr; 3240 PetscBool use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE; 3241 PetscSubcomm subcomm; 3242 3243 PetscFunctionBegin; 3244 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr); 3245 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 3246 ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 3247 3248 /* Get info on mapping */ 3249 matis = (Mat_IS*)(mat->data); 3250 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&local_size);CHKERRQ(ierr); 3251 ierr = ISLocalToGlobalMappingGetInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 3252 3253 /* build local CSR graph of subdomains' connectivity */ 3254 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 3255 xadj[0] = 0; 3256 xadj[1] = PetscMax(n_neighs-1,0); 3257 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 3258 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 3259 3260 if (threshold) { 3261 PetscInt xadj_count = 0; 3262 for (i=1;i<n_neighs;i++) { 3263 if (n_shared[i] > threshold) { 3264 adjncy[xadj_count] = neighs[i]; 3265 adjncy_wgt[xadj_count] = n_shared[i]; 3266 xadj_count++; 3267 } 3268 } 3269 xadj[1] = xadj_count; 3270 } else { 3271 if (xadj[1]) { 3272 ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr); 3273 ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr); 3274 } 3275 } 3276 ierr = ISLocalToGlobalMappingRestoreInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 3277 if (use_square) { 3278 for (i=0;i<xadj[1];i++) { 3279 adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i]; 3280 } 3281 } 3282 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 3283 3284 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 3285 3286 /* 3287 Restrict work on active processes only. 3288 */ 3289 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr); 3290 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 3291 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 3292 ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr); 3293 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 3294 if (color) { 3295 ierr = PetscFree(xadj);CHKERRQ(ierr); 3296 ierr = PetscFree(adjncy);CHKERRQ(ierr); 3297 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 3298 } else { 3299 PetscInt coarsening_ratio; 3300 ierr = MPI_Comm_size(subcomm->comm,&size);CHKERRQ(ierr); 3301 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 3302 prank = rank; 3303 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm->comm);CHKERRQ(ierr); 3304 /* 3305 for (i=0;i<size;i++) { 3306 PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]); 3307 } 3308 */ 3309 for (i=0;i<xadj[1];i++) { 3310 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 3311 } 3312 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 3313 ierr = MatCreateMPIAdj(subcomm->comm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 3314 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 3315 3316 /* Partition */ 3317 ierr = MatPartitioningCreate(subcomm->comm,&partitioner);CHKERRQ(ierr); 3318 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 3319 if (use_vwgt) { 3320 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 3321 v_wgt[0] = local_size; 3322 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 3323 } 3324 n_subdomains = PetscMin((PetscInt)size,n_subdomains); 3325 coarsening_ratio = size/n_subdomains; 3326 ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr); 3327 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 3328 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 3329 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 3330 3331 ierr = ISGetIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3332 if (contiguous) { 3333 ranks_send_to_idx[0] = oldranks[is_indices[0]]; /* contiguos set of processes */ 3334 } else { 3335 ranks_send_to_idx[0] = coarsening_ratio*oldranks[is_indices[0]]; /* scattered set of processes */ 3336 } 3337 ierr = ISRestoreIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3338 /* clean up */ 3339 ierr = PetscFree(oldranks);CHKERRQ(ierr); 3340 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 3341 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 3342 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 3343 } 3344 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 3345 3346 /* assemble parallel IS for sends */ 3347 i = 1; 3348 if (color) i=0; 3349 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr); 3350 3351 /* get back IS */ 3352 *is_sends = ranks_send_to; 3353 PetscFunctionReturn(0); 3354 } 3355 3356 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 3357 3358 #undef __FUNCT__ 3359 #define __FUNCT__ "MatISSubassemble" 3360 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[]) 3361 { 3362 Mat local_mat; 3363 Mat_IS *matis; 3364 IS is_sends_internal; 3365 PetscInt rows,cols,new_local_rows; 3366 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals; 3367 PetscBool ismatis,isdense,newisdense,destroy_mat; 3368 ISLocalToGlobalMapping l2gmap; 3369 PetscInt* l2gmap_indices; 3370 const PetscInt* is_indices; 3371 MatType new_local_type; 3372 /* buffers */ 3373 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 3374 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 3375 PetscInt *recv_buffer_idxs_local; 3376 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 3377 /* MPI */ 3378 MPI_Comm comm,comm_n; 3379 PetscSubcomm subcomm; 3380 PetscMPIInt n_sends,n_recvs,commsize; 3381 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 3382 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 3383 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,source_dest; 3384 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals; 3385 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals; 3386 PetscErrorCode ierr; 3387 3388 PetscFunctionBegin; 3389 /* TODO: add missing checks */ 3390 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 3391 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 3392 PetscValidLogicalCollectiveEnum(mat,reuse,5); 3393 PetscValidLogicalCollectiveInt(mat,nis,7); 3394 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 3395 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 3396 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 3397 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 3398 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 3399 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 3400 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 3401 if (reuse == MAT_REUSE_MATRIX && *mat_n) { 3402 PetscInt mrows,mcols,mnrows,mncols; 3403 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 3404 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 3405 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 3406 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 3407 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 3408 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 3409 } 3410 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 3411 PetscValidLogicalCollectiveInt(mat,bs,0); 3412 /* prepare IS for sending if not provided */ 3413 if (!is_sends) { 3414 PetscBool pcontig = PETSC_TRUE; 3415 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 3416 ierr = MatISGetSubassemblingPattern(mat,n_subdomains,pcontig,&is_sends_internal);CHKERRQ(ierr); 3417 } else { 3418 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 3419 is_sends_internal = is_sends; 3420 } 3421 3422 /* get pointer of MATIS data */ 3423 matis = (Mat_IS*)mat->data; 3424 3425 /* get comm */ 3426 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3427 3428 /* compute number of sends */ 3429 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 3430 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 3431 3432 /* compute number of receives */ 3433 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 3434 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 3435 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 3436 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 3437 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 3438 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 3439 ierr = PetscFree(iflags);CHKERRQ(ierr); 3440 3441 /* restrict comm if requested */ 3442 subcomm = 0; 3443 destroy_mat = PETSC_FALSE; 3444 if (restrict_comm) { 3445 PetscMPIInt color,subcommsize; 3446 3447 color = 0; 3448 if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm */ 3449 ierr = MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 3450 subcommsize = commsize - subcommsize; 3451 /* check if reuse has been requested */ 3452 if (reuse == MAT_REUSE_MATRIX) { 3453 if (*mat_n) { 3454 PetscMPIInt subcommsize2; 3455 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 3456 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 3457 comm_n = PetscObjectComm((PetscObject)*mat_n); 3458 } else { 3459 comm_n = PETSC_COMM_SELF; 3460 } 3461 } else { /* MAT_INITIAL_MATRIX */ 3462 PetscMPIInt rank; 3463 3464 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3465 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 3466 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 3467 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 3468 comm_n = subcomm->comm; 3469 } 3470 /* flag to destroy *mat_n if not significative */ 3471 if (color) destroy_mat = PETSC_TRUE; 3472 } else { 3473 comm_n = comm; 3474 } 3475 3476 /* prepare send/receive buffers */ 3477 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 3478 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 3479 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 3480 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 3481 if (nis) { 3482 ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr); 3483 } 3484 3485 /* Get data from local matrices */ 3486 if (!isdense) { 3487 SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 3488 /* TODO: See below some guidelines on how to prepare the local buffers */ 3489 /* 3490 send_buffer_vals should contain the raw values of the local matrix 3491 send_buffer_idxs should contain: 3492 - MatType_PRIVATE type 3493 - PetscInt size_of_l2gmap 3494 - PetscInt global_row_indices[size_of_l2gmap] 3495 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 3496 */ 3497 } else { 3498 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 3499 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&i);CHKERRQ(ierr); 3500 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 3501 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 3502 send_buffer_idxs[1] = i; 3503 ierr = ISLocalToGlobalMappingGetIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 3504 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 3505 ierr = ISLocalToGlobalMappingRestoreIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 3506 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 3507 for (i=0;i<n_sends;i++) { 3508 ilengths_vals[is_indices[i]] = len*len; 3509 ilengths_idxs[is_indices[i]] = len+2; 3510 } 3511 } 3512 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 3513 /* additional is (if any) */ 3514 if (nis) { 3515 PetscMPIInt psum; 3516 PetscInt j; 3517 for (j=0,psum=0;j<nis;j++) { 3518 PetscInt plen; 3519 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 3520 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 3521 psum += len+1; /* indices + lenght */ 3522 } 3523 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 3524 for (j=0,psum=0;j<nis;j++) { 3525 PetscInt plen; 3526 const PetscInt *is_array_idxs; 3527 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 3528 send_buffer_idxs_is[psum] = plen; 3529 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 3530 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 3531 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 3532 psum += plen+1; /* indices + lenght */ 3533 } 3534 for (i=0;i<n_sends;i++) { 3535 ilengths_idxs_is[is_indices[i]] = psum; 3536 } 3537 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 3538 } 3539 3540 buf_size_idxs = 0; 3541 buf_size_vals = 0; 3542 buf_size_idxs_is = 0; 3543 for (i=0;i<n_recvs;i++) { 3544 buf_size_idxs += (PetscInt)olengths_idxs[i]; 3545 buf_size_vals += (PetscInt)olengths_vals[i]; 3546 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 3547 } 3548 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 3549 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 3550 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 3551 3552 /* get new tags for clean communications */ 3553 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 3554 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 3555 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 3556 3557 /* allocate for requests */ 3558 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 3559 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 3560 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 3561 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 3562 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 3563 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 3564 3565 /* communications */ 3566 ptr_idxs = recv_buffer_idxs; 3567 ptr_vals = recv_buffer_vals; 3568 ptr_idxs_is = recv_buffer_idxs_is; 3569 for (i=0;i<n_recvs;i++) { 3570 source_dest = onodes[i]; 3571 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 3572 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 3573 ptr_idxs += olengths_idxs[i]; 3574 ptr_vals += olengths_vals[i]; 3575 if (nis) { 3576 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 3577 ptr_idxs_is += olengths_idxs_is[i]; 3578 } 3579 } 3580 for (i=0;i<n_sends;i++) { 3581 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 3582 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 3583 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 3584 if (nis) { 3585 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 3586 } 3587 } 3588 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 3589 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 3590 3591 /* assemble new l2g map */ 3592 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3593 ptr_idxs = recv_buffer_idxs; 3594 new_local_rows = 0; 3595 for (i=0;i<n_recvs;i++) { 3596 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 3597 ptr_idxs += olengths_idxs[i]; 3598 } 3599 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 3600 ptr_idxs = recv_buffer_idxs; 3601 new_local_rows = 0; 3602 for (i=0;i<n_recvs;i++) { 3603 ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 3604 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 3605 ptr_idxs += olengths_idxs[i]; 3606 } 3607 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 3608 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 3609 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 3610 3611 /* infer new local matrix type from received local matrices type */ 3612 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 3613 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 3614 if (n_recvs) { 3615 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 3616 ptr_idxs = recv_buffer_idxs; 3617 for (i=0;i<n_recvs;i++) { 3618 if ((PetscInt)new_local_type_private != *ptr_idxs) { 3619 new_local_type_private = MATAIJ_PRIVATE; 3620 break; 3621 } 3622 ptr_idxs += olengths_idxs[i]; 3623 } 3624 switch (new_local_type_private) { 3625 case MATDENSE_PRIVATE: 3626 if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */ 3627 new_local_type = MATSEQAIJ; 3628 bs = 1; 3629 } else { /* if I receive only 1 dense matrix */ 3630 new_local_type = MATSEQDENSE; 3631 bs = 1; 3632 } 3633 break; 3634 case MATAIJ_PRIVATE: 3635 new_local_type = MATSEQAIJ; 3636 bs = 1; 3637 break; 3638 case MATBAIJ_PRIVATE: 3639 new_local_type = MATSEQBAIJ; 3640 break; 3641 case MATSBAIJ_PRIVATE: 3642 new_local_type = MATSEQSBAIJ; 3643 break; 3644 default: 3645 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__); 3646 break; 3647 } 3648 } else { /* by default, new_local_type is seqdense */ 3649 new_local_type = MATSEQDENSE; 3650 bs = 1; 3651 } 3652 3653 /* create MATIS object if needed */ 3654 if (reuse == MAT_INITIAL_MATRIX) { 3655 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 3656 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,mat_n);CHKERRQ(ierr); 3657 } else { 3658 /* it also destroys the local matrices */ 3659 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 3660 } 3661 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 3662 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 3663 3664 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3665 3666 /* Global to local map of received indices */ 3667 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 3668 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 3669 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 3670 3671 /* restore attributes -> type of incoming data and its size */ 3672 buf_size_idxs = 0; 3673 for (i=0;i<n_recvs;i++) { 3674 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 3675 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 3676 buf_size_idxs += (PetscInt)olengths_idxs[i]; 3677 } 3678 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 3679 3680 /* set preallocation */ 3681 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 3682 if (!newisdense) { 3683 PetscInt *new_local_nnz=0; 3684 3685 ptr_vals = recv_buffer_vals; 3686 ptr_idxs = recv_buffer_idxs_local; 3687 if (n_recvs) { 3688 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 3689 } 3690 for (i=0;i<n_recvs;i++) { 3691 PetscInt j; 3692 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 3693 for (j=0;j<*(ptr_idxs+1);j++) { 3694 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 3695 } 3696 } else { 3697 /* TODO */ 3698 } 3699 ptr_idxs += olengths_idxs[i]; 3700 } 3701 if (new_local_nnz) { 3702 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 3703 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 3704 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 3705 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 3706 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 3707 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 3708 } else { 3709 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 3710 } 3711 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 3712 } else { 3713 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 3714 } 3715 3716 /* set values */ 3717 ptr_vals = recv_buffer_vals; 3718 ptr_idxs = recv_buffer_idxs_local; 3719 for (i=0;i<n_recvs;i++) { 3720 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 3721 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 3722 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 3723 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 3724 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 3725 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 3726 } else { 3727 /* TODO */ 3728 } 3729 ptr_idxs += olengths_idxs[i]; 3730 ptr_vals += olengths_vals[i]; 3731 } 3732 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3733 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3734 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3735 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3736 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 3737 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 3738 3739 #if 0 3740 if (!restrict_comm) { /* check */ 3741 Vec lvec,rvec; 3742 PetscReal infty_error; 3743 3744 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 3745 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 3746 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 3747 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 3748 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 3749 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 3750 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 3751 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 3752 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 3753 } 3754 #endif 3755 3756 /* assemble new additional is (if any) */ 3757 if (nis) { 3758 PetscInt **temp_idxs,*count_is,j,psum; 3759 3760 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3761 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 3762 ptr_idxs = recv_buffer_idxs_is; 3763 psum = 0; 3764 for (i=0;i<n_recvs;i++) { 3765 for (j=0;j<nis;j++) { 3766 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 3767 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 3768 psum += plen; 3769 ptr_idxs += plen+1; /* shift pointer to received data */ 3770 } 3771 } 3772 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 3773 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 3774 for (i=1;i<nis;i++) { 3775 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 3776 } 3777 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 3778 ptr_idxs = recv_buffer_idxs_is; 3779 for (i=0;i<n_recvs;i++) { 3780 for (j=0;j<nis;j++) { 3781 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 3782 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 3783 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 3784 ptr_idxs += plen+1; /* shift pointer to received data */ 3785 } 3786 } 3787 for (i=0;i<nis;i++) { 3788 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 3789 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 3790 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 3791 } 3792 ierr = PetscFree(count_is);CHKERRQ(ierr); 3793 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 3794 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 3795 } 3796 /* free workspace */ 3797 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 3798 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3799 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 3800 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3801 if (isdense) { 3802 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 3803 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 3804 } else { 3805 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 3806 } 3807 if (nis) { 3808 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3809 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 3810 } 3811 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 3812 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 3813 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 3814 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 3815 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 3816 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 3817 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 3818 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 3819 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 3820 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 3821 ierr = PetscFree(onodes);CHKERRQ(ierr); 3822 if (nis) { 3823 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 3824 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 3825 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 3826 } 3827 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 3828 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 3829 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 3830 for (i=0;i<nis;i++) { 3831 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 3832 } 3833 } 3834 PetscFunctionReturn(0); 3835 } 3836 3837 /* temporary hack into ksp private data structure */ 3838 #include <petsc-private/kspimpl.h> 3839 3840 #undef __FUNCT__ 3841 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 3842 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 3843 { 3844 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3845 PC_IS *pcis = (PC_IS*)pc->data; 3846 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 3847 MatNullSpace CoarseNullSpace=NULL; 3848 ISLocalToGlobalMapping coarse_islg; 3849 IS coarse_is,*isarray; 3850 PetscInt i,im_active=-1,active_procs=-1; 3851 PetscInt nis,nisdofs,nisneu; 3852 PC pc_temp; 3853 PCType coarse_pc_type; 3854 KSPType coarse_ksp_type; 3855 PetscBool multilevel_requested,multilevel_allowed; 3856 PetscBool isredundant,isbddc,isnn,coarse_reuse; 3857 Mat t_coarse_mat_is; 3858 PetscInt void_procs,ncoarse_ml,ncoarse_ds,ncoarse; 3859 PetscMPIInt all_procs; 3860 PetscBool csin_ml,csin_ds,csin,csin_type_simple,redist; 3861 PetscBool compute_vecs = PETSC_FALSE; 3862 PetscScalar *array; 3863 PetscErrorCode ierr; 3864 3865 PetscFunctionBegin; 3866 /* Assign global numbering to coarse dofs */ 3867 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 3868 compute_vecs = PETSC_TRUE; 3869 PetscInt ocoarse_size; 3870 ocoarse_size = pcbddc->coarse_size; 3871 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 3872 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 3873 /* see if we can avoid some work */ 3874 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 3875 if (ocoarse_size != pcbddc->coarse_size) { /* ...but with different size, so reset it and set reuse flag to false */ 3876 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 3877 coarse_reuse = PETSC_FALSE; 3878 } else { /* we can safely reuse already computed coarse matrix */ 3879 coarse_reuse = PETSC_TRUE; 3880 } 3881 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 3882 coarse_reuse = PETSC_FALSE; 3883 } 3884 /* reset any subassembling information */ 3885 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 3886 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 3887 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 3888 coarse_reuse = PETSC_TRUE; 3889 } 3890 3891 /* count "active" (i.e. with positive local size) and "void" processes */ 3892 im_active = !!(pcis->n); 3893 ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3894 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr); 3895 void_procs = all_procs-active_procs; 3896 csin_type_simple = PETSC_TRUE; 3897 redist = PETSC_FALSE; 3898 if (pcbddc->current_level && void_procs) { 3899 csin_ml = PETSC_TRUE; 3900 ncoarse_ml = void_procs; 3901 /* it has no sense to redistribute on a set of processors larger than the number of active processes */ 3902 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < active_procs) { 3903 csin_ds = PETSC_TRUE; 3904 ncoarse_ds = pcbddc->redistribute_coarse; 3905 redist = PETSC_TRUE; 3906 } else { 3907 csin_ds = PETSC_TRUE; 3908 ncoarse_ds = active_procs; 3909 redist = PETSC_TRUE; 3910 } 3911 } else { 3912 csin_ml = PETSC_FALSE; 3913 ncoarse_ml = all_procs; 3914 if (void_procs) { 3915 csin_ds = PETSC_TRUE; 3916 ncoarse_ds = void_procs; 3917 csin_type_simple = PETSC_FALSE; 3918 } else { 3919 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < all_procs) { 3920 csin_ds = PETSC_TRUE; 3921 ncoarse_ds = pcbddc->redistribute_coarse; 3922 redist = PETSC_TRUE; 3923 } else { 3924 csin_ds = PETSC_FALSE; 3925 ncoarse_ds = all_procs; 3926 } 3927 } 3928 } 3929 3930 /* 3931 test if we can go multilevel: three conditions must be satisfied: 3932 - we have not exceeded the number of levels requested 3933 - we can actually subassemble the active processes 3934 - we can find a suitable number of MPI processes where we can place the subassembled problem 3935 */ 3936 multilevel_allowed = PETSC_FALSE; 3937 multilevel_requested = PETSC_FALSE; 3938 if (pcbddc->current_level < pcbddc->max_levels) { 3939 multilevel_requested = PETSC_TRUE; 3940 if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) { 3941 multilevel_allowed = PETSC_FALSE; 3942 } else { 3943 multilevel_allowed = PETSC_TRUE; 3944 } 3945 } 3946 /* determine number of process partecipating to coarse solver */ 3947 if (multilevel_allowed) { 3948 ncoarse = ncoarse_ml; 3949 csin = csin_ml; 3950 redist = PETSC_FALSE; 3951 } else { 3952 ncoarse = ncoarse_ds; 3953 csin = csin_ds; 3954 } 3955 3956 /* creates temporary l2gmap and IS for coarse indexes */ 3957 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 3958 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 3959 3960 /* creates temporary MATIS object for coarse matrix */ 3961 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr); 3962 ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr); 3963 ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr); 3964 ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr); 3965 #if 0 3966 { 3967 PetscViewer viewer; 3968 char filename[256]; 3969 sprintf(filename,"local_coarse_mat%d.m",PetscGlobalRank); 3970 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 3971 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 3972 ierr = MatView(coarse_submat_dense,viewer);CHKERRQ(ierr); 3973 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 3974 } 3975 #endif 3976 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,&t_coarse_mat_is);CHKERRQ(ierr); 3977 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 3978 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3979 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3980 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 3981 3982 /* compute dofs splitting and neumann boundaries for coarse dofs */ 3983 if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal) ) { /* protects from unneded computations */ 3984 PetscInt *tidxs,*tidxs2,nout,tsize,i; 3985 const PetscInt *idxs; 3986 ISLocalToGlobalMapping tmap; 3987 3988 /* create map between primal indices (in local representative ordering) and local primal numbering */ 3989 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 3990 /* allocate space for temporary storage */ 3991 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 3992 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 3993 /* allocate for IS array */ 3994 nisdofs = pcbddc->n_ISForDofsLocal; 3995 nisneu = !!pcbddc->NeumannBoundariesLocal; 3996 nis = nisdofs + nisneu; 3997 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 3998 /* dofs splitting */ 3999 for (i=0;i<nisdofs;i++) { 4000 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 4001 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 4002 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 4003 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 4004 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 4005 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 4006 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->ISForDofsLocal[i]),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 4007 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 4008 } 4009 /* neumann boundaries */ 4010 if (pcbddc->NeumannBoundariesLocal) { 4011 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 4012 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 4013 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 4014 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 4015 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 4016 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 4017 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->NeumannBoundariesLocal),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 4018 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 4019 } 4020 /* free memory */ 4021 ierr = PetscFree(tidxs);CHKERRQ(ierr); 4022 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 4023 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 4024 } else { 4025 nis = 0; 4026 nisdofs = 0; 4027 nisneu = 0; 4028 isarray = NULL; 4029 } 4030 /* destroy no longer needed map */ 4031 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 4032 4033 /* restrict on coarse candidates (if needed) */ 4034 coarse_mat_is = NULL; 4035 if (csin) { 4036 if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */ 4037 if (redist) { 4038 PetscMPIInt rank; 4039 PetscInt spc,n_spc_p1,dest[1],destsize; 4040 4041 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 4042 spc = active_procs/ncoarse; 4043 n_spc_p1 = active_procs%ncoarse; 4044 if (im_active) { 4045 destsize = 1; 4046 if (rank > n_spc_p1*(spc+1)-1) { 4047 dest[0] = n_spc_p1+(rank-(n_spc_p1*(spc+1)))/spc; 4048 } else { 4049 dest[0] = rank/(spc+1); 4050 } 4051 } else { 4052 destsize = 0; 4053 } 4054 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),destsize,dest,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4055 } else if (csin_type_simple) { 4056 PetscMPIInt rank; 4057 PetscInt issize,isidx; 4058 4059 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 4060 if (im_active) { 4061 issize = 1; 4062 isidx = (PetscInt)rank; 4063 } else { 4064 issize = 0; 4065 isidx = -1; 4066 } 4067 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4068 } else { /* get a suitable subassembling pattern from MATIS code */ 4069 ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,PETSC_TRUE,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4070 } 4071 4072 /* we need to shift on coarse candidates either if we are not redistributing or we are redistributing and we have enough void processes */ 4073 if (!redist || ncoarse <= void_procs) { 4074 PetscInt ncoarse_cand,tissize,*nisindices; 4075 PetscInt *coarse_candidates; 4076 const PetscInt* tisindices; 4077 4078 /* get coarse candidates' ranks in pc communicator */ 4079 ierr = PetscMalloc1(all_procs,&coarse_candidates);CHKERRQ(ierr); 4080 ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4081 for (i=0,ncoarse_cand=0;i<all_procs;i++) { 4082 if (!coarse_candidates[i]) { 4083 coarse_candidates[ncoarse_cand++]=i; 4084 } 4085 } 4086 if (ncoarse_cand < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",ncoarse_cand,ncoarse); 4087 4088 4089 if (pcbddc->dbg_flag) { 4090 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4091 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr); 4092 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 4093 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr); 4094 for (i=0;i<ncoarse_cand;i++) { 4095 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr); 4096 } 4097 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr); 4098 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4099 } 4100 /* shift the pattern on coarse candidates */ 4101 ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr); 4102 ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 4103 ierr = PetscMalloc1(tissize,&nisindices);CHKERRQ(ierr); 4104 for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]]; 4105 ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 4106 ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr); 4107 ierr = PetscFree(coarse_candidates);CHKERRQ(ierr); 4108 } 4109 if (pcbddc->dbg_flag) { 4110 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4111 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr); 4112 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 4113 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4114 } 4115 } 4116 /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */ 4117 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 4118 } else { 4119 if (pcbddc->dbg_flag) { 4120 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4121 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr); 4122 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4123 } 4124 ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr); 4125 coarse_mat_is = t_coarse_mat_is; 4126 } 4127 4128 /* create local to global scatters for coarse problem */ 4129 if (compute_vecs) { 4130 PetscInt lrows; 4131 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 4132 if (coarse_mat_is) { 4133 ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr); 4134 } else { 4135 lrows = 0; 4136 } 4137 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 4138 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 4139 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 4140 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 4141 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 4142 } 4143 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 4144 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 4145 4146 /* set defaults for coarse KSP and PC */ 4147 if (multilevel_allowed) { 4148 coarse_ksp_type = KSPRICHARDSON; 4149 coarse_pc_type = PCBDDC; 4150 } else { 4151 coarse_ksp_type = KSPPREONLY; 4152 coarse_pc_type = PCREDUNDANT; 4153 } 4154 4155 /* print some info if requested */ 4156 if (pcbddc->dbg_flag) { 4157 if (!multilevel_allowed) { 4158 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4159 if (multilevel_requested) { 4160 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 4161 } else if (pcbddc->max_levels) { 4162 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 4163 } 4164 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4165 } 4166 } 4167 4168 /* create the coarse KSP object only once with defaults */ 4169 if (coarse_mat_is) { 4170 MatReuse coarse_mat_reuse; 4171 PetscViewer dbg_viewer = NULL; 4172 if (pcbddc->dbg_flag) { 4173 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is)); 4174 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 4175 } 4176 if (!pcbddc->coarse_ksp) { 4177 char prefix[256],str_level[16]; 4178 size_t len; 4179 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr); 4180 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 4181 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 4182 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr); 4183 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 4184 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 4185 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 4186 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 4187 /* prefix */ 4188 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 4189 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 4190 if (!pcbddc->current_level) { 4191 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 4192 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 4193 } else { 4194 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 4195 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 4196 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 4197 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 4198 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 4199 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 4200 } 4201 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 4202 /* allow user customization */ 4203 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 4204 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 4205 } 4206 4207 /* get some info after set from options */ 4208 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 4209 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 4210 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 4211 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 4212 if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */ 4213 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 4214 isbddc = PETSC_FALSE; 4215 } 4216 if (isredundant) { 4217 KSP inner_ksp; 4218 PC inner_pc; 4219 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 4220 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 4221 ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr); 4222 } 4223 4224 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 4225 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 4226 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 4227 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 4228 if (nisdofs) { 4229 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 4230 for (i=0;i<nisdofs;i++) { 4231 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 4232 } 4233 } 4234 if (nisneu) { 4235 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 4236 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 4237 } 4238 4239 /* assemble coarse matrix */ 4240 if (coarse_reuse) { 4241 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 4242 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 4243 coarse_mat_reuse = MAT_REUSE_MATRIX; 4244 } else { 4245 coarse_mat_reuse = MAT_INITIAL_MATRIX; 4246 } 4247 if (isbddc || isnn) { 4248 if (pcbddc->coarsening_ratio > 1) { 4249 if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */ 4250 ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,PETSC_TRUE,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 4251 if (pcbddc->dbg_flag) { 4252 ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4253 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr); 4254 ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr); 4255 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 4256 } 4257 } 4258 ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr); 4259 } else { 4260 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 4261 coarse_mat = coarse_mat_is; 4262 } 4263 } else { 4264 ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 4265 } 4266 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 4267 4268 /* propagate symmetry info to coarse matrix */ 4269 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pcbddc->issym);CHKERRQ(ierr); 4270 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 4271 4272 /* set operators */ 4273 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 4274 if (pcbddc->dbg_flag) { 4275 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 4276 } 4277 } else { /* processes non partecipating to coarse solver (if any) */ 4278 coarse_mat = 0; 4279 } 4280 ierr = PetscFree(isarray);CHKERRQ(ierr); 4281 #if 0 4282 { 4283 PetscViewer viewer; 4284 char filename[256]; 4285 sprintf(filename,"coarse_mat.m"); 4286 ierr = PetscViewerASCIIOpen(PETSC_COMM_WORLD,filename,&viewer);CHKERRQ(ierr); 4287 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 4288 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 4289 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 4290 } 4291 #endif 4292 4293 /* Compute coarse null space (special handling by BDDC only) */ 4294 if (pcbddc->NullSpace) { 4295 ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr); 4296 } 4297 4298 if (pcbddc->coarse_ksp) { 4299 Vec crhs,csol; 4300 PetscBool ispreonly; 4301 if (CoarseNullSpace) { 4302 if (isbddc) { 4303 ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr); 4304 } else { 4305 ierr = KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);CHKERRQ(ierr); 4306 } 4307 } 4308 /* setup coarse ksp */ 4309 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 4310 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 4311 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 4312 /* hack */ 4313 if (!csol) { 4314 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 4315 } 4316 if (!crhs) { 4317 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 4318 } 4319 /* Check coarse problem if in debug mode or if solving with an iterative method */ 4320 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 4321 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 4322 KSP check_ksp; 4323 KSPType check_ksp_type; 4324 PC check_pc; 4325 Vec check_vec,coarse_vec; 4326 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 4327 PetscInt its; 4328 PetscBool compute_eigs; 4329 PetscReal *eigs_r,*eigs_c; 4330 PetscInt neigs; 4331 const char *prefix; 4332 4333 /* Create ksp object suitable for estimation of extreme eigenvalues */ 4334 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 4335 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 4336 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 4337 if (ispreonly) { 4338 check_ksp_type = KSPPREONLY; 4339 compute_eigs = PETSC_FALSE; 4340 } else { 4341 check_ksp_type = KSPGMRES; 4342 compute_eigs = PETSC_TRUE; 4343 } 4344 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 4345 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 4346 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 4347 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 4348 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 4349 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 4350 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 4351 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 4352 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 4353 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 4354 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 4355 /* create random vec */ 4356 ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr); 4357 ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr); 4358 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 4359 if (CoarseNullSpace) { 4360 ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr); 4361 } 4362 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 4363 /* solve coarse problem */ 4364 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 4365 if (CoarseNullSpace) { 4366 ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr); 4367 } 4368 /* set eigenvalue estimation if preonly has not been requested */ 4369 if (compute_eigs) { 4370 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 4371 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 4372 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 4373 lambda_max = eigs_r[neigs-1]; 4374 lambda_min = eigs_r[0]; 4375 if (pcbddc->use_coarse_estimates) { 4376 if (lambda_max>lambda_min) { 4377 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr); 4378 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 4379 } 4380 } 4381 } 4382 4383 /* check coarse problem residual error */ 4384 if (pcbddc->dbg_flag) { 4385 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 4386 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 4387 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 4388 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 4389 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 4390 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 4391 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 4392 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 4393 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 4394 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 4395 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 4396 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 4397 if (compute_eigs) { 4398 PetscReal lambda_max_s,lambda_min_s; 4399 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 4400 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 4401 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 4402 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 4403 for (i=0;i<neigs;i++) { 4404 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 4405 } 4406 } 4407 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 4408 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 4409 } 4410 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 4411 if (compute_eigs) { 4412 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 4413 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 4414 } 4415 } 4416 } 4417 /* print additional info */ 4418 if (pcbddc->dbg_flag) { 4419 /* waits until all processes reaches this point */ 4420 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 4421 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 4422 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4423 } 4424 4425 /* free memory */ 4426 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 4427 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 4428 PetscFunctionReturn(0); 4429 } 4430 4431 #undef __FUNCT__ 4432 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 4433 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 4434 { 4435 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 4436 PC_IS* pcis = (PC_IS*)pc->data; 4437 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 4438 PetscInt i,coarse_size; 4439 PetscInt *local_primal_indices; 4440 PetscErrorCode ierr; 4441 4442 PetscFunctionBegin; 4443 /* Compute global number of coarse dofs */ 4444 if (!pcbddc->primal_indices_local_idxs && pcbddc->local_primal_size) { 4445 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Local primal indices have not been created"); 4446 } 4447 ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)(pc->pmat)),matis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,NULL,&coarse_size,&local_primal_indices);CHKERRQ(ierr); 4448 4449 /* check numbering */ 4450 if (pcbddc->dbg_flag) { 4451 PetscScalar coarsesum,*array; 4452 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 4453 4454 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4455 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4456 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 4457 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 4458 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 4459 for (i=0;i<pcbddc->local_primal_size;i++) { 4460 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 4461 } 4462 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 4463 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 4464 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4465 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4466 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4467 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4468 ierr = VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4469 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4470 for (i=0;i<pcis->n;i++) { 4471 if (array[i] == 1.0) { 4472 set_error = PETSC_TRUE; 4473 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr); 4474 } 4475 } 4476 ierr = MPI_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4477 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4478 for (i=0;i<pcis->n;i++) { 4479 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 4480 } 4481 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4482 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4483 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4484 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4485 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 4486 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 4487 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 4488 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 4489 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4490 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 4491 for (i=0;i<pcbddc->local_primal_size;i++) { 4492 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i]); 4493 } 4494 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4495 } 4496 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4497 if (set_error_reduced) { 4498 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 4499 } 4500 } 4501 /* get back data */ 4502 *coarse_size_n = coarse_size; 4503 *local_primal_indices_n = local_primal_indices; 4504 PetscFunctionReturn(0); 4505 } 4506 4507 #undef __FUNCT__ 4508 #define __FUNCT__ "PCBDDCGlobalToLocal" 4509 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 4510 { 4511 IS localis_t; 4512 PetscInt i,lsize,*idxs,n; 4513 PetscScalar *vals; 4514 PetscErrorCode ierr; 4515 4516 PetscFunctionBegin; 4517 /* get indices in local ordering exploiting local to global map */ 4518 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 4519 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 4520 for (i=0;i<lsize;i++) vals[i] = 1.0; 4521 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 4522 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 4523 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 4524 if (idxs) { /* multilevel guard */ 4525 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 4526 } 4527 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 4528 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 4529 ierr = PetscFree(vals);CHKERRQ(ierr); 4530 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 4531 /* now compute set in local ordering */ 4532 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4533 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4534 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 4535 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 4536 for (i=0,lsize=0;i<n;i++) { 4537 if (PetscRealPart(vals[i]) > 0.5) { 4538 lsize++; 4539 } 4540 } 4541 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 4542 for (i=0,lsize=0;i<n;i++) { 4543 if (PetscRealPart(vals[i]) > 0.5) { 4544 idxs[lsize++] = i; 4545 } 4546 } 4547 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 4548 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 4549 *localis = localis_t; 4550 PetscFunctionReturn(0); 4551 } 4552 4553 /* the next two functions will be called in KSPMatMult if a change of basis has been requested */ 4554 #undef __FUNCT__ 4555 #define __FUNCT__ "PCBDDCMatMult_Private" 4556 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y) 4557 { 4558 PCBDDCChange_ctx change_ctx; 4559 PetscErrorCode ierr; 4560 4561 PetscFunctionBegin; 4562 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 4563 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 4564 ierr = MatMult(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 4565 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 4566 PetscFunctionReturn(0); 4567 } 4568 4569 #undef __FUNCT__ 4570 #define __FUNCT__ "PCBDDCMatMultTranspose_Private" 4571 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y) 4572 { 4573 PCBDDCChange_ctx change_ctx; 4574 PetscErrorCode ierr; 4575 4576 PetscFunctionBegin; 4577 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 4578 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 4579 ierr = MatMultTranspose(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 4580 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 4581 PetscFunctionReturn(0); 4582 } 4583 4584 #undef __FUNCT__ 4585 #define __FUNCT__ "PCBDDCSetUpSubSchurs" 4586 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 4587 { 4588 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 4589 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 4590 PetscInt *used_xadj,*used_adjncy; 4591 PetscBool free_used_adj; 4592 PetscErrorCode ierr; 4593 4594 PetscFunctionBegin; 4595 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 4596 free_used_adj = PETSC_FALSE; 4597 if (pcbddc->sub_schurs_layers == -1) { 4598 used_xadj = NULL; 4599 used_adjncy = NULL; 4600 } else { 4601 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 4602 used_xadj = pcbddc->mat_graph->xadj; 4603 used_adjncy = pcbddc->mat_graph->adjncy; 4604 } else if (pcbddc->computed_rowadj) { 4605 used_xadj = pcbddc->mat_graph->xadj; 4606 used_adjncy = pcbddc->mat_graph->adjncy; 4607 } else { 4608 PetscBool flg_row=PETSC_FALSE; 4609 const PetscInt *xadj,*adjncy; 4610 PetscInt nvtxs; 4611 4612 ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4613 if (flg_row) { 4614 ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr); 4615 ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr); 4616 ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr); 4617 free_used_adj = PETSC_TRUE; 4618 } else { 4619 pcbddc->sub_schurs_layers = -1; 4620 used_xadj = NULL; 4621 used_adjncy = NULL; 4622 } 4623 ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4624 } 4625 } 4626 ierr = PCBDDCSubSchursSetUp(sub_schurs,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->adaptive_selection,pcbddc->use_deluxe_scaling,pcbddc->adaptive_invert_Stildas,pcbddc->use_edges,pcbddc->use_faces);CHKERRQ(ierr); 4627 4628 /* free adjacency */ 4629 if (free_used_adj) { 4630 ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr); 4631 } 4632 PetscFunctionReturn(0); 4633 } 4634 4635 #undef __FUNCT__ 4636 #define __FUNCT__ "PCBDDCInitSubSchurs" 4637 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 4638 { 4639 PC_IS *pcis=(PC_IS*)pc->data; 4640 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 4641 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 4642 PCBDDCGraph graph; 4643 Mat S_j; 4644 PetscErrorCode ierr; 4645 4646 PetscFunctionBegin; 4647 /* attach interface graph for determining subsets */ 4648 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 4649 IS verticesIS; 4650 4651 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 4652 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 4653 ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap);CHKERRQ(ierr); 4654 ierr = PCBDDCGraphSetUp(graph,0,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticesIS);CHKERRQ(ierr); 4655 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 4656 ierr = ISDestroy(&verticesIS);CHKERRQ(ierr); 4657 /* 4658 if (pcbddc->dbg_flag) { 4659 ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 4660 } 4661 */ 4662 } else { 4663 graph = pcbddc->mat_graph; 4664 } 4665 4666 /* Create Schur complement matrix */ 4667 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 4668 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 4669 4670 /* sub_schurs init */ 4671 ierr = PCBDDCSubSchursInit(sub_schurs,pcbddc->local_mat,S_j,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap,pcbddc->sub_schurs_threshold);CHKERRQ(ierr); 4672 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 4673 /* free graph struct */ 4674 if (pcbddc->sub_schurs_rebuild) { 4675 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 4676 } 4677 PetscFunctionReturn(0); 4678 } 4679