1 #include <../src/ksp/pc/impls/bddc/bddc.h> 2 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 3 #include <petscblaslapack.h> 4 5 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y); 6 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y); 7 8 #undef __FUNCT__ 9 #define __FUNCT__ "PCBDDCAdaptiveSelection" 10 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 11 { 12 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 13 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 14 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 15 PetscBLASInt *B_iwork,*B_ifail; 16 PetscScalar *work,lwork; 17 PetscScalar *St,*S,*eigv; 18 PetscScalar *Sarray,*Starray; 19 PetscScalar *Smult,*Seigv; 20 PetscReal *eigs,thresh; 21 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs; 22 #if defined(PETSC_USE_COMPLEX) 23 PetscReal *rwork; 24 #endif 25 PetscErrorCode ierr; 26 27 PetscFunctionBegin; 28 if (!sub_schurs->use_mumps) { 29 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS"); 30 } 31 if (!sub_schurs->is_hermitian || !sub_schurs->is_posdef) { 32 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Not yet implemented"); 33 } 34 35 /* max size of subsets */ 36 mss = 0; 37 for (i=0;i<sub_schurs->n_subs;i++) { 38 if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) { 39 PetscInt subset_size; 40 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 41 mss = PetscMax(mss,subset_size); 42 } 43 } 44 45 /* min/max and threshold */ 46 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 47 nmin = pcbddc->adaptive_nmin > -1 ? pcbddc->adaptive_nmin : 1; 48 nmax = PetscMax(nmin,nmax); 49 if (pcbddc->adaptive_threshold > 1.0) { 50 thresh = 1.0/pcbddc->adaptive_threshold; 51 } else { 52 thresh = 1.0; 53 } 54 55 /* allocate lapack workspace */ 56 cum = cum2 = 0; 57 maxneigs = 0; 58 for (i=0;i<sub_schurs->n_subs;i++) { 59 if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) { 60 PetscInt n,subset_size; 61 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 62 n = PetscMin(subset_size,nmax); 63 cum += subset_size*n; 64 cum2 += n; 65 maxneigs = PetscMax(maxneigs,n); 66 } 67 } 68 69 if (mss) { 70 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 71 PetscBLASInt B_itype = 1; 72 PetscBLASInt B_N = mss; 73 PetscScalar zero = 0.0; 74 PetscScalar eps = 0.0; /* dlamch? */ 75 76 B_lwork = -1; 77 S = NULL; 78 St = NULL; 79 eigs = NULL; 80 eigv = NULL; 81 B_iwork = NULL; 82 B_ifail = NULL; 83 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 84 #if defined(PETSC_USE_COMPLEX) 85 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 86 #else 87 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 88 #endif 89 if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 90 ierr = PetscFPTrapPop();CHKERRQ(ierr); 91 } else { 92 /* TODO */ 93 } 94 } else { 95 lwork = 0; 96 } 97 98 nv = 0; 99 if (sub_schurs->is_Ej_com) { /* complement of subsets, each entry is a vertex */ 100 ierr = ISGetLocalSize(sub_schurs->is_Ej_com,&nv);CHKERRQ(ierr); 101 } 102 ierr = PetscBLASIntCast((PetscInt)lwork,&B_lwork);CHKERRQ(ierr); 103 ierr = PetscMalloc7(mss*mss,&S,mss*mss,&St,mss*mss,&eigv,mss,&eigs, 104 B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr); 105 #if defined(PETSC_USE_COMPLEX) 106 ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr); 107 #endif 108 ierr = PetscMalloc2(mss*mss,&Smult,mss*mss,&Seigv);CHKERRQ(ierr); 109 ierr = PetscMalloc4(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 110 nv+cum2+1,&pcbddc->adaptive_constraints_ptrs, 111 nv+cum,&pcbddc->adaptive_constraints_idxs, 112 nv+cum,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 113 ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr); 114 115 maxneigs = 0; 116 cum = cum2 = cumarray = 0; 117 if (sub_schurs->is_Ej_com) { 118 const PetscInt *idxs; 119 120 ierr = ISGetIndices(sub_schurs->is_Ej_com,&idxs);CHKERRQ(ierr); 121 for (cum=0;cum<nv;cum++) { 122 pcbddc->adaptive_constraints_n[cum] = 1; 123 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 124 pcbddc->adaptive_constraints_ptrs[cum] = cum; 125 pcbddc->adaptive_constraints_data[cum] = 1.0; 126 } 127 cum2 = cum; 128 ierr = ISRestoreIndices(sub_schurs->is_Ej_com,&idxs);CHKERRQ(ierr); 129 } 130 131 if (mss) { /* multilevel */ 132 if (pcbddc->use_deluxe_scaling) { 133 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 134 } else { 135 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_all,&Sarray);CHKERRQ(ierr); 136 } 137 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 138 } 139 140 for (i=0;i<sub_schurs->n_subs;i++) { 141 PetscInt j,subset_size; 142 143 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 144 if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) { 145 const PetscInt *idxs; 146 PetscScalar one = 1.0,zero=0.0; 147 PetscBLASInt B_N; 148 149 /* S should be copied since we need it for deluxe scaling */ 150 if (sub_schurs->is_hermitian) { 151 PetscInt j; 152 for (j=0;j<subset_size;j++) { 153 ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 154 } 155 for (j=0;j<subset_size;j++) { 156 ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 157 } 158 } else { 159 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 160 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 161 } 162 /* is always this the right matrix? */ 163 ierr = PetscMemcpy(Smult,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 164 165 /* we could reuse space already allocated when building sum_S_Ej_tilda_all */ 166 /* St = Starray+cumarray; */ 167 168 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 169 PetscBLASInt B_itype = 1; 170 PetscBLASInt B_IL = 1, B_IU; 171 PetscScalar eps = -1.0; /* dlamch? */ 172 PetscInt nmin_s; 173 174 /* ask for eigenvalues lower than thresh */ 175 PetscPrintf(PETSC_COMM_SELF,"[%d] Computing for sub %d/%d.\n",PetscGlobalRank,i,sub_schurs->n_subs); 176 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 177 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 178 #if defined(PETSC_USE_COMPLEX) 179 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 180 #else 181 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 182 #endif 183 ierr = PetscFPTrapPop();CHKERRQ(ierr); 184 if (B_ierr) { 185 if (B_ierr < 0 ) { 186 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 187 } else if (B_ierr <= B_N) { 188 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 189 } else { 190 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 191 } 192 } 193 194 if (B_neigs > nmax) { 195 PetscPrintf(PETSC_COMM_SELF,"[%d] found %d eigs, more than maximum required %d.\n",PetscGlobalRank,B_neigs,nmax); 196 B_neigs = nmax; 197 } 198 199 nmin_s = PetscMin(nmin,B_N); 200 if (B_neigs < nmin_s) { 201 PetscBLASInt B_neigs2; 202 203 B_IL = B_neigs + 1; 204 ierr = PetscBLASIntCast(nmin_s,&B_IU);CHKERRQ(ierr); 205 PetscPrintf(PETSC_COMM_SELF,"[%d] found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",PetscGlobalRank,B_neigs,nmin,B_IL,B_IU); 206 if (sub_schurs->is_hermitian) { 207 PetscInt j; 208 for (j=0;j<subset_size;j++) { 209 ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 210 } 211 for (j=0;j<subset_size;j++) { 212 ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 213 } 214 } else { 215 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 216 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 217 } 218 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 219 #if defined(PETSC_USE_COMPLEX) 220 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 221 #else 222 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 223 #endif 224 ierr = PetscFPTrapPop();CHKERRQ(ierr); 225 B_neigs += B_neigs2; 226 } 227 if (B_ierr) { 228 if (B_ierr < 0 ) { 229 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 230 } else if (B_ierr <= B_N) { 231 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 232 } else { 233 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 234 } 235 } 236 PetscPrintf(PETSC_COMM_SELF,"[%d] -> Got %d eigs\n",PetscGlobalRank,B_neigs); 237 for (j=0;j<B_neigs;j++) { 238 if (eigs[j] == 0.0) { 239 PetscPrintf(PETSC_COMM_SELF,"[%d] Inf\n",PetscGlobalRank); 240 } else { 241 PetscPrintf(PETSC_COMM_SELF,"[%d] %1.6e\n",PetscGlobalRank,1.0/eigs[j]); 242 } 243 } 244 } else { 245 /* TODO */ 246 } 247 maxneigs = PetscMax(B_neigs,maxneigs); 248 pcbddc->adaptive_constraints_n[i+nv] = B_neigs; 249 250 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 251 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&B_N,&B_neigs,&B_N,&one,Smult,&B_N,eigv,&B_N,&zero,Seigv,&B_N)); 252 ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+cum2,Seigv,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 253 #if 0 254 PetscInt ii; 255 for (ii=0;ii<B_neigs;ii++) { 256 PetscPrintf(PETSC_COMM_SELF,"[%d] -> Eigenvector %d/%d (%d)\n",PetscGlobalRank,ii,B_neigs,B_N); 257 for (j=0;j<B_N;j++) { 258 PetscPrintf(PETSC_COMM_SELF,"[%d] %1.4e %1.4e\n",PetscGlobalRank,eigv[ii*B_N+j],Seigv[ii*B_N+j]); 259 } 260 } 261 #endif 262 ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 263 for (j=0;j<B_neigs;j++) { 264 #if 0 265 { 266 PetscBLASInt Blas_N,Blas_one = 1.0; 267 PetscScalar norm; 268 ierr = PetscBLASIntCast(subset_size,&Blas_N);CHKERRQ(ierr); 269 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,pcbddc->adaptive_constraints_data+cum2,&Blas_one,pcbddc->adaptive_constraints_data+cum2,&Blas_one)); 270 if (pcbddc->adaptive_constraints_data[cum2] > 0.0) { 271 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 272 } else { 273 norm = -1.0/PetscSqrtReal(PetscRealPart(norm)); 274 } 275 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,pcbddc->adaptive_constraints_data+cum2,&Blas_one)); 276 } 277 #endif 278 ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+cum2,idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr); 279 pcbddc->adaptive_constraints_ptrs[cum++] = cum2; 280 cum2 += subset_size; 281 } 282 ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 283 } 284 /* shift for next computation */ 285 cumarray += subset_size*subset_size; 286 } 287 pcbddc->adaptive_constraints_ptrs[cum] = cum2; 288 ierr = PetscFree2(Smult,Seigv);CHKERRQ(ierr); 289 290 if (mss) { 291 if (pcbddc->use_deluxe_scaling) { 292 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 293 } else { 294 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_all,&Sarray);CHKERRQ(ierr); 295 } 296 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 297 } 298 ierr = PetscFree7(S,St,eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr); 299 #if defined(PETSC_USE_COMPLEX) 300 ierr = PetscFree(rwork);CHKERRQ(ierr); 301 #endif 302 if (pcbddc->dbg_flag) { 303 PetscInt maxneigs_r; 304 ierr = MPI_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 305 ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr); 306 } 307 PetscFunctionReturn(0); 308 } 309 310 #undef __FUNCT__ 311 #define __FUNCT__ "PCBDDCSetUpSolvers" 312 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 313 { 314 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 315 PetscScalar *coarse_submat_vals; 316 PetscErrorCode ierr; 317 318 PetscFunctionBegin; 319 /* Setup local scatters R_to_B and (optionally) R_to_D */ 320 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 321 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 322 323 /* Setup local neumann solver ksp_R */ 324 /* PCBDDCSetUpLocalScatters should be called first! */ 325 ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 326 327 /* Change global null space passed in by the user if change of basis has been requested */ 328 if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) { 329 ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr); 330 } 331 332 /* 333 Setup local correction and local part of coarse basis. 334 Gives back the dense local part of the coarse matrix in column major ordering 335 */ 336 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 337 338 /* Compute total number of coarse nodes and setup coarse solver */ 339 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 340 341 /* free */ 342 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 343 PetscFunctionReturn(0); 344 } 345 346 #undef __FUNCT__ 347 #define __FUNCT__ "PCBDDCResetCustomization" 348 PetscErrorCode PCBDDCResetCustomization(PC pc) 349 { 350 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 351 PetscErrorCode ierr; 352 353 PetscFunctionBegin; 354 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 355 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 356 ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr); 357 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 358 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 359 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 360 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 361 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 362 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 363 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 364 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 365 PetscFunctionReturn(0); 366 } 367 368 #undef __FUNCT__ 369 #define __FUNCT__ "PCBDDCResetTopography" 370 PetscErrorCode PCBDDCResetTopography(PC pc) 371 { 372 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 373 PetscErrorCode ierr; 374 375 PetscFunctionBegin; 376 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 377 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 378 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 379 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 380 ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr); 381 PetscFunctionReturn(0); 382 } 383 384 #undef __FUNCT__ 385 #define __FUNCT__ "PCBDDCResetSolvers" 386 PetscErrorCode PCBDDCResetSolvers(PC pc) 387 { 388 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 389 PetscScalar *array; 390 PetscErrorCode ierr; 391 392 PetscFunctionBegin; 393 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 394 ierr = VecDestroy(&pcbddc->coarse_rhs);CHKERRQ(ierr); 395 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr); 396 ierr = PetscFree(array);CHKERRQ(ierr); 397 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 398 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 399 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 400 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 401 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 402 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 403 if (pcbddc->local_auxmat2) { 404 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 405 ierr = PetscFree(array);CHKERRQ(ierr); 406 } 407 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 408 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 409 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 410 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 411 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 412 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 413 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 414 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 415 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 416 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 417 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 418 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 419 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 420 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 421 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 422 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 423 PetscFunctionReturn(0); 424 } 425 426 #undef __FUNCT__ 427 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors" 428 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 429 { 430 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 431 PC_IS *pcis = (PC_IS*)pc->data; 432 VecType impVecType; 433 PetscInt n_constraints,n_R,old_size; 434 PetscErrorCode ierr; 435 436 PetscFunctionBegin; 437 if (!pcbddc->ConstraintMatrix) { 438 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created"); 439 } 440 /* get sizes */ 441 n_constraints = pcbddc->local_primal_size - pcbddc->n_actual_vertices; 442 n_R = pcis->n-pcbddc->n_actual_vertices; 443 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 444 /* local work vectors (try to avoid unneeded work)*/ 445 /* R nodes */ 446 old_size = -1; 447 if (pcbddc->vec1_R) { 448 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 449 } 450 if (n_R != old_size) { 451 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 452 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 453 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 454 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 455 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 456 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 457 } 458 /* local primal dofs */ 459 old_size = -1; 460 if (pcbddc->vec1_P) { 461 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 462 } 463 if (pcbddc->local_primal_size != old_size) { 464 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 465 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 466 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 467 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 468 } 469 /* local explicit constraints */ 470 old_size = -1; 471 if (pcbddc->vec1_C) { 472 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 473 } 474 if (n_constraints && n_constraints != old_size) { 475 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 476 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 477 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 478 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 479 } 480 PetscFunctionReturn(0); 481 } 482 483 #undef __FUNCT__ 484 #define __FUNCT__ "PCBDDCSetUpCorrection" 485 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 486 { 487 PetscErrorCode ierr; 488 /* pointers to pcis and pcbddc */ 489 PC_IS* pcis = (PC_IS*)pc->data; 490 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 491 /* submatrices of local problem */ 492 Mat A_RV,A_VR,A_VV; 493 /* submatrices of local coarse problem */ 494 Mat S_VV,S_CV,S_VC,S_CC; 495 /* working matrices */ 496 Mat C_CR; 497 /* additional working stuff */ 498 PC pc_R; 499 Mat F; 500 PetscBool isLU,isCHOL,isILU; 501 502 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 503 PetscScalar *work; 504 PetscInt *idx_V_B; 505 PetscInt n,n_vertices,n_constraints; 506 PetscInt i,n_R,n_D,n_B; 507 PetscBool unsymmetric_check; 508 /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */ 509 MatType impMatType; 510 /* some shortcuts to scalars */ 511 PetscScalar one=1.0,m_one=-1.0; 512 513 PetscFunctionBegin; 514 /* get number of vertices (corners plus constraints with change of basis) 515 pcbddc->n_actual_vertices stores the actual number of vertices, pcbddc->n_vertices the number of corners computed */ 516 n_vertices = pcbddc->n_actual_vertices; 517 n_constraints = pcbddc->local_primal_size-n_vertices; 518 /* Set Non-overlapping dimensions */ 519 n_B = pcis->n_B; n_D = pcis->n - n_B; 520 n_R = pcis->n-n_vertices; 521 522 /* Set types for local objects needed by BDDC precondtioner */ 523 impMatType = MATSEQDENSE; 524 525 /* vertices in boundary numbering */ 526 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 527 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->primal_indices_local_idxs,&i,idx_V_B);CHKERRQ(ierr); 528 if (i != n_vertices) { 529 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i); 530 } 531 532 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 533 ierr = PetscMalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 534 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr); 535 ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr); 536 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr); 537 ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr); 538 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr); 539 ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr); 540 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr); 541 ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr); 542 543 unsymmetric_check = PETSC_FALSE; 544 /* allocate workspace */ 545 n = 0; 546 if (n_constraints) { 547 n += n_R*n_constraints; 548 } 549 if (n_vertices) { 550 n = PetscMax(2*n_R*n_vertices,n); 551 } 552 if (!pcbddc->issym) { 553 n = PetscMax(2*n_R*pcbddc->local_primal_size,n); 554 unsymmetric_check = PETSC_TRUE; 555 } 556 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 557 558 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 559 ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr); 560 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr); 561 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr); 562 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr); 563 if (isLU || isILU || isCHOL) { 564 ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr); 565 } else { 566 F = NULL; 567 } 568 569 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 570 if (n_constraints) { 571 Mat M1,M2,M3; 572 IS is_aux; 573 /* see if we can save some allocations */ 574 if (pcbddc->local_auxmat2) { 575 PetscInt on_R,on_constraints; 576 ierr = MatGetSize(pcbddc->local_auxmat2,&on_R,&on_constraints);CHKERRQ(ierr); 577 if (on_R != n_R || on_constraints != n_constraints) { 578 PetscScalar *marray; 579 580 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&marray);CHKERRQ(ierr); 581 ierr = PetscFree(marray);CHKERRQ(ierr); 582 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 583 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 584 } 585 } 586 /* auxiliary matrices */ 587 if (!pcbddc->local_auxmat2) { 588 PetscScalar *marray; 589 590 ierr = PetscMalloc1(2*n_R*n_constraints,&marray);CHKERRQ(ierr); 591 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,marray,&pcbddc->local_auxmat2);CHKERRQ(ierr); 592 marray += n_R*n_constraints; 593 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_R,marray,&pcbddc->local_auxmat1);CHKERRQ(ierr); 594 } 595 596 /* Extract constraints on R nodes: C_{CR} */ 597 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 598 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 599 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 600 601 /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */ 602 ierr = PetscMemzero(work,n_R*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 603 for (i=0;i<n_constraints;i++) { 604 const PetscScalar *row_cmat_values; 605 const PetscInt *row_cmat_indices; 606 PetscInt size_of_constraint,j; 607 608 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 609 for (j=0;j<size_of_constraint;j++) { 610 work[row_cmat_indices[j]+i*n_R] = -row_cmat_values[j]; 611 } 612 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 613 } 614 if (F) { 615 Mat B; 616 617 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 618 ierr = MatMatSolve(F,B,pcbddc->local_auxmat2);CHKERRQ(ierr); 619 ierr = MatDestroy(&B);CHKERRQ(ierr); 620 } else { 621 PetscScalar *xarray; 622 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&xarray);CHKERRQ(ierr); 623 for (i=0;i<n_constraints;i++) { 624 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 625 ierr = VecPlaceArray(pcbddc->vec2_R,xarray+i*n_R);CHKERRQ(ierr); 626 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 627 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 628 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 629 } 630 ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&xarray);CHKERRQ(ierr); 631 } 632 633 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 634 ierr = MatConvert(C_CR,impMatType,MAT_REUSE_MATRIX,&C_CR);CHKERRQ(ierr); 635 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 636 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr); 637 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 638 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 639 ierr = VecSet(pcbddc->vec1_C,m_one);CHKERRQ(ierr); 640 ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr); 641 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 642 ierr = MatDestroy(&M2);CHKERRQ(ierr); 643 ierr = MatDestroy(&M3);CHKERRQ(ierr); 644 /* Assemble local_auxmat1 = S_CC*C_{CR} needed by BDDC application in KSP and in preproc */ 645 ierr = MatMatMult(M1,C_CR,MAT_REUSE_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 646 ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 647 ierr = MatDestroy(&M1);CHKERRQ(ierr); 648 } 649 /* Get submatrices from subdomain matrix */ 650 if (n_vertices) { 651 Mat newmat; 652 IS is_aux; 653 PetscInt ibs,mbs; 654 PetscBool issbaij; 655 656 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 657 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 658 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 659 if (ibs != mbs) { /* need to convert to SEQAIJ */ 660 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 661 ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 662 ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 663 ierr = MatGetSubMatrix(newmat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 664 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 665 } else { 666 /* this is safe */ 667 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 668 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 669 if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 670 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 671 ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 672 ierr = MatTranspose(A_VR,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 673 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 674 ierr = MatConvert(A_VV,MATSEQBAIJ,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr); 675 } else { 676 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 677 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 678 } 679 } 680 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 681 } 682 683 /* Matrix of coarse basis functions (local) */ 684 if (pcbddc->coarse_phi_B) { 685 PetscInt on_B,on_primal,on_D=n_D; 686 if (pcbddc->coarse_phi_D) { 687 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr); 688 } 689 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 690 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 691 PetscScalar *marray; 692 693 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr); 694 ierr = PetscFree(marray);CHKERRQ(ierr); 695 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 696 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 697 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 698 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 699 } 700 } 701 702 if (!pcbddc->coarse_phi_B) { 703 PetscScalar *marray; 704 705 n = n_B*pcbddc->local_primal_size; 706 if (pcbddc->switch_static || pcbddc->dbg_flag) { 707 n += n_D*pcbddc->local_primal_size; 708 } 709 if (!pcbddc->issym) { 710 n *= 2; 711 } 712 ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr); 713 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 714 n = n_B*pcbddc->local_primal_size; 715 if (pcbddc->switch_static || pcbddc->dbg_flag) { 716 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 717 n += n_D*pcbddc->local_primal_size; 718 } 719 if (!pcbddc->issym) { 720 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 721 if (pcbddc->switch_static || pcbddc->dbg_flag) { 722 n = n_B*pcbddc->local_primal_size; 723 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 724 } 725 } else { 726 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 727 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 728 if (pcbddc->switch_static || pcbddc->dbg_flag) { 729 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 730 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 731 } 732 } 733 } 734 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 735 /* vertices */ 736 if (n_vertices) { 737 738 if (n_R) { 739 Mat A_RRmA_RV,S_VVt; /* S_VVt with LDA=N */ 740 PetscBLASInt B_N,B_one = 1; 741 PetscScalar *x,*y; 742 743 ierr = PetscMemzero(work,2*n_R*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 744 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 745 ierr = MatConvert(A_RV,impMatType,MAT_REUSE_MATRIX,&A_RV);CHKERRQ(ierr); 746 if (F) { 747 ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr); 748 } else { 749 ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr); 750 for (i=0;i<n_vertices;i++) { 751 ierr = VecPlaceArray(pcbddc->vec1_R,y+i*n_R);CHKERRQ(ierr); 752 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 753 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 754 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 755 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 756 } 757 ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr); 758 } 759 ierr = MatScale(A_RRmA_RV,m_one);CHKERRQ(ierr); 760 /* S_VV and S_CV are the subdomain contribution to coarse matrix. WARNING -> column major ordering */ 761 if (n_constraints) { 762 Mat B; 763 ierr = MatMatMult(pcbddc->local_auxmat1,A_RRmA_RV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr); 764 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr); 765 ierr = MatMatMult(pcbddc->local_auxmat2,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 766 ierr = MatScale(S_CV,m_one);CHKERRQ(ierr); 767 ierr = PetscBLASIntCast(n_R*n_vertices,&B_N);CHKERRQ(ierr); 768 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+n_R*n_vertices,&B_one,work,&B_one)); 769 ierr = MatDestroy(&B);CHKERRQ(ierr); 770 } 771 ierr = MatConvert(A_VR,impMatType,MAT_REUSE_MATRIX,&A_VR);CHKERRQ(ierr); 772 ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr); 773 ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr); 774 ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr); 775 ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr); 776 ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr); 777 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one)); 778 ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr); 779 ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr); 780 ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 781 ierr = MatDestroy(&S_VVt);CHKERRQ(ierr); 782 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 783 } else { 784 ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr); 785 ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 786 } 787 /* coarse basis functions */ 788 for (i=0;i<n_vertices;i++) { 789 PetscScalar *y; 790 791 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr); 792 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 793 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 794 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 795 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 796 y[n_B*i+idx_V_B[i]] = 1.0; 797 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 798 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 799 800 if (pcbddc->switch_static || pcbddc->dbg_flag) { 801 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 802 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 803 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 804 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 805 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 806 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 807 } 808 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 809 } 810 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 811 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 812 } 813 814 if (n_constraints) { 815 Mat B; 816 817 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 818 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 819 ierr = MatMatMult(pcbddc->local_auxmat2,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 820 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 821 if (n_vertices) { 822 ierr = MatMatMult(A_VR,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_VC);CHKERRQ(ierr); 823 } 824 ierr = MatDestroy(&B);CHKERRQ(ierr); 825 /* coarse basis functions */ 826 for (i=0;i<n_constraints;i++) { 827 PetscScalar *y; 828 829 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr); 830 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 831 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr); 832 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 833 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 834 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 835 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 836 837 if (pcbddc->switch_static || pcbddc->dbg_flag) { 838 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 839 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr); 840 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 841 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 842 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 843 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 844 } 845 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 846 } 847 } 848 849 /* compute other basis functions for non-symmetric problems */ 850 if (!pcbddc->issym) { 851 Mat B,X; 852 853 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,pcbddc->local_primal_size,work,&B);CHKERRQ(ierr); 854 855 if (n_constraints) { 856 Mat S_CCT,B_C; 857 858 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work+n_vertices*n_R,&B_C);CHKERRQ(ierr); 859 ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr); 860 ierr = MatTransposeMatMult(C_CR,S_CCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr); 861 ierr = MatDestroy(&S_CCT);CHKERRQ(ierr); 862 if (n_vertices) { 863 Mat B_V,S_VCT; 864 865 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&B_V);CHKERRQ(ierr); 866 ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr); 867 ierr = MatTransposeMatMult(C_CR,S_VCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr); 868 ierr = MatDestroy(&B_V);CHKERRQ(ierr); 869 ierr = MatDestroy(&S_VCT);CHKERRQ(ierr); 870 } 871 ierr = MatDestroy(&B_C);CHKERRQ(ierr); 872 } 873 if (n_vertices && n_R) { 874 Mat A_VRT; 875 PetscBLASInt B_N,B_one = 1; 876 877 if (!n_constraints) { /* if there are no constraints, reset work */ 878 ierr = PetscMemzero(work,n_R*pcbddc->local_primal_size*sizeof(PetscScalar));CHKERRQ(ierr); 879 } 880 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work+pcbddc->local_primal_size*n_R,&A_VRT);CHKERRQ(ierr); 881 ierr = MatTranspose(A_VR,MAT_REUSE_MATRIX,&A_VRT);CHKERRQ(ierr); 882 ierr = PetscBLASIntCast(n_vertices*n_R,&B_N);CHKERRQ(ierr); 883 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&m_one,work+pcbddc->local_primal_size*n_R,&B_one,work,&B_one)); 884 ierr = MatDestroy(&A_VRT);CHKERRQ(ierr); 885 } 886 887 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,pcbddc->local_primal_size,work+pcbddc->local_primal_size*n_R,&X);CHKERRQ(ierr); 888 if (F) { /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 889 for (i=0;i<pcbddc->local_primal_size;i++) { 890 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 891 ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr); 892 ierr = MatSolveTranspose(F,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 893 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 894 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 895 } 896 } else { 897 for (i=0;i<pcbddc->local_primal_size;i++) { 898 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 899 ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr); 900 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 901 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 902 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 903 } 904 } 905 ierr = MatDestroy(&B);CHKERRQ(ierr); 906 /* coarse basis functions */ 907 for (i=0;i<pcbddc->local_primal_size;i++) { 908 PetscScalar *y; 909 910 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*(i+pcbddc->local_primal_size));CHKERRQ(ierr); 911 ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 912 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 913 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 914 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 915 if (i<n_vertices) { 916 y[n_B*i+idx_V_B[i]] = 1.0; 917 } 918 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 919 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 920 921 if (pcbddc->switch_static || pcbddc->dbg_flag) { 922 ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 923 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 924 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 925 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 926 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 927 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 928 } 929 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 930 } 931 ierr = MatDestroy(&X);CHKERRQ(ierr); 932 } 933 934 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 935 ierr = MatDestroy(&S_VV);CHKERRQ(ierr); 936 ierr = MatDestroy(&S_CV);CHKERRQ(ierr); 937 ierr = MatDestroy(&S_VC);CHKERRQ(ierr); 938 ierr = MatDestroy(&S_CC);CHKERRQ(ierr); 939 /* Checking coarse_sub_mat and coarse basis functios */ 940 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 941 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 942 if (pcbddc->dbg_flag) { 943 Mat coarse_sub_mat; 944 Mat AUXMAT,TM1,TM2,TM3,TM4; 945 Mat coarse_phi_D,coarse_phi_B; 946 Mat coarse_psi_D,coarse_psi_B; 947 Mat A_II,A_BB,A_IB,A_BI; 948 MatType checkmattype=MATSEQAIJ; 949 PetscReal real_value; 950 951 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 952 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 953 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 954 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 955 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 956 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 957 if (unsymmetric_check) { 958 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 959 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 960 } 961 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 962 963 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 964 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation\n");CHKERRQ(ierr); 965 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 966 if (unsymmetric_check) { 967 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 968 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 969 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 970 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 971 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 972 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 973 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 974 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 975 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 976 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 977 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 978 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 979 } else { 980 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 981 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 982 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 983 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 984 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 985 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 986 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 987 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 988 } 989 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 990 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 991 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 992 ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr); 993 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 994 ierr = MatNorm(TM1,NORM_INFINITY,&real_value);CHKERRQ(ierr); 995 996 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 997 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 998 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 999 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 1000 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 1001 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 1002 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 1003 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 1004 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 1005 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 1006 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 1007 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 1008 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 1009 if (unsymmetric_check) { 1010 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 1011 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 1012 } 1013 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 1014 } 1015 1016 /* free memory */ 1017 ierr = PetscFree(work);CHKERRQ(ierr); 1018 if (n_vertices) { 1019 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 1020 } 1021 if (n_constraints) { 1022 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 1023 } 1024 /* get back data */ 1025 *coarse_submat_vals_n = coarse_submat_vals; 1026 PetscFunctionReturn(0); 1027 } 1028 1029 #undef __FUNCT__ 1030 #define __FUNCT__ "MatGetSubMatrixUnsorted" 1031 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, MatStructure reuse, Mat* B) 1032 { 1033 Mat *work_mat; 1034 IS isrow_s,iscol_s; 1035 PetscBool rsorted,csorted; 1036 PetscInt rsize,*idxs_perm_r,csize,*idxs_perm_c; 1037 PetscErrorCode ierr; 1038 1039 PetscFunctionBegin; 1040 ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr); 1041 ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr); 1042 ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr); 1043 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 1044 1045 if (!rsorted) { 1046 const PetscInt *idxs; 1047 PetscInt *idxs_sorted,i; 1048 1049 ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr); 1050 ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr); 1051 for (i=0;i<rsize;i++) { 1052 idxs_perm_r[i] = i; 1053 } 1054 ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr); 1055 ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr); 1056 for (i=0;i<rsize;i++) { 1057 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 1058 } 1059 ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr); 1060 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr); 1061 } else { 1062 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1063 isrow_s = isrow; 1064 } 1065 1066 if (!csorted) { 1067 if (isrow == iscol) { 1068 ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr); 1069 iscol_s = isrow_s; 1070 } else { 1071 const PetscInt *idxs; 1072 PetscInt *idxs_sorted,i; 1073 1074 ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr); 1075 ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr); 1076 for (i=0;i<csize;i++) { 1077 idxs_perm_c[i] = i; 1078 } 1079 ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr); 1080 ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr); 1081 for (i=0;i<csize;i++) { 1082 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 1083 } 1084 ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr); 1085 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr); 1086 } 1087 } else { 1088 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1089 iscol_s = iscol; 1090 } 1091 1092 ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,reuse,&work_mat);CHKERRQ(ierr); 1093 1094 if (!rsorted || !csorted) { 1095 Mat new_mat; 1096 IS is_perm_r,is_perm_c; 1097 1098 if (!rsorted) { 1099 PetscInt *idxs_r,i; 1100 ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr); 1101 for (i=0;i<rsize;i++) { 1102 idxs_r[idxs_perm_r[i]] = i; 1103 } 1104 ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr); 1105 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr); 1106 } else { 1107 ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr); 1108 } 1109 ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr); 1110 1111 if (!csorted) { 1112 if (isrow_s == iscol_s) { 1113 ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr); 1114 is_perm_c = is_perm_r; 1115 } else { 1116 PetscInt *idxs_c,i; 1117 ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr); 1118 for (i=0;i<csize;i++) { 1119 idxs_c[idxs_perm_c[i]] = i; 1120 } 1121 ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr); 1122 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr); 1123 } 1124 } else { 1125 ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr); 1126 } 1127 ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr); 1128 1129 ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr); 1130 ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr); 1131 work_mat[0] = new_mat; 1132 ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr); 1133 ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr); 1134 } 1135 1136 ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr); 1137 *B = work_mat[0]; 1138 ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr); 1139 ierr = ISDestroy(&isrow_s);CHKERRQ(ierr); 1140 ierr = ISDestroy(&iscol_s);CHKERRQ(ierr); 1141 PetscFunctionReturn(0); 1142 } 1143 1144 #undef __FUNCT__ 1145 #define __FUNCT__ "PCBDDCComputeLocalMatrix" 1146 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 1147 { 1148 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1149 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1150 Mat new_mat; 1151 IS is_local,is_global; 1152 PetscInt local_size; 1153 PetscBool isseqaij; 1154 PetscErrorCode ierr; 1155 1156 PetscFunctionBegin; 1157 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1158 ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr); 1159 ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr); 1160 ierr = ISLocalToGlobalMappingApplyIS(matis->mapping,is_local,&is_global);CHKERRQ(ierr); 1161 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 1162 ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,MAT_INITIAL_MATRIX,&new_mat);CHKERRQ(ierr); 1163 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 1164 1165 /* check */ 1166 if (pcbddc->dbg_flag) { 1167 Vec x,x_change; 1168 PetscReal error; 1169 1170 ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr); 1171 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 1172 ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr); 1173 ierr = VecScatterBegin(matis->ctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1174 ierr = VecScatterEnd(matis->ctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1175 ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr); 1176 ierr = VecScatterBegin(matis->ctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1177 ierr = VecScatterEnd(matis->ctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1178 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 1179 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 1180 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1181 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr); 1182 ierr = VecDestroy(&x);CHKERRQ(ierr); 1183 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 1184 } 1185 1186 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 1187 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 1188 if (isseqaij) { 1189 ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 1190 } else { 1191 Mat work_mat; 1192 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 1193 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 1194 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 1195 } 1196 ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,pcbddc->issym);CHKERRQ(ierr); 1197 /* 1198 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 1199 ierr = MatView(new_mat,(PetscViewer)0);CHKERRQ(ierr); 1200 */ 1201 ierr = MatDestroy(&new_mat);CHKERRQ(ierr); 1202 PetscFunctionReturn(0); 1203 } 1204 1205 #undef __FUNCT__ 1206 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 1207 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 1208 { 1209 PC_IS* pcis = (PC_IS*)(pc->data); 1210 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1211 IS is_aux1,is_aux2; 1212 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 1213 PetscInt n_vertices,i,j,n_R,n_D,n_B; 1214 PetscInt vbs,bs; 1215 PetscBT bitmask; 1216 PetscErrorCode ierr; 1217 1218 PetscFunctionBegin; 1219 /* 1220 No need to setup local scatters if 1221 - primal space is unchanged 1222 AND 1223 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 1224 AND 1225 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 1226 */ 1227 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 1228 PetscFunctionReturn(0); 1229 } 1230 /* destroy old objects */ 1231 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 1232 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 1233 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 1234 /* Set Non-overlapping dimensions */ 1235 n_B = pcis->n_B; n_D = pcis->n - n_B; 1236 n_vertices = pcbddc->n_actual_vertices; 1237 /* create auxiliary bitmask */ 1238 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 1239 for (i=0;i<n_vertices;i++) { 1240 ierr = PetscBTSet(bitmask,pcbddc->primal_indices_local_idxs[i]);CHKERRQ(ierr); 1241 } 1242 1243 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 1244 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 1245 for (i=0, n_R=0; i<pcis->n; i++) { 1246 if (!PetscBTLookup(bitmask,i)) { 1247 idx_R_local[n_R] = i; 1248 n_R++; 1249 } 1250 } 1251 1252 /* Block code */ 1253 vbs = 1; 1254 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 1255 if (bs>1 && !(n_vertices%bs)) { 1256 PetscBool is_blocked = PETSC_TRUE; 1257 PetscInt *vary; 1258 /* Verify if the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 1259 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 1260 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 1261 for (i=0; i<n_vertices; i++) vary[pcbddc->primal_indices_local_idxs[i]/bs]++; 1262 for (i=0; i<n_vertices; i++) { 1263 if (vary[i]!=0 && vary[i]!=bs) { 1264 is_blocked = PETSC_FALSE; 1265 break; 1266 } 1267 } 1268 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 1269 vbs = bs; 1270 for (i=0;i<n_R/vbs;i++) { 1271 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 1272 } 1273 } 1274 ierr = PetscFree(vary);CHKERRQ(ierr); 1275 } 1276 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 1277 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 1278 1279 /* print some info if requested */ 1280 if (pcbddc->dbg_flag) { 1281 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1282 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1283 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1284 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 1285 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 1286 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices,pcbddc->local_primal_size);CHKERRQ(ierr); 1287 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"pcbddc->n_vertices = %d, pcbddc->n_constraints = %d\n",pcbddc->n_vertices,pcbddc->n_constraints);CHKERRQ(ierr); 1288 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1289 } 1290 1291 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 1292 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1293 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 1294 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 1295 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1296 for (i=0; i<n_D; i++) { 1297 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 1298 } 1299 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1300 for (i=0, j=0; i<n_R; i++) { 1301 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 1302 aux_array1[j++] = i; 1303 } 1304 } 1305 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 1306 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1307 for (i=0, j=0; i<n_B; i++) { 1308 if (!PetscBTLookup(bitmask,is_indices[i])) { 1309 aux_array2[j++] = i; 1310 } 1311 } 1312 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1313 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 1314 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 1315 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1316 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 1317 1318 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1319 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 1320 for (i=0, j=0; i<n_R; i++) { 1321 if (PetscBTLookup(bitmask,idx_R_local[i])) { 1322 aux_array1[j++] = i; 1323 } 1324 } 1325 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 1326 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 1327 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1328 } 1329 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 1330 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1331 PetscFunctionReturn(0); 1332 } 1333 1334 1335 #undef __FUNCT__ 1336 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 1337 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 1338 { 1339 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1340 PC_IS *pcis = (PC_IS*)pc->data; 1341 PC pc_temp; 1342 Mat A_RR; 1343 MatReuse reuse; 1344 PetscScalar m_one = -1.0; 1345 PetscReal value; 1346 PetscInt n_D,n_R,ibs,mbs; 1347 PetscBool use_exact,use_exact_reduced,issbaij; 1348 PetscErrorCode ierr; 1349 /* prefixes stuff */ 1350 char dir_prefix[256],neu_prefix[256],str_level[16]; 1351 size_t len; 1352 1353 PetscFunctionBegin; 1354 1355 /* compute prefixes */ 1356 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 1357 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 1358 if (!pcbddc->current_level) { 1359 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1360 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1361 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1362 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1363 } else { 1364 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 1365 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 1366 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 1367 len -= 15; /* remove "pc_bddc_coarse_" */ 1368 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 1369 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 1370 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 1371 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 1372 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1373 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1374 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 1375 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 1376 } 1377 1378 /* DIRICHLET PROBLEM */ 1379 if (dirichlet) { 1380 /* Matrix for Dirichlet problem is pcis->A_II */ 1381 ierr = ISGetSize(pcis->is_I_local,&n_D);CHKERRQ(ierr); 1382 if (!pcbddc->ksp_D) { /* create object if not yet build */ 1383 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 1384 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 1385 /* default */ 1386 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 1387 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 1388 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1389 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1390 if (issbaij) { 1391 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1392 } else { 1393 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1394 } 1395 /* Allow user's customization */ 1396 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 1397 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1398 } 1399 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 1400 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1401 if (!n_D) { 1402 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1403 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1404 } 1405 /* Set Up KSP for Dirichlet problem of BDDC */ 1406 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 1407 /* set ksp_D into pcis data */ 1408 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 1409 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 1410 pcis->ksp_D = pcbddc->ksp_D; 1411 } 1412 1413 /* NEUMANN PROBLEM */ 1414 A_RR = 0; 1415 if (neumann) { 1416 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 1417 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 1418 if (pcbddc->ksp_R) { /* already created ksp */ 1419 PetscInt nn_R; 1420 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 1421 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 1422 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 1423 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 1424 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 1425 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1426 reuse = MAT_INITIAL_MATRIX; 1427 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 1428 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 1429 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1430 reuse = MAT_INITIAL_MATRIX; 1431 } else { /* safe to reuse the matrix */ 1432 reuse = MAT_REUSE_MATRIX; 1433 } 1434 } 1435 /* last check */ 1436 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 1437 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1438 reuse = MAT_INITIAL_MATRIX; 1439 } 1440 } else { /* first time, so we need to create the matrix */ 1441 reuse = MAT_INITIAL_MATRIX; 1442 } 1443 /* extract A_RR */ 1444 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 1445 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 1446 if (ibs != mbs) { 1447 Mat newmat; 1448 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 1449 ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 1450 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 1451 } else { 1452 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 1453 } 1454 if (!pcbddc->ksp_R) { /* create object if not present */ 1455 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 1456 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 1457 /* default */ 1458 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 1459 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 1460 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1461 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1462 if (issbaij) { 1463 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1464 } else { 1465 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1466 } 1467 /* Allow user's customization */ 1468 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 1469 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1470 } 1471 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 1472 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1473 if (!n_R) { 1474 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1475 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1476 } 1477 /* Set Up KSP for Neumann problem of BDDC */ 1478 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 1479 } 1480 1481 /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */ 1482 if (pcbddc->NullSpace || pcbddc->dbg_flag) { 1483 if (pcbddc->dbg_flag) { 1484 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1485 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1486 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1487 } 1488 if (dirichlet) { /* Dirichlet */ 1489 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 1490 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 1491 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 1492 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 1493 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 1494 /* need to be adapted? */ 1495 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1496 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1497 ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr); 1498 /* print info */ 1499 if (pcbddc->dbg_flag) { 1500 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 1501 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1502 } 1503 if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) { 1504 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr); 1505 } 1506 } 1507 if (neumann) { /* Neumann */ 1508 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 1509 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1510 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 1511 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 1512 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 1513 /* need to be adapted? */ 1514 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1515 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1516 /* print info */ 1517 if (pcbddc->dbg_flag) { 1518 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 1519 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1520 } 1521 if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */ 1522 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr); 1523 } 1524 } 1525 } 1526 /* free Neumann problem's matrix */ 1527 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1528 PetscFunctionReturn(0); 1529 } 1530 1531 #undef __FUNCT__ 1532 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection" 1533 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec rhs, Vec sol, Vec work, PetscBool applytranspose) 1534 { 1535 PetscErrorCode ierr; 1536 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1537 1538 PetscFunctionBegin; 1539 if (applytranspose) { 1540 if (pcbddc->local_auxmat1) { 1541 ierr = MatMultTranspose(pcbddc->local_auxmat2,rhs,work);CHKERRQ(ierr); 1542 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,work,rhs,rhs);CHKERRQ(ierr); 1543 } 1544 ierr = KSPSolveTranspose(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr); 1545 } else { 1546 ierr = KSPSolve(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr); 1547 if (pcbddc->local_auxmat1) { 1548 ierr = MatMult(pcbddc->local_auxmat1,sol,work);CHKERRQ(ierr); 1549 ierr = MatMultAdd(pcbddc->local_auxmat2,work,sol,sol);CHKERRQ(ierr); 1550 } 1551 } 1552 PetscFunctionReturn(0); 1553 } 1554 1555 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 1556 #undef __FUNCT__ 1557 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 1558 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 1559 { 1560 PetscErrorCode ierr; 1561 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1562 PC_IS* pcis = (PC_IS*) (pc->data); 1563 const PetscScalar zero = 0.0; 1564 1565 PetscFunctionBegin; 1566 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 1567 if (applytranspose) { 1568 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1569 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1570 } else { 1571 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1572 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1573 } 1574 /* start communications from local primal nodes to rhs of coarse solver */ 1575 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 1576 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1577 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1578 1579 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 1580 /* TODO remove null space when doing multilevel */ 1581 if (pcbddc->coarse_ksp) { 1582 if (applytranspose) { 1583 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,NULL,NULL);CHKERRQ(ierr); 1584 } else { 1585 ierr = KSPSolve(pcbddc->coarse_ksp,NULL,NULL);CHKERRQ(ierr); 1586 } 1587 } 1588 1589 /* Local solution on R nodes */ 1590 if (pcis->n) { 1591 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 1592 ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1593 ierr = VecScatterEnd(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1594 if (pcbddc->switch_static) { 1595 ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1596 ierr = VecScatterEnd(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1597 } 1598 ierr = PCBDDCSolveSubstructureCorrection(pc,pcbddc->vec1_R,pcbddc->vec2_R,pcbddc->vec1_C,applytranspose);CHKERRQ(ierr); 1599 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 1600 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1601 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1602 if (pcbddc->switch_static) { 1603 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1604 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1605 } 1606 } 1607 1608 /* communications from coarse sol to local primal nodes */ 1609 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1610 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1611 1612 /* Sum contributions from two levels */ 1613 if (applytranspose) { 1614 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1615 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1616 } else { 1617 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1618 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1619 } 1620 PetscFunctionReturn(0); 1621 } 1622 1623 /* TODO: the following two function can be optimized using VecPlaceArray whenever possible and using overlap flag */ 1624 #undef __FUNCT__ 1625 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 1626 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 1627 { 1628 PetscErrorCode ierr; 1629 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1630 PetscScalar *array,*array2; 1631 Vec from,to; 1632 1633 PetscFunctionBegin; 1634 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 1635 from = pcbddc->coarse_vec; 1636 to = pcbddc->vec1_P; 1637 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 1638 Vec tvec; 1639 PetscInt lsize; 1640 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1641 ierr = VecGetLocalSize(tvec,&lsize);CHKERRQ(ierr); 1642 ierr = VecGetArrayRead(tvec,(const PetscScalar**)&array);CHKERRQ(ierr); 1643 ierr = VecGetArray(from,&array2);CHKERRQ(ierr); 1644 ierr = PetscMemcpy(array2,array,lsize*sizeof(PetscScalar));CHKERRQ(ierr); 1645 ierr = VecRestoreArrayRead(tvec,(const PetscScalar**)&array);CHKERRQ(ierr); 1646 ierr = VecRestoreArray(from,&array2);CHKERRQ(ierr); 1647 } 1648 } else { /* from local to global -> put data in coarse right hand side */ 1649 from = pcbddc->vec1_P; 1650 to = pcbddc->coarse_vec; 1651 } 1652 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 1653 PetscFunctionReturn(0); 1654 } 1655 1656 #undef __FUNCT__ 1657 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 1658 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 1659 { 1660 PetscErrorCode ierr; 1661 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1662 PetscScalar *array,*array2; 1663 Vec from,to; 1664 1665 PetscFunctionBegin; 1666 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 1667 from = pcbddc->coarse_vec; 1668 to = pcbddc->vec1_P; 1669 } else { /* from local to global -> put data in coarse right hand side */ 1670 from = pcbddc->vec1_P; 1671 to = pcbddc->coarse_vec; 1672 } 1673 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 1674 if (smode == SCATTER_FORWARD) { 1675 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 1676 Vec tvec; 1677 PetscInt lsize; 1678 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1679 ierr = VecGetLocalSize(tvec,&lsize);CHKERRQ(ierr); 1680 ierr = VecGetArrayRead(to,(const PetscScalar**)&array);CHKERRQ(ierr); 1681 ierr = VecGetArray(tvec,&array2);CHKERRQ(ierr); 1682 ierr = PetscMemcpy(array2,array,lsize*sizeof(PetscScalar));CHKERRQ(ierr); 1683 ierr = VecRestoreArrayRead(to,(const PetscScalar**)&array);CHKERRQ(ierr); 1684 ierr = VecRestoreArray(tvec,&array2);CHKERRQ(ierr); 1685 } 1686 } 1687 PetscFunctionReturn(0); 1688 } 1689 1690 /* uncomment for testing purposes */ 1691 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 1692 #undef __FUNCT__ 1693 #define __FUNCT__ "PCBDDCConstraintsSetUp" 1694 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 1695 { 1696 PetscErrorCode ierr; 1697 PC_IS* pcis = (PC_IS*)(pc->data); 1698 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1699 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1700 /* one and zero */ 1701 PetscScalar one=1.0,zero=0.0; 1702 /* space to store constraints and their local indices */ 1703 PetscScalar *temp_quadrature_constraint; 1704 PetscInt *temp_indices,*temp_indices_to_constraint,*temp_indices_to_constraint_B; 1705 /* iterators */ 1706 PetscInt i,j,k,total_counts,temp_start_ptr; 1707 /* stuff to store connected components stored in pcbddc->mat_graph */ 1708 IS ISForVertices,*ISForFaces,*ISForEdges,*used_IS; 1709 PetscInt n_ISForFaces,n_ISForEdges; 1710 /* near null space stuff */ 1711 MatNullSpace nearnullsp; 1712 const Vec *nearnullvecs; 1713 Vec *localnearnullsp; 1714 PetscBool nnsp_has_cnst; 1715 PetscInt nnsp_size; 1716 PetscScalar *array; 1717 /* BLAS integers */ 1718 PetscBLASInt lwork,lierr; 1719 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 1720 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 1721 /* LAPACK working arrays for SVD or POD */ 1722 PetscBool skip_lapack; 1723 PetscScalar *work; 1724 PetscReal *singular_vals; 1725 #if defined(PETSC_USE_COMPLEX) 1726 PetscReal *rwork; 1727 #endif 1728 #if defined(PETSC_MISSING_LAPACK_GESVD) 1729 PetscBLASInt Blas_one_2=1; 1730 PetscScalar *temp_basis,*correlation_mat; 1731 #else 1732 PetscBLASInt dummy_int_1=1,dummy_int_2=1; 1733 PetscScalar dummy_scalar_1=0.0,dummy_scalar_2=0.0; 1734 #endif 1735 /* reuse */ 1736 PetscInt olocal_primal_size; 1737 PetscInt *oprimal_indices_local_idxs; 1738 /* change of basis */ 1739 PetscInt *aux_primal_numbering,*aux_primal_minloc,*global_indices; 1740 PetscBool boolforchange,qr_needed; 1741 PetscBT touched,change_basis,qr_needed_idx; 1742 /* auxiliary stuff */ 1743 PetscInt *nnz,*is_indices,*aux_primal_numbering_B; 1744 PetscInt ncc,*gidxs=NULL,*permutation=NULL,*temp_indices_to_constraint_work=NULL; 1745 PetscScalar *temp_quadrature_constraint_work=NULL; 1746 /* some quantities */ 1747 PetscInt n_vertices,total_primal_vertices,valid_constraints; 1748 PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints; 1749 1750 1751 PetscFunctionBegin; 1752 /* Destroy Mat objects computed previously */ 1753 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1754 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1755 /* TODO synch with adaptive selection */ 1756 /* Get index sets for faces, edges and vertices from graph */ 1757 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 1758 /* free unneeded index sets */ 1759 if (!pcbddc->use_vertices) { 1760 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 1761 } 1762 if (!pcbddc->use_edges) { 1763 for (i=0;i<n_ISForEdges;i++) { 1764 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 1765 } 1766 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 1767 n_ISForEdges = 0; 1768 } 1769 if (!pcbddc->use_faces) { 1770 for (i=0;i<n_ISForFaces;i++) { 1771 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 1772 } 1773 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 1774 n_ISForFaces = 0; 1775 } 1776 /* HACKS (the following two blocks of code) */ 1777 if (!ISForVertices && pcbddc->NullSpace && !pcbddc->user_ChangeOfBasisMatrix) { 1778 pcbddc->use_change_of_basis = PETSC_TRUE; 1779 if (!ISForEdges) { 1780 pcbddc->use_change_on_faces = PETSC_TRUE; 1781 } 1782 } 1783 if (pcbddc->NullSpace) { 1784 /* use_change_of_basis should be consistent among processors */ 1785 PetscBool tbool[2],gbool[2]; 1786 tbool [0] = pcbddc->use_change_of_basis; 1787 tbool [1] = pcbddc->use_change_on_faces; 1788 ierr = MPI_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1789 pcbddc->use_change_of_basis = gbool[0]; 1790 pcbddc->use_change_on_faces = gbool[1]; 1791 } 1792 /* print some info */ 1793 if (pcbddc->dbg_flag) { 1794 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1795 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 1796 i = 0; 1797 if (ISForVertices) { 1798 ierr = ISGetSize(ISForVertices,&i);CHKERRQ(ierr); 1799 } 1800 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices\n",PetscGlobalRank,i);CHKERRQ(ierr); 1801 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges\n",PetscGlobalRank,n_ISForEdges);CHKERRQ(ierr); 1802 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces\n",PetscGlobalRank,n_ISForFaces);CHKERRQ(ierr); 1803 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1804 } 1805 1806 if (!pcbddc->adaptive_selection) { 1807 /* check if near null space is attached to global mat */ 1808 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 1809 if (nearnullsp) { 1810 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 1811 /* remove any stored info */ 1812 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 1813 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 1814 /* store information for BDDC solver reuse */ 1815 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 1816 pcbddc->onearnullspace = nearnullsp; 1817 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 1818 for (i=0;i<nnsp_size;i++) { 1819 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 1820 } 1821 } else { /* if near null space is not provided BDDC uses constants by default */ 1822 nnsp_size = 0; 1823 nnsp_has_cnst = PETSC_TRUE; 1824 } 1825 /* get max number of constraints on a single cc */ 1826 max_constraints = nnsp_size; 1827 if (nnsp_has_cnst) max_constraints++; 1828 1829 /* 1830 Evaluate maximum storage size needed by the procedure 1831 - temp_indices will contain start index of each constraint stored as follows 1832 - temp_indices_to_constraint [temp_indices[i],...,temp_indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts 1833 - temp_indices_to_constraint_B[temp_indices[i],...,temp_indices[i+1]-1] will contain the indices (in boundary numbering) on which the constraint acts 1834 - temp_quadrature_constraint [temp_indices[i],...,temp_indices[i+1]-1] will contain the scalars representing the constraint itself 1835 */ 1836 total_counts = n_ISForFaces+n_ISForEdges; 1837 total_counts *= max_constraints; 1838 n_vertices = 0; 1839 if (ISForVertices) { 1840 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 1841 } 1842 total_counts += n_vertices; 1843 ierr = PetscMalloc1(total_counts+1,&temp_indices);CHKERRQ(ierr); 1844 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 1845 total_counts = 0; 1846 max_size_of_constraint = 0; 1847 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 1848 if (i<n_ISForEdges) { 1849 used_IS = &ISForEdges[i]; 1850 } else { 1851 used_IS = &ISForFaces[i-n_ISForEdges]; 1852 } 1853 ierr = ISGetSize(*used_IS,&j);CHKERRQ(ierr); 1854 total_counts += j; 1855 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 1856 } 1857 total_counts *= max_constraints; 1858 total_counts += n_vertices; 1859 ierr = PetscMalloc3(total_counts,&temp_quadrature_constraint,total_counts,&temp_indices_to_constraint,total_counts,&temp_indices_to_constraint_B);CHKERRQ(ierr); 1860 /* get local part of global near null space vectors */ 1861 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 1862 for (k=0;k<nnsp_size;k++) { 1863 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 1864 ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1865 ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1866 } 1867 1868 /* whether or not to skip lapack calls */ 1869 skip_lapack = PETSC_TRUE; 1870 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 1871 1872 /* allocate some auxiliary stuff */ 1873 if (!skip_lapack || pcbddc->use_qr_single) { 1874 ierr = PetscMalloc4(max_size_of_constraint,&gidxs,max_size_of_constraint,&permutation,max_size_of_constraint,&temp_indices_to_constraint_work,max_size_of_constraint,&temp_quadrature_constraint_work);CHKERRQ(ierr); 1875 } 1876 1877 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 1878 if (!skip_lapack) { 1879 PetscScalar temp_work; 1880 1881 #if defined(PETSC_MISSING_LAPACK_GESVD) 1882 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 1883 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 1884 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 1885 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 1886 #if defined(PETSC_USE_COMPLEX) 1887 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 1888 #endif 1889 /* now we evaluate the optimal workspace using query with lwork=-1 */ 1890 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 1891 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 1892 lwork = -1; 1893 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1894 #if !defined(PETSC_USE_COMPLEX) 1895 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 1896 #else 1897 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 1898 #endif 1899 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1900 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 1901 #else /* on missing GESVD */ 1902 /* SVD */ 1903 PetscInt max_n,min_n; 1904 max_n = max_size_of_constraint; 1905 min_n = max_constraints; 1906 if (max_size_of_constraint < max_constraints) { 1907 min_n = max_size_of_constraint; 1908 max_n = max_constraints; 1909 } 1910 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 1911 #if defined(PETSC_USE_COMPLEX) 1912 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 1913 #endif 1914 /* now we evaluate the optimal workspace using query with lwork=-1 */ 1915 lwork = -1; 1916 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 1917 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 1918 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 1919 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1920 #if !defined(PETSC_USE_COMPLEX) 1921 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,&lierr)); 1922 #else 1923 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,rwork,&lierr)); 1924 #endif 1925 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1926 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 1927 #endif /* on missing GESVD */ 1928 /* Allocate optimal workspace */ 1929 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 1930 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 1931 } 1932 /* Now we can loop on constraining sets */ 1933 total_counts = 0; 1934 temp_indices[0] = 0; 1935 /* vertices */ 1936 if (ISForVertices) { 1937 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1938 if (nnsp_has_cnst) { /* consider all vertices */ 1939 ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 1940 for (i=0;i<n_vertices;i++) { 1941 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 1942 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 1943 total_counts++; 1944 } 1945 } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */ 1946 PetscBool used_vertex; 1947 for (i=0;i<n_vertices;i++) { 1948 used_vertex = PETSC_FALSE; 1949 k = 0; 1950 while (!used_vertex && k<nnsp_size) { 1951 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1952 if (PetscAbsScalar(array[is_indices[i]])>0.0) { 1953 temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i]; 1954 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 1955 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 1956 total_counts++; 1957 used_vertex = PETSC_TRUE; 1958 } 1959 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1960 k++; 1961 } 1962 } 1963 } 1964 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1965 n_vertices = total_counts; 1966 } 1967 1968 /* edges and faces */ 1969 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 1970 if (ncc<n_ISForEdges) { 1971 used_IS = &ISForEdges[ncc]; 1972 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 1973 } else { 1974 used_IS = &ISForFaces[ncc-n_ISForEdges]; 1975 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 1976 } 1977 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 1978 temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */ 1979 ierr = ISGetSize(*used_IS,&size_of_constraint);CHKERRQ(ierr); 1980 ierr = ISGetIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1981 /* change of basis should not be performed on local periodic nodes */ 1982 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 1983 if (nnsp_has_cnst) { 1984 PetscScalar quad_value; 1985 temp_constraints++; 1986 if (!pcbddc->use_nnsp_true) { 1987 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 1988 } else { 1989 quad_value = 1.0; 1990 } 1991 ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 1992 for (j=0;j<size_of_constraint;j++) { 1993 temp_quadrature_constraint[temp_indices[total_counts]+j]=quad_value; 1994 } 1995 /* sort by global ordering if using lapack subroutines (not needed!) */ 1996 if (!skip_lapack || pcbddc->use_qr_single) { 1997 ierr = ISLocalToGlobalMappingApply(matis->mapping,size_of_constraint,temp_indices_to_constraint+temp_indices[total_counts],gidxs);CHKERRQ(ierr); 1998 for (j=0;j<size_of_constraint;j++) { 1999 permutation[j]=j; 2000 } 2001 ierr = PetscSortIntWithPermutation(size_of_constraint,gidxs,permutation);CHKERRQ(ierr); 2002 for (j=0;j<size_of_constraint;j++) { 2003 if (permutation[j]!=j) SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"This should not happen"); 2004 } 2005 for (j=0;j<size_of_constraint;j++) { 2006 temp_indices_to_constraint_work[j] = temp_indices_to_constraint[temp_indices[total_counts]+permutation[j]]; 2007 temp_quadrature_constraint_work[j] = temp_quadrature_constraint[temp_indices[total_counts]+permutation[j]]; 2008 } 2009 ierr = PetscMemcpy(temp_indices_to_constraint+temp_indices[total_counts],temp_indices_to_constraint_work,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 2010 ierr = PetscMemcpy(temp_quadrature_constraint+temp_indices[total_counts],temp_quadrature_constraint_work,size_of_constraint*sizeof(PetscScalar));CHKERRQ(ierr); 2011 } 2012 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 2013 total_counts++; 2014 } 2015 for (k=0;k<nnsp_size;k++) { 2016 PetscReal real_value; 2017 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2018 ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 2019 for (j=0;j<size_of_constraint;j++) { 2020 temp_quadrature_constraint[temp_indices[total_counts]+j]=array[is_indices[j]]; 2021 } 2022 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2023 /* check if array is null on the connected component */ 2024 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2025 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_one)); 2026 if (real_value > 0.0) { /* keep indices and values */ 2027 /* sort by global ordering if using lapack subroutines */ 2028 if (!skip_lapack || pcbddc->use_qr_single) { 2029 ierr = ISLocalToGlobalMappingApply(matis->mapping,size_of_constraint,temp_indices_to_constraint+temp_indices[total_counts],gidxs);CHKERRQ(ierr); 2030 for (j=0;j<size_of_constraint;j++) { 2031 permutation[j]=j; 2032 } 2033 ierr = PetscSortIntWithPermutation(size_of_constraint,gidxs,permutation);CHKERRQ(ierr); 2034 for (j=0;j<size_of_constraint;j++) { 2035 temp_indices_to_constraint_work[j] = temp_indices_to_constraint[temp_indices[total_counts]+permutation[j]]; 2036 temp_quadrature_constraint_work[j] = temp_quadrature_constraint[temp_indices[total_counts]+permutation[j]]; 2037 } 2038 ierr = PetscMemcpy(temp_indices_to_constraint+temp_indices[total_counts],temp_indices_to_constraint_work,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 2039 ierr = PetscMemcpy(temp_quadrature_constraint+temp_indices[total_counts],temp_quadrature_constraint_work,size_of_constraint*sizeof(PetscScalar));CHKERRQ(ierr); 2040 } 2041 temp_constraints++; 2042 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 2043 total_counts++; 2044 } 2045 } 2046 ierr = ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2047 valid_constraints = temp_constraints; 2048 if (!pcbddc->use_nnsp_true && temp_constraints) { 2049 if (temp_constraints == 1) { /* just normalize the constraint */ 2050 PetscScalar norm; 2051 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2052 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one)); 2053 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 2054 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one)); 2055 } else { /* perform SVD */ 2056 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 2057 2058 #if defined(PETSC_MISSING_LAPACK_GESVD) 2059 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 2060 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 2061 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 2062 the constraints basis will differ (by a complex factor with absolute value equal to 1) 2063 from that computed using LAPACKgesvd 2064 -> This is due to a different computation of eigenvectors in LAPACKheev 2065 -> The quality of the POD-computed basis will be the same */ 2066 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 2067 /* Store upper triangular part of correlation matrix */ 2068 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2069 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2070 for (j=0;j<temp_constraints;j++) { 2071 for (k=0;k<j+1;k++) { 2072 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k]=BLASdot_(&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Blas_one,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Blas_one_2)); 2073 } 2074 } 2075 /* compute eigenvalues and eigenvectors of correlation matrix */ 2076 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2077 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 2078 #if !defined(PETSC_USE_COMPLEX) 2079 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 2080 #else 2081 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 2082 #endif 2083 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2084 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 2085 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 2086 j = 0; 2087 while (j < temp_constraints && singular_vals[j] < tol) j++; 2088 total_counts = total_counts-j; 2089 valid_constraints = temp_constraints-j; 2090 /* scale and copy POD basis into used quadrature memory */ 2091 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2092 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2093 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 2094 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2095 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 2096 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 2097 if (j<temp_constraints) { 2098 PetscInt ii; 2099 for (k=j;k<temp_constraints;k++) singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]); 2100 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2101 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 2102 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2103 for (k=0;k<temp_constraints-j;k++) { 2104 for (ii=0;ii<size_of_constraint;ii++) { 2105 temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 2106 } 2107 } 2108 } 2109 #else /* on missing GESVD */ 2110 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2111 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2112 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2113 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2114 #if !defined(PETSC_USE_COMPLEX) 2115 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,&lierr)); 2116 #else 2117 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,rwork,&lierr)); 2118 #endif 2119 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 2120 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2121 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 2122 k = temp_constraints; 2123 if (k > size_of_constraint) k = size_of_constraint; 2124 j = 0; 2125 while (j < k && singular_vals[k-j-1] < tol) j++; 2126 valid_constraints = k-j; 2127 total_counts = total_counts-temp_constraints+valid_constraints; 2128 #endif /* on missing GESVD */ 2129 } 2130 } 2131 /* setting change_of_basis flag is safe now */ 2132 if (boolforchange) { 2133 for (j=0;j<valid_constraints;j++) { 2134 PetscBTSet(change_basis,total_counts-j-1); 2135 } 2136 } 2137 } 2138 /* free workspace */ 2139 if (!skip_lapack || pcbddc->use_qr_single) { 2140 ierr = PetscFree4(gidxs,permutation,temp_indices_to_constraint_work,temp_quadrature_constraint_work);CHKERRQ(ierr); 2141 } 2142 if (!skip_lapack) { 2143 ierr = PetscFree(work);CHKERRQ(ierr); 2144 #if defined(PETSC_USE_COMPLEX) 2145 ierr = PetscFree(rwork);CHKERRQ(ierr); 2146 #endif 2147 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 2148 #if defined(PETSC_MISSING_LAPACK_GESVD) 2149 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 2150 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 2151 #endif 2152 } 2153 for (k=0;k<nnsp_size;k++) { 2154 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 2155 } 2156 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 2157 } else { 2158 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2159 PetscInt cum = 0; 2160 2161 total_counts = 0; 2162 n_vertices = 0; 2163 if (sub_schurs->is_Ej_com) { 2164 ierr = ISGetLocalSize(sub_schurs->is_Ej_com,&n_vertices);CHKERRQ(ierr); 2165 } 2166 max_constraints = 0; 2167 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 2168 total_counts += pcbddc->adaptive_constraints_n[i]; 2169 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 2170 } 2171 temp_indices = pcbddc->adaptive_constraints_ptrs; 2172 temp_indices_to_constraint = pcbddc->adaptive_constraints_idxs; 2173 temp_quadrature_constraint = pcbddc->adaptive_constraints_data; 2174 2175 #if 0 2176 printf("Found %d totals\n",total_counts); 2177 for (i=0;i<total_counts;i++) { 2178 printf("const %d, start %d",i,temp_indices[i]); 2179 printf(" end %d:\n",temp_indices[i+1]); 2180 for (j=temp_indices[i];j<temp_indices[i+1];j++) { 2181 printf(" idxs %d",temp_indices_to_constraint[j]); 2182 printf(" data %1.2e\n",temp_quadrature_constraint[j]); 2183 } 2184 } 2185 for (i=0;i<n_vertices;i++) { 2186 PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i+n_vertices]); 2187 } 2188 for (i=0;i<sub_schurs->n_subs;i++) { 2189 PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]); 2190 } 2191 #endif 2192 2193 for (i=0;i<total_counts;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,temp_indices[i+1]-temp_indices[i]); 2194 ierr = PetscMalloc1(temp_indices[total_counts],&temp_indices_to_constraint_B);CHKERRQ(ierr); 2195 /* Change of basis */ 2196 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 2197 if (pcbddc->use_change_of_basis) { 2198 cum = n_vertices; 2199 for (i=0;i<sub_schurs->n_subs;i++) { 2200 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 2201 for (j=0;j<pcbddc->adaptive_constraints_n[i+n_vertices];j++) { 2202 ierr = PetscBTSet(change_basis,cum+j);CHKERRQ(ierr); 2203 } 2204 } 2205 cum += pcbddc->adaptive_constraints_n[i+n_vertices]; 2206 } 2207 } 2208 } 2209 2210 /* free index sets of faces, edges and vertices */ 2211 for (i=0;i<n_ISForFaces;i++) { 2212 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 2213 } 2214 if (n_ISForFaces) { 2215 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 2216 } 2217 for (i=0;i<n_ISForEdges;i++) { 2218 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 2219 } 2220 if (n_ISForEdges) { 2221 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 2222 } 2223 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 2224 2225 /* map temp_indices_to_constraint in boundary numbering */ 2226 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,temp_indices[total_counts],temp_indices_to_constraint,&i,temp_indices_to_constraint_B);CHKERRQ(ierr); 2227 if (i != temp_indices[total_counts]) { 2228 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",temp_indices[total_counts],i); 2229 } 2230 2231 /* set quantities in pcbddc data structure and store previous primal size */ 2232 /* n_vertices defines the number of subdomain corners in the primal space */ 2233 /* n_constraints defines the number of averages (they can be point primal dofs if change of basis is requested) */ 2234 olocal_primal_size = pcbddc->local_primal_size; 2235 pcbddc->local_primal_size = total_counts; 2236 pcbddc->n_vertices = n_vertices; 2237 pcbddc->n_constraints = pcbddc->local_primal_size-pcbddc->n_vertices; 2238 2239 /* Create constraint matrix */ 2240 /* The constraint matrix is used to compute the l2g map of primal dofs */ 2241 /* so we need to set it up properly either with or without change of basis */ 2242 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 2243 ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr); 2244 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 2245 /* array to compute a local numbering of constraints : vertices first then constraints */ 2246 ierr = PetscMalloc1(pcbddc->local_primal_size,&aux_primal_numbering);CHKERRQ(ierr); 2247 /* array to select the proper local node (of minimum index with respect to global ordering) when changing the basis */ 2248 /* note: it should not be needed since IS for faces and edges are already sorted by global ordering when analyzing the graph but... just in case */ 2249 ierr = PetscMalloc1(pcbddc->local_primal_size,&aux_primal_minloc);CHKERRQ(ierr); 2250 /* auxiliary stuff for basis change */ 2251 ierr = PetscMalloc1(max_size_of_constraint,&global_indices);CHKERRQ(ierr); 2252 ierr = PetscBTCreate(pcis->n_B,&touched);CHKERRQ(ierr); 2253 2254 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 2255 total_primal_vertices=0; 2256 for (i=0;i<pcbddc->local_primal_size;i++) { 2257 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 2258 if (size_of_constraint == 1) { 2259 ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]]);CHKERRQ(ierr); 2260 aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]]; 2261 aux_primal_minloc[total_primal_vertices]=0; 2262 total_primal_vertices++; 2263 } else if (PetscBTLookup(change_basis,i)) { /* Same procedure used in PCBDDCGetPrimalConstraintsLocalIdx */ 2264 PetscInt min_loc,min_index; 2265 ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],global_indices);CHKERRQ(ierr); 2266 /* find first untouched local node */ 2267 k = 0; 2268 while (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) k++; 2269 min_index = global_indices[k]; 2270 min_loc = k; 2271 /* search the minimum among global nodes already untouched on the cc */ 2272 for (k=1;k<size_of_constraint;k++) { 2273 /* there can be more than one constraint on a single connected component */ 2274 if (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k]) && min_index > global_indices[k]) { 2275 min_index = global_indices[k]; 2276 min_loc = k; 2277 } 2278 } 2279 ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]+min_loc]);CHKERRQ(ierr); 2280 aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]+min_loc]; 2281 aux_primal_minloc[total_primal_vertices]=min_loc; 2282 total_primal_vertices++; 2283 } 2284 } 2285 /* determine if a QR strategy is needed for change of basis */ 2286 qr_needed = PETSC_FALSE; 2287 ierr = PetscBTCreate(pcbddc->local_primal_size,&qr_needed_idx);CHKERRQ(ierr); 2288 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 2289 if (PetscBTLookup(change_basis,i)) { 2290 if (!pcbddc->use_qr_single) { 2291 size_of_constraint = temp_indices[i+1]-temp_indices[i]; 2292 j = 0; 2293 for (k=0;k<size_of_constraint;k++) { 2294 if (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) { 2295 j++; 2296 } 2297 } 2298 /* found more than one primal dof on the cc */ 2299 if (j > 1) { 2300 PetscBTSet(qr_needed_idx,i); 2301 qr_needed = PETSC_TRUE; 2302 } 2303 } else { 2304 PetscBTSet(qr_needed_idx,i); 2305 qr_needed = PETSC_TRUE; 2306 } 2307 } 2308 } 2309 /* free workspace */ 2310 ierr = PetscFree(global_indices);CHKERRQ(ierr); 2311 2312 /* permute indices in order to have a sorted set of vertices */ 2313 ierr = PetscSortInt(total_primal_vertices,aux_primal_numbering);CHKERRQ(ierr); 2314 2315 /* nonzero structure of constraint matrix */ 2316 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 2317 for (i=0;i<total_primal_vertices;i++) nnz[i]=1; 2318 j=total_primal_vertices; 2319 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 2320 if (!PetscBTLookup(change_basis,i)) { 2321 nnz[j]=temp_indices[i+1]-temp_indices[i]; 2322 j++; 2323 } 2324 } 2325 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 2326 ierr = PetscFree(nnz);CHKERRQ(ierr); 2327 /* set values in constraint matrix */ 2328 for (i=0;i<total_primal_vertices;i++) { 2329 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,aux_primal_numbering[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 2330 } 2331 total_counts = total_primal_vertices; 2332 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 2333 if (!PetscBTLookup(change_basis,i)) { 2334 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 2335 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&total_counts,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);CHKERRQ(ierr); 2336 total_counts++; 2337 } 2338 } 2339 /* assembling */ 2340 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2341 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2342 /* 2343 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 2344 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 2345 */ 2346 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 2347 if (pcbddc->use_change_of_basis) { 2348 /* dual and primal dofs on a single cc */ 2349 PetscInt dual_dofs,primal_dofs; 2350 /* iterator on aux_primal_minloc (ordered as read from nearnullspace: vertices, edges and then constraints) */ 2351 PetscInt primal_counter; 2352 /* working stuff for GEQRF */ 2353 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 2354 PetscBLASInt lqr_work; 2355 /* working stuff for UNGQR */ 2356 PetscScalar *gqr_work,lgqr_work_t; 2357 PetscBLASInt lgqr_work; 2358 /* working stuff for TRTRS */ 2359 PetscScalar *trs_rhs; 2360 PetscBLASInt Blas_NRHS; 2361 /* pointers for values insertion into change of basis matrix */ 2362 PetscInt *start_rows,*start_cols; 2363 PetscScalar *start_vals; 2364 /* working stuff for values insertion */ 2365 PetscBT is_primal; 2366 /* matrix sizes */ 2367 PetscInt global_size,local_size; 2368 /* temporary change of basis */ 2369 Mat localChangeOfBasisMatrix; 2370 2371 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 2372 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 2373 ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 2374 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 2375 /* nonzeros for local mat */ 2376 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 2377 for (i=0;i<pcis->n;i++) nnz[i]=1; 2378 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 2379 if (PetscBTLookup(change_basis,i)) { 2380 size_of_constraint = temp_indices[i+1]-temp_indices[i]; 2381 if (PetscBTLookup(qr_needed_idx,i)) { 2382 for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint[temp_indices[i]+j]] = size_of_constraint; 2383 } else { 2384 for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint[temp_indices[i]+j]] = 2; 2385 /* get local primal index on the cc */ 2386 j = 0; 2387 while (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+j])) j++; 2388 nnz[temp_indices_to_constraint[temp_indices[i]+j]] = size_of_constraint; 2389 } 2390 } 2391 } 2392 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 2393 ierr = PetscFree(nnz);CHKERRQ(ierr); 2394 /* Set initial identity in the matrix */ 2395 for (i=0;i<pcis->n;i++) { 2396 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 2397 } 2398 2399 if (pcbddc->dbg_flag) { 2400 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 2401 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 2402 } 2403 2404 2405 /* Now we loop on the constraints which need a change of basis */ 2406 /* 2407 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 2408 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 2409 2410 Basic blocks of change of basis matrix T computed by 2411 2412 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 2413 2414 | 1 0 ... 0 s_1/S | 2415 | 0 1 ... 0 s_2/S | 2416 | ... | 2417 | 0 ... 1 s_{n-1}/S | 2418 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 2419 2420 with S = \sum_{i=1}^n s_i^2 2421 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 2422 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 2423 2424 - QR decomposition of constraints otherwise 2425 */ 2426 if (qr_needed) { 2427 /* space to store Q */ 2428 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 2429 /* first we issue queries for optimal work */ 2430 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 2431 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 2432 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2433 lqr_work = -1; 2434 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 2435 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 2436 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 2437 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 2438 lgqr_work = -1; 2439 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 2440 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 2441 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 2442 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2443 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 2444 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 2445 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 2446 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 2447 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 2448 /* array to store scaling factors for reflectors */ 2449 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 2450 /* array to store rhs and solution of triangular solver */ 2451 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 2452 /* allocating workspace for check */ 2453 if (pcbddc->dbg_flag) { 2454 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&work);CHKERRQ(ierr); 2455 } 2456 } 2457 /* array to store whether a node is primal or not */ 2458 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 2459 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 2460 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,aux_primal_numbering,&i,aux_primal_numbering_B);CHKERRQ(ierr); 2461 if (i != total_primal_vertices) { 2462 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i); 2463 } 2464 for (i=0;i<total_primal_vertices;i++) { 2465 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 2466 } 2467 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 2468 2469 /* loop on constraints and see whether or not they need a change of basis and compute it */ 2470 /* -> using implicit ordering contained in temp_indices data */ 2471 total_counts = pcbddc->n_vertices; 2472 primal_counter = total_counts; 2473 while (total_counts<pcbddc->local_primal_size) { 2474 primal_dofs = 1; 2475 if (PetscBTLookup(change_basis,total_counts)) { 2476 /* get all constraints with same support: if more then one constraint is present on the cc then surely indices are stored contiguosly */ 2477 while (total_counts+primal_dofs < pcbddc->local_primal_size && temp_indices_to_constraint[temp_indices[total_counts]] == temp_indices_to_constraint[temp_indices[total_counts+primal_dofs]]) { 2478 primal_dofs++; 2479 } 2480 /* get constraint info */ 2481 size_of_constraint = temp_indices[total_counts+1]-temp_indices[total_counts]; 2482 dual_dofs = size_of_constraint-primal_dofs; 2483 2484 if (pcbddc->dbg_flag) { 2485 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d to %d (incl) need a change of basis (size %d)\n",total_counts,total_counts+primal_dofs-1,size_of_constraint);CHKERRQ(ierr); 2486 } 2487 2488 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 2489 2490 /* copy quadrature constraints for change of basis check */ 2491 if (pcbddc->dbg_flag) { 2492 ierr = PetscMemcpy(work,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2493 } 2494 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 2495 ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2496 2497 /* compute QR decomposition of constraints */ 2498 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2499 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2500 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2501 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2502 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 2503 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 2504 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2505 2506 /* explictly compute R^-T */ 2507 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 2508 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 2509 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2510 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 2511 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2512 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 2513 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2514 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 2515 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 2516 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2517 2518 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 2519 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2520 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2521 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 2522 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2523 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2524 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 2525 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 2526 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2527 2528 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 2529 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 2530 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 2531 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2532 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2533 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 2534 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2535 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 2536 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 2537 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2538 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_LDC)); 2539 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2540 ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2541 2542 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 2543 start_rows = &temp_indices_to_constraint[temp_indices[total_counts]]; 2544 /* insert cols for primal dofs */ 2545 for (j=0;j<primal_dofs;j++) { 2546 start_vals = &qr_basis[j*size_of_constraint]; 2547 start_cols = &temp_indices_to_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter+j]]; 2548 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 2549 } 2550 /* insert cols for dual dofs */ 2551 for (j=0,k=0;j<dual_dofs;k++) { 2552 if (!PetscBTLookup(is_primal,temp_indices_to_constraint_B[temp_indices[total_counts]+k])) { 2553 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 2554 start_cols = &temp_indices_to_constraint[temp_indices[total_counts]+k]; 2555 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 2556 j++; 2557 } 2558 } 2559 2560 /* check change of basis */ 2561 if (pcbddc->dbg_flag) { 2562 PetscInt ii,jj; 2563 PetscBool valid_qr=PETSC_TRUE; 2564 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 2565 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2566 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 2567 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2568 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 2569 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 2570 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2571 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&work[size_of_constraint*primal_dofs],&Blas_LDC)); 2572 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2573 for (jj=0;jj<size_of_constraint;jj++) { 2574 for (ii=0;ii<primal_dofs;ii++) { 2575 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 2576 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 2577 } 2578 } 2579 if (!valid_qr) { 2580 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 2581 for (jj=0;jj<size_of_constraint;jj++) { 2582 for (ii=0;ii<primal_dofs;ii++) { 2583 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 2584 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2585 } 2586 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 2587 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2588 } 2589 } 2590 } 2591 } else { 2592 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 2593 } 2594 } 2595 } else { /* simple transformation block */ 2596 PetscInt row,col; 2597 PetscScalar val,norm; 2598 2599 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2600 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,temp_quadrature_constraint+temp_indices[total_counts],&Blas_one,temp_quadrature_constraint+temp_indices[total_counts],&Blas_one)); 2601 for (j=0;j<size_of_constraint;j++) { 2602 PetscInt row_B = temp_indices_to_constraint_B[temp_indices[total_counts]+j]; 2603 row = temp_indices_to_constraint[temp_indices[total_counts]+j]; 2604 if (!PetscBTLookup(is_primal,row_B)) { 2605 col = temp_indices_to_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]; 2606 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 2607 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,temp_quadrature_constraint[temp_indices[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 2608 } else { 2609 for (k=0;k<size_of_constraint;k++) { 2610 col = temp_indices_to_constraint[temp_indices[total_counts]+k]; 2611 if (row != col) { 2612 val = -temp_quadrature_constraint[temp_indices[total_counts]+k]/temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]; 2613 } else { 2614 val = temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]/norm; 2615 } 2616 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 2617 } 2618 } 2619 } 2620 if (pcbddc->dbg_flag) { 2621 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 2622 } 2623 } 2624 /* increment primal counter */ 2625 primal_counter += primal_dofs; 2626 } else { 2627 if (pcbddc->dbg_flag) { 2628 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,temp_indices[total_counts+1]-temp_indices[total_counts]);CHKERRQ(ierr); 2629 } 2630 } 2631 /* increment constraint counter total_counts */ 2632 total_counts += primal_dofs; 2633 } 2634 2635 /* free workspace */ 2636 if (qr_needed) { 2637 if (pcbddc->dbg_flag) { 2638 ierr = PetscFree(work);CHKERRQ(ierr); 2639 } 2640 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 2641 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 2642 ierr = PetscFree(qr_work);CHKERRQ(ierr); 2643 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 2644 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 2645 } 2646 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 2647 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2648 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2649 2650 /* assembling of global change of variable */ 2651 { 2652 Mat tmat; 2653 PetscInt bs; 2654 2655 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 2656 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 2657 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 2658 ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr); 2659 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2660 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 2661 ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr); 2662 ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr); 2663 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 2664 ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr); 2665 ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2666 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 2667 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 2668 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 2669 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2670 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2671 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 2672 ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr); 2673 } 2674 /* check */ 2675 if (pcbddc->dbg_flag) { 2676 PetscReal error; 2677 Vec x,x_change; 2678 2679 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 2680 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 2681 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 2682 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 2683 ierr = VecScatterBegin(matis->ctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2684 ierr = VecScatterEnd(matis->ctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2685 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 2686 ierr = VecScatterBegin(matis->ctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2687 ierr = VecScatterEnd(matis->ctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2688 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 2689 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 2690 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 2691 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2692 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr); 2693 ierr = VecDestroy(&x);CHKERRQ(ierr); 2694 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 2695 } 2696 2697 /* adapt sub_schurs computed (if any) */ 2698 if (pcbddc->use_deluxe_scaling) { 2699 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 2700 if (sub_schurs->n_subs_par_g) { 2701 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Change of basis with deluxe scaling and parallel problems still needs to be implemented"); 2702 } 2703 if (sub_schurs->S_Ej_all) { 2704 Mat S_1,S_2,tmat; 2705 IS is_all_N; 2706 2707 ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr); 2708 ierr = MatGetSubMatrixUnsorted(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr); 2709 ierr = ISDestroy(&is_all_N);CHKERRQ(ierr); 2710 ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_1);CHKERRQ(ierr); 2711 ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr); 2712 sub_schurs->S_Ej_all = S_1; 2713 ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_2);CHKERRQ(ierr); 2714 ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 2715 sub_schurs->sum_S_Ej_all = S_2; 2716 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 2717 } 2718 } 2719 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 2720 } else if (pcbddc->user_ChangeOfBasisMatrix) { 2721 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 2722 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 2723 } 2724 2725 /* set up change of basis context */ 2726 if (pcbddc->ChangeOfBasisMatrix) { 2727 PCBDDCChange_ctx change_ctx; 2728 2729 if (!pcbddc->new_global_mat) { 2730 PetscInt global_size,local_size; 2731 2732 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 2733 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 2734 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->new_global_mat);CHKERRQ(ierr); 2735 ierr = MatSetSizes(pcbddc->new_global_mat,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 2736 ierr = MatSetType(pcbddc->new_global_mat,MATSHELL);CHKERRQ(ierr); 2737 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT,(void (*)(void))PCBDDCMatMult_Private);CHKERRQ(ierr); 2738 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCMatMultTranspose_Private);CHKERRQ(ierr); 2739 ierr = PetscNew(&change_ctx);CHKERRQ(ierr); 2740 ierr = MatShellSetContext(pcbddc->new_global_mat,change_ctx);CHKERRQ(ierr); 2741 } else { 2742 ierr = MatShellGetContext(pcbddc->new_global_mat,&change_ctx);CHKERRQ(ierr); 2743 ierr = MatDestroy(&change_ctx->global_change);CHKERRQ(ierr); 2744 ierr = VecDestroyVecs(2,&change_ctx->work);CHKERRQ(ierr); 2745 } 2746 if (!pcbddc->user_ChangeOfBasisMatrix) { 2747 ierr = PetscObjectReference((PetscObject)pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2748 change_ctx->global_change = pcbddc->ChangeOfBasisMatrix; 2749 } else { 2750 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 2751 change_ctx->global_change = pcbddc->user_ChangeOfBasisMatrix; 2752 } 2753 ierr = VecDuplicateVecs(pcis->vec1_global,2,&change_ctx->work);CHKERRQ(ierr); 2754 ierr = MatSetUp(pcbddc->new_global_mat);CHKERRQ(ierr); 2755 ierr = MatAssemblyBegin(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2756 ierr = MatAssemblyEnd(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2757 } 2758 2759 /* get indices in local ordering for vertices and constraints */ 2760 if (olocal_primal_size == pcbddc->local_primal_size) { /* if this is true, I need to check if a new primal space has been introduced */ 2761 ierr = PetscMalloc1(olocal_primal_size,&oprimal_indices_local_idxs);CHKERRQ(ierr); 2762 ierr = PetscMemcpy(oprimal_indices_local_idxs,pcbddc->primal_indices_local_idxs,olocal_primal_size*sizeof(PetscInt));CHKERRQ(ierr); 2763 } 2764 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 2765 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2766 ierr = PetscMalloc1(pcbddc->local_primal_size,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2767 ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&i,&aux_primal_numbering);CHKERRQ(ierr); 2768 ierr = PetscMemcpy(pcbddc->primal_indices_local_idxs,aux_primal_numbering,i*sizeof(PetscInt));CHKERRQ(ierr); 2769 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 2770 ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&j,&aux_primal_numbering);CHKERRQ(ierr); 2771 ierr = PetscMemcpy(&pcbddc->primal_indices_local_idxs[i],aux_primal_numbering,j*sizeof(PetscInt));CHKERRQ(ierr); 2772 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 2773 /* set quantities in PCBDDC data struct */ 2774 pcbddc->n_actual_vertices = i; 2775 /* check if a new primal space has been introduced */ 2776 pcbddc->new_primal_space_local = PETSC_TRUE; 2777 if (olocal_primal_size == pcbddc->local_primal_size) { 2778 ierr = PetscMemcmp(pcbddc->primal_indices_local_idxs,oprimal_indices_local_idxs,olocal_primal_size,&pcbddc->new_primal_space_local);CHKERRQ(ierr); 2779 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 2780 ierr = PetscFree(oprimal_indices_local_idxs);CHKERRQ(ierr); 2781 } 2782 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 2783 ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2784 2785 /* flush dbg viewer */ 2786 if (pcbddc->dbg_flag) { 2787 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2788 } 2789 2790 /* free workspace */ 2791 ierr = PetscBTDestroy(&touched);CHKERRQ(ierr); 2792 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 2793 ierr = PetscFree(aux_primal_minloc);CHKERRQ(ierr); 2794 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 2795 if (!pcbddc->adaptive_selection) { 2796 ierr = PetscFree(temp_indices);CHKERRQ(ierr); 2797 ierr = PetscFree3(temp_quadrature_constraint,temp_indices_to_constraint,temp_indices_to_constraint_B);CHKERRQ(ierr); 2798 } else { 2799 ierr = PetscFree4(pcbddc->adaptive_constraints_n, 2800 pcbddc->adaptive_constraints_ptrs, 2801 pcbddc->adaptive_constraints_idxs, 2802 pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 2803 ierr = PetscFree(temp_indices_to_constraint_B);CHKERRQ(ierr); 2804 } 2805 PetscFunctionReturn(0); 2806 } 2807 2808 #undef __FUNCT__ 2809 #define __FUNCT__ "PCBDDCAnalyzeInterface" 2810 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 2811 { 2812 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2813 PC_IS *pcis = (PC_IS*)pc->data; 2814 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2815 PetscInt ierr,i,vertex_size; 2816 PetscViewer viewer=pcbddc->dbg_viewer; 2817 2818 PetscFunctionBegin; 2819 /* Reset previously computed graph */ 2820 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 2821 /* Init local Graph struct */ 2822 ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping);CHKERRQ(ierr); 2823 2824 /* Check validity of the csr graph passed in by the user */ 2825 if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) { 2826 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 2827 } 2828 2829 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 2830 if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) { 2831 PetscInt *xadj,*adjncy; 2832 PetscInt nvtxs; 2833 2834 if (pcbddc->use_local_adj) { 2835 PetscBool flg_row=PETSC_FALSE; 2836 2837 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 2838 if (flg_row) { 2839 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 2840 pcbddc->computed_rowadj = PETSC_TRUE; 2841 } 2842 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 2843 } else if (pcbddc->current_level) { /* just compute subdomain's connected components for coarser levels */ 2844 IS is_dummy; 2845 ISLocalToGlobalMapping l2gmap_dummy; 2846 PetscInt j,sum; 2847 PetscInt *cxadj,*cadjncy; 2848 const PetscInt *idxs; 2849 PCBDDCGraph graph; 2850 PetscBT is_on_boundary; 2851 2852 ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n,0,1,&is_dummy);CHKERRQ(ierr); 2853 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 2854 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2855 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 2856 ierr = PCBDDCGraphInit(graph,l2gmap_dummy);CHKERRQ(ierr); 2857 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 2858 graph->xadj = xadj; 2859 graph->adjncy = adjncy; 2860 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 2861 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 2862 2863 if (pcbddc->dbg_flag) { 2864 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] Found %d subdomains\n",PetscGlobalRank,graph->ncc);CHKERRQ(ierr); 2865 for (i=0;i<graph->ncc;i++) { 2866 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] %d cc size %d\n",PetscGlobalRank,i,graph->cptr[i+1]-graph->cptr[i]);CHKERRQ(ierr); 2867 } 2868 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2869 } 2870 2871 ierr = PetscBTCreate(nvtxs,&is_on_boundary);CHKERRQ(ierr); 2872 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2873 for (i=0;i<pcis->n_B;i++) { 2874 ierr = PetscBTSet(is_on_boundary,idxs[i]);CHKERRQ(ierr); 2875 } 2876 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2877 2878 ierr = PetscCalloc1(nvtxs+1,&cxadj);CHKERRQ(ierr); 2879 sum = 0; 2880 for (i=0;i<graph->ncc;i++) { 2881 PetscInt sizecc = 0; 2882 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 2883 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 2884 sizecc++; 2885 } 2886 } 2887 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 2888 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 2889 cxadj[graph->queue[j]] = sizecc; 2890 } 2891 } 2892 sum += sizecc*sizecc; 2893 } 2894 ierr = PetscMalloc1(sum,&cadjncy);CHKERRQ(ierr); 2895 sum = 0; 2896 for (i=0;i<nvtxs;i++) { 2897 PetscInt temp = cxadj[i]; 2898 cxadj[i] = sum; 2899 sum += temp; 2900 } 2901 cxadj[nvtxs] = sum; 2902 for (i=0;i<graph->ncc;i++) { 2903 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 2904 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 2905 PetscInt k,sizecc = 0; 2906 for (k=graph->cptr[i];k<graph->cptr[i+1];k++) { 2907 if (PetscBTLookup(is_on_boundary,graph->queue[k])) { 2908 cadjncy[cxadj[graph->queue[j]]+sizecc]=graph->queue[k]; 2909 sizecc++; 2910 } 2911 } 2912 } 2913 } 2914 } 2915 if (nvtxs) { 2916 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,cxadj,cadjncy,PETSC_OWN_POINTER);CHKERRQ(ierr); 2917 } else { 2918 ierr = PetscFree(cxadj);CHKERRQ(ierr); 2919 ierr = PetscFree(cadjncy);CHKERRQ(ierr); 2920 } 2921 graph->xadj = 0; 2922 graph->adjncy = 0; 2923 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 2924 ierr = PetscBTDestroy(&is_on_boundary);CHKERRQ(ierr); 2925 } 2926 } 2927 2928 /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */ 2929 vertex_size = 1; 2930 if (pcbddc->user_provided_isfordofs) { 2931 if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */ 2932 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 2933 for (i=0;i<pcbddc->n_ISForDofs;i++) { 2934 ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 2935 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 2936 } 2937 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 2938 pcbddc->n_ISForDofs = 0; 2939 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 2940 } 2941 /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */ 2942 ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr); 2943 } else { 2944 if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */ 2945 ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr); 2946 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 2947 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 2948 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 2949 } 2950 } 2951 } 2952 2953 /* Setup of Graph */ 2954 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */ 2955 ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 2956 } 2957 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */ 2958 ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 2959 } 2960 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices); 2961 2962 /* Graph's connected components analysis */ 2963 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 2964 2965 /* print some info to stdout */ 2966 if (pcbddc->dbg_flag) { 2967 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer); 2968 } 2969 2970 /* mark topography has done */ 2971 pcbddc->recompute_topography = PETSC_FALSE; 2972 PetscFunctionReturn(0); 2973 } 2974 2975 #undef __FUNCT__ 2976 #define __FUNCT__ "PCBDDCGetPrimalVerticesLocalIdx" 2977 PetscErrorCode PCBDDCGetPrimalVerticesLocalIdx(PC pc, PetscInt *n_vertices, PetscInt **vertices_idx) 2978 { 2979 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 2980 PetscInt *vertices,*row_cmat_indices,n,i,size_of_constraint,local_primal_size; 2981 PetscErrorCode ierr; 2982 2983 PetscFunctionBegin; 2984 n = 0; 2985 vertices = 0; 2986 if (pcbddc->ConstraintMatrix) { 2987 ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&i);CHKERRQ(ierr); 2988 for (i=0;i<local_primal_size;i++) { 2989 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2990 if (size_of_constraint == 1) n++; 2991 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2992 } 2993 if (vertices_idx) { 2994 ierr = PetscMalloc1(n,&vertices);CHKERRQ(ierr); 2995 n = 0; 2996 for (i=0;i<local_primal_size;i++) { 2997 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2998 if (size_of_constraint == 1) { 2999 vertices[n++]=row_cmat_indices[0]; 3000 } 3001 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 3002 } 3003 } 3004 } 3005 *n_vertices = n; 3006 if (vertices_idx) *vertices_idx = vertices; 3007 PetscFunctionReturn(0); 3008 } 3009 3010 #undef __FUNCT__ 3011 #define __FUNCT__ "PCBDDCGetPrimalConstraintsLocalIdx" 3012 PetscErrorCode PCBDDCGetPrimalConstraintsLocalIdx(PC pc, PetscInt *n_constraints, PetscInt **constraints_idx) 3013 { 3014 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 3015 PetscInt *constraints_index,*row_cmat_indices,*row_cmat_global_indices; 3016 PetscInt n,i,j,size_of_constraint,local_primal_size,local_size,max_size_of_constraint,min_index,min_loc; 3017 PetscBT touched; 3018 PetscErrorCode ierr; 3019 3020 /* This function assumes that the number of local constraints per connected component 3021 is not greater than the number of nodes defined for the connected component 3022 (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */ 3023 PetscFunctionBegin; 3024 n = 0; 3025 constraints_index = 0; 3026 if (pcbddc->ConstraintMatrix) { 3027 ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&local_size);CHKERRQ(ierr); 3028 max_size_of_constraint = 0; 3029 for (i=0;i<local_primal_size;i++) { 3030 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 3031 if (size_of_constraint > 1) { 3032 n++; 3033 } 3034 max_size_of_constraint = PetscMax(size_of_constraint,max_size_of_constraint); 3035 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 3036 } 3037 if (constraints_idx) { 3038 ierr = PetscMalloc1(n,&constraints_index);CHKERRQ(ierr); 3039 ierr = PetscMalloc1(max_size_of_constraint,&row_cmat_global_indices);CHKERRQ(ierr); 3040 ierr = PetscBTCreate(local_size,&touched);CHKERRQ(ierr); 3041 n = 0; 3042 for (i=0;i<local_primal_size;i++) { 3043 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 3044 if (size_of_constraint > 1) { 3045 ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,row_cmat_indices,row_cmat_global_indices);CHKERRQ(ierr); 3046 /* find first untouched local node */ 3047 j = 0; 3048 while (PetscBTLookup(touched,row_cmat_indices[j])) j++; 3049 min_index = row_cmat_global_indices[j]; 3050 min_loc = j; 3051 /* search the minimum among nodes not yet touched on the connected component 3052 since there can be more than one constraint on a single cc */ 3053 for (j=1;j<size_of_constraint;j++) { 3054 if (!PetscBTLookup(touched,row_cmat_indices[j]) && min_index > row_cmat_global_indices[j]) { 3055 min_index = row_cmat_global_indices[j]; 3056 min_loc = j; 3057 } 3058 } 3059 ierr = PetscBTSet(touched,row_cmat_indices[min_loc]);CHKERRQ(ierr); 3060 constraints_index[n++] = row_cmat_indices[min_loc]; 3061 } 3062 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 3063 } 3064 ierr = PetscBTDestroy(&touched);CHKERRQ(ierr); 3065 ierr = PetscFree(row_cmat_global_indices);CHKERRQ(ierr); 3066 } 3067 } 3068 *n_constraints = n; 3069 if (constraints_idx) *constraints_idx = constraints_index; 3070 PetscFunctionReturn(0); 3071 } 3072 3073 #undef __FUNCT__ 3074 #define __FUNCT__ "PCBDDCSubsetNumbering" 3075 PetscErrorCode PCBDDCSubsetNumbering(MPI_Comm comm,ISLocalToGlobalMapping l2gmap, PetscInt n_local_dofs, PetscInt local_dofs[], PetscInt local_dofs_mult[], PetscInt* n_global_subset, PetscInt* global_numbering_subset[]) 3076 { 3077 Vec local_vec,global_vec; 3078 IS seqis,paris; 3079 VecScatter scatter_ctx; 3080 PetscScalar *array; 3081 PetscInt *temp_global_dofs; 3082 PetscScalar globalsum; 3083 PetscInt i,j,s; 3084 PetscInt nlocals,first_index,old_index,max_local; 3085 PetscMPIInt rank_prec_comm,size_prec_comm,max_global; 3086 PetscMPIInt *dof_sizes,*dof_displs; 3087 PetscBool first_found; 3088 PetscErrorCode ierr; 3089 3090 PetscFunctionBegin; 3091 /* mpi buffers */ 3092 ierr = MPI_Comm_size(comm,&size_prec_comm);CHKERRQ(ierr); 3093 ierr = MPI_Comm_rank(comm,&rank_prec_comm);CHKERRQ(ierr); 3094 j = ( !rank_prec_comm ? size_prec_comm : 0); 3095 ierr = PetscMalloc1(j,&dof_sizes);CHKERRQ(ierr); 3096 ierr = PetscMalloc1(j,&dof_displs);CHKERRQ(ierr); 3097 /* get maximum size of subset */ 3098 ierr = PetscMalloc1(n_local_dofs,&temp_global_dofs);CHKERRQ(ierr); 3099 ierr = ISLocalToGlobalMappingApply(l2gmap,n_local_dofs,local_dofs,temp_global_dofs);CHKERRQ(ierr); 3100 max_local = 0; 3101 for (i=0;i<n_local_dofs;i++) { 3102 if (max_local < temp_global_dofs[i] ) { 3103 max_local = temp_global_dofs[i]; 3104 } 3105 } 3106 ierr = MPI_Allreduce(&max_local,&max_global,1,MPIU_INT,MPI_MAX,comm);CHKERRQ(ierr); 3107 max_global++; 3108 max_local = 0; 3109 for (i=0;i<n_local_dofs;i++) { 3110 if (max_local < local_dofs[i] ) { 3111 max_local = local_dofs[i]; 3112 } 3113 } 3114 max_local++; 3115 /* allocate workspace */ 3116 ierr = VecCreate(PETSC_COMM_SELF,&local_vec);CHKERRQ(ierr); 3117 ierr = VecSetSizes(local_vec,PETSC_DECIDE,max_local);CHKERRQ(ierr); 3118 ierr = VecSetType(local_vec,VECSEQ);CHKERRQ(ierr); 3119 ierr = VecCreate(comm,&global_vec);CHKERRQ(ierr); 3120 ierr = VecSetSizes(global_vec,PETSC_DECIDE,max_global);CHKERRQ(ierr); 3121 ierr = VecSetType(global_vec,VECMPI);CHKERRQ(ierr); 3122 /* create scatter */ 3123 ierr = ISCreateGeneral(PETSC_COMM_SELF,n_local_dofs,local_dofs,PETSC_COPY_VALUES,&seqis);CHKERRQ(ierr); 3124 ierr = ISCreateGeneral(comm,n_local_dofs,temp_global_dofs,PETSC_COPY_VALUES,&paris);CHKERRQ(ierr); 3125 ierr = VecScatterCreate(local_vec,seqis,global_vec,paris,&scatter_ctx);CHKERRQ(ierr); 3126 ierr = ISDestroy(&seqis);CHKERRQ(ierr); 3127 ierr = ISDestroy(&paris);CHKERRQ(ierr); 3128 /* init array */ 3129 ierr = VecSet(global_vec,0.0);CHKERRQ(ierr); 3130 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 3131 ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr); 3132 if (local_dofs_mult) { 3133 for (i=0;i<n_local_dofs;i++) { 3134 array[local_dofs[i]]=(PetscScalar)local_dofs_mult[i]; 3135 } 3136 } else { 3137 for (i=0;i<n_local_dofs;i++) { 3138 array[local_dofs[i]]=1.0; 3139 } 3140 } 3141 ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr); 3142 /* scatter into global vec and get total number of global dofs */ 3143 ierr = VecScatterBegin(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3144 ierr = VecScatterEnd(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3145 ierr = VecSum(global_vec,&globalsum);CHKERRQ(ierr); 3146 *n_global_subset = (PetscInt)PetscRealPart(globalsum); 3147 /* Fill global_vec with cumulative function for global numbering */ 3148 ierr = VecGetArray(global_vec,&array);CHKERRQ(ierr); 3149 ierr = VecGetLocalSize(global_vec,&s);CHKERRQ(ierr); 3150 nlocals = 0; 3151 first_index = -1; 3152 first_found = PETSC_FALSE; 3153 for (i=0;i<s;i++) { 3154 if (!first_found && PetscRealPart(array[i]) > 0.1) { 3155 first_found = PETSC_TRUE; 3156 first_index = i; 3157 } 3158 nlocals += (PetscInt)PetscRealPart(array[i]); 3159 } 3160 ierr = MPI_Gather(&nlocals,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr); 3161 if (!rank_prec_comm) { 3162 dof_displs[0]=0; 3163 for (i=1;i<size_prec_comm;i++) { 3164 dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1]; 3165 } 3166 } 3167 ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&nlocals,1,MPIU_INT,0,comm);CHKERRQ(ierr); 3168 if (first_found) { 3169 array[first_index] += (PetscScalar)nlocals; 3170 old_index = first_index; 3171 for (i=first_index+1;i<s;i++) { 3172 if (PetscRealPart(array[i]) > 0.1) { 3173 array[i] += array[old_index]; 3174 old_index = i; 3175 } 3176 } 3177 } 3178 ierr = VecRestoreArray(global_vec,&array);CHKERRQ(ierr); 3179 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 3180 ierr = VecScatterBegin(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3181 ierr = VecScatterEnd(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3182 /* get global ordering of local dofs */ 3183 ierr = VecGetArrayRead(local_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 3184 if (local_dofs_mult) { 3185 for (i=0;i<n_local_dofs;i++) { 3186 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-local_dofs_mult[i]; 3187 } 3188 } else { 3189 for (i=0;i<n_local_dofs;i++) { 3190 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-1; 3191 } 3192 } 3193 ierr = VecRestoreArrayRead(local_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 3194 /* free workspace */ 3195 ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr); 3196 ierr = VecDestroy(&local_vec);CHKERRQ(ierr); 3197 ierr = VecDestroy(&global_vec);CHKERRQ(ierr); 3198 ierr = PetscFree(dof_sizes);CHKERRQ(ierr); 3199 ierr = PetscFree(dof_displs);CHKERRQ(ierr); 3200 /* return pointer to global ordering of local dofs */ 3201 *global_numbering_subset = temp_global_dofs; 3202 PetscFunctionReturn(0); 3203 } 3204 3205 #undef __FUNCT__ 3206 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 3207 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 3208 { 3209 PetscInt i,j; 3210 PetscScalar *alphas; 3211 PetscErrorCode ierr; 3212 3213 PetscFunctionBegin; 3214 /* this implements stabilized Gram-Schmidt */ 3215 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 3216 for (i=0;i<n;i++) { 3217 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 3218 if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); } 3219 for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); } 3220 } 3221 ierr = PetscFree(alphas);CHKERRQ(ierr); 3222 PetscFunctionReturn(0); 3223 } 3224 3225 #undef __FUNCT__ 3226 #define __FUNCT__ "MatISGetSubassemblingPattern" 3227 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscBool contiguous, IS* is_sends) 3228 { 3229 Mat subdomain_adj; 3230 IS new_ranks,ranks_send_to; 3231 MatPartitioning partitioner; 3232 Mat_IS *matis; 3233 PetscInt n_neighs,*neighs,*n_shared,**shared; 3234 PetscInt prank; 3235 PetscMPIInt size,rank,color; 3236 PetscInt *xadj,*adjncy,*oldranks; 3237 PetscInt *adjncy_wgt,*v_wgt,*is_indices,*ranks_send_to_idx; 3238 PetscInt i,local_size,threshold=0; 3239 PetscErrorCode ierr; 3240 PetscBool use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE; 3241 PetscSubcomm subcomm; 3242 3243 PetscFunctionBegin; 3244 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr); 3245 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 3246 ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 3247 3248 /* Get info on mapping */ 3249 matis = (Mat_IS*)(mat->data); 3250 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&local_size);CHKERRQ(ierr); 3251 ierr = ISLocalToGlobalMappingGetInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 3252 3253 /* build local CSR graph of subdomains' connectivity */ 3254 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 3255 xadj[0] = 0; 3256 xadj[1] = PetscMax(n_neighs-1,0); 3257 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 3258 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 3259 3260 if (threshold) { 3261 PetscInt xadj_count = 0; 3262 for (i=1;i<n_neighs;i++) { 3263 if (n_shared[i] > threshold) { 3264 adjncy[xadj_count] = neighs[i]; 3265 adjncy_wgt[xadj_count] = n_shared[i]; 3266 xadj_count++; 3267 } 3268 } 3269 xadj[1] = xadj_count; 3270 } else { 3271 if (xadj[1]) { 3272 ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr); 3273 ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr); 3274 } 3275 } 3276 ierr = ISLocalToGlobalMappingRestoreInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 3277 if (use_square) { 3278 for (i=0;i<xadj[1];i++) { 3279 adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i]; 3280 } 3281 } 3282 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 3283 3284 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 3285 3286 /* 3287 Restrict work on active processes only. 3288 */ 3289 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr); 3290 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 3291 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 3292 ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr); 3293 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 3294 if (color) { 3295 ierr = PetscFree(xadj);CHKERRQ(ierr); 3296 ierr = PetscFree(adjncy);CHKERRQ(ierr); 3297 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 3298 } else { 3299 PetscInt coarsening_ratio; 3300 ierr = MPI_Comm_size(subcomm->comm,&size);CHKERRQ(ierr); 3301 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 3302 prank = rank; 3303 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm->comm);CHKERRQ(ierr); 3304 /* 3305 for (i=0;i<size;i++) { 3306 PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]); 3307 } 3308 */ 3309 for (i=0;i<xadj[1];i++) { 3310 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 3311 } 3312 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 3313 ierr = MatCreateMPIAdj(subcomm->comm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 3314 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 3315 3316 /* Partition */ 3317 ierr = MatPartitioningCreate(subcomm->comm,&partitioner);CHKERRQ(ierr); 3318 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 3319 if (use_vwgt) { 3320 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 3321 v_wgt[0] = local_size; 3322 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 3323 } 3324 n_subdomains = PetscMin((PetscInt)size,n_subdomains); 3325 coarsening_ratio = size/n_subdomains; 3326 ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr); 3327 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 3328 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 3329 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 3330 3331 ierr = ISGetIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3332 if (contiguous) { 3333 ranks_send_to_idx[0] = oldranks[is_indices[0]]; /* contiguos set of processes */ 3334 } else { 3335 ranks_send_to_idx[0] = coarsening_ratio*oldranks[is_indices[0]]; /* scattered set of processes */ 3336 } 3337 ierr = ISRestoreIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3338 /* clean up */ 3339 ierr = PetscFree(oldranks);CHKERRQ(ierr); 3340 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 3341 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 3342 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 3343 } 3344 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 3345 3346 /* assemble parallel IS for sends */ 3347 i = 1; 3348 if (color) i=0; 3349 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr); 3350 3351 /* get back IS */ 3352 *is_sends = ranks_send_to; 3353 PetscFunctionReturn(0); 3354 } 3355 3356 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 3357 3358 #undef __FUNCT__ 3359 #define __FUNCT__ "MatISSubassemble" 3360 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[]) 3361 { 3362 Mat local_mat; 3363 Mat_IS *matis; 3364 IS is_sends_internal; 3365 PetscInt rows,cols,new_local_rows; 3366 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals; 3367 PetscBool ismatis,isdense,newisdense,destroy_mat; 3368 ISLocalToGlobalMapping l2gmap; 3369 PetscInt* l2gmap_indices; 3370 const PetscInt* is_indices; 3371 MatType new_local_type; 3372 /* buffers */ 3373 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 3374 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 3375 PetscInt *recv_buffer_idxs_local; 3376 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 3377 /* MPI */ 3378 MPI_Comm comm,comm_n; 3379 PetscSubcomm subcomm; 3380 PetscMPIInt n_sends,n_recvs,commsize; 3381 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 3382 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 3383 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,source_dest; 3384 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals; 3385 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals; 3386 PetscErrorCode ierr; 3387 3388 PetscFunctionBegin; 3389 /* TODO: add missing checks */ 3390 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 3391 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 3392 PetscValidLogicalCollectiveEnum(mat,reuse,5); 3393 PetscValidLogicalCollectiveInt(mat,nis,7); 3394 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 3395 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 3396 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 3397 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 3398 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 3399 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 3400 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 3401 if (reuse == MAT_REUSE_MATRIX && *mat_n) { 3402 PetscInt mrows,mcols,mnrows,mncols; 3403 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 3404 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 3405 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 3406 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 3407 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 3408 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 3409 } 3410 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 3411 PetscValidLogicalCollectiveInt(mat,bs,0); 3412 /* prepare IS for sending if not provided */ 3413 if (!is_sends) { 3414 PetscBool pcontig = PETSC_TRUE; 3415 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 3416 ierr = MatISGetSubassemblingPattern(mat,n_subdomains,pcontig,&is_sends_internal);CHKERRQ(ierr); 3417 } else { 3418 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 3419 is_sends_internal = is_sends; 3420 } 3421 3422 /* get pointer of MATIS data */ 3423 matis = (Mat_IS*)mat->data; 3424 3425 /* get comm */ 3426 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3427 3428 /* compute number of sends */ 3429 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 3430 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 3431 3432 /* compute number of receives */ 3433 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 3434 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 3435 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 3436 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 3437 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 3438 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 3439 ierr = PetscFree(iflags);CHKERRQ(ierr); 3440 3441 /* restrict comm if requested */ 3442 subcomm = 0; 3443 destroy_mat = PETSC_FALSE; 3444 if (restrict_comm) { 3445 PetscMPIInt color,subcommsize; 3446 3447 color = 0; 3448 if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm */ 3449 ierr = MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 3450 subcommsize = commsize - subcommsize; 3451 /* check if reuse has been requested */ 3452 if (reuse == MAT_REUSE_MATRIX) { 3453 if (*mat_n) { 3454 PetscMPIInt subcommsize2; 3455 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 3456 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 3457 comm_n = PetscObjectComm((PetscObject)*mat_n); 3458 } else { 3459 comm_n = PETSC_COMM_SELF; 3460 } 3461 } else { /* MAT_INITIAL_MATRIX */ 3462 PetscMPIInt rank; 3463 3464 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3465 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 3466 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 3467 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 3468 comm_n = subcomm->comm; 3469 } 3470 /* flag to destroy *mat_n if not significative */ 3471 if (color) destroy_mat = PETSC_TRUE; 3472 } else { 3473 comm_n = comm; 3474 } 3475 3476 /* prepare send/receive buffers */ 3477 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 3478 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 3479 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 3480 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 3481 if (nis) { 3482 ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr); 3483 } 3484 3485 /* Get data from local matrices */ 3486 if (!isdense) { 3487 SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 3488 /* TODO: See below some guidelines on how to prepare the local buffers */ 3489 /* 3490 send_buffer_vals should contain the raw values of the local matrix 3491 send_buffer_idxs should contain: 3492 - MatType_PRIVATE type 3493 - PetscInt size_of_l2gmap 3494 - PetscInt global_row_indices[size_of_l2gmap] 3495 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 3496 */ 3497 } else { 3498 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 3499 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&i);CHKERRQ(ierr); 3500 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 3501 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 3502 send_buffer_idxs[1] = i; 3503 ierr = ISLocalToGlobalMappingGetIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 3504 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 3505 ierr = ISLocalToGlobalMappingRestoreIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 3506 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 3507 for (i=0;i<n_sends;i++) { 3508 ilengths_vals[is_indices[i]] = len*len; 3509 ilengths_idxs[is_indices[i]] = len+2; 3510 } 3511 } 3512 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 3513 /* additional is (if any) */ 3514 if (nis) { 3515 PetscMPIInt psum; 3516 PetscInt j; 3517 for (j=0,psum=0;j<nis;j++) { 3518 PetscInt plen; 3519 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 3520 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 3521 psum += len+1; /* indices + lenght */ 3522 } 3523 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 3524 for (j=0,psum=0;j<nis;j++) { 3525 PetscInt plen; 3526 const PetscInt *is_array_idxs; 3527 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 3528 send_buffer_idxs_is[psum] = plen; 3529 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 3530 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 3531 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 3532 psum += plen+1; /* indices + lenght */ 3533 } 3534 for (i=0;i<n_sends;i++) { 3535 ilengths_idxs_is[is_indices[i]] = psum; 3536 } 3537 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 3538 } 3539 3540 buf_size_idxs = 0; 3541 buf_size_vals = 0; 3542 buf_size_idxs_is = 0; 3543 for (i=0;i<n_recvs;i++) { 3544 buf_size_idxs += (PetscInt)olengths_idxs[i]; 3545 buf_size_vals += (PetscInt)olengths_vals[i]; 3546 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 3547 } 3548 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 3549 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 3550 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 3551 3552 /* get new tags for clean communications */ 3553 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 3554 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 3555 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 3556 3557 /* allocate for requests */ 3558 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 3559 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 3560 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 3561 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 3562 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 3563 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 3564 3565 /* communications */ 3566 ptr_idxs = recv_buffer_idxs; 3567 ptr_vals = recv_buffer_vals; 3568 ptr_idxs_is = recv_buffer_idxs_is; 3569 for (i=0;i<n_recvs;i++) { 3570 source_dest = onodes[i]; 3571 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 3572 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 3573 ptr_idxs += olengths_idxs[i]; 3574 ptr_vals += olengths_vals[i]; 3575 if (nis) { 3576 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 3577 ptr_idxs_is += olengths_idxs_is[i]; 3578 } 3579 } 3580 for (i=0;i<n_sends;i++) { 3581 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 3582 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 3583 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 3584 if (nis) { 3585 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 3586 } 3587 } 3588 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 3589 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 3590 3591 /* assemble new l2g map */ 3592 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3593 ptr_idxs = recv_buffer_idxs; 3594 new_local_rows = 0; 3595 for (i=0;i<n_recvs;i++) { 3596 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 3597 ptr_idxs += olengths_idxs[i]; 3598 } 3599 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 3600 ptr_idxs = recv_buffer_idxs; 3601 new_local_rows = 0; 3602 for (i=0;i<n_recvs;i++) { 3603 ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 3604 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 3605 ptr_idxs += olengths_idxs[i]; 3606 } 3607 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 3608 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 3609 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 3610 3611 /* infer new local matrix type from received local matrices type */ 3612 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 3613 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 3614 if (n_recvs) { 3615 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 3616 ptr_idxs = recv_buffer_idxs; 3617 for (i=0;i<n_recvs;i++) { 3618 if ((PetscInt)new_local_type_private != *ptr_idxs) { 3619 new_local_type_private = MATAIJ_PRIVATE; 3620 break; 3621 } 3622 ptr_idxs += olengths_idxs[i]; 3623 } 3624 switch (new_local_type_private) { 3625 case MATDENSE_PRIVATE: 3626 if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */ 3627 new_local_type = MATSEQAIJ; 3628 bs = 1; 3629 } else { /* if I receive only 1 dense matrix */ 3630 new_local_type = MATSEQDENSE; 3631 bs = 1; 3632 } 3633 break; 3634 case MATAIJ_PRIVATE: 3635 new_local_type = MATSEQAIJ; 3636 bs = 1; 3637 break; 3638 case MATBAIJ_PRIVATE: 3639 new_local_type = MATSEQBAIJ; 3640 break; 3641 case MATSBAIJ_PRIVATE: 3642 new_local_type = MATSEQSBAIJ; 3643 break; 3644 default: 3645 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__); 3646 break; 3647 } 3648 } else { /* by default, new_local_type is seqdense */ 3649 new_local_type = MATSEQDENSE; 3650 bs = 1; 3651 } 3652 3653 /* create MATIS object if needed */ 3654 if (reuse == MAT_INITIAL_MATRIX) { 3655 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 3656 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,mat_n);CHKERRQ(ierr); 3657 } else { 3658 /* it also destroys the local matrices */ 3659 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 3660 } 3661 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 3662 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 3663 3664 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3665 3666 /* Global to local map of received indices */ 3667 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 3668 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 3669 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 3670 3671 /* restore attributes -> type of incoming data and its size */ 3672 buf_size_idxs = 0; 3673 for (i=0;i<n_recvs;i++) { 3674 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 3675 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 3676 buf_size_idxs += (PetscInt)olengths_idxs[i]; 3677 } 3678 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 3679 3680 /* set preallocation */ 3681 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 3682 if (!newisdense) { 3683 PetscInt *new_local_nnz=0; 3684 3685 ptr_vals = recv_buffer_vals; 3686 ptr_idxs = recv_buffer_idxs_local; 3687 if (n_recvs) { 3688 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 3689 } 3690 for (i=0;i<n_recvs;i++) { 3691 PetscInt j; 3692 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 3693 for (j=0;j<*(ptr_idxs+1);j++) { 3694 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 3695 } 3696 } else { 3697 /* TODO */ 3698 } 3699 ptr_idxs += olengths_idxs[i]; 3700 } 3701 if (new_local_nnz) { 3702 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 3703 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 3704 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 3705 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 3706 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 3707 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 3708 } else { 3709 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 3710 } 3711 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 3712 } else { 3713 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 3714 } 3715 3716 /* set values */ 3717 ptr_vals = recv_buffer_vals; 3718 ptr_idxs = recv_buffer_idxs_local; 3719 for (i=0;i<n_recvs;i++) { 3720 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 3721 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 3722 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 3723 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 3724 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 3725 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 3726 } else { 3727 /* TODO */ 3728 } 3729 ptr_idxs += olengths_idxs[i]; 3730 ptr_vals += olengths_vals[i]; 3731 } 3732 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3733 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3734 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3735 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3736 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 3737 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 3738 3739 #if 0 3740 if (!restrict_comm) { /* check */ 3741 Vec lvec,rvec; 3742 PetscReal infty_error; 3743 3744 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 3745 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 3746 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 3747 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 3748 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 3749 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 3750 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 3751 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 3752 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 3753 } 3754 #endif 3755 3756 /* assemble new additional is (if any) */ 3757 if (nis) { 3758 PetscInt **temp_idxs,*count_is,j,psum; 3759 3760 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3761 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 3762 ptr_idxs = recv_buffer_idxs_is; 3763 psum = 0; 3764 for (i=0;i<n_recvs;i++) { 3765 for (j=0;j<nis;j++) { 3766 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 3767 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 3768 psum += plen; 3769 ptr_idxs += plen+1; /* shift pointer to received data */ 3770 } 3771 } 3772 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 3773 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 3774 for (i=1;i<nis;i++) { 3775 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 3776 } 3777 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 3778 ptr_idxs = recv_buffer_idxs_is; 3779 for (i=0;i<n_recvs;i++) { 3780 for (j=0;j<nis;j++) { 3781 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 3782 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 3783 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 3784 ptr_idxs += plen+1; /* shift pointer to received data */ 3785 } 3786 } 3787 for (i=0;i<nis;i++) { 3788 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 3789 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 3790 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 3791 } 3792 ierr = PetscFree(count_is);CHKERRQ(ierr); 3793 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 3794 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 3795 } 3796 /* free workspace */ 3797 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 3798 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3799 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 3800 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3801 if (isdense) { 3802 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 3803 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 3804 } else { 3805 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 3806 } 3807 if (nis) { 3808 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3809 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 3810 } 3811 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 3812 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 3813 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 3814 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 3815 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 3816 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 3817 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 3818 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 3819 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 3820 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 3821 ierr = PetscFree(onodes);CHKERRQ(ierr); 3822 if (nis) { 3823 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 3824 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 3825 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 3826 } 3827 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 3828 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 3829 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 3830 for (i=0;i<nis;i++) { 3831 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 3832 } 3833 } 3834 PetscFunctionReturn(0); 3835 } 3836 3837 /* temporary hack into ksp private data structure */ 3838 #include <petsc-private/kspimpl.h> 3839 3840 #undef __FUNCT__ 3841 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 3842 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 3843 { 3844 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3845 PC_IS *pcis = (PC_IS*)pc->data; 3846 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 3847 MatNullSpace CoarseNullSpace=NULL; 3848 ISLocalToGlobalMapping coarse_islg; 3849 IS coarse_is,*isarray; 3850 PetscInt i,im_active=-1,active_procs=-1; 3851 PetscInt nis,nisdofs,nisneu; 3852 PC pc_temp; 3853 PCType coarse_pc_type; 3854 KSPType coarse_ksp_type; 3855 PetscBool multilevel_requested,multilevel_allowed; 3856 PetscBool isredundant,isbddc,isnn,coarse_reuse; 3857 Mat t_coarse_mat_is; 3858 PetscInt void_procs,ncoarse_ml,ncoarse_ds,ncoarse; 3859 PetscMPIInt all_procs; 3860 PetscBool csin_ml,csin_ds,csin,csin_type_simple,redist; 3861 PetscBool compute_vecs = PETSC_FALSE; 3862 PetscScalar *array; 3863 PetscErrorCode ierr; 3864 3865 PetscFunctionBegin; 3866 /* Assign global numbering to coarse dofs */ 3867 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 3868 compute_vecs = PETSC_TRUE; 3869 PetscInt ocoarse_size; 3870 ocoarse_size = pcbddc->coarse_size; 3871 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 3872 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 3873 /* see if we can avoid some work */ 3874 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 3875 if (ocoarse_size != pcbddc->coarse_size) { /* ...but with different size, so reset it and set reuse flag to false */ 3876 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 3877 coarse_reuse = PETSC_FALSE; 3878 } else { /* we can safely reuse already computed coarse matrix */ 3879 coarse_reuse = PETSC_TRUE; 3880 } 3881 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 3882 coarse_reuse = PETSC_FALSE; 3883 } 3884 /* reset any subassembling information */ 3885 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 3886 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 3887 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 3888 coarse_reuse = PETSC_TRUE; 3889 } 3890 3891 /* count "active" (i.e. with positive local size) and "void" processes */ 3892 im_active = !!(pcis->n); 3893 ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3894 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr); 3895 void_procs = all_procs-active_procs; 3896 csin_type_simple = PETSC_TRUE; 3897 redist = PETSC_FALSE; 3898 if (pcbddc->current_level && void_procs) { 3899 csin_ml = PETSC_TRUE; 3900 ncoarse_ml = void_procs; 3901 /* it has no sense to redistribute on a set of processors larger than the number of active processes */ 3902 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < active_procs) { 3903 csin_ds = PETSC_TRUE; 3904 ncoarse_ds = pcbddc->redistribute_coarse; 3905 redist = PETSC_TRUE; 3906 } else { 3907 csin_ds = PETSC_TRUE; 3908 ncoarse_ds = active_procs; 3909 redist = PETSC_TRUE; 3910 } 3911 } else { 3912 csin_ml = PETSC_FALSE; 3913 ncoarse_ml = all_procs; 3914 if (void_procs) { 3915 csin_ds = PETSC_TRUE; 3916 ncoarse_ds = void_procs; 3917 csin_type_simple = PETSC_FALSE; 3918 } else { 3919 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < all_procs) { 3920 csin_ds = PETSC_TRUE; 3921 ncoarse_ds = pcbddc->redistribute_coarse; 3922 redist = PETSC_TRUE; 3923 } else { 3924 csin_ds = PETSC_FALSE; 3925 ncoarse_ds = all_procs; 3926 } 3927 } 3928 } 3929 3930 /* 3931 test if we can go multilevel: three conditions must be satisfied: 3932 - we have not exceeded the number of levels requested 3933 - we can actually subassemble the active processes 3934 - we can find a suitable number of MPI processes where we can place the subassembled problem 3935 */ 3936 multilevel_allowed = PETSC_FALSE; 3937 multilevel_requested = PETSC_FALSE; 3938 if (pcbddc->current_level < pcbddc->max_levels) { 3939 multilevel_requested = PETSC_TRUE; 3940 if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) { 3941 multilevel_allowed = PETSC_FALSE; 3942 } else { 3943 multilevel_allowed = PETSC_TRUE; 3944 } 3945 } 3946 /* determine number of process partecipating to coarse solver */ 3947 if (multilevel_allowed) { 3948 ncoarse = ncoarse_ml; 3949 csin = csin_ml; 3950 } else { 3951 ncoarse = ncoarse_ds; 3952 csin = csin_ds; 3953 } 3954 3955 /* creates temporary l2gmap and IS for coarse indexes */ 3956 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 3957 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 3958 3959 /* creates temporary MATIS object for coarse matrix */ 3960 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr); 3961 ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr); 3962 ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr); 3963 ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr); 3964 #if 0 3965 { 3966 PetscViewer viewer; 3967 char filename[256]; 3968 sprintf(filename,"local_coarse_mat%d.m",PetscGlobalRank); 3969 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 3970 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 3971 ierr = MatView(coarse_submat_dense,viewer);CHKERRQ(ierr); 3972 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 3973 } 3974 #endif 3975 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,&t_coarse_mat_is);CHKERRQ(ierr); 3976 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 3977 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3978 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3979 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 3980 3981 /* compute dofs splitting and neumann boundaries for coarse dofs */ 3982 if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal) ) { /* protects from unneded computations */ 3983 PetscInt *tidxs,*tidxs2,nout,tsize,i; 3984 const PetscInt *idxs; 3985 ISLocalToGlobalMapping tmap; 3986 3987 /* create map between primal indices (in local representative ordering) and local primal numbering */ 3988 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 3989 /* allocate space for temporary storage */ 3990 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 3991 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 3992 /* allocate for IS array */ 3993 nisdofs = pcbddc->n_ISForDofsLocal; 3994 nisneu = !!pcbddc->NeumannBoundariesLocal; 3995 nis = nisdofs + nisneu; 3996 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 3997 /* dofs splitting */ 3998 for (i=0;i<nisdofs;i++) { 3999 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 4000 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 4001 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 4002 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 4003 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 4004 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 4005 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->ISForDofsLocal[i]),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 4006 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 4007 } 4008 /* neumann boundaries */ 4009 if (pcbddc->NeumannBoundariesLocal) { 4010 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 4011 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 4012 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 4013 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 4014 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 4015 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 4016 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->NeumannBoundariesLocal),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 4017 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 4018 } 4019 /* free memory */ 4020 ierr = PetscFree(tidxs);CHKERRQ(ierr); 4021 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 4022 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 4023 } else { 4024 nis = 0; 4025 nisdofs = 0; 4026 nisneu = 0; 4027 isarray = NULL; 4028 } 4029 /* destroy no longer needed map */ 4030 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 4031 4032 /* restrict on coarse candidates (if needed) */ 4033 coarse_mat_is = NULL; 4034 if (csin) { 4035 if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */ 4036 if (redist) { 4037 PetscMPIInt rank; 4038 PetscInt spc,n_spc_p1,dest[1],destsize; 4039 4040 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 4041 spc = active_procs/pcbddc->redistribute_coarse; 4042 n_spc_p1 = active_procs%pcbddc->redistribute_coarse; 4043 if (im_active) { 4044 destsize = 1; 4045 if (rank > n_spc_p1*(spc+1)-1) { 4046 dest[0] = n_spc_p1+(rank-(n_spc_p1*(spc+1)))/spc; 4047 } else { 4048 dest[0] = rank/(spc+1); 4049 } 4050 } else { 4051 destsize = 0; 4052 } 4053 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),destsize,dest,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4054 } else if (csin_type_simple) { 4055 PetscMPIInt rank; 4056 PetscInt issize,isidx; 4057 4058 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 4059 if (im_active) { 4060 issize = 1; 4061 isidx = (PetscInt)rank; 4062 } else { 4063 issize = 0; 4064 isidx = -1; 4065 } 4066 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4067 } else { /* get a suitable subassembling pattern from MATIS code */ 4068 ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,PETSC_TRUE,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4069 } 4070 4071 /* we need to shift on coarse candidates either if we are not redistributing or we are redistributing and we have enough void processes */ 4072 if (!redist || ncoarse <= void_procs) { 4073 PetscInt ncoarse_cand,tissize,*nisindices; 4074 PetscInt *coarse_candidates; 4075 const PetscInt* tisindices; 4076 4077 /* get coarse candidates' ranks in pc communicator */ 4078 ierr = PetscMalloc1(all_procs,&coarse_candidates);CHKERRQ(ierr); 4079 ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4080 for (i=0,ncoarse_cand=0;i<all_procs;i++) { 4081 if (!coarse_candidates[i]) { 4082 coarse_candidates[ncoarse_cand++]=i; 4083 } 4084 } 4085 if (ncoarse_cand < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",ncoarse_cand,ncoarse); 4086 4087 4088 if (pcbddc->dbg_flag) { 4089 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4090 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr); 4091 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 4092 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr); 4093 for (i=0;i<ncoarse_cand;i++) { 4094 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr); 4095 } 4096 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr); 4097 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4098 } 4099 /* shift the pattern on coarse candidates */ 4100 ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr); 4101 ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 4102 ierr = PetscMalloc1(tissize,&nisindices);CHKERRQ(ierr); 4103 for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]]; 4104 ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 4105 ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr); 4106 ierr = PetscFree(coarse_candidates);CHKERRQ(ierr); 4107 } 4108 if (pcbddc->dbg_flag) { 4109 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4110 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr); 4111 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 4112 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4113 } 4114 } 4115 /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */ 4116 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 4117 } else { 4118 if (pcbddc->dbg_flag) { 4119 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4120 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr); 4121 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4122 } 4123 ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr); 4124 coarse_mat_is = t_coarse_mat_is; 4125 } 4126 4127 /* create local to global scatters for coarse problem */ 4128 if (compute_vecs) { 4129 PetscInt lrows; 4130 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 4131 if (coarse_mat_is) { 4132 ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr); 4133 } else { 4134 lrows = 0; 4135 } 4136 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 4137 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 4138 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 4139 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 4140 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 4141 } 4142 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 4143 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 4144 4145 /* set defaults for coarse KSP and PC */ 4146 if (multilevel_allowed) { 4147 coarse_ksp_type = KSPRICHARDSON; 4148 coarse_pc_type = PCBDDC; 4149 } else { 4150 coarse_ksp_type = KSPPREONLY; 4151 coarse_pc_type = PCREDUNDANT; 4152 } 4153 4154 /* print some info if requested */ 4155 if (pcbddc->dbg_flag) { 4156 if (!multilevel_allowed) { 4157 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4158 if (multilevel_requested) { 4159 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 4160 } else if (pcbddc->max_levels) { 4161 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 4162 } 4163 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4164 } 4165 } 4166 4167 /* create the coarse KSP object only once with defaults */ 4168 if (coarse_mat_is) { 4169 MatReuse coarse_mat_reuse; 4170 PetscViewer dbg_viewer = NULL; 4171 if (pcbddc->dbg_flag) { 4172 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is)); 4173 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 4174 } 4175 if (!pcbddc->coarse_ksp) { 4176 char prefix[256],str_level[16]; 4177 size_t len; 4178 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr); 4179 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 4180 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 4181 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr); 4182 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 4183 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 4184 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 4185 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 4186 /* prefix */ 4187 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 4188 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 4189 if (!pcbddc->current_level) { 4190 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 4191 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 4192 } else { 4193 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 4194 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 4195 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 4196 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 4197 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 4198 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 4199 } 4200 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 4201 /* allow user customization */ 4202 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 4203 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 4204 } 4205 4206 /* get some info after set from options */ 4207 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 4208 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 4209 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 4210 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 4211 if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */ 4212 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 4213 isbddc = PETSC_FALSE; 4214 } 4215 if (isredundant) { 4216 KSP inner_ksp; 4217 PC inner_pc; 4218 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 4219 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 4220 ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr); 4221 } 4222 4223 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 4224 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 4225 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 4226 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 4227 if (nisdofs) { 4228 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 4229 for (i=0;i<nisdofs;i++) { 4230 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 4231 } 4232 } 4233 if (nisneu) { 4234 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 4235 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 4236 } 4237 4238 /* assemble coarse matrix */ 4239 if (coarse_reuse) { 4240 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 4241 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 4242 coarse_mat_reuse = MAT_REUSE_MATRIX; 4243 } else { 4244 coarse_mat_reuse = MAT_INITIAL_MATRIX; 4245 } 4246 if (isbddc || isnn) { 4247 if (pcbddc->coarsening_ratio > 1) { 4248 if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */ 4249 ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,PETSC_TRUE,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 4250 if (pcbddc->dbg_flag) { 4251 ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4252 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr); 4253 ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr); 4254 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 4255 } 4256 } 4257 ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr); 4258 } else { 4259 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 4260 coarse_mat = coarse_mat_is; 4261 } 4262 } else { 4263 ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 4264 } 4265 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 4266 4267 /* propagate symmetry info to coarse matrix */ 4268 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pcbddc->issym);CHKERRQ(ierr); 4269 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 4270 4271 /* set operators */ 4272 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 4273 if (pcbddc->dbg_flag) { 4274 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 4275 } 4276 } else { /* processes non partecipating to coarse solver (if any) */ 4277 coarse_mat = 0; 4278 } 4279 ierr = PetscFree(isarray);CHKERRQ(ierr); 4280 #if 0 4281 { 4282 PetscViewer viewer; 4283 char filename[256]; 4284 sprintf(filename,"coarse_mat.m"); 4285 ierr = PetscViewerASCIIOpen(PETSC_COMM_WORLD,filename,&viewer);CHKERRQ(ierr); 4286 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 4287 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 4288 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 4289 } 4290 #endif 4291 4292 /* Compute coarse null space (special handling by BDDC only) */ 4293 if (pcbddc->NullSpace) { 4294 ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr); 4295 } 4296 4297 if (pcbddc->coarse_ksp) { 4298 Vec crhs,csol; 4299 PetscBool ispreonly; 4300 if (CoarseNullSpace) { 4301 if (isbddc) { 4302 ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr); 4303 } else { 4304 ierr = KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);CHKERRQ(ierr); 4305 } 4306 } 4307 /* setup coarse ksp */ 4308 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 4309 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 4310 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 4311 /* hack */ 4312 if (!csol) { 4313 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 4314 } 4315 if (!crhs) { 4316 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 4317 } 4318 /* Check coarse problem if in debug mode or if solving with an iterative method */ 4319 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 4320 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 4321 KSP check_ksp; 4322 KSPType check_ksp_type; 4323 PC check_pc; 4324 Vec check_vec,coarse_vec; 4325 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 4326 PetscInt its; 4327 PetscBool compute_eigs; 4328 PetscReal *eigs_r,*eigs_c; 4329 PetscInt neigs; 4330 const char *prefix; 4331 4332 /* Create ksp object suitable for estimation of extreme eigenvalues */ 4333 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 4334 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 4335 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 4336 if (ispreonly) { 4337 check_ksp_type = KSPPREONLY; 4338 compute_eigs = PETSC_FALSE; 4339 } else { 4340 check_ksp_type = KSPGMRES; 4341 compute_eigs = PETSC_TRUE; 4342 } 4343 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 4344 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 4345 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 4346 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 4347 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 4348 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 4349 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 4350 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 4351 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 4352 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 4353 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 4354 /* create random vec */ 4355 ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr); 4356 ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr); 4357 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 4358 if (CoarseNullSpace) { 4359 ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr); 4360 } 4361 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 4362 /* solve coarse problem */ 4363 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 4364 if (CoarseNullSpace) { 4365 ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr); 4366 } 4367 /* set eigenvalue estimation if preonly has not been requested */ 4368 if (compute_eigs) { 4369 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 4370 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 4371 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 4372 lambda_max = eigs_r[neigs-1]; 4373 lambda_min = eigs_r[0]; 4374 if (pcbddc->use_coarse_estimates) { 4375 if (lambda_max>lambda_min) { 4376 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr); 4377 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 4378 } 4379 } 4380 } 4381 4382 /* check coarse problem residual error */ 4383 if (pcbddc->dbg_flag) { 4384 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 4385 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 4386 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 4387 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 4388 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 4389 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 4390 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 4391 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 4392 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 4393 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 4394 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 4395 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 4396 if (compute_eigs) { 4397 PetscReal lambda_max_s,lambda_min_s; 4398 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 4399 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 4400 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 4401 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 4402 for (i=0;i<neigs;i++) { 4403 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 4404 } 4405 } 4406 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 4407 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 4408 } 4409 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 4410 if (compute_eigs) { 4411 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 4412 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 4413 } 4414 } 4415 } 4416 /* print additional info */ 4417 if (pcbddc->dbg_flag) { 4418 /* waits until all processes reaches this point */ 4419 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 4420 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 4421 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4422 } 4423 4424 /* free memory */ 4425 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 4426 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 4427 PetscFunctionReturn(0); 4428 } 4429 4430 #undef __FUNCT__ 4431 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 4432 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 4433 { 4434 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 4435 PC_IS* pcis = (PC_IS*)pc->data; 4436 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 4437 PetscInt i,coarse_size; 4438 PetscInt *local_primal_indices; 4439 PetscErrorCode ierr; 4440 4441 PetscFunctionBegin; 4442 /* Compute global number of coarse dofs */ 4443 if (!pcbddc->primal_indices_local_idxs && pcbddc->local_primal_size) { 4444 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Local primal indices have not been created"); 4445 } 4446 ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)(pc->pmat)),matis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,NULL,&coarse_size,&local_primal_indices);CHKERRQ(ierr); 4447 4448 /* check numbering */ 4449 if (pcbddc->dbg_flag) { 4450 PetscScalar coarsesum,*array; 4451 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 4452 4453 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4454 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4455 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 4456 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 4457 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 4458 for (i=0;i<pcbddc->local_primal_size;i++) { 4459 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 4460 } 4461 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 4462 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 4463 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4464 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4465 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4466 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4467 ierr = VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4468 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4469 for (i=0;i<pcis->n;i++) { 4470 if (array[i] == 1.0) { 4471 set_error = PETSC_TRUE; 4472 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr); 4473 } 4474 } 4475 ierr = MPI_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4476 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4477 for (i=0;i<pcis->n;i++) { 4478 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 4479 } 4480 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4481 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4482 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4483 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4484 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 4485 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 4486 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 4487 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 4488 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4489 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 4490 for (i=0;i<pcbddc->local_primal_size;i++) { 4491 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i]); 4492 } 4493 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4494 } 4495 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4496 if (set_error_reduced) { 4497 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 4498 } 4499 } 4500 /* get back data */ 4501 *coarse_size_n = coarse_size; 4502 *local_primal_indices_n = local_primal_indices; 4503 PetscFunctionReturn(0); 4504 } 4505 4506 #undef __FUNCT__ 4507 #define __FUNCT__ "PCBDDCGlobalToLocal" 4508 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 4509 { 4510 IS localis_t; 4511 PetscInt i,lsize,*idxs,n; 4512 PetscScalar *vals; 4513 PetscErrorCode ierr; 4514 4515 PetscFunctionBegin; 4516 /* get indices in local ordering exploiting local to global map */ 4517 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 4518 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 4519 for (i=0;i<lsize;i++) vals[i] = 1.0; 4520 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 4521 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 4522 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 4523 if (idxs) { /* multilevel guard */ 4524 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 4525 } 4526 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 4527 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 4528 ierr = PetscFree(vals);CHKERRQ(ierr); 4529 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 4530 /* now compute set in local ordering */ 4531 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4532 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4533 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 4534 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 4535 for (i=0,lsize=0;i<n;i++) { 4536 if (PetscRealPart(vals[i]) > 0.5) { 4537 lsize++; 4538 } 4539 } 4540 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 4541 for (i=0,lsize=0;i<n;i++) { 4542 if (PetscRealPart(vals[i]) > 0.5) { 4543 idxs[lsize++] = i; 4544 } 4545 } 4546 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 4547 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 4548 *localis = localis_t; 4549 PetscFunctionReturn(0); 4550 } 4551 4552 /* the next two functions will be called in KSPMatMult if a change of basis has been requested */ 4553 #undef __FUNCT__ 4554 #define __FUNCT__ "PCBDDCMatMult_Private" 4555 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y) 4556 { 4557 PCBDDCChange_ctx change_ctx; 4558 PetscErrorCode ierr; 4559 4560 PetscFunctionBegin; 4561 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 4562 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 4563 ierr = MatMult(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 4564 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 4565 PetscFunctionReturn(0); 4566 } 4567 4568 #undef __FUNCT__ 4569 #define __FUNCT__ "PCBDDCMatMultTranspose_Private" 4570 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y) 4571 { 4572 PCBDDCChange_ctx change_ctx; 4573 PetscErrorCode ierr; 4574 4575 PetscFunctionBegin; 4576 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 4577 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 4578 ierr = MatMultTranspose(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 4579 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 4580 PetscFunctionReturn(0); 4581 } 4582 4583 #undef __FUNCT__ 4584 #define __FUNCT__ "PCBDDCSetUpSubSchurs" 4585 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 4586 { 4587 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 4588 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 4589 PetscInt *used_xadj,*used_adjncy; 4590 PetscBool free_used_adj; 4591 PetscErrorCode ierr; 4592 4593 PetscFunctionBegin; 4594 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 4595 free_used_adj = PETSC_FALSE; 4596 if (pcbddc->sub_schurs_layers == -1) { 4597 used_xadj = NULL; 4598 used_adjncy = NULL; 4599 } else { 4600 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 4601 used_xadj = pcbddc->mat_graph->xadj; 4602 used_adjncy = pcbddc->mat_graph->adjncy; 4603 } else if (pcbddc->computed_rowadj) { 4604 used_xadj = pcbddc->mat_graph->xadj; 4605 used_adjncy = pcbddc->mat_graph->adjncy; 4606 } else { 4607 PetscBool flg_row=PETSC_FALSE; 4608 const PetscInt *xadj,*adjncy; 4609 PetscInt nvtxs; 4610 4611 ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4612 if (flg_row) { 4613 ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr); 4614 ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr); 4615 ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr); 4616 free_used_adj = PETSC_TRUE; 4617 } else { 4618 pcbddc->sub_schurs_layers = -1; 4619 used_xadj = NULL; 4620 used_adjncy = NULL; 4621 } 4622 ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4623 } 4624 } 4625 ierr = PCBDDCSubSchursSetUp(sub_schurs,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->adaptive_selection,pcbddc->use_deluxe_scaling,pcbddc->adaptive_invert_Stildas,pcbddc->use_edges,pcbddc->use_faces);CHKERRQ(ierr); 4626 4627 /* free adjacency */ 4628 if (free_used_adj) { 4629 ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr); 4630 } 4631 PetscFunctionReturn(0); 4632 } 4633 4634 #undef __FUNCT__ 4635 #define __FUNCT__ "PCBDDCInitSubSchurs" 4636 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 4637 { 4638 PC_IS *pcis=(PC_IS*)pc->data; 4639 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 4640 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 4641 PCBDDCGraph graph; 4642 Mat S_j; 4643 PetscErrorCode ierr; 4644 4645 PetscFunctionBegin; 4646 /* attach interface graph for determining subsets */ 4647 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 4648 IS verticesIS; 4649 4650 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 4651 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 4652 ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap);CHKERRQ(ierr); 4653 ierr = PCBDDCGraphSetUp(graph,0,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticesIS);CHKERRQ(ierr); 4654 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 4655 ierr = ISDestroy(&verticesIS);CHKERRQ(ierr); 4656 /* 4657 if (pcbddc->dbg_flag) { 4658 ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 4659 } 4660 */ 4661 } else { 4662 graph = pcbddc->mat_graph; 4663 } 4664 4665 /* Create Schur complement matrix */ 4666 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 4667 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 4668 4669 /* sub_schurs init */ 4670 ierr = PCBDDCSubSchursInit(sub_schurs,pcbddc->local_mat,S_j,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap,pcbddc->sub_schurs_threshold);CHKERRQ(ierr); 4671 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 4672 /* free graph struct */ 4673 if (pcbddc->sub_schurs_rebuild) { 4674 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 4675 } 4676 PetscFunctionReturn(0); 4677 } 4678