1 #include <../src/ksp/pc/impls/bddc/bddc.h> 2 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 3 #include <petscblaslapack.h> 4 5 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y); 6 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y); 7 8 #undef __FUNCT__ 9 #define __FUNCT__ "PCBDDCAdaptiveSelection" 10 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 11 { 12 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 13 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 14 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 15 PetscBLASInt *B_iwork,*B_ifail; 16 PetscScalar *work,lwork; 17 PetscScalar *St,*S,*eigv; 18 PetscScalar *Sarray,*Starray; 19 PetscScalar *Smult,*Seigv; 20 PetscReal *eigs,thresh; 21 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs; 22 #if defined(PETSC_USE_COMPLEX) 23 PetscReal *rwork; 24 #endif 25 PetscBool symmetric; 26 PetscErrorCode ierr; 27 28 PetscFunctionBegin; 29 if (!sub_schurs->use_mumps) { 30 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS"); 31 } 32 ierr = MatIsSymmetric(sub_schurs->A,0.0,&symmetric);CHKERRQ(ierr); 33 if (!symmetric) { 34 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Not yet implemented"); 35 } 36 37 /* max size of subsets */ 38 mss = 0; 39 for (i=0;i<sub_schurs->n_subs;i++) { 40 if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) { 41 PetscInt subset_size; 42 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 43 mss = PetscMax(mss,subset_size); 44 } 45 } 46 47 /* min/max and threshold */ 48 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 49 nmin = pcbddc->adaptive_nmin > -1 ? pcbddc->adaptive_nmin : 1; 50 nmax = PetscMax(nmin,nmax); 51 if (pcbddc->adaptive_threshold > 1.0) { 52 thresh = 1.0/pcbddc->adaptive_threshold; 53 } else { 54 thresh = 1.0; 55 } 56 57 /* allocate lapack workspace */ 58 cum = cum2 = 0; 59 maxneigs = 0; 60 for (i=0;i<sub_schurs->n_subs;i++) { 61 if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) { 62 PetscInt n,subset_size; 63 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 64 n = PetscMin(subset_size,nmax); 65 cum += subset_size*n; 66 cum2 += n; 67 maxneigs = PetscMax(maxneigs,n); 68 } 69 } 70 71 if (mss) { 72 if (symmetric) { 73 PetscBLASInt B_itype = 1; 74 PetscBLASInt B_N = mss; 75 PetscScalar zero = 0.0; 76 PetscScalar eps = 0.0; /* dlamch? */ 77 78 B_lwork = -1; 79 S = NULL; 80 St = NULL; 81 eigs = NULL; 82 eigv = NULL; 83 B_iwork = NULL; 84 B_ifail = NULL; 85 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 86 #if defined(PETSC_USE_COMPLEX) 87 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 88 #else 89 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 90 #endif 91 if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 92 ierr = PetscFPTrapPop();CHKERRQ(ierr); 93 } else { 94 /* TODO */ 95 } 96 } else { 97 lwork = 0; 98 } 99 100 nv = 0; 101 if (sub_schurs->is_Ej_com) { /* complement of subsets, each entry is a vertex */ 102 ierr = ISGetLocalSize(sub_schurs->is_Ej_com,&nv);CHKERRQ(ierr); 103 } 104 ierr = PetscBLASIntCast((PetscInt)lwork,&B_lwork);CHKERRQ(ierr); 105 ierr = PetscMalloc7(mss*mss,&S,mss*mss,&St,mss*mss,&eigv,mss,&eigs, 106 B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr); 107 #if defined(PETSC_USE_COMPLEX) 108 ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr); 109 #endif 110 ierr = PetscMalloc2(mss*mss,&Smult,mss*mss,&Seigv);CHKERRQ(ierr); 111 ierr = PetscMalloc4(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 112 nv+cum2+1,&pcbddc->adaptive_constraints_ptrs, 113 nv+cum,&pcbddc->adaptive_constraints_idxs, 114 nv+cum,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 115 ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr); 116 117 maxneigs = 0; 118 cum = cum2 = cumarray = 0; 119 if (sub_schurs->is_Ej_com) { 120 const PetscInt *idxs; 121 122 ierr = ISGetIndices(sub_schurs->is_Ej_com,&idxs);CHKERRQ(ierr); 123 for (cum=0;cum<nv;cum++) { 124 pcbddc->adaptive_constraints_n[cum] = 1; 125 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 126 pcbddc->adaptive_constraints_ptrs[cum] = cum; 127 pcbddc->adaptive_constraints_data[cum] = 1.0; 128 } 129 cum2 = cum; 130 ierr = ISRestoreIndices(sub_schurs->is_Ej_com,&idxs);CHKERRQ(ierr); 131 } 132 133 if (mss) { /* multilevel */ 134 if (pcbddc->use_deluxe_scaling) { 135 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 136 } else { 137 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_all,&Sarray);CHKERRQ(ierr); 138 } 139 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 140 } 141 142 for (i=0;i<sub_schurs->n_subs;i++) { 143 PetscInt j,subset_size; 144 145 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 146 if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) { 147 const PetscInt *idxs; 148 PetscScalar one = 1.0,zero=0.0; 149 PetscBLASInt B_N; 150 151 /* S should be copied since we need it for deluxe scaling */ 152 if (symmetric) { 153 PetscInt j; 154 for (j=0;j<subset_size;j++) { 155 ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 156 } 157 for (j=0;j<subset_size;j++) { 158 ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 159 } 160 } else { 161 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 162 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 163 } 164 /* is always this the right matrix? */ 165 ierr = PetscMemcpy(Smult,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 166 167 /* we could reuse space already allocated when building sum_S_Ej_tilda_all */ 168 /* St = Starray+cumarray; */ 169 170 if (symmetric) { 171 PetscBLASInt B_itype = 1; 172 PetscBLASInt B_IL = 1, B_IU; 173 PetscScalar eps = -1.0; /* dlamch? */ 174 PetscInt nmin_s; 175 176 /* ask for eigenvalues lower than thresh */ 177 PetscPrintf(PETSC_COMM_SELF,"[%d] Computing for sub %d/%d.\n",PetscGlobalRank,i,sub_schurs->n_subs); 178 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 179 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 180 #if defined(PETSC_USE_COMPLEX) 181 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 182 #else 183 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 184 #endif 185 ierr = PetscFPTrapPop();CHKERRQ(ierr); 186 if (B_ierr) { 187 if (B_ierr < 0 ) { 188 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 189 } else if (B_ierr <= B_N) { 190 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 191 } else { 192 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 193 } 194 } 195 196 if (B_neigs > nmax) { 197 PetscPrintf(PETSC_COMM_SELF,"[%d] found %d eigs, more than maximum required %d.\n",PetscGlobalRank,B_neigs,nmax); 198 B_neigs = nmax; 199 } 200 201 nmin_s = PetscMin(nmin,B_N); 202 if (B_neigs < nmin_s) { 203 PetscBLASInt B_neigs2; 204 205 B_IL = B_neigs + 1; 206 ierr = PetscBLASIntCast(nmin_s,&B_IU);CHKERRQ(ierr); 207 PetscPrintf(PETSC_COMM_SELF,"[%d] found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",PetscGlobalRank,B_neigs,nmin,B_IL,B_IU); 208 if (symmetric) { 209 PetscInt j; 210 for (j=0;j<subset_size;j++) { 211 ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 212 } 213 for (j=0;j<subset_size;j++) { 214 ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 215 } 216 } else { 217 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 218 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 219 } 220 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 221 #if defined(PETSC_USE_COMPLEX) 222 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 223 #else 224 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 225 #endif 226 ierr = PetscFPTrapPop();CHKERRQ(ierr); 227 B_neigs += B_neigs2; 228 } 229 if (B_ierr) { 230 if (B_ierr < 0 ) { 231 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 232 } else if (B_ierr <= B_N) { 233 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 234 } else { 235 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 236 } 237 } 238 PetscPrintf(PETSC_COMM_SELF,"[%d] -> Got %d eigs\n",PetscGlobalRank,B_neigs); 239 for (j=0;j<B_neigs;j++) { 240 if (eigs[j] == 0.0) { 241 PetscPrintf(PETSC_COMM_SELF,"[%d] Inf\n",PetscGlobalRank); 242 } else { 243 PetscPrintf(PETSC_COMM_SELF,"[%d] %1.6e\n",PetscGlobalRank,1.0/eigs[j]); 244 } 245 } 246 } else { 247 /* TODO */ 248 } 249 maxneigs = PetscMax(B_neigs,maxneigs); 250 pcbddc->adaptive_constraints_n[i+nv] = B_neigs; 251 252 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 253 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&B_N,&B_neigs,&B_N,&one,Smult,&B_N,eigv,&B_N,&zero,Seigv,&B_N)); 254 ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+cum2,Seigv,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 255 #if 0 256 PetscInt ii; 257 for (ii=0;ii<B_neigs;ii++) { 258 PetscPrintf(PETSC_COMM_SELF,"[%d] -> Eigenvector %d/%d (%d)\n",PetscGlobalRank,ii,B_neigs,B_N); 259 for (j=0;j<B_N;j++) { 260 PetscPrintf(PETSC_COMM_SELF,"[%d] %1.4e %1.4e\n",PetscGlobalRank,eigv[ii*B_N+j],Seigv[ii*B_N+j]); 261 } 262 } 263 #endif 264 ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 265 for (j=0;j<B_neigs;j++) { 266 #if 0 267 { 268 PetscBLASInt Blas_N,Blas_one = 1.0; 269 PetscScalar norm; 270 ierr = PetscBLASIntCast(subset_size,&Blas_N);CHKERRQ(ierr); 271 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,pcbddc->adaptive_constraints_data+cum2,&Blas_one,pcbddc->adaptive_constraints_data+cum2,&Blas_one)); 272 if (pcbddc->adaptive_constraints_data[cum2] > 0.0) { 273 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 274 } else { 275 norm = -1.0/PetscSqrtReal(PetscRealPart(norm)); 276 } 277 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,pcbddc->adaptive_constraints_data+cum2,&Blas_one)); 278 } 279 #endif 280 ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+cum2,idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr); 281 pcbddc->adaptive_constraints_ptrs[cum++] = cum2; 282 cum2 += subset_size; 283 } 284 ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 285 } 286 /* shift for next computation */ 287 cumarray += subset_size*subset_size; 288 } 289 pcbddc->adaptive_constraints_ptrs[cum] = cum2; 290 ierr = PetscFree2(Smult,Seigv);CHKERRQ(ierr); 291 292 if (mss) { 293 if (pcbddc->use_deluxe_scaling) { 294 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 295 } else { 296 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_all,&Sarray);CHKERRQ(ierr); 297 } 298 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 299 } 300 ierr = PetscFree7(S,St,eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr); 301 #if defined(PETSC_USE_COMPLEX) 302 ierr = PetscFree(rwork);CHKERRQ(ierr); 303 #endif 304 if (pcbddc->dbg_flag) { 305 PetscInt maxneigs_r; 306 ierr = MPI_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 307 ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr); 308 } 309 PetscFunctionReturn(0); 310 } 311 312 #undef __FUNCT__ 313 #define __FUNCT__ "PCBDDCSetUpSolvers" 314 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 315 { 316 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 317 PetscScalar *coarse_submat_vals; 318 PetscErrorCode ierr; 319 320 PetscFunctionBegin; 321 /* Setup local scatters R_to_B and (optionally) R_to_D */ 322 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 323 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 324 325 /* Setup local neumann solver ksp_R */ 326 /* PCBDDCSetUpLocalScatters should be called first! */ 327 ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 328 329 /* Change global null space passed in by the user if change of basis has been requested */ 330 if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) { 331 ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr); 332 } 333 334 /* 335 Setup local correction and local part of coarse basis. 336 Gives back the dense local part of the coarse matrix in column major ordering 337 */ 338 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 339 340 /* Compute total number of coarse nodes and setup coarse solver */ 341 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 342 343 /* free */ 344 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 345 PetscFunctionReturn(0); 346 } 347 348 #undef __FUNCT__ 349 #define __FUNCT__ "PCBDDCResetCustomization" 350 PetscErrorCode PCBDDCResetCustomization(PC pc) 351 { 352 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 353 PetscErrorCode ierr; 354 355 PetscFunctionBegin; 356 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 357 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 358 ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr); 359 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 360 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 361 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 362 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 363 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 364 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 365 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 366 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 367 PetscFunctionReturn(0); 368 } 369 370 #undef __FUNCT__ 371 #define __FUNCT__ "PCBDDCResetTopography" 372 PetscErrorCode PCBDDCResetTopography(PC pc) 373 { 374 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 375 PetscErrorCode ierr; 376 377 PetscFunctionBegin; 378 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 379 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 380 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 381 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 382 ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr); 383 PetscFunctionReturn(0); 384 } 385 386 #undef __FUNCT__ 387 #define __FUNCT__ "PCBDDCResetSolvers" 388 PetscErrorCode PCBDDCResetSolvers(PC pc) 389 { 390 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 391 PetscScalar *array; 392 PetscErrorCode ierr; 393 394 PetscFunctionBegin; 395 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 396 ierr = VecDestroy(&pcbddc->coarse_rhs);CHKERRQ(ierr); 397 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr); 398 ierr = PetscFree(array);CHKERRQ(ierr); 399 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 400 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 401 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 402 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 403 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 404 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 405 if (pcbddc->local_auxmat2) { 406 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 407 ierr = PetscFree(array);CHKERRQ(ierr); 408 } 409 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 410 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 411 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 412 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 413 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 414 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 415 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 416 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 417 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 418 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 419 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 420 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 421 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 422 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 423 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 424 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 425 PetscFunctionReturn(0); 426 } 427 428 #undef __FUNCT__ 429 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors" 430 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 431 { 432 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 433 PC_IS *pcis = (PC_IS*)pc->data; 434 VecType impVecType; 435 PetscInt n_constraints,n_R,old_size; 436 PetscErrorCode ierr; 437 438 PetscFunctionBegin; 439 if (!pcbddc->ConstraintMatrix) { 440 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created"); 441 } 442 /* get sizes */ 443 n_constraints = pcbddc->local_primal_size - pcbddc->n_actual_vertices; 444 n_R = pcis->n-pcbddc->n_actual_vertices; 445 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 446 /* local work vectors (try to avoid unneeded work)*/ 447 /* R nodes */ 448 old_size = -1; 449 if (pcbddc->vec1_R) { 450 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 451 } 452 if (n_R != old_size) { 453 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 454 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 455 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 456 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 457 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 458 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 459 } 460 /* local primal dofs */ 461 old_size = -1; 462 if (pcbddc->vec1_P) { 463 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 464 } 465 if (pcbddc->local_primal_size != old_size) { 466 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 467 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 468 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 469 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 470 } 471 /* local explicit constraints */ 472 old_size = -1; 473 if (pcbddc->vec1_C) { 474 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 475 } 476 if (n_constraints && n_constraints != old_size) { 477 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 478 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 479 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 480 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 481 } 482 PetscFunctionReturn(0); 483 } 484 485 #undef __FUNCT__ 486 #define __FUNCT__ "PCBDDCSetUpCorrection" 487 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 488 { 489 PetscErrorCode ierr; 490 /* pointers to pcis and pcbddc */ 491 PC_IS* pcis = (PC_IS*)pc->data; 492 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 493 /* submatrices of local problem */ 494 Mat A_RV,A_VR,A_VV; 495 /* submatrices of local coarse problem */ 496 Mat S_VV,S_CV,S_VC,S_CC; 497 /* working matrices */ 498 Mat C_CR; 499 /* additional working stuff */ 500 PC pc_R; 501 Mat F; 502 PetscBool isLU,isCHOL,isILU; 503 504 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 505 PetscScalar *work; 506 PetscInt *idx_V_B; 507 PetscInt n,n_vertices,n_constraints; 508 PetscInt i,n_R,n_D,n_B; 509 PetscBool unsymmetric_check; 510 /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */ 511 MatType impMatType; 512 /* some shortcuts to scalars */ 513 PetscScalar one=1.0,m_one=-1.0; 514 515 PetscFunctionBegin; 516 /* get number of vertices (corners plus constraints with change of basis) 517 pcbddc->n_actual_vertices stores the actual number of vertices, pcbddc->n_vertices the number of corners computed */ 518 n_vertices = pcbddc->n_actual_vertices; 519 n_constraints = pcbddc->local_primal_size-n_vertices; 520 /* Set Non-overlapping dimensions */ 521 n_B = pcis->n_B; n_D = pcis->n - n_B; 522 n_R = pcis->n-n_vertices; 523 524 /* Set types for local objects needed by BDDC precondtioner */ 525 impMatType = MATSEQDENSE; 526 527 /* vertices in boundary numbering */ 528 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 529 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->primal_indices_local_idxs,&i,idx_V_B);CHKERRQ(ierr); 530 if (i != n_vertices) { 531 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i); 532 } 533 534 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 535 ierr = PetscMalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 536 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr); 537 ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr); 538 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr); 539 ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr); 540 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr); 541 ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr); 542 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr); 543 ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr); 544 545 unsymmetric_check = PETSC_FALSE; 546 /* allocate workspace */ 547 n = 0; 548 if (n_constraints) { 549 n += n_R*n_constraints; 550 } 551 if (n_vertices) { 552 n = PetscMax(2*n_R*n_vertices,n); 553 } 554 if (!pcbddc->issym) { 555 n = PetscMax(2*n_R*pcbddc->local_primal_size,n); 556 unsymmetric_check = PETSC_TRUE; 557 } 558 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 559 560 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 561 ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr); 562 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr); 563 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr); 564 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr); 565 if (isLU || isILU || isCHOL) { 566 ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr); 567 } else { 568 F = NULL; 569 } 570 571 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 572 if (n_constraints) { 573 Mat M1,M2,M3; 574 IS is_aux; 575 /* see if we can save some allocations */ 576 if (pcbddc->local_auxmat2) { 577 PetscInt on_R,on_constraints; 578 ierr = MatGetSize(pcbddc->local_auxmat2,&on_R,&on_constraints);CHKERRQ(ierr); 579 if (on_R != n_R || on_constraints != n_constraints) { 580 PetscScalar *marray; 581 582 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&marray);CHKERRQ(ierr); 583 ierr = PetscFree(marray);CHKERRQ(ierr); 584 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 585 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 586 } 587 } 588 /* auxiliary matrices */ 589 if (!pcbddc->local_auxmat2) { 590 PetscScalar *marray; 591 592 ierr = PetscMalloc1(2*n_R*n_constraints,&marray);CHKERRQ(ierr); 593 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,marray,&pcbddc->local_auxmat2);CHKERRQ(ierr); 594 marray += n_R*n_constraints; 595 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_R,marray,&pcbddc->local_auxmat1);CHKERRQ(ierr); 596 } 597 598 /* Extract constraints on R nodes: C_{CR} */ 599 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 600 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 601 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 602 603 /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */ 604 ierr = PetscMemzero(work,n_R*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 605 for (i=0;i<n_constraints;i++) { 606 const PetscScalar *row_cmat_values; 607 const PetscInt *row_cmat_indices; 608 PetscInt size_of_constraint,j; 609 610 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 611 for (j=0;j<size_of_constraint;j++) { 612 work[row_cmat_indices[j]+i*n_R] = -row_cmat_values[j]; 613 } 614 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 615 } 616 if (F) { 617 Mat B; 618 619 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 620 ierr = MatMatSolve(F,B,pcbddc->local_auxmat2);CHKERRQ(ierr); 621 ierr = MatDestroy(&B);CHKERRQ(ierr); 622 } else { 623 PetscScalar *xarray; 624 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&xarray);CHKERRQ(ierr); 625 for (i=0;i<n_constraints;i++) { 626 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 627 ierr = VecPlaceArray(pcbddc->vec2_R,xarray+i*n_R);CHKERRQ(ierr); 628 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 629 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 630 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 631 } 632 ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&xarray);CHKERRQ(ierr); 633 } 634 635 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 636 ierr = MatConvert(C_CR,impMatType,MAT_REUSE_MATRIX,&C_CR);CHKERRQ(ierr); 637 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 638 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr); 639 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 640 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 641 ierr = VecSet(pcbddc->vec1_C,m_one);CHKERRQ(ierr); 642 ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr); 643 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 644 ierr = MatDestroy(&M2);CHKERRQ(ierr); 645 ierr = MatDestroy(&M3);CHKERRQ(ierr); 646 /* Assemble local_auxmat1 = S_CC*C_{CR} needed by BDDC application in KSP and in preproc */ 647 ierr = MatMatMult(M1,C_CR,MAT_REUSE_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 648 ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 649 ierr = MatDestroy(&M1);CHKERRQ(ierr); 650 } 651 652 /* Get submatrices from subdomain matrix */ 653 if (n_vertices) { 654 Mat newmat; 655 IS is_aux; 656 PetscInt ibs,mbs; 657 PetscBool issbaij; 658 659 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 660 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 661 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 662 if (ibs != mbs) { /* need to convert to SEQAIJ */ 663 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 664 ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 665 ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 666 ierr = MatGetSubMatrix(newmat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 667 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 668 } else { 669 /* this is safe */ 670 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 671 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 672 if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 673 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 674 ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 675 ierr = MatTranspose(A_VR,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 676 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 677 } else { 678 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 679 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 680 } 681 } 682 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 683 } 684 685 /* Matrix of coarse basis functions (local) */ 686 if (pcbddc->coarse_phi_B) { 687 PetscInt on_B,on_primal,on_D=n_D; 688 if (pcbddc->coarse_phi_D) { 689 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr); 690 } 691 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 692 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 693 PetscScalar *marray; 694 695 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr); 696 ierr = PetscFree(marray);CHKERRQ(ierr); 697 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 698 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 699 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 700 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 701 } 702 } 703 704 if (!pcbddc->coarse_phi_B) { 705 PetscScalar *marray; 706 707 n = n_B*pcbddc->local_primal_size; 708 if (pcbddc->switch_static || pcbddc->dbg_flag) { 709 n += n_D*pcbddc->local_primal_size; 710 } 711 if (!pcbddc->issym) { 712 n *= 2; 713 } 714 ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr); 715 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 716 n = n_B*pcbddc->local_primal_size; 717 if (pcbddc->switch_static || pcbddc->dbg_flag) { 718 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 719 n += n_D*pcbddc->local_primal_size; 720 } 721 if (!pcbddc->issym) { 722 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 723 if (pcbddc->switch_static || pcbddc->dbg_flag) { 724 n = n_B*pcbddc->local_primal_size; 725 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 726 } 727 } else { 728 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 729 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 730 if (pcbddc->switch_static || pcbddc->dbg_flag) { 731 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 732 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 733 } 734 } 735 } 736 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 737 /* vertices */ 738 if (n_vertices) { 739 Mat A_RRmA_RV,S_VVt; /* S_VVt with LDA=N */ 740 PetscScalar *x,*y; 741 PetscBLASInt B_N,B_one = 1; 742 743 ierr = PetscMemzero(work,2*n_R*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 744 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 745 ierr = MatConvert(A_RV,impMatType,MAT_REUSE_MATRIX,&A_RV);CHKERRQ(ierr); 746 if (F) { 747 ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr); 748 } else { 749 ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr); 750 for (i=0;i<n_vertices;i++) { 751 ierr = VecPlaceArray(pcbddc->vec1_R,y+i*n_R);CHKERRQ(ierr); 752 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 753 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 754 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 755 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 756 } 757 ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr); 758 } 759 ierr = MatScale(A_RRmA_RV,m_one);CHKERRQ(ierr); 760 /* S_VV and S_CV are the subdomain contribution to coarse matrix. WARNING -> column major ordering */ 761 if (n_constraints) { 762 Mat B; 763 ierr = MatMatMult(pcbddc->local_auxmat1,A_RRmA_RV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr); 764 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr); 765 ierr = MatMatMult(pcbddc->local_auxmat2,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 766 ierr = MatScale(S_CV,m_one);CHKERRQ(ierr); 767 ierr = PetscBLASIntCast(n_R*n_vertices,&B_N);CHKERRQ(ierr); 768 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+n_R*n_vertices,&B_one,work,&B_one)); 769 ierr = MatDestroy(&B);CHKERRQ(ierr); 770 } 771 ierr = MatConvert(A_VR,impMatType,MAT_REUSE_MATRIX,&A_VR);CHKERRQ(ierr); 772 ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr); 773 ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr); 774 ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr); 775 ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr); 776 ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr); 777 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one)); 778 ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr); 779 ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr); 780 ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 781 ierr = MatDestroy(&S_VVt);CHKERRQ(ierr); 782 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 783 /* coarse basis functions */ 784 for (i=0;i<n_vertices;i++) { 785 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr); 786 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 787 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 788 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 789 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 790 y[n_B*i+idx_V_B[i]] = 1.0; 791 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 792 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 793 794 if (pcbddc->switch_static || pcbddc->dbg_flag) { 795 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 796 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 797 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 798 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 799 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 800 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 801 } 802 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 803 } 804 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 805 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 806 } 807 808 if (n_constraints) { 809 Mat B; 810 811 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 812 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 813 ierr = MatMatMult(pcbddc->local_auxmat2,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 814 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 815 if (n_vertices) { 816 ierr = MatMatMult(A_VR,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_VC);CHKERRQ(ierr); 817 } 818 ierr = MatDestroy(&B);CHKERRQ(ierr); 819 /* coarse basis functions */ 820 for (i=0;i<n_constraints;i++) { 821 PetscScalar *y; 822 823 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr); 824 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 825 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr); 826 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 827 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 828 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 829 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 830 831 if (pcbddc->switch_static || pcbddc->dbg_flag) { 832 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 833 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr); 834 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 835 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 836 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 837 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 838 } 839 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 840 } 841 } 842 843 /* compute other basis functions for non-symmetric problems */ 844 if (!pcbddc->issym) { 845 Mat B,X; 846 847 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,pcbddc->local_primal_size,work,&B);CHKERRQ(ierr); 848 849 if (n_constraints) { 850 Mat S_VCT,S_CCT,B_C; 851 852 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work+n_vertices*n_R,&B_C);CHKERRQ(ierr); 853 ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr); 854 ierr = MatTransposeMatMult(C_CR,S_CCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr); 855 if (n_vertices) { 856 Mat B_V; 857 858 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&B_V);CHKERRQ(ierr); 859 ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr); 860 ierr = MatTransposeMatMult(C_CR,S_VCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr); 861 ierr = MatDestroy(&B_V);CHKERRQ(ierr); 862 } 863 ierr = MatDestroy(&B_C);CHKERRQ(ierr); 864 } 865 if (n_vertices) { 866 Mat A_VRT; 867 PetscBLASInt B_N,B_one = 1; 868 869 if (!n_constraints) { /* if there are no constraints, reset work */ 870 ierr = PetscMemzero(work,n_R*pcbddc->local_primal_size*sizeof(PetscScalar));CHKERRQ(ierr); 871 } 872 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work+pcbddc->local_primal_size*n_R,&A_VRT);CHKERRQ(ierr); 873 ierr = MatTranspose(A_VR,MAT_REUSE_MATRIX,&A_VRT);CHKERRQ(ierr); 874 ierr = PetscBLASIntCast(n_vertices*n_R,&B_N);CHKERRQ(ierr); 875 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&m_one,work+pcbddc->local_primal_size*n_R,&B_one,work,&B_one)); 876 } 877 878 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,pcbddc->local_primal_size,work+pcbddc->local_primal_size*n_R,&X);CHKERRQ(ierr); 879 if (F) { /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 880 for (i=0;i<pcbddc->local_primal_size;i++) { 881 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 882 ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr); 883 ierr = MatSolveTranspose(F,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 884 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 885 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 886 } 887 } else { 888 for (i=0;i<pcbddc->local_primal_size;i++) { 889 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 890 ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr); 891 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 892 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 893 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 894 } 895 } 896 ierr = MatDestroy(&B);CHKERRQ(ierr); 897 /* coarse basis functions */ 898 for (i=0;i<pcbddc->local_primal_size;i++) { 899 PetscScalar *y; 900 901 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*(i+pcbddc->local_primal_size));CHKERRQ(ierr); 902 ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 903 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 904 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 905 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 906 if (i<n_vertices) { 907 y[n_B*i+idx_V_B[i]] = 1.0; 908 } 909 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 910 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 911 912 if (pcbddc->switch_static || pcbddc->dbg_flag) { 913 ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 914 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 915 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 916 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 917 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 918 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 919 } 920 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 921 } 922 ierr = MatDestroy(&X);CHKERRQ(ierr); 923 } 924 925 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 926 ierr = MatDestroy(&S_VV);CHKERRQ(ierr); 927 ierr = MatDestroy(&S_CV);CHKERRQ(ierr); 928 ierr = MatDestroy(&S_VC);CHKERRQ(ierr); 929 ierr = MatDestroy(&S_CC);CHKERRQ(ierr); 930 /* Checking coarse_sub_mat and coarse basis functios */ 931 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 932 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 933 if (pcbddc->dbg_flag) { 934 Mat coarse_sub_mat; 935 Mat AUXMAT,TM1,TM2,TM3,TM4; 936 Mat coarse_phi_D,coarse_phi_B; 937 Mat coarse_psi_D,coarse_psi_B; 938 Mat A_II,A_BB,A_IB,A_BI; 939 MatType checkmattype=MATSEQAIJ; 940 PetscReal real_value; 941 942 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 943 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 944 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 945 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 946 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 947 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 948 if (unsymmetric_check) { 949 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 950 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 951 } 952 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 953 954 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 955 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation\n");CHKERRQ(ierr); 956 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 957 if (unsymmetric_check) { 958 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 959 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 960 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 961 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 962 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 963 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 964 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 965 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 966 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 967 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 968 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 969 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 970 } else { 971 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 972 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 973 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 974 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 975 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 976 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 977 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 978 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 979 } 980 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 981 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 982 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 983 ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr); 984 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 985 ierr = MatNorm(TM1,NORM_INFINITY,&real_value);CHKERRQ(ierr); 986 987 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 988 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 989 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 990 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 991 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 992 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 993 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 994 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 995 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 996 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 997 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 998 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 999 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 1000 if (unsymmetric_check) { 1001 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 1002 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 1003 } 1004 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 1005 } 1006 1007 /* free memory */ 1008 ierr = PetscFree(work);CHKERRQ(ierr); 1009 if (n_vertices) { 1010 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 1011 } 1012 if (n_constraints) { 1013 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 1014 } 1015 /* get back data */ 1016 *coarse_submat_vals_n = coarse_submat_vals; 1017 PetscFunctionReturn(0); 1018 } 1019 1020 #undef __FUNCT__ 1021 #define __FUNCT__ "MatGetSubMatrixUnsorted" 1022 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, MatStructure reuse, Mat* B) 1023 { 1024 Mat *work_mat; 1025 IS isrow_s,iscol_s; 1026 PetscBool rsorted,csorted; 1027 PetscInt rsize,*idxs_perm_r,csize,*idxs_perm_c; 1028 PetscErrorCode ierr; 1029 1030 PetscFunctionBegin; 1031 ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr); 1032 ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr); 1033 ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr); 1034 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 1035 1036 if (!rsorted) { 1037 const PetscInt *idxs; 1038 PetscInt *idxs_sorted,i; 1039 1040 ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr); 1041 ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr); 1042 for (i=0;i<rsize;i++) { 1043 idxs_perm_r[i] = i; 1044 } 1045 ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr); 1046 ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr); 1047 for (i=0;i<rsize;i++) { 1048 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 1049 } 1050 ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr); 1051 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr); 1052 } else { 1053 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1054 isrow_s = isrow; 1055 } 1056 1057 if (!csorted) { 1058 if (isrow == iscol) { 1059 ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr); 1060 iscol_s = isrow_s; 1061 } else { 1062 const PetscInt *idxs; 1063 PetscInt *idxs_sorted,i; 1064 1065 ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr); 1066 ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr); 1067 for (i=0;i<csize;i++) { 1068 idxs_perm_c[i] = i; 1069 } 1070 ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr); 1071 ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr); 1072 for (i=0;i<csize;i++) { 1073 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 1074 } 1075 ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr); 1076 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr); 1077 } 1078 } else { 1079 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1080 iscol_s = iscol; 1081 } 1082 1083 ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,reuse,&work_mat);CHKERRQ(ierr); 1084 1085 if (!rsorted || !csorted) { 1086 Mat new_mat; 1087 IS is_perm_r,is_perm_c; 1088 1089 if (!rsorted) { 1090 PetscInt *idxs_r,i; 1091 ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr); 1092 for (i=0;i<rsize;i++) { 1093 idxs_r[idxs_perm_r[i]] = i; 1094 } 1095 ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr); 1096 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr); 1097 } else { 1098 ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr); 1099 } 1100 ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr); 1101 1102 if (!csorted) { 1103 if (isrow_s == iscol_s) { 1104 ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr); 1105 is_perm_c = is_perm_r; 1106 } else { 1107 PetscInt *idxs_c,i; 1108 ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr); 1109 for (i=0;i<csize;i++) { 1110 idxs_c[idxs_perm_c[i]] = i; 1111 } 1112 ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr); 1113 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr); 1114 } 1115 } else { 1116 ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr); 1117 } 1118 ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr); 1119 1120 ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr); 1121 ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr); 1122 work_mat[0] = new_mat; 1123 ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr); 1124 ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr); 1125 } 1126 1127 ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr); 1128 *B = work_mat[0]; 1129 ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr); 1130 ierr = ISDestroy(&isrow_s);CHKERRQ(ierr); 1131 ierr = ISDestroy(&iscol_s);CHKERRQ(ierr); 1132 PetscFunctionReturn(0); 1133 } 1134 1135 #undef __FUNCT__ 1136 #define __FUNCT__ "PCBDDCComputeLocalMatrix" 1137 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 1138 { 1139 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1140 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1141 Mat new_mat; 1142 IS is_local,is_global; 1143 PetscInt local_size; 1144 PetscBool isseqaij; 1145 PetscErrorCode ierr; 1146 1147 PetscFunctionBegin; 1148 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1149 ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr); 1150 ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr); 1151 ierr = ISLocalToGlobalMappingApplyIS(matis->mapping,is_local,&is_global);CHKERRQ(ierr); 1152 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 1153 ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,MAT_INITIAL_MATRIX,&new_mat);CHKERRQ(ierr); 1154 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 1155 1156 /* check */ 1157 if (pcbddc->dbg_flag) { 1158 Vec x,x_change; 1159 PetscReal error; 1160 1161 ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr); 1162 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 1163 ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr); 1164 ierr = VecScatterBegin(matis->ctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1165 ierr = VecScatterEnd(matis->ctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1166 ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr); 1167 ierr = VecScatterBegin(matis->ctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1168 ierr = VecScatterEnd(matis->ctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1169 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 1170 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 1171 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1172 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr); 1173 ierr = VecDestroy(&x);CHKERRQ(ierr); 1174 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 1175 } 1176 1177 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 1178 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 1179 if (isseqaij) { 1180 ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 1181 } else { 1182 Mat work_mat; 1183 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 1184 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 1185 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 1186 } 1187 ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,pcbddc->issym);CHKERRQ(ierr); 1188 /* 1189 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 1190 ierr = MatView(new_mat,(PetscViewer)0);CHKERRQ(ierr); 1191 */ 1192 ierr = MatDestroy(&new_mat);CHKERRQ(ierr); 1193 PetscFunctionReturn(0); 1194 } 1195 1196 #undef __FUNCT__ 1197 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 1198 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 1199 { 1200 PC_IS* pcis = (PC_IS*)(pc->data); 1201 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1202 IS is_aux1,is_aux2; 1203 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 1204 PetscInt n_vertices,i,j,n_R,n_D,n_B; 1205 PetscInt vbs,bs; 1206 PetscBT bitmask; 1207 PetscErrorCode ierr; 1208 1209 PetscFunctionBegin; 1210 /* 1211 No need to setup local scatters if 1212 - primal space is unchanged 1213 AND 1214 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 1215 AND 1216 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 1217 */ 1218 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 1219 PetscFunctionReturn(0); 1220 } 1221 /* destroy old objects */ 1222 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 1223 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 1224 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 1225 /* Set Non-overlapping dimensions */ 1226 n_B = pcis->n_B; n_D = pcis->n - n_B; 1227 n_vertices = pcbddc->n_actual_vertices; 1228 /* create auxiliary bitmask */ 1229 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 1230 for (i=0;i<n_vertices;i++) { 1231 ierr = PetscBTSet(bitmask,pcbddc->primal_indices_local_idxs[i]);CHKERRQ(ierr); 1232 } 1233 1234 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 1235 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 1236 for (i=0, n_R=0; i<pcis->n; i++) { 1237 if (!PetscBTLookup(bitmask,i)) { 1238 idx_R_local[n_R] = i; 1239 n_R++; 1240 } 1241 } 1242 1243 /* Block code */ 1244 vbs = 1; 1245 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 1246 if (bs>1 && !(n_vertices%bs)) { 1247 PetscBool is_blocked = PETSC_TRUE; 1248 PetscInt *vary; 1249 /* Verify if the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 1250 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 1251 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 1252 for (i=0; i<n_vertices; i++) vary[pcbddc->primal_indices_local_idxs[i]/bs]++; 1253 for (i=0; i<n_vertices; i++) { 1254 if (vary[i]!=0 && vary[i]!=bs) { 1255 is_blocked = PETSC_FALSE; 1256 break; 1257 } 1258 } 1259 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 1260 vbs = bs; 1261 for (i=0;i<n_R/vbs;i++) { 1262 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 1263 } 1264 } 1265 ierr = PetscFree(vary);CHKERRQ(ierr); 1266 } 1267 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 1268 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 1269 1270 /* print some info if requested */ 1271 if (pcbddc->dbg_flag) { 1272 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1273 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1274 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1275 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 1276 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 1277 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices,pcbddc->local_primal_size);CHKERRQ(ierr); 1278 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"pcbddc->n_vertices = %d, pcbddc->n_constraints = %d\n",pcbddc->n_vertices,pcbddc->n_constraints);CHKERRQ(ierr); 1279 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1280 } 1281 1282 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 1283 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1284 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 1285 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 1286 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1287 for (i=0; i<n_D; i++) { 1288 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 1289 } 1290 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1291 for (i=0, j=0; i<n_R; i++) { 1292 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 1293 aux_array1[j++] = i; 1294 } 1295 } 1296 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 1297 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1298 for (i=0, j=0; i<n_B; i++) { 1299 if (!PetscBTLookup(bitmask,is_indices[i])) { 1300 aux_array2[j++] = i; 1301 } 1302 } 1303 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1304 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 1305 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 1306 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1307 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 1308 1309 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1310 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 1311 for (i=0, j=0; i<n_R; i++) { 1312 if (PetscBTLookup(bitmask,idx_R_local[i])) { 1313 aux_array1[j++] = i; 1314 } 1315 } 1316 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 1317 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 1318 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1319 } 1320 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 1321 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1322 PetscFunctionReturn(0); 1323 } 1324 1325 1326 #undef __FUNCT__ 1327 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 1328 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 1329 { 1330 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1331 PC_IS *pcis = (PC_IS*)pc->data; 1332 PC pc_temp; 1333 Mat A_RR; 1334 MatReuse reuse; 1335 PetscScalar m_one = -1.0; 1336 PetscReal value; 1337 PetscInt n_D,n_R,ibs,mbs; 1338 PetscBool use_exact,use_exact_reduced,issbaij; 1339 PetscErrorCode ierr; 1340 /* prefixes stuff */ 1341 char dir_prefix[256],neu_prefix[256],str_level[16]; 1342 size_t len; 1343 1344 PetscFunctionBegin; 1345 1346 /* compute prefixes */ 1347 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 1348 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 1349 if (!pcbddc->current_level) { 1350 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1351 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1352 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1353 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1354 } else { 1355 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 1356 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 1357 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 1358 len -= 15; /* remove "pc_bddc_coarse_" */ 1359 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 1360 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 1361 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 1362 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 1363 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1364 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1365 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 1366 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 1367 } 1368 1369 /* DIRICHLET PROBLEM */ 1370 if (dirichlet) { 1371 /* Matrix for Dirichlet problem is pcis->A_II */ 1372 ierr = ISGetSize(pcis->is_I_local,&n_D);CHKERRQ(ierr); 1373 if (!pcbddc->ksp_D) { /* create object if not yet build */ 1374 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 1375 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 1376 /* default */ 1377 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 1378 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 1379 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1380 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1381 if (issbaij) { 1382 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1383 } else { 1384 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1385 } 1386 /* Allow user's customization */ 1387 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 1388 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1389 } 1390 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 1391 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1392 if (!n_D) { 1393 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1394 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1395 } 1396 /* Set Up KSP for Dirichlet problem of BDDC */ 1397 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 1398 /* set ksp_D into pcis data */ 1399 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 1400 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 1401 pcis->ksp_D = pcbddc->ksp_D; 1402 } 1403 1404 /* NEUMANN PROBLEM */ 1405 A_RR = 0; 1406 if (neumann) { 1407 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 1408 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 1409 if (pcbddc->ksp_R) { /* already created ksp */ 1410 PetscInt nn_R; 1411 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 1412 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 1413 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 1414 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 1415 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 1416 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1417 reuse = MAT_INITIAL_MATRIX; 1418 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 1419 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 1420 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1421 reuse = MAT_INITIAL_MATRIX; 1422 } else { /* safe to reuse the matrix */ 1423 reuse = MAT_REUSE_MATRIX; 1424 } 1425 } 1426 /* last check */ 1427 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 1428 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1429 reuse = MAT_INITIAL_MATRIX; 1430 } 1431 } else { /* first time, so we need to create the matrix */ 1432 reuse = MAT_INITIAL_MATRIX; 1433 } 1434 /* extract A_RR */ 1435 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 1436 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 1437 if (ibs != mbs) { 1438 Mat newmat; 1439 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 1440 ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 1441 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 1442 } else { 1443 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 1444 } 1445 if (!pcbddc->ksp_R) { /* create object if not present */ 1446 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 1447 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 1448 /* default */ 1449 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 1450 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 1451 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1452 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1453 if (issbaij) { 1454 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1455 } else { 1456 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1457 } 1458 /* Allow user's customization */ 1459 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 1460 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1461 } 1462 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 1463 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1464 if (!n_R) { 1465 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1466 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1467 } 1468 /* Set Up KSP for Neumann problem of BDDC */ 1469 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 1470 } 1471 1472 /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */ 1473 if (pcbddc->NullSpace || pcbddc->dbg_flag) { 1474 if (pcbddc->dbg_flag) { 1475 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1476 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1477 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1478 } 1479 if (dirichlet) { /* Dirichlet */ 1480 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 1481 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 1482 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 1483 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 1484 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 1485 /* need to be adapted? */ 1486 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1487 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1488 ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr); 1489 /* print info */ 1490 if (pcbddc->dbg_flag) { 1491 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 1492 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1493 } 1494 if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) { 1495 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr); 1496 } 1497 } 1498 if (neumann) { /* Neumann */ 1499 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 1500 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1501 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 1502 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 1503 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 1504 /* need to be adapted? */ 1505 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1506 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1507 /* print info */ 1508 if (pcbddc->dbg_flag) { 1509 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 1510 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1511 } 1512 if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */ 1513 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr); 1514 } 1515 } 1516 } 1517 /* free Neumann problem's matrix */ 1518 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1519 PetscFunctionReturn(0); 1520 } 1521 1522 #undef __FUNCT__ 1523 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection" 1524 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec rhs, Vec sol, Vec work, PetscBool applytranspose) 1525 { 1526 PetscErrorCode ierr; 1527 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1528 1529 PetscFunctionBegin; 1530 if (applytranspose) { 1531 if (pcbddc->local_auxmat1) { 1532 ierr = MatMultTranspose(pcbddc->local_auxmat2,rhs,work);CHKERRQ(ierr); 1533 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,work,rhs,rhs);CHKERRQ(ierr); 1534 } 1535 ierr = KSPSolveTranspose(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr); 1536 } else { 1537 ierr = KSPSolve(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr); 1538 if (pcbddc->local_auxmat1) { 1539 ierr = MatMult(pcbddc->local_auxmat1,sol,work);CHKERRQ(ierr); 1540 ierr = MatMultAdd(pcbddc->local_auxmat2,work,sol,sol);CHKERRQ(ierr); 1541 } 1542 } 1543 PetscFunctionReturn(0); 1544 } 1545 1546 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 1547 #undef __FUNCT__ 1548 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 1549 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 1550 { 1551 PetscErrorCode ierr; 1552 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1553 PC_IS* pcis = (PC_IS*) (pc->data); 1554 const PetscScalar zero = 0.0; 1555 1556 PetscFunctionBegin; 1557 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 1558 if (applytranspose) { 1559 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1560 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1561 } else { 1562 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1563 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1564 } 1565 /* start communications from local primal nodes to rhs of coarse solver */ 1566 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 1567 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1568 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1569 1570 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 1571 /* TODO remove null space when doing multilevel */ 1572 if (pcbddc->coarse_ksp) { 1573 if (applytranspose) { 1574 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,NULL,NULL);CHKERRQ(ierr); 1575 } else { 1576 ierr = KSPSolve(pcbddc->coarse_ksp,NULL,NULL);CHKERRQ(ierr); 1577 } 1578 } 1579 1580 /* Local solution on R nodes */ 1581 if (pcis->n) { 1582 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 1583 ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1584 ierr = VecScatterEnd(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1585 if (pcbddc->switch_static) { 1586 ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1587 ierr = VecScatterEnd(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1588 } 1589 ierr = PCBDDCSolveSubstructureCorrection(pc,pcbddc->vec1_R,pcbddc->vec2_R,pcbddc->vec1_C,applytranspose);CHKERRQ(ierr); 1590 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 1591 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1592 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1593 if (pcbddc->switch_static) { 1594 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1595 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1596 } 1597 } 1598 1599 /* communications from coarse sol to local primal nodes */ 1600 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1601 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1602 1603 /* Sum contributions from two levels */ 1604 if (applytranspose) { 1605 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1606 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1607 } else { 1608 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1609 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1610 } 1611 PetscFunctionReturn(0); 1612 } 1613 1614 /* TODO: the following two function can be optimized using VecPlaceArray whenever possible and using overlap flag */ 1615 #undef __FUNCT__ 1616 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 1617 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 1618 { 1619 PetscErrorCode ierr; 1620 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1621 PetscScalar *array,*array2; 1622 Vec from,to; 1623 1624 PetscFunctionBegin; 1625 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 1626 from = pcbddc->coarse_vec; 1627 to = pcbddc->vec1_P; 1628 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 1629 Vec tvec; 1630 PetscInt lsize; 1631 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1632 ierr = VecGetLocalSize(tvec,&lsize);CHKERRQ(ierr); 1633 ierr = VecGetArrayRead(tvec,(const PetscScalar**)&array);CHKERRQ(ierr); 1634 ierr = VecGetArray(from,&array2);CHKERRQ(ierr); 1635 ierr = PetscMemcpy(array2,array,lsize*sizeof(PetscScalar));CHKERRQ(ierr); 1636 ierr = VecRestoreArrayRead(tvec,(const PetscScalar**)&array);CHKERRQ(ierr); 1637 ierr = VecRestoreArray(from,&array2);CHKERRQ(ierr); 1638 } 1639 } else { /* from local to global -> put data in coarse right hand side */ 1640 from = pcbddc->vec1_P; 1641 to = pcbddc->coarse_vec; 1642 } 1643 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 1644 PetscFunctionReturn(0); 1645 } 1646 1647 #undef __FUNCT__ 1648 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 1649 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 1650 { 1651 PetscErrorCode ierr; 1652 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1653 PetscScalar *array,*array2; 1654 Vec from,to; 1655 1656 PetscFunctionBegin; 1657 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 1658 from = pcbddc->coarse_vec; 1659 to = pcbddc->vec1_P; 1660 } else { /* from local to global -> put data in coarse right hand side */ 1661 from = pcbddc->vec1_P; 1662 to = pcbddc->coarse_vec; 1663 } 1664 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 1665 if (smode == SCATTER_FORWARD) { 1666 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 1667 Vec tvec; 1668 PetscInt lsize; 1669 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1670 ierr = VecGetLocalSize(tvec,&lsize);CHKERRQ(ierr); 1671 ierr = VecGetArrayRead(to,(const PetscScalar**)&array);CHKERRQ(ierr); 1672 ierr = VecGetArray(tvec,&array2);CHKERRQ(ierr); 1673 ierr = PetscMemcpy(array2,array,lsize*sizeof(PetscScalar));CHKERRQ(ierr); 1674 ierr = VecRestoreArrayRead(to,(const PetscScalar**)&array);CHKERRQ(ierr); 1675 ierr = VecRestoreArray(tvec,&array2);CHKERRQ(ierr); 1676 } 1677 } 1678 PetscFunctionReturn(0); 1679 } 1680 1681 /* uncomment for testing purposes */ 1682 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 1683 #undef __FUNCT__ 1684 #define __FUNCT__ "PCBDDCConstraintsSetUp" 1685 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 1686 { 1687 PetscErrorCode ierr; 1688 PC_IS* pcis = (PC_IS*)(pc->data); 1689 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1690 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1691 /* constraint and (optionally) change of basis matrix implemented as SeqAIJ */ 1692 MatType impMatType=MATSEQAIJ; 1693 /* one and zero */ 1694 PetscScalar one=1.0,zero=0.0; 1695 /* space to store constraints and their local indices */ 1696 PetscScalar *temp_quadrature_constraint; 1697 PetscInt *temp_indices,*temp_indices_to_constraint,*temp_indices_to_constraint_B; 1698 /* iterators */ 1699 PetscInt i,j,k,total_counts,temp_start_ptr; 1700 /* stuff to store connected components stored in pcbddc->mat_graph */ 1701 IS ISForVertices,*ISForFaces,*ISForEdges,*used_IS; 1702 PetscInt n_ISForFaces,n_ISForEdges; 1703 /* near null space stuff */ 1704 MatNullSpace nearnullsp; 1705 const Vec *nearnullvecs; 1706 Vec *localnearnullsp; 1707 PetscBool nnsp_has_cnst; 1708 PetscInt nnsp_size; 1709 PetscScalar *array; 1710 /* BLAS integers */ 1711 PetscBLASInt lwork,lierr; 1712 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 1713 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 1714 /* LAPACK working arrays for SVD or POD */ 1715 PetscBool skip_lapack; 1716 PetscScalar *work; 1717 PetscReal *singular_vals; 1718 #if defined(PETSC_USE_COMPLEX) 1719 PetscReal *rwork; 1720 #endif 1721 #if defined(PETSC_MISSING_LAPACK_GESVD) 1722 PetscBLASInt Blas_one_2=1; 1723 PetscScalar *temp_basis,*correlation_mat; 1724 #else 1725 PetscBLASInt dummy_int_1=1,dummy_int_2=1; 1726 PetscScalar dummy_scalar_1=0.0,dummy_scalar_2=0.0; 1727 #endif 1728 /* reuse */ 1729 PetscInt olocal_primal_size; 1730 PetscInt *oprimal_indices_local_idxs; 1731 /* change of basis */ 1732 PetscInt *aux_primal_numbering,*aux_primal_minloc,*global_indices; 1733 PetscBool boolforchange,qr_needed; 1734 PetscBT touched,change_basis,qr_needed_idx; 1735 /* auxiliary stuff */ 1736 PetscInt *nnz,*is_indices,*aux_primal_numbering_B; 1737 PetscInt ncc,*gidxs=NULL,*permutation=NULL,*temp_indices_to_constraint_work=NULL; 1738 PetscScalar *temp_quadrature_constraint_work=NULL; 1739 /* some quantities */ 1740 PetscInt n_vertices,total_primal_vertices,valid_constraints; 1741 PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints; 1742 1743 1744 PetscFunctionBegin; 1745 /* Destroy Mat objects computed previously */ 1746 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1747 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1748 /* TODO synch with adaptive selection */ 1749 /* Get index sets for faces, edges and vertices from graph */ 1750 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 1751 /* free unneeded index sets */ 1752 if (!pcbddc->use_vertices) { 1753 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 1754 } 1755 if (!pcbddc->use_edges) { 1756 for (i=0;i<n_ISForEdges;i++) { 1757 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 1758 } 1759 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 1760 n_ISForEdges = 0; 1761 } 1762 if (!pcbddc->use_faces) { 1763 for (i=0;i<n_ISForFaces;i++) { 1764 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 1765 } 1766 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 1767 n_ISForFaces = 0; 1768 } 1769 /* HACKS (the following two blocks of code) */ 1770 if (!ISForVertices && pcbddc->NullSpace && !pcbddc->user_ChangeOfBasisMatrix) { 1771 pcbddc->use_change_of_basis = PETSC_TRUE; 1772 if (!ISForEdges) { 1773 pcbddc->use_change_on_faces = PETSC_TRUE; 1774 } 1775 } 1776 if (pcbddc->NullSpace) { 1777 /* use_change_of_basis should be consistent among processors */ 1778 PetscBool tbool[2],gbool[2]; 1779 tbool [0] = pcbddc->use_change_of_basis; 1780 tbool [1] = pcbddc->use_change_on_faces; 1781 ierr = MPI_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1782 pcbddc->use_change_of_basis = gbool[0]; 1783 pcbddc->use_change_on_faces = gbool[1]; 1784 } 1785 /* print some info */ 1786 if (pcbddc->dbg_flag) { 1787 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1788 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 1789 i = 0; 1790 if (ISForVertices) { 1791 ierr = ISGetSize(ISForVertices,&i);CHKERRQ(ierr); 1792 } 1793 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices\n",PetscGlobalRank,i);CHKERRQ(ierr); 1794 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges\n",PetscGlobalRank,n_ISForEdges);CHKERRQ(ierr); 1795 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces\n",PetscGlobalRank,n_ISForFaces);CHKERRQ(ierr); 1796 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1797 } 1798 1799 if (!pcbddc->adaptive_selection) { 1800 /* check if near null space is attached to global mat */ 1801 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 1802 if (nearnullsp) { 1803 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 1804 /* remove any stored info */ 1805 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 1806 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 1807 /* store information for BDDC solver reuse */ 1808 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 1809 pcbddc->onearnullspace = nearnullsp; 1810 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 1811 for (i=0;i<nnsp_size;i++) { 1812 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 1813 } 1814 } else { /* if near null space is not provided BDDC uses constants by default */ 1815 nnsp_size = 0; 1816 nnsp_has_cnst = PETSC_TRUE; 1817 } 1818 /* get max number of constraints on a single cc */ 1819 max_constraints = nnsp_size; 1820 if (nnsp_has_cnst) max_constraints++; 1821 1822 /* 1823 Evaluate maximum storage size needed by the procedure 1824 - temp_indices will contain start index of each constraint stored as follows 1825 - temp_indices_to_constraint [temp_indices[i],...,temp_indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts 1826 - temp_indices_to_constraint_B[temp_indices[i],...,temp_indices[i+1]-1] will contain the indices (in boundary numbering) on which the constraint acts 1827 - temp_quadrature_constraint [temp_indices[i],...,temp_indices[i+1]-1] will contain the scalars representing the constraint itself 1828 */ 1829 total_counts = n_ISForFaces+n_ISForEdges; 1830 total_counts *= max_constraints; 1831 n_vertices = 0; 1832 if (ISForVertices) { 1833 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 1834 } 1835 total_counts += n_vertices; 1836 ierr = PetscMalloc1(total_counts+1,&temp_indices);CHKERRQ(ierr); 1837 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 1838 total_counts = 0; 1839 max_size_of_constraint = 0; 1840 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 1841 if (i<n_ISForEdges) { 1842 used_IS = &ISForEdges[i]; 1843 } else { 1844 used_IS = &ISForFaces[i-n_ISForEdges]; 1845 } 1846 ierr = ISGetSize(*used_IS,&j);CHKERRQ(ierr); 1847 total_counts += j; 1848 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 1849 } 1850 total_counts *= max_constraints; 1851 total_counts += n_vertices; 1852 ierr = PetscMalloc3(total_counts,&temp_quadrature_constraint,total_counts,&temp_indices_to_constraint,total_counts,&temp_indices_to_constraint_B);CHKERRQ(ierr); 1853 /* get local part of global near null space vectors */ 1854 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 1855 for (k=0;k<nnsp_size;k++) { 1856 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 1857 ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1858 ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1859 } 1860 1861 /* whether or not to skip lapack calls */ 1862 skip_lapack = PETSC_TRUE; 1863 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 1864 1865 /* allocate some auxiliary stuff */ 1866 if (!skip_lapack || pcbddc->use_qr_single) { 1867 ierr = PetscMalloc4(max_size_of_constraint,&gidxs,max_size_of_constraint,&permutation,max_size_of_constraint,&temp_indices_to_constraint_work,max_size_of_constraint,&temp_quadrature_constraint_work);CHKERRQ(ierr); 1868 } 1869 1870 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 1871 if (!skip_lapack) { 1872 PetscScalar temp_work; 1873 1874 #if defined(PETSC_MISSING_LAPACK_GESVD) 1875 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 1876 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 1877 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 1878 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 1879 #if defined(PETSC_USE_COMPLEX) 1880 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 1881 #endif 1882 /* now we evaluate the optimal workspace using query with lwork=-1 */ 1883 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 1884 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 1885 lwork = -1; 1886 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1887 #if !defined(PETSC_USE_COMPLEX) 1888 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 1889 #else 1890 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 1891 #endif 1892 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1893 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 1894 #else /* on missing GESVD */ 1895 /* SVD */ 1896 PetscInt max_n,min_n; 1897 max_n = max_size_of_constraint; 1898 min_n = max_constraints; 1899 if (max_size_of_constraint < max_constraints) { 1900 min_n = max_size_of_constraint; 1901 max_n = max_constraints; 1902 } 1903 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 1904 #if defined(PETSC_USE_COMPLEX) 1905 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 1906 #endif 1907 /* now we evaluate the optimal workspace using query with lwork=-1 */ 1908 lwork = -1; 1909 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 1910 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 1911 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 1912 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1913 #if !defined(PETSC_USE_COMPLEX) 1914 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,&lierr)); 1915 #else 1916 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,rwork,&lierr)); 1917 #endif 1918 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1919 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 1920 #endif /* on missing GESVD */ 1921 /* Allocate optimal workspace */ 1922 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 1923 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 1924 } 1925 /* Now we can loop on constraining sets */ 1926 total_counts = 0; 1927 temp_indices[0] = 0; 1928 /* vertices */ 1929 if (ISForVertices) { 1930 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1931 if (nnsp_has_cnst) { /* consider all vertices */ 1932 ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 1933 for (i=0;i<n_vertices;i++) { 1934 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 1935 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 1936 total_counts++; 1937 } 1938 } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */ 1939 PetscBool used_vertex; 1940 for (i=0;i<n_vertices;i++) { 1941 used_vertex = PETSC_FALSE; 1942 k = 0; 1943 while (!used_vertex && k<nnsp_size) { 1944 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1945 if (PetscAbsScalar(array[is_indices[i]])>0.0) { 1946 temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i]; 1947 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 1948 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 1949 total_counts++; 1950 used_vertex = PETSC_TRUE; 1951 } 1952 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1953 k++; 1954 } 1955 } 1956 } 1957 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1958 n_vertices = total_counts; 1959 } 1960 1961 /* edges and faces */ 1962 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 1963 if (ncc<n_ISForEdges) { 1964 used_IS = &ISForEdges[ncc]; 1965 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 1966 } else { 1967 used_IS = &ISForFaces[ncc-n_ISForEdges]; 1968 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 1969 } 1970 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 1971 temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */ 1972 ierr = ISGetSize(*used_IS,&size_of_constraint);CHKERRQ(ierr); 1973 ierr = ISGetIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1974 /* change of basis should not be performed on local periodic nodes */ 1975 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 1976 if (nnsp_has_cnst) { 1977 PetscScalar quad_value; 1978 temp_constraints++; 1979 if (!pcbddc->use_nnsp_true) { 1980 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 1981 } else { 1982 quad_value = 1.0; 1983 } 1984 ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 1985 for (j=0;j<size_of_constraint;j++) { 1986 temp_quadrature_constraint[temp_indices[total_counts]+j]=quad_value; 1987 } 1988 /* sort by global ordering if using lapack subroutines (not needed!) */ 1989 if (!skip_lapack || pcbddc->use_qr_single) { 1990 ierr = ISLocalToGlobalMappingApply(matis->mapping,size_of_constraint,temp_indices_to_constraint+temp_indices[total_counts],gidxs);CHKERRQ(ierr); 1991 for (j=0;j<size_of_constraint;j++) { 1992 permutation[j]=j; 1993 } 1994 ierr = PetscSortIntWithPermutation(size_of_constraint,gidxs,permutation);CHKERRQ(ierr); 1995 for (j=0;j<size_of_constraint;j++) { 1996 if (permutation[j]!=j) SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"This should not happen"); 1997 } 1998 for (j=0;j<size_of_constraint;j++) { 1999 temp_indices_to_constraint_work[j] = temp_indices_to_constraint[temp_indices[total_counts]+permutation[j]]; 2000 temp_quadrature_constraint_work[j] = temp_quadrature_constraint[temp_indices[total_counts]+permutation[j]]; 2001 } 2002 ierr = PetscMemcpy(temp_indices_to_constraint+temp_indices[total_counts],temp_indices_to_constraint_work,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 2003 ierr = PetscMemcpy(temp_quadrature_constraint+temp_indices[total_counts],temp_quadrature_constraint_work,size_of_constraint*sizeof(PetscScalar));CHKERRQ(ierr); 2004 } 2005 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 2006 total_counts++; 2007 } 2008 for (k=0;k<nnsp_size;k++) { 2009 PetscReal real_value; 2010 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2011 ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 2012 for (j=0;j<size_of_constraint;j++) { 2013 temp_quadrature_constraint[temp_indices[total_counts]+j]=array[is_indices[j]]; 2014 } 2015 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2016 /* check if array is null on the connected component */ 2017 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2018 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_one)); 2019 if (real_value > 0.0) { /* keep indices and values */ 2020 /* sort by global ordering if using lapack subroutines */ 2021 if (!skip_lapack || pcbddc->use_qr_single) { 2022 ierr = ISLocalToGlobalMappingApply(matis->mapping,size_of_constraint,temp_indices_to_constraint+temp_indices[total_counts],gidxs);CHKERRQ(ierr); 2023 for (j=0;j<size_of_constraint;j++) { 2024 permutation[j]=j; 2025 } 2026 ierr = PetscSortIntWithPermutation(size_of_constraint,gidxs,permutation);CHKERRQ(ierr); 2027 for (j=0;j<size_of_constraint;j++) { 2028 temp_indices_to_constraint_work[j] = temp_indices_to_constraint[temp_indices[total_counts]+permutation[j]]; 2029 temp_quadrature_constraint_work[j] = temp_quadrature_constraint[temp_indices[total_counts]+permutation[j]]; 2030 } 2031 ierr = PetscMemcpy(temp_indices_to_constraint+temp_indices[total_counts],temp_indices_to_constraint_work,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 2032 ierr = PetscMemcpy(temp_quadrature_constraint+temp_indices[total_counts],temp_quadrature_constraint_work,size_of_constraint*sizeof(PetscScalar));CHKERRQ(ierr); 2033 } 2034 temp_constraints++; 2035 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 2036 total_counts++; 2037 } 2038 } 2039 ierr = ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2040 valid_constraints = temp_constraints; 2041 if (!pcbddc->use_nnsp_true && temp_constraints) { 2042 if (temp_constraints == 1) { /* just normalize the constraint */ 2043 PetscScalar norm; 2044 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2045 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one)); 2046 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 2047 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one)); 2048 } else { /* perform SVD */ 2049 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 2050 2051 #if defined(PETSC_MISSING_LAPACK_GESVD) 2052 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 2053 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 2054 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 2055 the constraints basis will differ (by a complex factor with absolute value equal to 1) 2056 from that computed using LAPACKgesvd 2057 -> This is due to a different computation of eigenvectors in LAPACKheev 2058 -> The quality of the POD-computed basis will be the same */ 2059 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 2060 /* Store upper triangular part of correlation matrix */ 2061 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2062 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2063 for (j=0;j<temp_constraints;j++) { 2064 for (k=0;k<j+1;k++) { 2065 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k]=BLASdot_(&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Blas_one,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Blas_one_2)); 2066 } 2067 } 2068 /* compute eigenvalues and eigenvectors of correlation matrix */ 2069 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2070 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 2071 #if !defined(PETSC_USE_COMPLEX) 2072 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 2073 #else 2074 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 2075 #endif 2076 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2077 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 2078 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 2079 j = 0; 2080 while (j < temp_constraints && singular_vals[j] < tol) j++; 2081 total_counts = total_counts-j; 2082 valid_constraints = temp_constraints-j; 2083 /* scale and copy POD basis into used quadrature memory */ 2084 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2085 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2086 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 2087 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2088 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 2089 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 2090 if (j<temp_constraints) { 2091 PetscInt ii; 2092 for (k=j;k<temp_constraints;k++) singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]); 2093 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2094 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 2095 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2096 for (k=0;k<temp_constraints-j;k++) { 2097 for (ii=0;ii<size_of_constraint;ii++) { 2098 temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 2099 } 2100 } 2101 } 2102 #else /* on missing GESVD */ 2103 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2104 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2105 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2106 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2107 #if !defined(PETSC_USE_COMPLEX) 2108 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,&lierr)); 2109 #else 2110 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,rwork,&lierr)); 2111 #endif 2112 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 2113 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2114 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 2115 k = temp_constraints; 2116 if (k > size_of_constraint) k = size_of_constraint; 2117 j = 0; 2118 while (j < k && singular_vals[k-j-1] < tol) j++; 2119 valid_constraints = k-j; 2120 total_counts = total_counts-temp_constraints+valid_constraints; 2121 #endif /* on missing GESVD */ 2122 } 2123 } 2124 /* setting change_of_basis flag is safe now */ 2125 if (boolforchange) { 2126 for (j=0;j<valid_constraints;j++) { 2127 PetscBTSet(change_basis,total_counts-j-1); 2128 } 2129 } 2130 } 2131 /* free workspace */ 2132 if (!skip_lapack || pcbddc->use_qr_single) { 2133 ierr = PetscFree4(gidxs,permutation,temp_indices_to_constraint_work,temp_quadrature_constraint_work);CHKERRQ(ierr); 2134 } 2135 if (!skip_lapack) { 2136 ierr = PetscFree(work);CHKERRQ(ierr); 2137 #if defined(PETSC_USE_COMPLEX) 2138 ierr = PetscFree(rwork);CHKERRQ(ierr); 2139 #endif 2140 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 2141 #if defined(PETSC_MISSING_LAPACK_GESVD) 2142 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 2143 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 2144 #endif 2145 } 2146 for (k=0;k<nnsp_size;k++) { 2147 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 2148 } 2149 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 2150 } else { 2151 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2152 PetscInt cum = 0; 2153 2154 total_counts = 0; 2155 n_vertices = 0; 2156 if (sub_schurs->is_Ej_com) { 2157 ierr = ISGetLocalSize(sub_schurs->is_Ej_com,&n_vertices);CHKERRQ(ierr); 2158 } 2159 max_constraints = 0; 2160 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 2161 total_counts += pcbddc->adaptive_constraints_n[i]; 2162 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 2163 } 2164 temp_indices = pcbddc->adaptive_constraints_ptrs; 2165 temp_indices_to_constraint = pcbddc->adaptive_constraints_idxs; 2166 temp_quadrature_constraint = pcbddc->adaptive_constraints_data; 2167 2168 #if 0 2169 printf("Found %d totals\n",total_counts); 2170 for (i=0;i<total_counts;i++) { 2171 printf("const %d, start %d",i,temp_indices[i]); 2172 printf(" end %d:\n",temp_indices[i+1]); 2173 for (j=temp_indices[i];j<temp_indices[i+1];j++) { 2174 printf(" idxs %d",temp_indices_to_constraint[j]); 2175 printf(" data %1.2e\n",temp_quadrature_constraint[j]); 2176 } 2177 } 2178 for (i=0;i<n_vertices;i++) { 2179 PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i+n_vertices]); 2180 } 2181 for (i=0;i<sub_schurs->n_subs;i++) { 2182 PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]); 2183 } 2184 #endif 2185 2186 for (i=0;i<total_counts;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,temp_indices[i+1]-temp_indices[i]); 2187 ierr = PetscMalloc1(temp_indices[total_counts],&temp_indices_to_constraint_B);CHKERRQ(ierr); 2188 /* Change of basis */ 2189 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 2190 if (pcbddc->use_change_of_basis) { 2191 cum = n_vertices; 2192 for (i=0;i<sub_schurs->n_subs;i++) { 2193 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 2194 for (j=0;j<pcbddc->adaptive_constraints_n[i+n_vertices];j++) { 2195 ierr = PetscBTSet(change_basis,cum+j);CHKERRQ(ierr); 2196 } 2197 } 2198 cum += pcbddc->adaptive_constraints_n[i+n_vertices]; 2199 } 2200 } 2201 } 2202 2203 /* free index sets of faces, edges and vertices */ 2204 for (i=0;i<n_ISForFaces;i++) { 2205 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 2206 } 2207 if (n_ISForFaces) { 2208 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 2209 } 2210 for (i=0;i<n_ISForEdges;i++) { 2211 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 2212 } 2213 if (n_ISForEdges) { 2214 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 2215 } 2216 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 2217 2218 /* map temp_indices_to_constraint in boundary numbering */ 2219 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,temp_indices[total_counts],temp_indices_to_constraint,&i,temp_indices_to_constraint_B);CHKERRQ(ierr); 2220 if (i != temp_indices[total_counts]) { 2221 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",temp_indices[total_counts],i); 2222 } 2223 2224 /* set quantities in pcbddc data structure and store previous primal size */ 2225 /* n_vertices defines the number of subdomain corners in the primal space */ 2226 /* n_constraints defines the number of averages (they can be point primal dofs if change of basis is requested) */ 2227 olocal_primal_size = pcbddc->local_primal_size; 2228 pcbddc->local_primal_size = total_counts; 2229 pcbddc->n_vertices = n_vertices; 2230 pcbddc->n_constraints = pcbddc->local_primal_size-pcbddc->n_vertices; 2231 2232 /* Create constraint matrix */ 2233 /* The constraint matrix is used to compute the l2g map of primal dofs */ 2234 /* so we need to set it up properly either with or without change of basis */ 2235 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 2236 ierr = MatSetType(pcbddc->ConstraintMatrix,impMatType);CHKERRQ(ierr); 2237 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 2238 /* array to compute a local numbering of constraints : vertices first then constraints */ 2239 ierr = PetscMalloc1(pcbddc->local_primal_size,&aux_primal_numbering);CHKERRQ(ierr); 2240 /* array to select the proper local node (of minimum index with respect to global ordering) when changing the basis */ 2241 /* note: it should not be needed since IS for faces and edges are already sorted by global ordering when analyzing the graph but... just in case */ 2242 ierr = PetscMalloc1(pcbddc->local_primal_size,&aux_primal_minloc);CHKERRQ(ierr); 2243 /* auxiliary stuff for basis change */ 2244 ierr = PetscMalloc1(max_size_of_constraint,&global_indices);CHKERRQ(ierr); 2245 ierr = PetscBTCreate(pcis->n_B,&touched);CHKERRQ(ierr); 2246 2247 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 2248 total_primal_vertices=0; 2249 for (i=0;i<pcbddc->local_primal_size;i++) { 2250 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 2251 if (size_of_constraint == 1) { 2252 ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]]);CHKERRQ(ierr); 2253 aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]]; 2254 aux_primal_minloc[total_primal_vertices]=0; 2255 total_primal_vertices++; 2256 } else if (PetscBTLookup(change_basis,i)) { /* Same procedure used in PCBDDCGetPrimalConstraintsLocalIdx */ 2257 PetscInt min_loc,min_index; 2258 ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],global_indices);CHKERRQ(ierr); 2259 /* find first untouched local node */ 2260 k = 0; 2261 while (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) k++; 2262 min_index = global_indices[k]; 2263 min_loc = k; 2264 /* search the minimum among global nodes already untouched on the cc */ 2265 for (k=1;k<size_of_constraint;k++) { 2266 /* there can be more than one constraint on a single connected component */ 2267 if (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k]) && min_index > global_indices[k]) { 2268 min_index = global_indices[k]; 2269 min_loc = k; 2270 } 2271 } 2272 ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]+min_loc]);CHKERRQ(ierr); 2273 aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]+min_loc]; 2274 aux_primal_minloc[total_primal_vertices]=min_loc; 2275 total_primal_vertices++; 2276 } 2277 } 2278 /* determine if a QR strategy is needed for change of basis */ 2279 qr_needed = PETSC_FALSE; 2280 ierr = PetscBTCreate(pcbddc->local_primal_size,&qr_needed_idx);CHKERRQ(ierr); 2281 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 2282 if (PetscBTLookup(change_basis,i)) { 2283 if (!pcbddc->use_qr_single) { 2284 size_of_constraint = temp_indices[i+1]-temp_indices[i]; 2285 j = 0; 2286 for (k=0;k<size_of_constraint;k++) { 2287 if (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) { 2288 j++; 2289 } 2290 } 2291 /* found more than one primal dof on the cc */ 2292 if (j > 1) { 2293 PetscBTSet(qr_needed_idx,i); 2294 qr_needed = PETSC_TRUE; 2295 } 2296 } else { 2297 PetscBTSet(qr_needed_idx,i); 2298 qr_needed = PETSC_TRUE; 2299 } 2300 } 2301 } 2302 /* free workspace */ 2303 ierr = PetscFree(global_indices);CHKERRQ(ierr); 2304 2305 /* permute indices in order to have a sorted set of vertices */ 2306 ierr = PetscSortInt(total_primal_vertices,aux_primal_numbering);CHKERRQ(ierr); 2307 2308 /* nonzero structure of constraint matrix */ 2309 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 2310 for (i=0;i<total_primal_vertices;i++) nnz[i]=1; 2311 j=total_primal_vertices; 2312 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 2313 if (!PetscBTLookup(change_basis,i)) { 2314 nnz[j]=temp_indices[i+1]-temp_indices[i]; 2315 j++; 2316 } 2317 } 2318 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 2319 ierr = PetscFree(nnz);CHKERRQ(ierr); 2320 /* set values in constraint matrix */ 2321 for (i=0;i<total_primal_vertices;i++) { 2322 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,aux_primal_numbering[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 2323 } 2324 total_counts = total_primal_vertices; 2325 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 2326 if (!PetscBTLookup(change_basis,i)) { 2327 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 2328 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&total_counts,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);CHKERRQ(ierr); 2329 total_counts++; 2330 } 2331 } 2332 /* assembling */ 2333 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2334 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2335 /* 2336 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 2337 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 2338 */ 2339 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 2340 if (pcbddc->use_change_of_basis) { 2341 /* dual and primal dofs on a single cc */ 2342 PetscInt dual_dofs,primal_dofs; 2343 /* iterator on aux_primal_minloc (ordered as read from nearnullspace: vertices, edges and then constraints) */ 2344 PetscInt primal_counter; 2345 /* working stuff for GEQRF */ 2346 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 2347 PetscBLASInt lqr_work; 2348 /* working stuff for UNGQR */ 2349 PetscScalar *gqr_work,lgqr_work_t; 2350 PetscBLASInt lgqr_work; 2351 /* working stuff for TRTRS */ 2352 PetscScalar *trs_rhs; 2353 PetscBLASInt Blas_NRHS; 2354 /* pointers for values insertion into change of basis matrix */ 2355 PetscInt *start_rows,*start_cols; 2356 PetscScalar *start_vals; 2357 /* working stuff for values insertion */ 2358 PetscBT is_primal; 2359 /* matrix sizes */ 2360 PetscInt global_size,local_size; 2361 /* temporary change of basis */ 2362 Mat localChangeOfBasisMatrix; 2363 2364 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 2365 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 2366 ierr = MatSetType(localChangeOfBasisMatrix,impMatType);CHKERRQ(ierr); 2367 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 2368 2369 /* nonzeros for local mat */ 2370 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 2371 for (i=0;i<pcis->n;i++) nnz[i]=1; 2372 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 2373 if (PetscBTLookup(change_basis,i)) { 2374 size_of_constraint = temp_indices[i+1]-temp_indices[i]; 2375 if (PetscBTLookup(qr_needed_idx,i)) { 2376 for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint[temp_indices[i]+j]] = size_of_constraint; 2377 } else { 2378 for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint[temp_indices[i]+j]] = 2; 2379 /* get local primal index on the cc */ 2380 j = 0; 2381 while (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+j])) j++; 2382 nnz[temp_indices_to_constraint[temp_indices[i]+j]] = size_of_constraint; 2383 } 2384 } 2385 } 2386 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 2387 ierr = PetscFree(nnz);CHKERRQ(ierr); 2388 /* Set initial identity in the matrix */ 2389 for (i=0;i<pcis->n;i++) { 2390 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 2391 } 2392 2393 if (pcbddc->dbg_flag) { 2394 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 2395 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 2396 } 2397 2398 2399 /* Now we loop on the constraints which need a change of basis */ 2400 /* 2401 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 2402 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 2403 2404 Basic blocks of change of basis matrix T computed by 2405 2406 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 2407 2408 | 1 0 ... 0 s_1/S | 2409 | 0 1 ... 0 s_2/S | 2410 | ... | 2411 | 0 ... 1 s_{n-1}/S | 2412 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 2413 2414 with S = \sum_{i=1}^n s_i^2 2415 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 2416 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 2417 2418 - QR decomposition of constraints otherwise 2419 */ 2420 if (qr_needed) { 2421 /* space to store Q */ 2422 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 2423 /* first we issue queries for optimal work */ 2424 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 2425 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 2426 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2427 lqr_work = -1; 2428 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 2429 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 2430 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 2431 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 2432 lgqr_work = -1; 2433 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 2434 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 2435 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 2436 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2437 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 2438 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 2439 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 2440 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 2441 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 2442 /* array to store scaling factors for reflectors */ 2443 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 2444 /* array to store rhs and solution of triangular solver */ 2445 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 2446 /* allocating workspace for check */ 2447 if (pcbddc->dbg_flag) { 2448 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&work);CHKERRQ(ierr); 2449 } 2450 } 2451 /* array to store whether a node is primal or not */ 2452 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 2453 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 2454 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,aux_primal_numbering,&i,aux_primal_numbering_B);CHKERRQ(ierr); 2455 if (i != total_primal_vertices) { 2456 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i); 2457 } 2458 for (i=0;i<total_primal_vertices;i++) { 2459 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 2460 } 2461 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 2462 2463 /* loop on constraints and see whether or not they need a change of basis and compute it */ 2464 /* -> using implicit ordering contained in temp_indices data */ 2465 total_counts = pcbddc->n_vertices; 2466 primal_counter = total_counts; 2467 while (total_counts<pcbddc->local_primal_size) { 2468 primal_dofs = 1; 2469 if (PetscBTLookup(change_basis,total_counts)) { 2470 /* get all constraints with same support: if more then one constraint is present on the cc then surely indices are stored contiguosly */ 2471 while (total_counts+primal_dofs < pcbddc->local_primal_size && temp_indices_to_constraint[temp_indices[total_counts]] == temp_indices_to_constraint[temp_indices[total_counts+primal_dofs]]) { 2472 primal_dofs++; 2473 } 2474 /* get constraint info */ 2475 size_of_constraint = temp_indices[total_counts+1]-temp_indices[total_counts]; 2476 dual_dofs = size_of_constraint-primal_dofs; 2477 2478 if (pcbddc->dbg_flag) { 2479 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d to %d (incl) need a change of basis (size %d)\n",total_counts,total_counts+primal_dofs-1,size_of_constraint);CHKERRQ(ierr); 2480 } 2481 2482 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 2483 2484 /* copy quadrature constraints for change of basis check */ 2485 if (pcbddc->dbg_flag) { 2486 ierr = PetscMemcpy(work,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2487 } 2488 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 2489 ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2490 2491 /* compute QR decomposition of constraints */ 2492 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2493 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2494 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2495 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2496 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 2497 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 2498 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2499 2500 /* explictly compute R^-T */ 2501 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 2502 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 2503 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2504 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 2505 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2506 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 2507 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2508 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 2509 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 2510 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2511 2512 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 2513 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2514 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2515 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 2516 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2517 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2518 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 2519 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 2520 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2521 2522 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 2523 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 2524 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 2525 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2526 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2527 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 2528 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2529 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 2530 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 2531 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2532 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_LDC)); 2533 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2534 ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2535 2536 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 2537 start_rows = &temp_indices_to_constraint[temp_indices[total_counts]]; 2538 /* insert cols for primal dofs */ 2539 for (j=0;j<primal_dofs;j++) { 2540 start_vals = &qr_basis[j*size_of_constraint]; 2541 start_cols = &temp_indices_to_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter+j]]; 2542 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 2543 } 2544 /* insert cols for dual dofs */ 2545 for (j=0,k=0;j<dual_dofs;k++) { 2546 if (!PetscBTLookup(is_primal,temp_indices_to_constraint_B[temp_indices[total_counts]+k])) { 2547 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 2548 start_cols = &temp_indices_to_constraint[temp_indices[total_counts]+k]; 2549 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 2550 j++; 2551 } 2552 } 2553 2554 /* check change of basis */ 2555 if (pcbddc->dbg_flag) { 2556 PetscInt ii,jj; 2557 PetscBool valid_qr=PETSC_TRUE; 2558 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 2559 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2560 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 2561 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2562 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 2563 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 2564 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2565 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&work[size_of_constraint*primal_dofs],&Blas_LDC)); 2566 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2567 for (jj=0;jj<size_of_constraint;jj++) { 2568 for (ii=0;ii<primal_dofs;ii++) { 2569 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 2570 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 2571 } 2572 } 2573 if (!valid_qr) { 2574 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 2575 for (jj=0;jj<size_of_constraint;jj++) { 2576 for (ii=0;ii<primal_dofs;ii++) { 2577 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 2578 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2579 } 2580 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 2581 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2582 } 2583 } 2584 } 2585 } else { 2586 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 2587 } 2588 } 2589 } else { /* simple transformation block */ 2590 PetscInt row,col; 2591 PetscScalar val,norm; 2592 2593 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2594 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,temp_quadrature_constraint+temp_indices[total_counts],&Blas_one,temp_quadrature_constraint+temp_indices[total_counts],&Blas_one)); 2595 for (j=0;j<size_of_constraint;j++) { 2596 PetscInt row_B = temp_indices_to_constraint_B[temp_indices[total_counts]+j]; 2597 row = temp_indices_to_constraint[temp_indices[total_counts]+j]; 2598 if (!PetscBTLookup(is_primal,row_B)) { 2599 col = temp_indices_to_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]; 2600 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 2601 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,temp_quadrature_constraint[temp_indices[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 2602 } else { 2603 for (k=0;k<size_of_constraint;k++) { 2604 col = temp_indices_to_constraint[temp_indices[total_counts]+k]; 2605 if (row != col) { 2606 val = -temp_quadrature_constraint[temp_indices[total_counts]+k]/temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]; 2607 } else { 2608 val = temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]/norm; 2609 } 2610 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 2611 } 2612 } 2613 } 2614 if (pcbddc->dbg_flag) { 2615 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 2616 } 2617 } 2618 /* increment primal counter */ 2619 primal_counter += primal_dofs; 2620 } else { 2621 if (pcbddc->dbg_flag) { 2622 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,temp_indices[total_counts+1]-temp_indices[total_counts]);CHKERRQ(ierr); 2623 } 2624 } 2625 /* increment constraint counter total_counts */ 2626 total_counts += primal_dofs; 2627 } 2628 2629 /* free workspace */ 2630 if (qr_needed) { 2631 if (pcbddc->dbg_flag) { 2632 ierr = PetscFree(work);CHKERRQ(ierr); 2633 } 2634 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 2635 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 2636 ierr = PetscFree(qr_work);CHKERRQ(ierr); 2637 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 2638 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 2639 } 2640 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 2641 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2642 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2643 2644 /* assembling of global change of variable */ 2645 { 2646 Mat tmat; 2647 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 2648 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 2649 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 2650 ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr); 2651 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2652 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 2653 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 2654 ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr); 2655 ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2656 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 2657 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 2658 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 2659 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2660 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2661 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 2662 ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr); 2663 } 2664 /* check */ 2665 if (pcbddc->dbg_flag) { 2666 PetscReal error; 2667 Vec x,x_change; 2668 2669 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 2670 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 2671 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 2672 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 2673 ierr = VecScatterBegin(matis->ctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2674 ierr = VecScatterEnd(matis->ctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2675 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 2676 ierr = VecScatterBegin(matis->ctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2677 ierr = VecScatterEnd(matis->ctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2678 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 2679 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 2680 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 2681 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2682 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr); 2683 ierr = VecDestroy(&x);CHKERRQ(ierr); 2684 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 2685 } 2686 2687 /* adapt sub_schurs computed (if any) */ 2688 if (pcbddc->use_deluxe_scaling) { 2689 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 2690 if (sub_schurs->n_subs_par_g) { 2691 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Change of basis with deluxe scaling and parallel problems still needs to be implemented"); 2692 } 2693 if (sub_schurs->S_Ej_all) { 2694 Mat S_1,S_2,tmat; 2695 IS is_all_N; 2696 2697 ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr); 2698 ierr = MatGetSubMatrixUnsorted(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr); 2699 ierr = ISDestroy(&is_all_N);CHKERRQ(ierr); 2700 ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_1);CHKERRQ(ierr); 2701 ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr); 2702 sub_schurs->S_Ej_all = S_1; 2703 ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_2);CHKERRQ(ierr); 2704 ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 2705 sub_schurs->sum_S_Ej_all = S_2; 2706 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 2707 } 2708 } 2709 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 2710 } else if (pcbddc->user_ChangeOfBasisMatrix) { 2711 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 2712 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 2713 } 2714 2715 /* set up change of basis context */ 2716 if (pcbddc->ChangeOfBasisMatrix) { 2717 PCBDDCChange_ctx change_ctx; 2718 2719 if (!pcbddc->new_global_mat) { 2720 PetscInt global_size,local_size; 2721 2722 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 2723 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 2724 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->new_global_mat);CHKERRQ(ierr); 2725 ierr = MatSetSizes(pcbddc->new_global_mat,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 2726 ierr = MatSetType(pcbddc->new_global_mat,MATSHELL);CHKERRQ(ierr); 2727 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT,(void (*)(void))PCBDDCMatMult_Private);CHKERRQ(ierr); 2728 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCMatMultTranspose_Private);CHKERRQ(ierr); 2729 ierr = PetscNew(&change_ctx);CHKERRQ(ierr); 2730 ierr = MatShellSetContext(pcbddc->new_global_mat,change_ctx);CHKERRQ(ierr); 2731 } else { 2732 ierr = MatShellGetContext(pcbddc->new_global_mat,&change_ctx);CHKERRQ(ierr); 2733 ierr = MatDestroy(&change_ctx->global_change);CHKERRQ(ierr); 2734 ierr = VecDestroyVecs(2,&change_ctx->work);CHKERRQ(ierr); 2735 } 2736 if (!pcbddc->user_ChangeOfBasisMatrix) { 2737 ierr = PetscObjectReference((PetscObject)pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2738 change_ctx->global_change = pcbddc->ChangeOfBasisMatrix; 2739 } else { 2740 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 2741 change_ctx->global_change = pcbddc->user_ChangeOfBasisMatrix; 2742 } 2743 ierr = VecDuplicateVecs(pcis->vec1_global,2,&change_ctx->work);CHKERRQ(ierr); 2744 ierr = MatSetUp(pcbddc->new_global_mat);CHKERRQ(ierr); 2745 ierr = MatAssemblyBegin(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2746 ierr = MatAssemblyEnd(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2747 } 2748 2749 /* get indices in local ordering for vertices and constraints */ 2750 if (olocal_primal_size == pcbddc->local_primal_size) { /* if this is true, I need to check if a new primal space has been introduced */ 2751 ierr = PetscMalloc1(olocal_primal_size,&oprimal_indices_local_idxs);CHKERRQ(ierr); 2752 ierr = PetscMemcpy(oprimal_indices_local_idxs,pcbddc->primal_indices_local_idxs,olocal_primal_size*sizeof(PetscInt));CHKERRQ(ierr); 2753 } 2754 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 2755 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2756 ierr = PetscMalloc1(pcbddc->local_primal_size,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2757 ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&i,&aux_primal_numbering);CHKERRQ(ierr); 2758 ierr = PetscMemcpy(pcbddc->primal_indices_local_idxs,aux_primal_numbering,i*sizeof(PetscInt));CHKERRQ(ierr); 2759 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 2760 ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&j,&aux_primal_numbering);CHKERRQ(ierr); 2761 ierr = PetscMemcpy(&pcbddc->primal_indices_local_idxs[i],aux_primal_numbering,j*sizeof(PetscInt));CHKERRQ(ierr); 2762 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 2763 /* set quantities in PCBDDC data struct */ 2764 pcbddc->n_actual_vertices = i; 2765 /* check if a new primal space has been introduced */ 2766 pcbddc->new_primal_space_local = PETSC_TRUE; 2767 if (olocal_primal_size == pcbddc->local_primal_size) { 2768 ierr = PetscMemcmp(pcbddc->primal_indices_local_idxs,oprimal_indices_local_idxs,olocal_primal_size,&pcbddc->new_primal_space_local);CHKERRQ(ierr); 2769 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 2770 ierr = PetscFree(oprimal_indices_local_idxs);CHKERRQ(ierr); 2771 } 2772 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 2773 ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2774 2775 /* flush dbg viewer */ 2776 if (pcbddc->dbg_flag) { 2777 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2778 } 2779 2780 /* free workspace */ 2781 ierr = PetscBTDestroy(&touched);CHKERRQ(ierr); 2782 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 2783 ierr = PetscFree(aux_primal_minloc);CHKERRQ(ierr); 2784 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 2785 if (!pcbddc->adaptive_selection) { 2786 ierr = PetscFree(temp_indices);CHKERRQ(ierr); 2787 ierr = PetscFree3(temp_quadrature_constraint,temp_indices_to_constraint,temp_indices_to_constraint_B);CHKERRQ(ierr); 2788 } else { 2789 ierr = PetscFree4(pcbddc->adaptive_constraints_n, 2790 pcbddc->adaptive_constraints_ptrs, 2791 pcbddc->adaptive_constraints_idxs, 2792 pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 2793 ierr = PetscFree(temp_indices_to_constraint_B);CHKERRQ(ierr); 2794 } 2795 PetscFunctionReturn(0); 2796 } 2797 2798 #undef __FUNCT__ 2799 #define __FUNCT__ "PCBDDCAnalyzeInterface" 2800 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 2801 { 2802 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2803 PC_IS *pcis = (PC_IS*)pc->data; 2804 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2805 PetscInt ierr,i,vertex_size; 2806 PetscViewer viewer=pcbddc->dbg_viewer; 2807 2808 PetscFunctionBegin; 2809 /* Reset previously computed graph */ 2810 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 2811 /* Init local Graph struct */ 2812 ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping);CHKERRQ(ierr); 2813 2814 /* Check validity of the csr graph passed in by the user */ 2815 if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) { 2816 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 2817 } 2818 2819 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 2820 if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) { 2821 Mat mat_adj; 2822 PetscInt *xadj,*adjncy; 2823 PetscInt nvtxs; 2824 PetscBool flg_row=PETSC_TRUE; 2825 2826 ierr = MatConvert(matis->A,MATMPIADJ,MAT_INITIAL_MATRIX,&mat_adj);CHKERRQ(ierr); 2827 ierr = MatGetRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 2828 if (!flg_row) { 2829 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__); 2830 } 2831 if (pcbddc->use_local_adj) { 2832 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 2833 pcbddc->computed_rowadj = PETSC_TRUE; 2834 } else { /* just compute subdomain's connected components */ 2835 IS is_dummy; 2836 ISLocalToGlobalMapping l2gmap_dummy; 2837 PetscInt j,sum; 2838 PetscInt *cxadj,*cadjncy; 2839 const PetscInt *idxs; 2840 PCBDDCGraph graph; 2841 PetscBT is_on_boundary; 2842 2843 ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n,0,1,&is_dummy);CHKERRQ(ierr); 2844 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 2845 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2846 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 2847 ierr = PCBDDCGraphInit(graph,l2gmap_dummy);CHKERRQ(ierr); 2848 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 2849 graph->xadj = xadj; 2850 graph->adjncy = adjncy; 2851 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 2852 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 2853 2854 if (pcbddc->dbg_flag) { 2855 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] Found %d subdomains\n",PetscGlobalRank,graph->ncc);CHKERRQ(ierr); 2856 for (i=0;i<graph->ncc;i++) { 2857 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] %d cc size %d\n",PetscGlobalRank,i,graph->cptr[i+1]-graph->cptr[i]);CHKERRQ(ierr); 2858 } 2859 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2860 } 2861 2862 ierr = PetscBTCreate(nvtxs,&is_on_boundary);CHKERRQ(ierr); 2863 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2864 for (i=0;i<pcis->n_B;i++) { 2865 ierr = PetscBTSet(is_on_boundary,idxs[i]);CHKERRQ(ierr); 2866 } 2867 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2868 2869 ierr = PetscCalloc1(nvtxs+1,&cxadj);CHKERRQ(ierr); 2870 sum = 0; 2871 for (i=0;i<graph->ncc;i++) { 2872 PetscInt sizecc = 0; 2873 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 2874 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 2875 sizecc++; 2876 } 2877 } 2878 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 2879 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 2880 cxadj[graph->queue[j]] = sizecc; 2881 } 2882 } 2883 sum += sizecc*sizecc; 2884 } 2885 ierr = PetscMalloc1(sum,&cadjncy);CHKERRQ(ierr); 2886 sum = 0; 2887 for (i=0;i<nvtxs;i++) { 2888 PetscInt temp = cxadj[i]; 2889 cxadj[i] = sum; 2890 sum += temp; 2891 } 2892 cxadj[nvtxs] = sum; 2893 for (i=0;i<graph->ncc;i++) { 2894 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 2895 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 2896 PetscInt k,sizecc = 0; 2897 for (k=graph->cptr[i];k<graph->cptr[i+1];k++) { 2898 if (PetscBTLookup(is_on_boundary,graph->queue[k])) { 2899 cadjncy[cxadj[graph->queue[j]]+sizecc]=graph->queue[k]; 2900 sizecc++; 2901 } 2902 } 2903 } 2904 } 2905 } 2906 if (nvtxs) { 2907 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,cxadj,cadjncy,PETSC_OWN_POINTER);CHKERRQ(ierr); 2908 } else { 2909 ierr = PetscFree(cxadj);CHKERRQ(ierr); 2910 ierr = PetscFree(cadjncy);CHKERRQ(ierr); 2911 } 2912 graph->xadj = 0; 2913 graph->adjncy = 0; 2914 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 2915 ierr = PetscBTDestroy(&is_on_boundary);CHKERRQ(ierr); 2916 } 2917 ierr = MatRestoreRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 2918 if (!flg_row) { 2919 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__); 2920 } 2921 ierr = MatDestroy(&mat_adj);CHKERRQ(ierr); 2922 } 2923 2924 /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */ 2925 vertex_size = 1; 2926 if (pcbddc->user_provided_isfordofs) { 2927 if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */ 2928 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 2929 for (i=0;i<pcbddc->n_ISForDofs;i++) { 2930 ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 2931 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 2932 } 2933 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 2934 pcbddc->n_ISForDofs = 0; 2935 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 2936 } 2937 /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */ 2938 ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr); 2939 } else { 2940 if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */ 2941 ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr); 2942 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 2943 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 2944 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 2945 } 2946 } 2947 } 2948 2949 /* Setup of Graph */ 2950 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */ 2951 ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 2952 } 2953 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */ 2954 ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 2955 } 2956 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices); 2957 2958 /* Graph's connected components analysis */ 2959 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 2960 2961 /* print some info to stdout */ 2962 if (pcbddc->dbg_flag) { 2963 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer); 2964 } 2965 2966 /* mark topography has done */ 2967 pcbddc->recompute_topography = PETSC_FALSE; 2968 PetscFunctionReturn(0); 2969 } 2970 2971 #undef __FUNCT__ 2972 #define __FUNCT__ "PCBDDCGetPrimalVerticesLocalIdx" 2973 PetscErrorCode PCBDDCGetPrimalVerticesLocalIdx(PC pc, PetscInt *n_vertices, PetscInt **vertices_idx) 2974 { 2975 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 2976 PetscInt *vertices,*row_cmat_indices,n,i,size_of_constraint,local_primal_size; 2977 PetscErrorCode ierr; 2978 2979 PetscFunctionBegin; 2980 n = 0; 2981 vertices = 0; 2982 if (pcbddc->ConstraintMatrix) { 2983 ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&i);CHKERRQ(ierr); 2984 for (i=0;i<local_primal_size;i++) { 2985 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2986 if (size_of_constraint == 1) n++; 2987 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2988 } 2989 if (vertices_idx) { 2990 ierr = PetscMalloc1(n,&vertices);CHKERRQ(ierr); 2991 n = 0; 2992 for (i=0;i<local_primal_size;i++) { 2993 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2994 if (size_of_constraint == 1) { 2995 vertices[n++]=row_cmat_indices[0]; 2996 } 2997 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2998 } 2999 } 3000 } 3001 *n_vertices = n; 3002 if (vertices_idx) *vertices_idx = vertices; 3003 PetscFunctionReturn(0); 3004 } 3005 3006 #undef __FUNCT__ 3007 #define __FUNCT__ "PCBDDCGetPrimalConstraintsLocalIdx" 3008 PetscErrorCode PCBDDCGetPrimalConstraintsLocalIdx(PC pc, PetscInt *n_constraints, PetscInt **constraints_idx) 3009 { 3010 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 3011 PetscInt *constraints_index,*row_cmat_indices,*row_cmat_global_indices; 3012 PetscInt n,i,j,size_of_constraint,local_primal_size,local_size,max_size_of_constraint,min_index,min_loc; 3013 PetscBT touched; 3014 PetscErrorCode ierr; 3015 3016 /* This function assumes that the number of local constraints per connected component 3017 is not greater than the number of nodes defined for the connected component 3018 (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */ 3019 PetscFunctionBegin; 3020 n = 0; 3021 constraints_index = 0; 3022 if (pcbddc->ConstraintMatrix) { 3023 ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&local_size);CHKERRQ(ierr); 3024 max_size_of_constraint = 0; 3025 for (i=0;i<local_primal_size;i++) { 3026 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 3027 if (size_of_constraint > 1) { 3028 n++; 3029 } 3030 max_size_of_constraint = PetscMax(size_of_constraint,max_size_of_constraint); 3031 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 3032 } 3033 if (constraints_idx) { 3034 ierr = PetscMalloc1(n,&constraints_index);CHKERRQ(ierr); 3035 ierr = PetscMalloc1(max_size_of_constraint,&row_cmat_global_indices);CHKERRQ(ierr); 3036 ierr = PetscBTCreate(local_size,&touched);CHKERRQ(ierr); 3037 n = 0; 3038 for (i=0;i<local_primal_size;i++) { 3039 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 3040 if (size_of_constraint > 1) { 3041 ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,row_cmat_indices,row_cmat_global_indices);CHKERRQ(ierr); 3042 /* find first untouched local node */ 3043 j = 0; 3044 while (PetscBTLookup(touched,row_cmat_indices[j])) j++; 3045 min_index = row_cmat_global_indices[j]; 3046 min_loc = j; 3047 /* search the minimum among nodes not yet touched on the connected component 3048 since there can be more than one constraint on a single cc */ 3049 for (j=1;j<size_of_constraint;j++) { 3050 if (!PetscBTLookup(touched,row_cmat_indices[j]) && min_index > row_cmat_global_indices[j]) { 3051 min_index = row_cmat_global_indices[j]; 3052 min_loc = j; 3053 } 3054 } 3055 ierr = PetscBTSet(touched,row_cmat_indices[min_loc]);CHKERRQ(ierr); 3056 constraints_index[n++] = row_cmat_indices[min_loc]; 3057 } 3058 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 3059 } 3060 ierr = PetscBTDestroy(&touched);CHKERRQ(ierr); 3061 ierr = PetscFree(row_cmat_global_indices);CHKERRQ(ierr); 3062 } 3063 } 3064 *n_constraints = n; 3065 if (constraints_idx) *constraints_idx = constraints_index; 3066 PetscFunctionReturn(0); 3067 } 3068 3069 #undef __FUNCT__ 3070 #define __FUNCT__ "PCBDDCSubsetNumbering" 3071 PetscErrorCode PCBDDCSubsetNumbering(MPI_Comm comm,ISLocalToGlobalMapping l2gmap, PetscInt n_local_dofs, PetscInt local_dofs[], PetscInt local_dofs_mult[], PetscInt* n_global_subset, PetscInt* global_numbering_subset[]) 3072 { 3073 Vec local_vec,global_vec; 3074 IS seqis,paris; 3075 VecScatter scatter_ctx; 3076 PetscScalar *array; 3077 PetscInt *temp_global_dofs; 3078 PetscScalar globalsum; 3079 PetscInt i,j,s; 3080 PetscInt nlocals,first_index,old_index,max_local; 3081 PetscMPIInt rank_prec_comm,size_prec_comm,max_global; 3082 PetscMPIInt *dof_sizes,*dof_displs; 3083 PetscBool first_found; 3084 PetscErrorCode ierr; 3085 3086 PetscFunctionBegin; 3087 /* mpi buffers */ 3088 ierr = MPI_Comm_size(comm,&size_prec_comm);CHKERRQ(ierr); 3089 ierr = MPI_Comm_rank(comm,&rank_prec_comm);CHKERRQ(ierr); 3090 j = ( !rank_prec_comm ? size_prec_comm : 0); 3091 ierr = PetscMalloc1(j,&dof_sizes);CHKERRQ(ierr); 3092 ierr = PetscMalloc1(j,&dof_displs);CHKERRQ(ierr); 3093 /* get maximum size of subset */ 3094 ierr = PetscMalloc1(n_local_dofs,&temp_global_dofs);CHKERRQ(ierr); 3095 ierr = ISLocalToGlobalMappingApply(l2gmap,n_local_dofs,local_dofs,temp_global_dofs);CHKERRQ(ierr); 3096 max_local = 0; 3097 for (i=0;i<n_local_dofs;i++) { 3098 if (max_local < temp_global_dofs[i] ) { 3099 max_local = temp_global_dofs[i]; 3100 } 3101 } 3102 ierr = MPI_Allreduce(&max_local,&max_global,1,MPIU_INT,MPI_MAX,comm);CHKERRQ(ierr); 3103 max_global++; 3104 max_local = 0; 3105 for (i=0;i<n_local_dofs;i++) { 3106 if (max_local < local_dofs[i] ) { 3107 max_local = local_dofs[i]; 3108 } 3109 } 3110 max_local++; 3111 /* allocate workspace */ 3112 ierr = VecCreate(PETSC_COMM_SELF,&local_vec);CHKERRQ(ierr); 3113 ierr = VecSetSizes(local_vec,PETSC_DECIDE,max_local);CHKERRQ(ierr); 3114 ierr = VecSetType(local_vec,VECSEQ);CHKERRQ(ierr); 3115 ierr = VecCreate(comm,&global_vec);CHKERRQ(ierr); 3116 ierr = VecSetSizes(global_vec,PETSC_DECIDE,max_global);CHKERRQ(ierr); 3117 ierr = VecSetType(global_vec,VECMPI);CHKERRQ(ierr); 3118 /* create scatter */ 3119 ierr = ISCreateGeneral(PETSC_COMM_SELF,n_local_dofs,local_dofs,PETSC_COPY_VALUES,&seqis);CHKERRQ(ierr); 3120 ierr = ISCreateGeneral(comm,n_local_dofs,temp_global_dofs,PETSC_COPY_VALUES,&paris);CHKERRQ(ierr); 3121 ierr = VecScatterCreate(local_vec,seqis,global_vec,paris,&scatter_ctx);CHKERRQ(ierr); 3122 ierr = ISDestroy(&seqis);CHKERRQ(ierr); 3123 ierr = ISDestroy(&paris);CHKERRQ(ierr); 3124 /* init array */ 3125 ierr = VecSet(global_vec,0.0);CHKERRQ(ierr); 3126 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 3127 ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr); 3128 if (local_dofs_mult) { 3129 for (i=0;i<n_local_dofs;i++) { 3130 array[local_dofs[i]]=(PetscScalar)local_dofs_mult[i]; 3131 } 3132 } else { 3133 for (i=0;i<n_local_dofs;i++) { 3134 array[local_dofs[i]]=1.0; 3135 } 3136 } 3137 ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr); 3138 /* scatter into global vec and get total number of global dofs */ 3139 ierr = VecScatterBegin(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3140 ierr = VecScatterEnd(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3141 ierr = VecSum(global_vec,&globalsum);CHKERRQ(ierr); 3142 *n_global_subset = (PetscInt)PetscRealPart(globalsum); 3143 /* Fill global_vec with cumulative function for global numbering */ 3144 ierr = VecGetArray(global_vec,&array);CHKERRQ(ierr); 3145 ierr = VecGetLocalSize(global_vec,&s);CHKERRQ(ierr); 3146 nlocals = 0; 3147 first_index = -1; 3148 first_found = PETSC_FALSE; 3149 for (i=0;i<s;i++) { 3150 if (!first_found && PetscRealPart(array[i]) > 0.1) { 3151 first_found = PETSC_TRUE; 3152 first_index = i; 3153 } 3154 nlocals += (PetscInt)PetscRealPart(array[i]); 3155 } 3156 ierr = MPI_Gather(&nlocals,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr); 3157 if (!rank_prec_comm) { 3158 dof_displs[0]=0; 3159 for (i=1;i<size_prec_comm;i++) { 3160 dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1]; 3161 } 3162 } 3163 ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&nlocals,1,MPIU_INT,0,comm);CHKERRQ(ierr); 3164 if (first_found) { 3165 array[first_index] += (PetscScalar)nlocals; 3166 old_index = first_index; 3167 for (i=first_index+1;i<s;i++) { 3168 if (PetscRealPart(array[i]) > 0.1) { 3169 array[i] += array[old_index]; 3170 old_index = i; 3171 } 3172 } 3173 } 3174 ierr = VecRestoreArray(global_vec,&array);CHKERRQ(ierr); 3175 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 3176 ierr = VecScatterBegin(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3177 ierr = VecScatterEnd(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3178 /* get global ordering of local dofs */ 3179 ierr = VecGetArrayRead(local_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 3180 if (local_dofs_mult) { 3181 for (i=0;i<n_local_dofs;i++) { 3182 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-local_dofs_mult[i]; 3183 } 3184 } else { 3185 for (i=0;i<n_local_dofs;i++) { 3186 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-1; 3187 } 3188 } 3189 ierr = VecRestoreArrayRead(local_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 3190 /* free workspace */ 3191 ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr); 3192 ierr = VecDestroy(&local_vec);CHKERRQ(ierr); 3193 ierr = VecDestroy(&global_vec);CHKERRQ(ierr); 3194 ierr = PetscFree(dof_sizes);CHKERRQ(ierr); 3195 ierr = PetscFree(dof_displs);CHKERRQ(ierr); 3196 /* return pointer to global ordering of local dofs */ 3197 *global_numbering_subset = temp_global_dofs; 3198 PetscFunctionReturn(0); 3199 } 3200 3201 #undef __FUNCT__ 3202 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 3203 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 3204 { 3205 PetscInt i,j; 3206 PetscScalar *alphas; 3207 PetscErrorCode ierr; 3208 3209 PetscFunctionBegin; 3210 /* this implements stabilized Gram-Schmidt */ 3211 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 3212 for (i=0;i<n;i++) { 3213 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 3214 if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); } 3215 for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); } 3216 } 3217 ierr = PetscFree(alphas);CHKERRQ(ierr); 3218 PetscFunctionReturn(0); 3219 } 3220 3221 #undef __FUNCT__ 3222 #define __FUNCT__ "MatISGetSubassemblingPattern" 3223 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscBool contiguous, IS* is_sends) 3224 { 3225 Mat subdomain_adj; 3226 IS new_ranks,ranks_send_to; 3227 MatPartitioning partitioner; 3228 Mat_IS *matis; 3229 PetscInt n_neighs,*neighs,*n_shared,**shared; 3230 PetscInt prank; 3231 PetscMPIInt size,rank,color; 3232 PetscInt *xadj,*adjncy,*oldranks; 3233 PetscInt *adjncy_wgt,*v_wgt,*is_indices,*ranks_send_to_idx; 3234 PetscInt i,local_size,threshold=0; 3235 PetscErrorCode ierr; 3236 PetscBool use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE; 3237 PetscSubcomm subcomm; 3238 3239 PetscFunctionBegin; 3240 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr); 3241 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 3242 ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 3243 3244 /* Get info on mapping */ 3245 matis = (Mat_IS*)(mat->data); 3246 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&local_size);CHKERRQ(ierr); 3247 ierr = ISLocalToGlobalMappingGetInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 3248 3249 /* build local CSR graph of subdomains' connectivity */ 3250 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 3251 xadj[0] = 0; 3252 xadj[1] = PetscMax(n_neighs-1,0); 3253 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 3254 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 3255 3256 if (threshold) { 3257 PetscInt xadj_count = 0; 3258 for (i=1;i<n_neighs;i++) { 3259 if (n_shared[i] > threshold) { 3260 adjncy[xadj_count] = neighs[i]; 3261 adjncy_wgt[xadj_count] = n_shared[i]; 3262 xadj_count++; 3263 } 3264 } 3265 xadj[1] = xadj_count; 3266 } else { 3267 if (xadj[1]) { 3268 ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr); 3269 ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr); 3270 } 3271 } 3272 ierr = ISLocalToGlobalMappingRestoreInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 3273 if (use_square) { 3274 for (i=0;i<xadj[1];i++) { 3275 adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i]; 3276 } 3277 } 3278 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 3279 3280 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 3281 3282 /* 3283 Restrict work on active processes only. 3284 */ 3285 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr); 3286 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 3287 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 3288 ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr); 3289 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 3290 if (color) { 3291 ierr = PetscFree(xadj);CHKERRQ(ierr); 3292 ierr = PetscFree(adjncy);CHKERRQ(ierr); 3293 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 3294 } else { 3295 PetscInt coarsening_ratio; 3296 ierr = MPI_Comm_size(subcomm->comm,&size);CHKERRQ(ierr); 3297 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 3298 prank = rank; 3299 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm->comm);CHKERRQ(ierr); 3300 /* 3301 for (i=0;i<size;i++) { 3302 PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]); 3303 } 3304 */ 3305 for (i=0;i<xadj[1];i++) { 3306 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 3307 } 3308 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 3309 ierr = MatCreateMPIAdj(subcomm->comm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 3310 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 3311 3312 /* Partition */ 3313 ierr = MatPartitioningCreate(subcomm->comm,&partitioner);CHKERRQ(ierr); 3314 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 3315 if (use_vwgt) { 3316 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 3317 v_wgt[0] = local_size; 3318 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 3319 } 3320 n_subdomains = PetscMin((PetscInt)size,n_subdomains); 3321 coarsening_ratio = size/n_subdomains; 3322 ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr); 3323 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 3324 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 3325 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 3326 3327 ierr = ISGetIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3328 if (contiguous) { 3329 ranks_send_to_idx[0] = oldranks[is_indices[0]]; /* contiguos set of processes */ 3330 } else { 3331 ranks_send_to_idx[0] = coarsening_ratio*oldranks[is_indices[0]]; /* scattered set of processes */ 3332 } 3333 ierr = ISRestoreIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3334 /* clean up */ 3335 ierr = PetscFree(oldranks);CHKERRQ(ierr); 3336 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 3337 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 3338 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 3339 } 3340 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 3341 3342 /* assemble parallel IS for sends */ 3343 i = 1; 3344 if (color) i=0; 3345 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr); 3346 3347 /* get back IS */ 3348 *is_sends = ranks_send_to; 3349 PetscFunctionReturn(0); 3350 } 3351 3352 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 3353 3354 #undef __FUNCT__ 3355 #define __FUNCT__ "MatISSubassemble" 3356 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[]) 3357 { 3358 Mat local_mat; 3359 Mat_IS *matis; 3360 IS is_sends_internal; 3361 PetscInt rows,cols,new_local_rows; 3362 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals; 3363 PetscBool ismatis,isdense,newisdense,destroy_mat; 3364 ISLocalToGlobalMapping l2gmap; 3365 PetscInt* l2gmap_indices; 3366 const PetscInt* is_indices; 3367 MatType new_local_type; 3368 /* buffers */ 3369 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 3370 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 3371 PetscInt *recv_buffer_idxs_local; 3372 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 3373 /* MPI */ 3374 MPI_Comm comm,comm_n; 3375 PetscSubcomm subcomm; 3376 PetscMPIInt n_sends,n_recvs,commsize; 3377 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 3378 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 3379 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,source_dest; 3380 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals; 3381 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals; 3382 PetscErrorCode ierr; 3383 3384 PetscFunctionBegin; 3385 /* TODO: add missing checks */ 3386 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 3387 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 3388 PetscValidLogicalCollectiveEnum(mat,reuse,5); 3389 PetscValidLogicalCollectiveInt(mat,nis,7); 3390 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 3391 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 3392 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 3393 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 3394 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 3395 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 3396 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 3397 if (reuse == MAT_REUSE_MATRIX && *mat_n) { 3398 PetscInt mrows,mcols,mnrows,mncols; 3399 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 3400 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 3401 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 3402 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 3403 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 3404 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 3405 } 3406 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 3407 PetscValidLogicalCollectiveInt(mat,bs,0); 3408 /* prepare IS for sending if not provided */ 3409 if (!is_sends) { 3410 PetscBool pcontig = PETSC_TRUE; 3411 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 3412 ierr = MatISGetSubassemblingPattern(mat,n_subdomains,pcontig,&is_sends_internal);CHKERRQ(ierr); 3413 } else { 3414 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 3415 is_sends_internal = is_sends; 3416 } 3417 3418 /* get pointer of MATIS data */ 3419 matis = (Mat_IS*)mat->data; 3420 3421 /* get comm */ 3422 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3423 3424 /* compute number of sends */ 3425 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 3426 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 3427 3428 /* compute number of receives */ 3429 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 3430 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 3431 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 3432 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 3433 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 3434 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 3435 ierr = PetscFree(iflags);CHKERRQ(ierr); 3436 3437 /* restrict comm if requested */ 3438 subcomm = 0; 3439 destroy_mat = PETSC_FALSE; 3440 if (restrict_comm) { 3441 PetscMPIInt color,subcommsize; 3442 3443 color = 0; 3444 if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm */ 3445 ierr = MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 3446 subcommsize = commsize - subcommsize; 3447 /* check if reuse has been requested */ 3448 if (reuse == MAT_REUSE_MATRIX) { 3449 if (*mat_n) { 3450 PetscMPIInt subcommsize2; 3451 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 3452 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 3453 comm_n = PetscObjectComm((PetscObject)*mat_n); 3454 } else { 3455 comm_n = PETSC_COMM_SELF; 3456 } 3457 } else { /* MAT_INITIAL_MATRIX */ 3458 PetscMPIInt rank; 3459 3460 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3461 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 3462 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 3463 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 3464 comm_n = subcomm->comm; 3465 } 3466 /* flag to destroy *mat_n if not significative */ 3467 if (color) destroy_mat = PETSC_TRUE; 3468 } else { 3469 comm_n = comm; 3470 } 3471 3472 /* prepare send/receive buffers */ 3473 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 3474 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 3475 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 3476 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 3477 if (nis) { 3478 ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr); 3479 } 3480 3481 /* Get data from local matrices */ 3482 if (!isdense) { 3483 SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 3484 /* TODO: See below some guidelines on how to prepare the local buffers */ 3485 /* 3486 send_buffer_vals should contain the raw values of the local matrix 3487 send_buffer_idxs should contain: 3488 - MatType_PRIVATE type 3489 - PetscInt size_of_l2gmap 3490 - PetscInt global_row_indices[size_of_l2gmap] 3491 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 3492 */ 3493 } else { 3494 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 3495 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&i);CHKERRQ(ierr); 3496 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 3497 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 3498 send_buffer_idxs[1] = i; 3499 ierr = ISLocalToGlobalMappingGetIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 3500 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 3501 ierr = ISLocalToGlobalMappingRestoreIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 3502 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 3503 for (i=0;i<n_sends;i++) { 3504 ilengths_vals[is_indices[i]] = len*len; 3505 ilengths_idxs[is_indices[i]] = len+2; 3506 } 3507 } 3508 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 3509 /* additional is (if any) */ 3510 if (nis) { 3511 PetscMPIInt psum; 3512 PetscInt j; 3513 for (j=0,psum=0;j<nis;j++) { 3514 PetscInt plen; 3515 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 3516 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 3517 psum += len+1; /* indices + lenght */ 3518 } 3519 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 3520 for (j=0,psum=0;j<nis;j++) { 3521 PetscInt plen; 3522 const PetscInt *is_array_idxs; 3523 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 3524 send_buffer_idxs_is[psum] = plen; 3525 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 3526 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 3527 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 3528 psum += plen+1; /* indices + lenght */ 3529 } 3530 for (i=0;i<n_sends;i++) { 3531 ilengths_idxs_is[is_indices[i]] = psum; 3532 } 3533 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 3534 } 3535 3536 buf_size_idxs = 0; 3537 buf_size_vals = 0; 3538 buf_size_idxs_is = 0; 3539 for (i=0;i<n_recvs;i++) { 3540 buf_size_idxs += (PetscInt)olengths_idxs[i]; 3541 buf_size_vals += (PetscInt)olengths_vals[i]; 3542 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 3543 } 3544 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 3545 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 3546 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 3547 3548 /* get new tags for clean communications */ 3549 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 3550 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 3551 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 3552 3553 /* allocate for requests */ 3554 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 3555 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 3556 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 3557 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 3558 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 3559 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 3560 3561 /* communications */ 3562 ptr_idxs = recv_buffer_idxs; 3563 ptr_vals = recv_buffer_vals; 3564 ptr_idxs_is = recv_buffer_idxs_is; 3565 for (i=0;i<n_recvs;i++) { 3566 source_dest = onodes[i]; 3567 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 3568 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 3569 ptr_idxs += olengths_idxs[i]; 3570 ptr_vals += olengths_vals[i]; 3571 if (nis) { 3572 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 3573 ptr_idxs_is += olengths_idxs_is[i]; 3574 } 3575 } 3576 for (i=0;i<n_sends;i++) { 3577 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 3578 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 3579 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 3580 if (nis) { 3581 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 3582 } 3583 } 3584 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 3585 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 3586 3587 /* assemble new l2g map */ 3588 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3589 ptr_idxs = recv_buffer_idxs; 3590 new_local_rows = 0; 3591 for (i=0;i<n_recvs;i++) { 3592 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 3593 ptr_idxs += olengths_idxs[i]; 3594 } 3595 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 3596 ptr_idxs = recv_buffer_idxs; 3597 new_local_rows = 0; 3598 for (i=0;i<n_recvs;i++) { 3599 ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 3600 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 3601 ptr_idxs += olengths_idxs[i]; 3602 } 3603 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 3604 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 3605 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 3606 3607 /* infer new local matrix type from received local matrices type */ 3608 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 3609 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 3610 if (n_recvs) { 3611 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 3612 ptr_idxs = recv_buffer_idxs; 3613 for (i=0;i<n_recvs;i++) { 3614 if ((PetscInt)new_local_type_private != *ptr_idxs) { 3615 new_local_type_private = MATAIJ_PRIVATE; 3616 break; 3617 } 3618 ptr_idxs += olengths_idxs[i]; 3619 } 3620 switch (new_local_type_private) { 3621 case MATDENSE_PRIVATE: 3622 if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */ 3623 new_local_type = MATSEQAIJ; 3624 bs = 1; 3625 } else { /* if I receive only 1 dense matrix */ 3626 new_local_type = MATSEQDENSE; 3627 bs = 1; 3628 } 3629 break; 3630 case MATAIJ_PRIVATE: 3631 new_local_type = MATSEQAIJ; 3632 bs = 1; 3633 break; 3634 case MATBAIJ_PRIVATE: 3635 new_local_type = MATSEQBAIJ; 3636 break; 3637 case MATSBAIJ_PRIVATE: 3638 new_local_type = MATSEQSBAIJ; 3639 break; 3640 default: 3641 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__); 3642 break; 3643 } 3644 } else { /* by default, new_local_type is seqdense */ 3645 new_local_type = MATSEQDENSE; 3646 bs = 1; 3647 } 3648 3649 /* create MATIS object if needed */ 3650 if (reuse == MAT_INITIAL_MATRIX) { 3651 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 3652 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,mat_n);CHKERRQ(ierr); 3653 } else { 3654 /* it also destroys the local matrices */ 3655 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 3656 } 3657 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 3658 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 3659 3660 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3661 3662 /* Global to local map of received indices */ 3663 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 3664 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 3665 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 3666 3667 /* restore attributes -> type of incoming data and its size */ 3668 buf_size_idxs = 0; 3669 for (i=0;i<n_recvs;i++) { 3670 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 3671 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 3672 buf_size_idxs += (PetscInt)olengths_idxs[i]; 3673 } 3674 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 3675 3676 /* set preallocation */ 3677 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 3678 if (!newisdense) { 3679 PetscInt *new_local_nnz=0; 3680 3681 ptr_vals = recv_buffer_vals; 3682 ptr_idxs = recv_buffer_idxs_local; 3683 if (n_recvs) { 3684 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 3685 } 3686 for (i=0;i<n_recvs;i++) { 3687 PetscInt j; 3688 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 3689 for (j=0;j<*(ptr_idxs+1);j++) { 3690 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 3691 } 3692 } else { 3693 /* TODO */ 3694 } 3695 ptr_idxs += olengths_idxs[i]; 3696 } 3697 if (new_local_nnz) { 3698 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 3699 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 3700 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 3701 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 3702 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 3703 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 3704 } else { 3705 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 3706 } 3707 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 3708 } else { 3709 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 3710 } 3711 3712 /* set values */ 3713 ptr_vals = recv_buffer_vals; 3714 ptr_idxs = recv_buffer_idxs_local; 3715 for (i=0;i<n_recvs;i++) { 3716 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 3717 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 3718 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 3719 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 3720 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 3721 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 3722 } else { 3723 /* TODO */ 3724 } 3725 ptr_idxs += olengths_idxs[i]; 3726 ptr_vals += olengths_vals[i]; 3727 } 3728 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3729 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3730 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3731 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3732 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 3733 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 3734 3735 #if 0 3736 if (!restrict_comm) { /* check */ 3737 Vec lvec,rvec; 3738 PetscReal infty_error; 3739 3740 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 3741 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 3742 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 3743 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 3744 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 3745 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 3746 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 3747 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 3748 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 3749 } 3750 #endif 3751 3752 /* assemble new additional is (if any) */ 3753 if (nis) { 3754 PetscInt **temp_idxs,*count_is,j,psum; 3755 3756 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3757 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 3758 ptr_idxs = recv_buffer_idxs_is; 3759 psum = 0; 3760 for (i=0;i<n_recvs;i++) { 3761 for (j=0;j<nis;j++) { 3762 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 3763 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 3764 psum += plen; 3765 ptr_idxs += plen+1; /* shift pointer to received data */ 3766 } 3767 } 3768 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 3769 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 3770 for (i=1;i<nis;i++) { 3771 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 3772 } 3773 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 3774 ptr_idxs = recv_buffer_idxs_is; 3775 for (i=0;i<n_recvs;i++) { 3776 for (j=0;j<nis;j++) { 3777 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 3778 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 3779 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 3780 ptr_idxs += plen+1; /* shift pointer to received data */ 3781 } 3782 } 3783 for (i=0;i<nis;i++) { 3784 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 3785 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 3786 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 3787 } 3788 ierr = PetscFree(count_is);CHKERRQ(ierr); 3789 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 3790 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 3791 } 3792 /* free workspace */ 3793 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 3794 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3795 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 3796 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3797 if (isdense) { 3798 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 3799 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 3800 } else { 3801 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 3802 } 3803 if (nis) { 3804 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3805 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 3806 } 3807 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 3808 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 3809 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 3810 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 3811 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 3812 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 3813 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 3814 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 3815 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 3816 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 3817 ierr = PetscFree(onodes);CHKERRQ(ierr); 3818 if (nis) { 3819 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 3820 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 3821 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 3822 } 3823 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 3824 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 3825 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 3826 for (i=0;i<nis;i++) { 3827 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 3828 } 3829 } 3830 PetscFunctionReturn(0); 3831 } 3832 3833 /* temporary hack into ksp private data structure */ 3834 #include <petsc-private/kspimpl.h> 3835 3836 #undef __FUNCT__ 3837 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 3838 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 3839 { 3840 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3841 PC_IS *pcis = (PC_IS*)pc->data; 3842 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 3843 MatNullSpace CoarseNullSpace=NULL; 3844 ISLocalToGlobalMapping coarse_islg; 3845 IS coarse_is,*isarray; 3846 PetscInt i,im_active=-1,active_procs=-1; 3847 PetscInt nis,nisdofs,nisneu; 3848 PC pc_temp; 3849 PCType coarse_pc_type; 3850 KSPType coarse_ksp_type; 3851 PetscBool multilevel_requested,multilevel_allowed; 3852 PetscBool isredundant,isbddc,isnn,coarse_reuse; 3853 Mat t_coarse_mat_is; 3854 PetscInt void_procs,ncoarse_ml,ncoarse_ds,ncoarse; 3855 PetscMPIInt all_procs; 3856 PetscBool csin_ml,csin_ds,csin,csin_type_simple,redist; 3857 PetscBool compute_vecs = PETSC_FALSE; 3858 PetscScalar *array; 3859 PetscErrorCode ierr; 3860 3861 PetscFunctionBegin; 3862 /* Assign global numbering to coarse dofs */ 3863 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 3864 compute_vecs = PETSC_TRUE; 3865 PetscInt ocoarse_size; 3866 ocoarse_size = pcbddc->coarse_size; 3867 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 3868 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 3869 /* see if we can avoid some work */ 3870 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 3871 if (ocoarse_size != pcbddc->coarse_size) { /* ...but with different size, so reset it and set reuse flag to false */ 3872 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 3873 coarse_reuse = PETSC_FALSE; 3874 } else { /* we can safely reuse already computed coarse matrix */ 3875 coarse_reuse = PETSC_TRUE; 3876 } 3877 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 3878 coarse_reuse = PETSC_FALSE; 3879 } 3880 /* reset any subassembling information */ 3881 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 3882 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 3883 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 3884 coarse_reuse = PETSC_TRUE; 3885 } 3886 3887 /* count "active" (i.e. with positive local size) and "void" processes */ 3888 im_active = !!(pcis->n); 3889 ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3890 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr); 3891 void_procs = all_procs-active_procs; 3892 csin_type_simple = PETSC_TRUE; 3893 redist = PETSC_FALSE; 3894 if (pcbddc->current_level && void_procs) { 3895 csin_ml = PETSC_TRUE; 3896 ncoarse_ml = void_procs; 3897 /* it has no sense to redistribute on a set of processors larger than the number of active processes */ 3898 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < active_procs) { 3899 csin_ds = PETSC_TRUE; 3900 ncoarse_ds = pcbddc->redistribute_coarse; 3901 redist = PETSC_TRUE; 3902 } else { 3903 csin_ds = PETSC_TRUE; 3904 ncoarse_ds = active_procs; 3905 redist = PETSC_TRUE; 3906 } 3907 } else { 3908 csin_ml = PETSC_FALSE; 3909 ncoarse_ml = all_procs; 3910 if (void_procs) { 3911 csin_ds = PETSC_TRUE; 3912 ncoarse_ds = void_procs; 3913 csin_type_simple = PETSC_FALSE; 3914 } else { 3915 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < all_procs) { 3916 csin_ds = PETSC_TRUE; 3917 ncoarse_ds = pcbddc->redistribute_coarse; 3918 redist = PETSC_TRUE; 3919 } else { 3920 csin_ds = PETSC_FALSE; 3921 ncoarse_ds = all_procs; 3922 } 3923 } 3924 } 3925 3926 /* 3927 test if we can go multilevel: three conditions must be satisfied: 3928 - we have not exceeded the number of levels requested 3929 - we can actually subassemble the active processes 3930 - we can find a suitable number of MPI processes where we can place the subassembled problem 3931 */ 3932 multilevel_allowed = PETSC_FALSE; 3933 multilevel_requested = PETSC_FALSE; 3934 if (pcbddc->current_level < pcbddc->max_levels) { 3935 multilevel_requested = PETSC_TRUE; 3936 if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) { 3937 multilevel_allowed = PETSC_FALSE; 3938 } else { 3939 multilevel_allowed = PETSC_TRUE; 3940 } 3941 } 3942 /* determine number of process partecipating to coarse solver */ 3943 if (multilevel_allowed) { 3944 ncoarse = ncoarse_ml; 3945 csin = csin_ml; 3946 } else { 3947 ncoarse = ncoarse_ds; 3948 csin = csin_ds; 3949 } 3950 3951 /* creates temporary l2gmap and IS for coarse indexes */ 3952 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 3953 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 3954 3955 /* creates temporary MATIS object for coarse matrix */ 3956 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr); 3957 ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr); 3958 ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr); 3959 ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr); 3960 #if 0 3961 { 3962 PetscViewer viewer; 3963 char filename[256]; 3964 sprintf(filename,"local_coarse_mat%d.m",PetscGlobalRank); 3965 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 3966 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 3967 ierr = MatView(coarse_submat_dense,viewer);CHKERRQ(ierr); 3968 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 3969 } 3970 #endif 3971 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,&t_coarse_mat_is);CHKERRQ(ierr); 3972 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 3973 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3974 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3975 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 3976 3977 /* compute dofs splitting and neumann boundaries for coarse dofs */ 3978 if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal) ) { /* protects from unneded computations */ 3979 PetscInt *tidxs,*tidxs2,nout,tsize,i; 3980 const PetscInt *idxs; 3981 ISLocalToGlobalMapping tmap; 3982 3983 /* create map between primal indices (in local representative ordering) and local primal numbering */ 3984 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 3985 /* allocate space for temporary storage */ 3986 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 3987 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 3988 /* allocate for IS array */ 3989 nisdofs = pcbddc->n_ISForDofsLocal; 3990 nisneu = !!pcbddc->NeumannBoundariesLocal; 3991 nis = nisdofs + nisneu; 3992 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 3993 /* dofs splitting */ 3994 for (i=0;i<nisdofs;i++) { 3995 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 3996 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 3997 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 3998 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 3999 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 4000 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 4001 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->ISForDofsLocal[i]),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 4002 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 4003 } 4004 /* neumann boundaries */ 4005 if (pcbddc->NeumannBoundariesLocal) { 4006 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 4007 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 4008 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 4009 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 4010 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 4011 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 4012 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->NeumannBoundariesLocal),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 4013 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 4014 } 4015 /* free memory */ 4016 ierr = PetscFree(tidxs);CHKERRQ(ierr); 4017 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 4018 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 4019 } else { 4020 nis = 0; 4021 nisdofs = 0; 4022 nisneu = 0; 4023 isarray = NULL; 4024 } 4025 /* destroy no longer needed map */ 4026 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 4027 4028 /* restrict on coarse candidates (if needed) */ 4029 coarse_mat_is = NULL; 4030 if (csin) { 4031 if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */ 4032 if (redist) { 4033 PetscMPIInt rank; 4034 PetscInt spc,n_spc_p1,dest[1],destsize; 4035 4036 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 4037 spc = active_procs/pcbddc->redistribute_coarse; 4038 n_spc_p1 = active_procs%pcbddc->redistribute_coarse; 4039 if (im_active) { 4040 destsize = 1; 4041 if (rank > n_spc_p1*(spc+1)-1) { 4042 dest[0] = n_spc_p1+(rank-(n_spc_p1*(spc+1)))/spc; 4043 } else { 4044 dest[0] = rank/(spc+1); 4045 } 4046 } else { 4047 destsize = 0; 4048 } 4049 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),destsize,dest,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4050 } else if (csin_type_simple) { 4051 PetscMPIInt rank; 4052 PetscInt issize,isidx; 4053 4054 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 4055 if (im_active) { 4056 issize = 1; 4057 isidx = (PetscInt)rank; 4058 } else { 4059 issize = 0; 4060 isidx = -1; 4061 } 4062 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4063 } else { /* get a suitable subassembling pattern from MATIS code */ 4064 ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,PETSC_TRUE,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4065 } 4066 4067 /* we need to shift on coarse candidates either if we are not redistributing or we are redistributing and we have enough void processes */ 4068 if (!redist || ncoarse <= void_procs) { 4069 PetscInt ncoarse_cand,tissize,*nisindices; 4070 PetscInt *coarse_candidates; 4071 const PetscInt* tisindices; 4072 4073 /* get coarse candidates' ranks in pc communicator */ 4074 ierr = PetscMalloc1(all_procs,&coarse_candidates);CHKERRQ(ierr); 4075 ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4076 for (i=0,ncoarse_cand=0;i<all_procs;i++) { 4077 if (!coarse_candidates[i]) { 4078 coarse_candidates[ncoarse_cand++]=i; 4079 } 4080 } 4081 if (ncoarse_cand < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",ncoarse_cand,ncoarse); 4082 4083 4084 if (pcbddc->dbg_flag) { 4085 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4086 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr); 4087 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 4088 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr); 4089 for (i=0;i<ncoarse_cand;i++) { 4090 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr); 4091 } 4092 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr); 4093 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4094 } 4095 /* shift the pattern on coarse candidates */ 4096 ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr); 4097 ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 4098 ierr = PetscMalloc1(tissize,&nisindices);CHKERRQ(ierr); 4099 for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]]; 4100 ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 4101 ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr); 4102 ierr = PetscFree(coarse_candidates);CHKERRQ(ierr); 4103 } 4104 if (pcbddc->dbg_flag) { 4105 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4106 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr); 4107 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 4108 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4109 } 4110 } 4111 /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */ 4112 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 4113 } else { 4114 if (pcbddc->dbg_flag) { 4115 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4116 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr); 4117 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4118 } 4119 ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr); 4120 coarse_mat_is = t_coarse_mat_is; 4121 } 4122 4123 /* create local to global scatters for coarse problem */ 4124 if (compute_vecs) { 4125 PetscInt lrows; 4126 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 4127 if (coarse_mat_is) { 4128 ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr); 4129 } else { 4130 lrows = 0; 4131 } 4132 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 4133 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 4134 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 4135 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 4136 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 4137 } 4138 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 4139 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 4140 4141 /* set defaults for coarse KSP and PC */ 4142 if (multilevel_allowed) { 4143 coarse_ksp_type = KSPRICHARDSON; 4144 coarse_pc_type = PCBDDC; 4145 } else { 4146 coarse_ksp_type = KSPPREONLY; 4147 coarse_pc_type = PCREDUNDANT; 4148 } 4149 4150 /* print some info if requested */ 4151 if (pcbddc->dbg_flag) { 4152 if (!multilevel_allowed) { 4153 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4154 if (multilevel_requested) { 4155 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 4156 } else if (pcbddc->max_levels) { 4157 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 4158 } 4159 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4160 } 4161 } 4162 4163 /* create the coarse KSP object only once with defaults */ 4164 if (coarse_mat_is) { 4165 MatReuse coarse_mat_reuse; 4166 PetscViewer dbg_viewer = NULL; 4167 if (pcbddc->dbg_flag) { 4168 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is)); 4169 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 4170 } 4171 if (!pcbddc->coarse_ksp) { 4172 char prefix[256],str_level[16]; 4173 size_t len; 4174 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr); 4175 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 4176 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 4177 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr); 4178 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 4179 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 4180 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 4181 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 4182 /* prefix */ 4183 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 4184 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 4185 if (!pcbddc->current_level) { 4186 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 4187 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 4188 } else { 4189 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 4190 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 4191 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 4192 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 4193 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 4194 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 4195 } 4196 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 4197 /* allow user customization */ 4198 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 4199 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 4200 } 4201 4202 /* get some info after set from options */ 4203 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 4204 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 4205 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 4206 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 4207 if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */ 4208 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 4209 isbddc = PETSC_FALSE; 4210 } 4211 if (isredundant) { 4212 KSP inner_ksp; 4213 PC inner_pc; 4214 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 4215 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 4216 ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr); 4217 } 4218 4219 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 4220 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 4221 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 4222 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 4223 if (nisdofs) { 4224 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 4225 for (i=0;i<nisdofs;i++) { 4226 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 4227 } 4228 } 4229 if (nisneu) { 4230 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 4231 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 4232 } 4233 4234 /* assemble coarse matrix */ 4235 if (coarse_reuse) { 4236 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 4237 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 4238 coarse_mat_reuse = MAT_REUSE_MATRIX; 4239 } else { 4240 coarse_mat_reuse = MAT_INITIAL_MATRIX; 4241 } 4242 if (isbddc || isnn) { 4243 if (pcbddc->coarsening_ratio > 1) { 4244 if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */ 4245 ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,PETSC_TRUE,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 4246 if (pcbddc->dbg_flag) { 4247 ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4248 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr); 4249 ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr); 4250 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 4251 } 4252 } 4253 ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr); 4254 } else { 4255 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 4256 coarse_mat = coarse_mat_is; 4257 } 4258 } else { 4259 ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 4260 } 4261 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 4262 4263 /* propagate symmetry info to coarse matrix */ 4264 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pcbddc->issym);CHKERRQ(ierr); 4265 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 4266 4267 /* set operators */ 4268 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 4269 if (pcbddc->dbg_flag) { 4270 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 4271 } 4272 } else { /* processes non partecipating to coarse solver (if any) */ 4273 coarse_mat = 0; 4274 } 4275 ierr = PetscFree(isarray);CHKERRQ(ierr); 4276 #if 0 4277 { 4278 PetscViewer viewer; 4279 char filename[256]; 4280 sprintf(filename,"coarse_mat.m"); 4281 ierr = PetscViewerASCIIOpen(PETSC_COMM_WORLD,filename,&viewer);CHKERRQ(ierr); 4282 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 4283 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 4284 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 4285 } 4286 #endif 4287 4288 /* Compute coarse null space (special handling by BDDC only) */ 4289 if (pcbddc->NullSpace) { 4290 ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr); 4291 } 4292 4293 if (pcbddc->coarse_ksp) { 4294 Vec crhs,csol; 4295 PetscBool ispreonly; 4296 if (CoarseNullSpace) { 4297 if (isbddc) { 4298 ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr); 4299 } else { 4300 ierr = KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);CHKERRQ(ierr); 4301 } 4302 } 4303 /* setup coarse ksp */ 4304 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 4305 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 4306 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 4307 /* hack */ 4308 if (!csol) { 4309 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 4310 } 4311 if (!crhs) { 4312 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 4313 } 4314 /* Check coarse problem if in debug mode or if solving with an iterative method */ 4315 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 4316 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 4317 KSP check_ksp; 4318 KSPType check_ksp_type; 4319 PC check_pc; 4320 Vec check_vec,coarse_vec; 4321 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 4322 PetscInt its; 4323 PetscBool compute_eigs; 4324 PetscReal *eigs_r,*eigs_c; 4325 PetscInt neigs; 4326 const char *prefix; 4327 4328 /* Create ksp object suitable for estimation of extreme eigenvalues */ 4329 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 4330 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 4331 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 4332 if (ispreonly) { 4333 check_ksp_type = KSPPREONLY; 4334 compute_eigs = PETSC_FALSE; 4335 } else { 4336 check_ksp_type = KSPGMRES; 4337 compute_eigs = PETSC_TRUE; 4338 } 4339 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 4340 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 4341 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 4342 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 4343 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 4344 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 4345 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 4346 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 4347 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 4348 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 4349 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 4350 /* create random vec */ 4351 ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr); 4352 ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr); 4353 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 4354 if (CoarseNullSpace) { 4355 ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr); 4356 } 4357 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 4358 /* solve coarse problem */ 4359 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 4360 if (CoarseNullSpace) { 4361 ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr); 4362 } 4363 /* set eigenvalue estimation if preonly has not been requested */ 4364 if (compute_eigs) { 4365 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 4366 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 4367 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 4368 lambda_max = eigs_r[neigs-1]; 4369 lambda_min = eigs_r[0]; 4370 if (pcbddc->use_coarse_estimates) { 4371 if (lambda_max>lambda_min) { 4372 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr); 4373 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 4374 } 4375 } 4376 } 4377 4378 /* check coarse problem residual error */ 4379 if (pcbddc->dbg_flag) { 4380 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 4381 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 4382 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 4383 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 4384 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 4385 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 4386 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 4387 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 4388 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 4389 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 4390 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 4391 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 4392 if (compute_eigs) { 4393 PetscReal lambda_max_s,lambda_min_s; 4394 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 4395 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 4396 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 4397 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 4398 for (i=0;i<neigs;i++) { 4399 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 4400 } 4401 } 4402 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 4403 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 4404 } 4405 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 4406 if (compute_eigs) { 4407 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 4408 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 4409 } 4410 } 4411 } 4412 /* print additional info */ 4413 if (pcbddc->dbg_flag) { 4414 /* waits until all processes reaches this point */ 4415 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 4416 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 4417 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4418 } 4419 4420 /* free memory */ 4421 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 4422 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 4423 PetscFunctionReturn(0); 4424 } 4425 4426 #undef __FUNCT__ 4427 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 4428 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 4429 { 4430 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 4431 PC_IS* pcis = (PC_IS*)pc->data; 4432 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 4433 PetscInt i,coarse_size; 4434 PetscInt *local_primal_indices; 4435 PetscErrorCode ierr; 4436 4437 PetscFunctionBegin; 4438 /* Compute global number of coarse dofs */ 4439 if (!pcbddc->primal_indices_local_idxs && pcbddc->local_primal_size) { 4440 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Local primal indices have not been created"); 4441 } 4442 ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)(pc->pmat)),matis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,NULL,&coarse_size,&local_primal_indices);CHKERRQ(ierr); 4443 4444 /* check numbering */ 4445 if (pcbddc->dbg_flag) { 4446 PetscScalar coarsesum,*array; 4447 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 4448 4449 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4450 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4451 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 4452 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 4453 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 4454 for (i=0;i<pcbddc->local_primal_size;i++) { 4455 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 4456 } 4457 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 4458 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 4459 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4460 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4461 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4462 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4463 ierr = VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4464 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4465 for (i=0;i<pcis->n;i++) { 4466 if (array[i] == 1.0) { 4467 set_error = PETSC_TRUE; 4468 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr); 4469 } 4470 } 4471 ierr = MPI_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4472 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4473 for (i=0;i<pcis->n;i++) { 4474 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 4475 } 4476 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4477 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4478 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4479 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4480 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 4481 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 4482 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 4483 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 4484 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4485 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 4486 for (i=0;i<pcbddc->local_primal_size;i++) { 4487 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i]); 4488 } 4489 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4490 } 4491 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4492 if (set_error_reduced) { 4493 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 4494 } 4495 } 4496 /* get back data */ 4497 *coarse_size_n = coarse_size; 4498 *local_primal_indices_n = local_primal_indices; 4499 PetscFunctionReturn(0); 4500 } 4501 4502 #undef __FUNCT__ 4503 #define __FUNCT__ "PCBDDCGlobalToLocal" 4504 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 4505 { 4506 IS localis_t; 4507 PetscInt i,lsize,*idxs,n; 4508 PetscScalar *vals; 4509 PetscErrorCode ierr; 4510 4511 PetscFunctionBegin; 4512 /* get indices in local ordering exploiting local to global map */ 4513 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 4514 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 4515 for (i=0;i<lsize;i++) vals[i] = 1.0; 4516 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 4517 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 4518 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 4519 if (idxs) { /* multilevel guard */ 4520 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 4521 } 4522 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 4523 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 4524 ierr = PetscFree(vals);CHKERRQ(ierr); 4525 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 4526 /* now compute set in local ordering */ 4527 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4528 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4529 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 4530 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 4531 for (i=0,lsize=0;i<n;i++) { 4532 if (PetscRealPart(vals[i]) > 0.5) { 4533 lsize++; 4534 } 4535 } 4536 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 4537 for (i=0,lsize=0;i<n;i++) { 4538 if (PetscRealPart(vals[i]) > 0.5) { 4539 idxs[lsize++] = i; 4540 } 4541 } 4542 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 4543 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 4544 *localis = localis_t; 4545 PetscFunctionReturn(0); 4546 } 4547 4548 /* the next two functions will be called in KSPMatMult if a change of basis has been requested */ 4549 #undef __FUNCT__ 4550 #define __FUNCT__ "PCBDDCMatMult_Private" 4551 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y) 4552 { 4553 PCBDDCChange_ctx change_ctx; 4554 PetscErrorCode ierr; 4555 4556 PetscFunctionBegin; 4557 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 4558 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 4559 ierr = MatMult(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 4560 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 4561 PetscFunctionReturn(0); 4562 } 4563 4564 #undef __FUNCT__ 4565 #define __FUNCT__ "PCBDDCMatMultTranspose_Private" 4566 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y) 4567 { 4568 PCBDDCChange_ctx change_ctx; 4569 PetscErrorCode ierr; 4570 4571 PetscFunctionBegin; 4572 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 4573 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 4574 ierr = MatMultTranspose(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 4575 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 4576 PetscFunctionReturn(0); 4577 } 4578 4579 #undef __FUNCT__ 4580 #define __FUNCT__ "PCBDDCSetUpSubSchurs" 4581 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 4582 { 4583 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 4584 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 4585 PetscInt *used_xadj,*used_adjncy; 4586 PetscBool free_used_adj; 4587 PetscErrorCode ierr; 4588 4589 PetscFunctionBegin; 4590 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 4591 free_used_adj = PETSC_FALSE; 4592 if (pcbddc->sub_schurs_layers == -1) { 4593 used_xadj = NULL; 4594 used_adjncy = NULL; 4595 } else { 4596 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 4597 used_xadj = pcbddc->mat_graph->xadj; 4598 used_adjncy = pcbddc->mat_graph->adjncy; 4599 } else if (pcbddc->computed_rowadj) { 4600 used_xadj = pcbddc->mat_graph->xadj; 4601 used_adjncy = pcbddc->mat_graph->adjncy; 4602 } else { 4603 Mat mat_adj; 4604 PetscBool flg_row=PETSC_TRUE; 4605 const PetscInt *xadj,*adjncy; 4606 PetscInt nvtxs; 4607 4608 ierr = MatConvert(pcbddc->local_mat,MATMPIADJ,MAT_INITIAL_MATRIX,&mat_adj);CHKERRQ(ierr); 4609 ierr = MatGetRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4610 if (!flg_row) { 4611 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__); 4612 } 4613 ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr); 4614 ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr); 4615 ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr); 4616 ierr = MatRestoreRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4617 if (!flg_row) { 4618 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__); 4619 } 4620 ierr = MatDestroy(&mat_adj);CHKERRQ(ierr); 4621 free_used_adj = PETSC_TRUE; 4622 } 4623 } 4624 ierr = PCBDDCSubSchursSetUp(sub_schurs,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->adaptive_selection,pcbddc->use_deluxe_scaling,pcbddc->adaptive_invert_Stildas,pcbddc->use_edges,pcbddc->use_faces);CHKERRQ(ierr); 4625 4626 /* free adjacency */ 4627 if (free_used_adj) { 4628 ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr); 4629 } 4630 PetscFunctionReturn(0); 4631 } 4632 4633 #undef __FUNCT__ 4634 #define __FUNCT__ "PCBDDCInitSubSchurs" 4635 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 4636 { 4637 PC_IS *pcis=(PC_IS*)pc->data; 4638 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 4639 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 4640 PCBDDCGraph graph; 4641 Mat S_j; 4642 PetscErrorCode ierr; 4643 4644 PetscFunctionBegin; 4645 /* attach interface graph for determining subsets */ 4646 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 4647 IS verticesIS; 4648 4649 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 4650 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 4651 ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap);CHKERRQ(ierr); 4652 ierr = PCBDDCGraphSetUp(graph,0,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticesIS);CHKERRQ(ierr); 4653 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 4654 ierr = ISDestroy(&verticesIS);CHKERRQ(ierr); 4655 /* 4656 if (pcbddc->dbg_flag) { 4657 ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 4658 } 4659 */ 4660 } else { 4661 graph = pcbddc->mat_graph; 4662 } 4663 4664 /* Create Schur complement matrix */ 4665 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 4666 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 4667 4668 /* sub_schurs init */ 4669 ierr = PCBDDCSubSchursInit(sub_schurs,pcbddc->local_mat,S_j,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap,pcbddc->sub_schurs_threshold);CHKERRQ(ierr); 4670 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 4671 /* free graph struct */ 4672 if (pcbddc->sub_schurs_rebuild) { 4673 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 4674 } 4675 PetscFunctionReturn(0); 4676 } 4677