1 #include <../src/ksp/pc/impls/bddc/bddc.h> 2 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 3 #include <petscblaslapack.h> 4 5 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y); 6 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y); 7 8 #undef __FUNCT__ 9 #define __FUNCT__ "PCBDDCAdaptiveSelection" 10 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 11 { 12 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 13 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 14 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 15 PetscBLASInt *B_iwork,*B_ifail; 16 PetscScalar *work,lwork; 17 PetscScalar *St,*S,*eigv; 18 PetscScalar *Sarray,*Starray; 19 PetscScalar *Smult,*Seigv; 20 PetscReal *eigs,thresh; 21 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs; 22 #if defined(PETSC_USE_COMPLEX) 23 PetscReal *rwork; 24 #endif 25 PetscBool symmetric; 26 PetscErrorCode ierr; 27 28 PetscFunctionBegin; 29 if (!sub_schurs->use_mumps) { 30 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS"); 31 } 32 ierr = MatIsSymmetric(sub_schurs->A,0.0,&symmetric);CHKERRQ(ierr); 33 if (!symmetric) { 34 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Not yet implemented"); 35 } 36 37 /* max size of subsets */ 38 mss = 0; 39 for (i=0;i<sub_schurs->n_subs;i++) { 40 if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) { 41 PetscInt subset_size; 42 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 43 mss = PetscMax(mss,subset_size); 44 } 45 } 46 47 /* min/max and threshold */ 48 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 49 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 1; 50 nmax = PetscMax(nmin,nmax); 51 if (pcbddc->adaptive_threshold > 1.0) { 52 thresh = 1.0/pcbddc->adaptive_threshold; 53 } else { 54 thresh = 1.0; 55 } 56 57 /* allocate lapack workspace */ 58 cum = cum2 = 0; 59 maxneigs = 0; 60 for (i=0;i<sub_schurs->n_subs;i++) { 61 if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) { 62 PetscInt n,subset_size; 63 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 64 n = PetscMin(subset_size,nmax); 65 cum += subset_size*n; 66 cum2 += n; 67 maxneigs = PetscMax(maxneigs,n); 68 } 69 } 70 71 if (mss) { 72 if (symmetric) { 73 PetscBLASInt B_itype = 1; 74 PetscBLASInt B_N = mss; 75 PetscScalar zero = 0.0; 76 PetscScalar eps = 0.0; /* dlamch? */ 77 78 B_lwork = -1; 79 S = NULL; 80 St = NULL; 81 eigs = NULL; 82 eigv = NULL; 83 B_iwork = NULL; 84 B_ifail = NULL; 85 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 86 #if defined(PETSC_USE_COMPLEX) 87 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 88 #else 89 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 90 #endif 91 if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 92 ierr = PetscFPTrapPop();CHKERRQ(ierr); 93 } else { 94 /* TODO */ 95 } 96 } else { 97 lwork = 0; 98 } 99 100 nv = 0; 101 if (sub_schurs->is_Ej_com) { /* complement of subsets, each entry is a vertex */ 102 ierr = ISGetLocalSize(sub_schurs->is_Ej_com,&nv);CHKERRQ(ierr); 103 } 104 ierr = PetscBLASIntCast((PetscInt)lwork,&B_lwork);CHKERRQ(ierr); 105 ierr = PetscMalloc7(mss*mss,&S,mss*mss,&St,mss*mss,&eigv,mss,&eigs, 106 B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr); 107 #if defined(PETSC_USE_COMPLEX) 108 ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr); 109 #endif 110 ierr = PetscMalloc2(mss*mss,&Smult,mss*mss,&Seigv);CHKERRQ(ierr); 111 ierr = PetscMalloc4(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 112 nv+cum2+1,&pcbddc->adaptive_constraints_ptrs, 113 nv+cum,&pcbddc->adaptive_constraints_idxs, 114 nv+cum,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 115 ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr); 116 117 maxneigs = 0; 118 cum = cum2 = cumarray = 0; 119 if (sub_schurs->is_Ej_com) { 120 const PetscInt *idxs; 121 122 ierr = ISGetIndices(sub_schurs->is_Ej_com,&idxs);CHKERRQ(ierr); 123 for (cum=0;cum<nv;cum++) { 124 pcbddc->adaptive_constraints_n[cum] = 1; 125 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 126 pcbddc->adaptive_constraints_ptrs[cum] = cum; 127 pcbddc->adaptive_constraints_data[cum] = 1.0; 128 } 129 cum2 = cum; 130 ierr = ISRestoreIndices(sub_schurs->is_Ej_com,&idxs);CHKERRQ(ierr); 131 } 132 133 if (mss) { /* multilevel */ 134 if (pcbddc->use_deluxe_scaling) { 135 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 136 } else { 137 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_all,&Sarray);CHKERRQ(ierr); 138 } 139 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 140 } 141 142 for (i=0;i<sub_schurs->n_subs;i++) { 143 PetscInt j,subset_size; 144 145 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 146 if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) { 147 const PetscInt *idxs; 148 PetscScalar one = 1.0,zero=0.0; 149 PetscBLASInt B_N; 150 151 /* S should be copied since we need it for deluxe scaling */ 152 if (symmetric) { 153 PetscInt j; 154 for (j=0;j<subset_size;j++) { 155 ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 156 } 157 for (j=0;j<subset_size;j++) { 158 ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 159 } 160 } else { 161 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 162 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 163 } 164 /* is always this the right matrix? */ 165 ierr = PetscMemcpy(Smult,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 166 167 /* we could reuse space already allocated when building sum_S_Ej_tilda_all */ 168 /* St = Starray+cumarray; */ 169 170 if (symmetric) { 171 PetscBLASInt B_itype = 1; 172 PetscBLASInt B_IL = 1, B_IU; 173 PetscScalar eps = -1.0; /* dlamch? */ 174 PetscInt nmin_s; 175 176 /* ask for eigenvalues lower than thresh */ 177 PetscPrintf(PETSC_COMM_SELF,"[%d] Computing for sub %d/%d.\n",PetscGlobalRank,i,sub_schurs->n_subs); 178 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 179 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 180 #if defined(PETSC_USE_COMPLEX) 181 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 182 #else 183 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 184 #endif 185 ierr = PetscFPTrapPop();CHKERRQ(ierr); 186 if (B_ierr) { 187 if (B_ierr < 0 ) { 188 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 189 } else if (B_ierr <= B_N) { 190 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 191 } else { 192 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 193 } 194 } 195 196 if (B_neigs > nmax) { 197 PetscPrintf(PETSC_COMM_SELF,"[%d] found %d eigs, more than maximum required %d.\n",PetscGlobalRank,B_neigs,nmax); 198 B_neigs = nmax; 199 } 200 201 nmin_s = PetscMin(nmin,B_N); 202 if (B_neigs < nmin_s) { 203 PetscBLASInt B_neigs2; 204 205 B_IL = B_neigs + 1; 206 ierr = PetscBLASIntCast(nmin_s,&B_IU);CHKERRQ(ierr); 207 PetscPrintf(PETSC_COMM_SELF,"[%d] found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",PetscGlobalRank,B_neigs,nmin,B_IL,B_IU); 208 if (symmetric) { 209 PetscInt j; 210 for (j=0;j<subset_size;j++) { 211 ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 212 } 213 for (j=0;j<subset_size;j++) { 214 ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 215 } 216 } else { 217 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 218 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 219 } 220 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 221 #if defined(PETSC_USE_COMPLEX) 222 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 223 #else 224 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 225 #endif 226 ierr = PetscFPTrapPop();CHKERRQ(ierr); 227 B_neigs += B_neigs2; 228 } 229 if (B_ierr) { 230 if (B_ierr < 0 ) { 231 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 232 } else if (B_ierr <= B_N) { 233 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 234 } else { 235 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 236 } 237 } 238 PetscPrintf(PETSC_COMM_SELF,"[%d] -> Got %d eigs\n",PetscGlobalRank,B_neigs); 239 for (j=0;j<B_neigs;j++) { 240 if (eigs[j] == 0.0) { 241 PetscPrintf(PETSC_COMM_SELF,"[%d] Inf\n",PetscGlobalRank); 242 } else { 243 PetscPrintf(PETSC_COMM_SELF,"[%d] %1.6e\n",PetscGlobalRank,1.0/eigs[j]); 244 } 245 } 246 } else { 247 /* TODO */ 248 } 249 maxneigs = PetscMax(B_neigs,maxneigs); 250 pcbddc->adaptive_constraints_n[i+nv] = B_neigs; 251 252 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 253 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&B_N,&B_neigs,&B_N,&one,Smult,&B_N,eigv,&B_N,&zero,Seigv,&B_N)); 254 ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+cum2,Seigv,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 255 #if 0 256 PetscInt ii; 257 for (ii=0;ii<B_neigs;ii++) { 258 PetscPrintf(PETSC_COMM_SELF,"[%d] -> Eigenvector %d/%d (%d)\n",PetscGlobalRank,ii,B_neigs,B_N); 259 for (j=0;j<B_N;j++) { 260 PetscPrintf(PETSC_COMM_SELF,"[%d] %1.4e %1.4e\n",PetscGlobalRank,eigv[ii*B_N+j],Seigv[ii*B_N+j]); 261 } 262 } 263 #endif 264 ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 265 for (j=0;j<B_neigs;j++) { 266 #if 0 267 { 268 PetscBLASInt Blas_N,Blas_one = 1.0; 269 PetscScalar norm; 270 ierr = PetscBLASIntCast(subset_size,&Blas_N);CHKERRQ(ierr); 271 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,pcbddc->adaptive_constraints_data+cum2,&Blas_one,pcbddc->adaptive_constraints_data+cum2,&Blas_one)); 272 if (pcbddc->adaptive_constraints_data[cum2] > 0.0) { 273 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 274 } else { 275 norm = -1.0/PetscSqrtReal(PetscRealPart(norm)); 276 } 277 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,pcbddc->adaptive_constraints_data+cum2,&Blas_one)); 278 } 279 #endif 280 ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+cum2,idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr); 281 pcbddc->adaptive_constraints_ptrs[cum++] = cum2; 282 cum2 += subset_size; 283 } 284 ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 285 } 286 /* shift for next computation */ 287 cumarray += subset_size*subset_size; 288 } 289 pcbddc->adaptive_constraints_ptrs[cum] = cum2; 290 ierr = PetscFree2(Smult,Seigv);CHKERRQ(ierr); 291 292 if (mss) { 293 if (pcbddc->use_deluxe_scaling) { 294 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 295 } else { 296 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_all,&Sarray);CHKERRQ(ierr); 297 } 298 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 299 } 300 ierr = PetscFree7(S,St,eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr); 301 #if defined(PETSC_USE_COMPLEX) 302 ierr = PetscFree(rwork);CHKERRQ(ierr); 303 #endif 304 if (pcbddc->dbg_flag) { 305 PetscInt maxneigs_r; 306 ierr = MPI_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 307 ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr); 308 } 309 PetscFunctionReturn(0); 310 } 311 312 #undef __FUNCT__ 313 #define __FUNCT__ "PCBDDCSetUpSolvers" 314 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 315 { 316 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 317 PetscScalar *coarse_submat_vals; 318 PetscErrorCode ierr; 319 320 PetscFunctionBegin; 321 /* Setup local scatters R_to_B and (optionally) R_to_D */ 322 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 323 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 324 325 /* Setup local neumann solver ksp_R */ 326 /* PCBDDCSetUpLocalScatters should be called first! */ 327 ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 328 329 /* Change global null space passed in by the user if change of basis has been requested */ 330 if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) { 331 ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr); 332 } 333 334 /* 335 Setup local correction and local part of coarse basis. 336 Gives back the dense local part of the coarse matrix in column major ordering 337 */ 338 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 339 340 /* Compute total number of coarse nodes and setup coarse solver */ 341 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 342 343 /* free */ 344 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 345 PetscFunctionReturn(0); 346 } 347 348 #undef __FUNCT__ 349 #define __FUNCT__ "PCBDDCResetCustomization" 350 PetscErrorCode PCBDDCResetCustomization(PC pc) 351 { 352 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 353 PetscErrorCode ierr; 354 355 PetscFunctionBegin; 356 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 357 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 358 ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr); 359 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 360 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 361 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 362 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 363 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 364 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 365 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 366 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 367 PetscFunctionReturn(0); 368 } 369 370 #undef __FUNCT__ 371 #define __FUNCT__ "PCBDDCResetTopography" 372 PetscErrorCode PCBDDCResetTopography(PC pc) 373 { 374 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 375 PetscErrorCode ierr; 376 377 PetscFunctionBegin; 378 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 379 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 380 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 381 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 382 ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr); 383 PetscFunctionReturn(0); 384 } 385 386 #undef __FUNCT__ 387 #define __FUNCT__ "PCBDDCResetSolvers" 388 PetscErrorCode PCBDDCResetSolvers(PC pc) 389 { 390 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 391 PetscScalar *array; 392 PetscErrorCode ierr; 393 394 PetscFunctionBegin; 395 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 396 ierr = VecDestroy(&pcbddc->coarse_rhs);CHKERRQ(ierr); 397 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr); 398 ierr = PetscFree(array);CHKERRQ(ierr); 399 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 400 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 401 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 402 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 403 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 404 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 405 if (pcbddc->local_auxmat2) { 406 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 407 ierr = PetscFree(array);CHKERRQ(ierr); 408 } 409 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 410 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 411 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 412 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 413 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 414 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 415 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 416 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 417 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 418 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 419 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 420 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 421 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 422 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 423 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 424 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 425 PetscFunctionReturn(0); 426 } 427 428 #undef __FUNCT__ 429 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors" 430 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 431 { 432 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 433 PC_IS *pcis = (PC_IS*)pc->data; 434 VecType impVecType; 435 PetscInt n_constraints,n_R,old_size; 436 PetscErrorCode ierr; 437 438 PetscFunctionBegin; 439 if (!pcbddc->ConstraintMatrix) { 440 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created"); 441 } 442 /* get sizes */ 443 n_constraints = pcbddc->local_primal_size - pcbddc->n_actual_vertices; 444 n_R = pcis->n-pcbddc->n_actual_vertices; 445 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 446 /* local work vectors (try to avoid unneeded work)*/ 447 /* R nodes */ 448 old_size = -1; 449 if (pcbddc->vec1_R) { 450 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 451 } 452 if (n_R != old_size) { 453 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 454 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 455 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 456 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 457 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 458 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 459 } 460 /* local primal dofs */ 461 old_size = -1; 462 if (pcbddc->vec1_P) { 463 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 464 } 465 if (pcbddc->local_primal_size != old_size) { 466 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 467 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 468 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 469 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 470 } 471 /* local explicit constraints */ 472 old_size = -1; 473 if (pcbddc->vec1_C) { 474 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 475 } 476 if (n_constraints && n_constraints != old_size) { 477 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 478 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 479 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 480 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 481 } 482 PetscFunctionReturn(0); 483 } 484 485 #undef __FUNCT__ 486 #define __FUNCT__ "PCBDDCSetUpCorrection" 487 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 488 { 489 PetscErrorCode ierr; 490 /* pointers to pcis and pcbddc */ 491 PC_IS* pcis = (PC_IS*)pc->data; 492 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 493 /* submatrices of local problem */ 494 Mat A_RV,A_VR,A_VV; 495 /* submatrices of local coarse problem */ 496 Mat S_VV,S_CV,S_VC,S_CC; 497 /* working matrices */ 498 Mat C_CR; 499 /* additional working stuff */ 500 PC pc_R; 501 Mat F; 502 PetscBool isLU,isCHOL,isILU; 503 504 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 505 PetscScalar *work; 506 PetscInt *idx_V_B; 507 PetscInt n,n_vertices,n_constraints; 508 PetscInt i,n_R,n_D,n_B; 509 PetscBool unsymmetric_check; 510 /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */ 511 MatType impMatType; 512 /* some shortcuts to scalars */ 513 PetscScalar one=1.0,m_one=-1.0; 514 515 PetscFunctionBegin; 516 /* get number of vertices (corners plus constraints with change of basis) 517 pcbddc->n_actual_vertices stores the actual number of vertices, pcbddc->n_vertices the number of corners computed */ 518 n_vertices = pcbddc->n_actual_vertices; 519 n_constraints = pcbddc->local_primal_size-n_vertices; 520 /* Set Non-overlapping dimensions */ 521 n_B = pcis->n_B; n_D = pcis->n - n_B; 522 n_R = pcis->n-n_vertices; 523 524 /* Set types for local objects needed by BDDC precondtioner */ 525 impMatType = MATSEQDENSE; 526 527 /* vertices in boundary numbering */ 528 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 529 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->primal_indices_local_idxs,&i,idx_V_B);CHKERRQ(ierr); 530 if (i != n_vertices) { 531 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i); 532 } 533 534 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 535 ierr = PetscMalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 536 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr); 537 ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr); 538 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr); 539 ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr); 540 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr); 541 ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr); 542 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr); 543 ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr); 544 545 unsymmetric_check = PETSC_FALSE; 546 /* allocate workspace */ 547 n = 0; 548 if (n_constraints) { 549 n += n_R*n_constraints; 550 } 551 if (n_vertices) { 552 n = PetscMax(2*n_R*n_vertices,n); 553 } 554 if (!pcbddc->issym) { 555 n = PetscMax(2*n_R*pcbddc->local_primal_size,n); 556 unsymmetric_check = PETSC_TRUE; 557 } 558 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 559 560 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 561 ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr); 562 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr); 563 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr); 564 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr); 565 if (isLU || isILU || isCHOL) { 566 ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr); 567 } else { 568 F = NULL; 569 } 570 571 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 572 if (n_constraints) { 573 Mat M1,M2,M3; 574 IS is_aux; 575 /* see if we can save some allocations */ 576 if (pcbddc->local_auxmat2) { 577 PetscInt on_R,on_constraints; 578 ierr = MatGetSize(pcbddc->local_auxmat2,&on_R,&on_constraints);CHKERRQ(ierr); 579 if (on_R != n_R || on_constraints != n_constraints) { 580 PetscScalar *marray; 581 582 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&marray);CHKERRQ(ierr); 583 ierr = PetscFree(marray);CHKERRQ(ierr); 584 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 585 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 586 } 587 } 588 /* auxiliary matrices */ 589 if (!pcbddc->local_auxmat2) { 590 PetscScalar *marray; 591 592 ierr = PetscMalloc1(2*n_R*n_constraints,&marray);CHKERRQ(ierr); 593 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,marray,&pcbddc->local_auxmat2);CHKERRQ(ierr); 594 marray += n_R*n_constraints; 595 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_R,marray,&pcbddc->local_auxmat1);CHKERRQ(ierr); 596 } 597 598 /* Extract constraints on R nodes: C_{CR} */ 599 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 600 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 601 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 602 603 /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */ 604 ierr = PetscMemzero(work,n_R*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 605 for (i=0;i<n_constraints;i++) { 606 const PetscScalar *row_cmat_values; 607 const PetscInt *row_cmat_indices; 608 PetscInt size_of_constraint,j; 609 610 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 611 for (j=0;j<size_of_constraint;j++) { 612 work[row_cmat_indices[j]+i*n_R] = -row_cmat_values[j]; 613 } 614 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 615 } 616 if (F) { 617 Mat B; 618 619 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 620 ierr = MatMatSolve(F,B,pcbddc->local_auxmat2);CHKERRQ(ierr); 621 ierr = MatDestroy(&B);CHKERRQ(ierr); 622 } else { 623 PetscScalar *xarray; 624 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&xarray);CHKERRQ(ierr); 625 for (i=0;i<n_constraints;i++) { 626 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 627 ierr = VecPlaceArray(pcbddc->vec2_R,xarray+i*n_R);CHKERRQ(ierr); 628 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 629 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 630 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 631 } 632 ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&xarray);CHKERRQ(ierr); 633 } 634 635 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 636 ierr = MatConvert(C_CR,impMatType,MAT_REUSE_MATRIX,&C_CR);CHKERRQ(ierr); 637 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 638 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr); 639 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 640 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 641 ierr = VecSet(pcbddc->vec1_C,m_one);CHKERRQ(ierr); 642 ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr); 643 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 644 ierr = MatDestroy(&M2);CHKERRQ(ierr); 645 ierr = MatDestroy(&M3);CHKERRQ(ierr); 646 /* Assemble local_auxmat1 = S_CC*C_{CR} needed by BDDC application in KSP and in preproc */ 647 ierr = MatMatMult(M1,C_CR,MAT_REUSE_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 648 ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 649 ierr = MatDestroy(&M1);CHKERRQ(ierr); 650 } 651 652 /* Get submatrices from subdomain matrix */ 653 if (n_vertices) { 654 Mat newmat; 655 IS is_aux; 656 PetscInt ibs,mbs; 657 PetscBool issbaij; 658 659 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 660 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 661 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 662 if (ibs != mbs) { /* need to convert to SEQAIJ */ 663 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 664 ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 665 ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 666 ierr = MatGetSubMatrix(newmat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 667 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 668 } else { 669 /* this is safe */ 670 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 671 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 672 if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 673 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 674 ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 675 ierr = MatTranspose(A_VR,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 676 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 677 } else { 678 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 679 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 680 } 681 } 682 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 683 } 684 685 /* Matrix of coarse basis functions (local) */ 686 if (pcbddc->coarse_phi_B) { 687 PetscInt on_B,on_primal,on_D=n_D; 688 if (pcbddc->coarse_phi_D) { 689 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr); 690 } 691 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 692 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 693 PetscScalar *marray; 694 695 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr); 696 ierr = PetscFree(marray);CHKERRQ(ierr); 697 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 698 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 699 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 700 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 701 } 702 } 703 704 if (!pcbddc->coarse_phi_B) { 705 PetscScalar *marray; 706 707 n = n_B*pcbddc->local_primal_size; 708 if (pcbddc->switch_static || pcbddc->dbg_flag) { 709 n += n_D*pcbddc->local_primal_size; 710 } 711 if (!pcbddc->issym) { 712 n *= 2; 713 } 714 ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr); 715 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 716 n = n_B*pcbddc->local_primal_size; 717 if (pcbddc->switch_static || pcbddc->dbg_flag) { 718 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 719 n += n_D*pcbddc->local_primal_size; 720 } 721 if (!pcbddc->issym) { 722 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 723 if (pcbddc->switch_static || pcbddc->dbg_flag) { 724 n = n_B*pcbddc->local_primal_size; 725 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 726 } 727 } else { 728 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 729 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 730 if (pcbddc->switch_static || pcbddc->dbg_flag) { 731 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 732 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 733 } 734 } 735 } 736 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 737 /* vertices */ 738 if (n_vertices) { 739 Mat A_RRmA_RV,S_VVt; /* S_VVt with LDA=N */ 740 PetscScalar *x,*y; 741 PetscBLASInt B_N,B_one = 1; 742 743 ierr = PetscMemzero(work,2*n_R*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 744 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 745 ierr = MatConvert(A_RV,impMatType,MAT_REUSE_MATRIX,&A_RV);CHKERRQ(ierr); 746 if (F) { 747 ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr); 748 } else { 749 ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr); 750 for (i=0;i<n_vertices;i++) { 751 ierr = VecPlaceArray(pcbddc->vec1_R,y+i*n_R);CHKERRQ(ierr); 752 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 753 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 754 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 755 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 756 } 757 ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr); 758 } 759 ierr = MatScale(A_RRmA_RV,m_one);CHKERRQ(ierr); 760 /* S_VV and S_CV are the subdomain contribution to coarse matrix. WARNING -> column major ordering */ 761 if (n_constraints) { 762 Mat B; 763 ierr = MatMatMult(pcbddc->local_auxmat1,A_RRmA_RV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr); 764 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr); 765 ierr = MatMatMult(pcbddc->local_auxmat2,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 766 ierr = MatScale(S_CV,m_one);CHKERRQ(ierr); 767 ierr = PetscBLASIntCast(n_R*n_vertices,&B_N);CHKERRQ(ierr); 768 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+n_R*n_vertices,&B_one,work,&B_one)); 769 ierr = MatDestroy(&B);CHKERRQ(ierr); 770 } 771 ierr = MatConvert(A_VR,impMatType,MAT_REUSE_MATRIX,&A_VR);CHKERRQ(ierr); 772 ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr); 773 ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr); 774 ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr); 775 ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr); 776 ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr); 777 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one)); 778 ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr); 779 ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr); 780 ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 781 ierr = MatDestroy(&S_VVt);CHKERRQ(ierr); 782 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 783 /* coarse basis functions */ 784 for (i=0;i<n_vertices;i++) { 785 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr); 786 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 787 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 788 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 789 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 790 y[n_B*i+idx_V_B[i]] = 1.0; 791 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 792 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 793 794 if (pcbddc->switch_static || pcbddc->dbg_flag) { 795 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 796 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 797 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 798 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 799 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 800 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 801 } 802 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 803 } 804 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 805 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 806 } 807 808 if (n_constraints) { 809 Mat B; 810 811 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 812 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 813 ierr = MatMatMult(pcbddc->local_auxmat2,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 814 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 815 if (n_vertices) { 816 ierr = MatMatMult(A_VR,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_VC);CHKERRQ(ierr); 817 } 818 ierr = MatDestroy(&B);CHKERRQ(ierr); 819 /* coarse basis functions */ 820 for (i=0;i<n_constraints;i++) { 821 PetscScalar *y; 822 823 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr); 824 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 825 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr); 826 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 827 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 828 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 829 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 830 831 if (pcbddc->switch_static || pcbddc->dbg_flag) { 832 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 833 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr); 834 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 835 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 836 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 837 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 838 } 839 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 840 } 841 } 842 843 /* compute other basis functions for non-symmetric problems */ 844 if (!pcbddc->issym) { 845 Mat B,X; 846 847 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,pcbddc->local_primal_size,work,&B);CHKERRQ(ierr); 848 849 if (n_constraints) { 850 Mat S_VCT,S_CCT,B_C; 851 852 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work+n_vertices*n_R,&B_C);CHKERRQ(ierr); 853 ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr); 854 ierr = MatTransposeMatMult(C_CR,S_CCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr); 855 if (n_vertices) { 856 Mat B_V; 857 858 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&B_V);CHKERRQ(ierr); 859 ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr); 860 ierr = MatTransposeMatMult(C_CR,S_VCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr); 861 ierr = MatDestroy(&B_V);CHKERRQ(ierr); 862 } 863 ierr = MatDestroy(&B_C);CHKERRQ(ierr); 864 } 865 if (n_vertices) { 866 Mat A_VRT; 867 PetscBLASInt B_N,B_one = 1; 868 869 if (!n_constraints) { /* if there are no constraints, reset work */ 870 ierr = PetscMemzero(work,n_R*pcbddc->local_primal_size*sizeof(PetscScalar));CHKERRQ(ierr); 871 } 872 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work+pcbddc->local_primal_size*n_R,&A_VRT);CHKERRQ(ierr); 873 ierr = MatTranspose(A_VR,MAT_REUSE_MATRIX,&A_VRT);CHKERRQ(ierr); 874 ierr = PetscBLASIntCast(n_vertices*n_R,&B_N);CHKERRQ(ierr); 875 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&m_one,work+pcbddc->local_primal_size*n_R,&B_one,work,&B_one)); 876 } 877 878 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,pcbddc->local_primal_size,work+pcbddc->local_primal_size*n_R,&X);CHKERRQ(ierr); 879 if (F) { /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 880 for (i=0;i<pcbddc->local_primal_size;i++) { 881 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 882 ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr); 883 ierr = MatSolveTranspose(F,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 884 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 885 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 886 } 887 } else { 888 for (i=0;i<pcbddc->local_primal_size;i++) { 889 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 890 ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr); 891 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 892 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 893 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 894 } 895 } 896 ierr = MatDestroy(&B);CHKERRQ(ierr); 897 /* coarse basis functions */ 898 for (i=0;i<pcbddc->local_primal_size;i++) { 899 PetscScalar *y; 900 901 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*(i+pcbddc->local_primal_size));CHKERRQ(ierr); 902 ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 903 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 904 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 905 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 906 if (i<n_vertices) { 907 y[n_B*i+idx_V_B[i]] = 1.0; 908 } 909 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 910 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 911 912 if (pcbddc->switch_static || pcbddc->dbg_flag) { 913 ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 914 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 915 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 916 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 917 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 918 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 919 } 920 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 921 } 922 ierr = MatDestroy(&X);CHKERRQ(ierr); 923 } 924 925 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 926 ierr = MatDestroy(&S_VV);CHKERRQ(ierr); 927 ierr = MatDestroy(&S_CV);CHKERRQ(ierr); 928 ierr = MatDestroy(&S_VC);CHKERRQ(ierr); 929 ierr = MatDestroy(&S_CC);CHKERRQ(ierr); 930 /* Checking coarse_sub_mat and coarse basis functios */ 931 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 932 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 933 if (pcbddc->dbg_flag) { 934 Mat coarse_sub_mat; 935 Mat AUXMAT,TM1,TM2,TM3,TM4; 936 Mat coarse_phi_D,coarse_phi_B; 937 Mat coarse_psi_D,coarse_psi_B; 938 Mat A_II,A_BB,A_IB,A_BI; 939 MatType checkmattype=MATSEQAIJ; 940 PetscReal real_value; 941 942 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 943 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 944 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 945 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 946 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 947 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 948 if (unsymmetric_check) { 949 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 950 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 951 } 952 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 953 954 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 955 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation\n");CHKERRQ(ierr); 956 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 957 if (unsymmetric_check) { 958 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 959 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 960 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 961 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 962 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 963 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 964 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 965 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 966 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 967 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 968 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 969 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 970 } else { 971 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 972 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 973 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 974 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 975 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 976 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 977 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 978 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 979 } 980 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 981 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 982 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 983 ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr); 984 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 985 ierr = MatNorm(TM1,NORM_INFINITY,&real_value);CHKERRQ(ierr); 986 987 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 988 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 989 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 990 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 991 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 992 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 993 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 994 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 995 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 996 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 997 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 998 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 999 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 1000 if (unsymmetric_check) { 1001 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 1002 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 1003 } 1004 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 1005 } 1006 1007 /* free memory */ 1008 ierr = PetscFree(work);CHKERRQ(ierr); 1009 if (n_vertices) { 1010 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 1011 } 1012 if (n_constraints) { 1013 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 1014 } 1015 /* get back data */ 1016 *coarse_submat_vals_n = coarse_submat_vals; 1017 PetscFunctionReturn(0); 1018 } 1019 1020 #undef __FUNCT__ 1021 #define __FUNCT__ "MatGetSubMatrixUnsorted" 1022 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, MatStructure reuse, Mat* B) 1023 { 1024 Mat *work_mat; 1025 IS isrow_s,iscol_s; 1026 PetscBool rsorted,csorted; 1027 PetscInt rsize,*idxs_perm_r,csize,*idxs_perm_c; 1028 PetscErrorCode ierr; 1029 1030 PetscFunctionBegin; 1031 ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr); 1032 ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr); 1033 ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr); 1034 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 1035 1036 if (!rsorted) { 1037 const PetscInt *idxs; 1038 PetscInt *idxs_sorted,i; 1039 1040 ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr); 1041 ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr); 1042 for (i=0;i<rsize;i++) { 1043 idxs_perm_r[i] = i; 1044 } 1045 ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr); 1046 ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr); 1047 for (i=0;i<rsize;i++) { 1048 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 1049 } 1050 ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr); 1051 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr); 1052 } else { 1053 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1054 isrow_s = isrow; 1055 } 1056 1057 if (!csorted) { 1058 if (isrow == iscol) { 1059 ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr); 1060 iscol_s = isrow_s; 1061 } else { 1062 const PetscInt *idxs; 1063 PetscInt *idxs_sorted,i; 1064 1065 ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr); 1066 ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr); 1067 for (i=0;i<csize;i++) { 1068 idxs_perm_c[i] = i; 1069 } 1070 ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr); 1071 ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr); 1072 for (i=0;i<csize;i++) { 1073 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 1074 } 1075 ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr); 1076 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr); 1077 } 1078 } else { 1079 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1080 iscol_s = iscol; 1081 } 1082 1083 ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,reuse,&work_mat);CHKERRQ(ierr); 1084 1085 if (!rsorted || !csorted) { 1086 Mat new_mat; 1087 IS is_perm_r,is_perm_c; 1088 1089 if (!rsorted) { 1090 PetscInt *idxs_r,i; 1091 ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr); 1092 for (i=0;i<rsize;i++) { 1093 idxs_r[idxs_perm_r[i]] = i; 1094 } 1095 ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr); 1096 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr); 1097 } else { 1098 ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr); 1099 } 1100 ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr); 1101 1102 if (!csorted) { 1103 if (isrow_s == iscol_s) { 1104 ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr); 1105 is_perm_c = is_perm_r; 1106 } else { 1107 PetscInt *idxs_c,i; 1108 ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr); 1109 for (i=0;i<csize;i++) { 1110 idxs_c[idxs_perm_c[i]] = i; 1111 } 1112 ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr); 1113 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr); 1114 } 1115 } else { 1116 ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr); 1117 } 1118 ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr); 1119 1120 ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr); 1121 ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr); 1122 work_mat[0] = new_mat; 1123 ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr); 1124 ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr); 1125 } 1126 1127 ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr); 1128 *B = work_mat[0]; 1129 ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr); 1130 ierr = ISDestroy(&isrow_s);CHKERRQ(ierr); 1131 ierr = ISDestroy(&iscol_s);CHKERRQ(ierr); 1132 PetscFunctionReturn(0); 1133 } 1134 1135 #undef __FUNCT__ 1136 #define __FUNCT__ "PCBDDCComputeLocalMatrix" 1137 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 1138 { 1139 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1140 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1141 Mat new_mat; 1142 IS is_local,is_global; 1143 PetscInt local_size; 1144 PetscBool isseqaij; 1145 PetscErrorCode ierr; 1146 1147 PetscFunctionBegin; 1148 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1149 ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr); 1150 ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr); 1151 ierr = ISLocalToGlobalMappingApplyIS(matis->mapping,is_local,&is_global);CHKERRQ(ierr); 1152 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 1153 ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,MAT_INITIAL_MATRIX,&new_mat);CHKERRQ(ierr); 1154 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 1155 1156 /* check */ 1157 if (pcbddc->dbg_flag) { 1158 Vec x,x_change; 1159 PetscReal error; 1160 1161 ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr); 1162 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 1163 ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr); 1164 ierr = VecScatterBegin(matis->ctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1165 ierr = VecScatterEnd(matis->ctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1166 ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr); 1167 ierr = VecScatterBegin(matis->ctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1168 ierr = VecScatterEnd(matis->ctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1169 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 1170 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 1171 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1172 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr); 1173 ierr = VecDestroy(&x);CHKERRQ(ierr); 1174 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 1175 } 1176 1177 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 1178 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 1179 if (isseqaij) { 1180 ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 1181 } else { 1182 Mat work_mat; 1183 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 1184 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 1185 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 1186 } 1187 ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,pcbddc->issym);CHKERRQ(ierr); 1188 /* 1189 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 1190 ierr = MatView(new_mat,(PetscViewer)0);CHKERRQ(ierr); 1191 */ 1192 ierr = MatDestroy(&new_mat);CHKERRQ(ierr); 1193 PetscFunctionReturn(0); 1194 } 1195 1196 #undef __FUNCT__ 1197 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 1198 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 1199 { 1200 PC_IS* pcis = (PC_IS*)(pc->data); 1201 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1202 IS is_aux1,is_aux2; 1203 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 1204 PetscInt n_vertices,i,j,n_R,n_D,n_B; 1205 PetscInt vbs,bs; 1206 PetscBT bitmask; 1207 PetscErrorCode ierr; 1208 1209 PetscFunctionBegin; 1210 /* 1211 No need to setup local scatters if 1212 - primal space is unchanged 1213 AND 1214 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 1215 AND 1216 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 1217 */ 1218 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 1219 PetscFunctionReturn(0); 1220 } 1221 /* destroy old objects */ 1222 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 1223 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 1224 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 1225 /* Set Non-overlapping dimensions */ 1226 n_B = pcis->n_B; n_D = pcis->n - n_B; 1227 n_vertices = pcbddc->n_actual_vertices; 1228 /* create auxiliary bitmask */ 1229 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 1230 for (i=0;i<n_vertices;i++) { 1231 ierr = PetscBTSet(bitmask,pcbddc->primal_indices_local_idxs[i]);CHKERRQ(ierr); 1232 } 1233 1234 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 1235 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 1236 for (i=0, n_R=0; i<pcis->n; i++) { 1237 if (!PetscBTLookup(bitmask,i)) { 1238 idx_R_local[n_R] = i; 1239 n_R++; 1240 } 1241 } 1242 1243 /* Block code */ 1244 vbs = 1; 1245 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 1246 if (bs>1 && !(n_vertices%bs)) { 1247 PetscBool is_blocked = PETSC_TRUE; 1248 PetscInt *vary; 1249 /* Verify if the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 1250 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 1251 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 1252 for (i=0; i<n_vertices; i++) vary[pcbddc->primal_indices_local_idxs[i]/bs]++; 1253 for (i=0; i<n_vertices; i++) { 1254 if (vary[i]!=0 && vary[i]!=bs) { 1255 is_blocked = PETSC_FALSE; 1256 break; 1257 } 1258 } 1259 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 1260 vbs = bs; 1261 for (i=0;i<n_R/vbs;i++) { 1262 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 1263 } 1264 } 1265 ierr = PetscFree(vary);CHKERRQ(ierr); 1266 } 1267 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 1268 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 1269 1270 /* print some info if requested */ 1271 if (pcbddc->dbg_flag) { 1272 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1273 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1274 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1275 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 1276 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 1277 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices,pcbddc->local_primal_size);CHKERRQ(ierr); 1278 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"pcbddc->n_vertices = %d, pcbddc->n_constraints = %d\n",pcbddc->n_vertices,pcbddc->n_constraints);CHKERRQ(ierr); 1279 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1280 } 1281 1282 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 1283 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1284 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 1285 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 1286 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1287 for (i=0; i<n_D; i++) { 1288 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 1289 } 1290 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1291 for (i=0, j=0; i<n_R; i++) { 1292 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 1293 aux_array1[j++] = i; 1294 } 1295 } 1296 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 1297 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1298 for (i=0, j=0; i<n_B; i++) { 1299 if (!PetscBTLookup(bitmask,is_indices[i])) { 1300 aux_array2[j++] = i; 1301 } 1302 } 1303 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1304 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 1305 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 1306 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1307 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 1308 1309 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1310 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 1311 for (i=0, j=0; i<n_R; i++) { 1312 if (PetscBTLookup(bitmask,idx_R_local[i])) { 1313 aux_array1[j++] = i; 1314 } 1315 } 1316 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 1317 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 1318 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1319 } 1320 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 1321 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1322 PetscFunctionReturn(0); 1323 } 1324 1325 1326 #undef __FUNCT__ 1327 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 1328 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 1329 { 1330 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1331 PC_IS *pcis = (PC_IS*)pc->data; 1332 PC pc_temp; 1333 Mat A_RR; 1334 MatReuse reuse; 1335 PetscScalar m_one = -1.0; 1336 PetscReal value; 1337 PetscInt n_D,n_R,ibs,mbs; 1338 PetscBool use_exact,use_exact_reduced,issbaij; 1339 PetscErrorCode ierr; 1340 /* prefixes stuff */ 1341 char dir_prefix[256],neu_prefix[256],str_level[16]; 1342 size_t len; 1343 1344 PetscFunctionBegin; 1345 1346 /* compute prefixes */ 1347 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 1348 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 1349 if (!pcbddc->current_level) { 1350 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1351 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1352 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1353 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1354 } else { 1355 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 1356 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 1357 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 1358 len -= 15; /* remove "pc_bddc_coarse_" */ 1359 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 1360 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 1361 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 1362 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 1363 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1364 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1365 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 1366 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 1367 } 1368 1369 /* DIRICHLET PROBLEM */ 1370 if (dirichlet) { 1371 /* Matrix for Dirichlet problem is pcis->A_II */ 1372 ierr = ISGetSize(pcis->is_I_local,&n_D);CHKERRQ(ierr); 1373 if (!pcbddc->ksp_D) { /* create object if not yet build */ 1374 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 1375 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 1376 /* default */ 1377 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 1378 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 1379 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1380 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1381 if (issbaij) { 1382 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1383 } else { 1384 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1385 } 1386 /* Allow user's customization */ 1387 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 1388 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1389 } 1390 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 1391 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1392 if (!n_D) { 1393 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1394 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1395 } 1396 /* Set Up KSP for Dirichlet problem of BDDC */ 1397 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 1398 /* set ksp_D into pcis data */ 1399 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 1400 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 1401 pcis->ksp_D = pcbddc->ksp_D; 1402 } 1403 1404 /* NEUMANN PROBLEM */ 1405 A_RR = 0; 1406 if (neumann) { 1407 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 1408 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 1409 if (pcbddc->ksp_R) { /* already created ksp */ 1410 PetscInt nn_R; 1411 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 1412 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 1413 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 1414 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 1415 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 1416 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1417 reuse = MAT_INITIAL_MATRIX; 1418 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 1419 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 1420 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1421 reuse = MAT_INITIAL_MATRIX; 1422 } else { /* safe to reuse the matrix */ 1423 reuse = MAT_REUSE_MATRIX; 1424 } 1425 } 1426 /* last check */ 1427 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 1428 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1429 reuse = MAT_INITIAL_MATRIX; 1430 } 1431 } else { /* first time, so we need to create the matrix */ 1432 reuse = MAT_INITIAL_MATRIX; 1433 } 1434 /* extract A_RR */ 1435 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 1436 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 1437 if (ibs != mbs) { 1438 Mat newmat; 1439 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 1440 ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 1441 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 1442 } else { 1443 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 1444 } 1445 if (!pcbddc->ksp_R) { /* create object if not present */ 1446 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 1447 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 1448 /* default */ 1449 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 1450 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 1451 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1452 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1453 if (issbaij) { 1454 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1455 } else { 1456 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1457 } 1458 /* Allow user's customization */ 1459 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 1460 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1461 } 1462 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 1463 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1464 if (!n_R) { 1465 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1466 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1467 } 1468 /* Set Up KSP for Neumann problem of BDDC */ 1469 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 1470 } 1471 1472 /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */ 1473 if (pcbddc->NullSpace || pcbddc->dbg_flag) { 1474 if (pcbddc->dbg_flag) { 1475 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1476 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1477 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1478 } 1479 if (dirichlet) { /* Dirichlet */ 1480 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 1481 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 1482 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 1483 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 1484 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 1485 /* need to be adapted? */ 1486 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1487 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1488 ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr); 1489 /* print info */ 1490 if (pcbddc->dbg_flag) { 1491 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 1492 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1493 } 1494 if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) { 1495 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr); 1496 } 1497 } 1498 if (neumann) { /* Neumann */ 1499 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 1500 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1501 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 1502 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 1503 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 1504 /* need to be adapted? */ 1505 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1506 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1507 /* print info */ 1508 if (pcbddc->dbg_flag) { 1509 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 1510 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1511 } 1512 if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */ 1513 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr); 1514 } 1515 } 1516 } 1517 /* free Neumann problem's matrix */ 1518 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1519 PetscFunctionReturn(0); 1520 } 1521 1522 #undef __FUNCT__ 1523 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection" 1524 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec rhs, Vec sol, Vec work, PetscBool applytranspose) 1525 { 1526 PetscErrorCode ierr; 1527 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1528 1529 PetscFunctionBegin; 1530 if (applytranspose) { 1531 if (pcbddc->local_auxmat1) { 1532 ierr = MatMultTranspose(pcbddc->local_auxmat2,rhs,work);CHKERRQ(ierr); 1533 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,work,rhs,rhs);CHKERRQ(ierr); 1534 } 1535 ierr = KSPSolveTranspose(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr); 1536 } else { 1537 ierr = KSPSolve(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr); 1538 if (pcbddc->local_auxmat1) { 1539 ierr = MatMult(pcbddc->local_auxmat1,sol,work);CHKERRQ(ierr); 1540 ierr = MatMultAdd(pcbddc->local_auxmat2,work,sol,sol);CHKERRQ(ierr); 1541 } 1542 } 1543 PetscFunctionReturn(0); 1544 } 1545 1546 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 1547 #undef __FUNCT__ 1548 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 1549 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 1550 { 1551 PetscErrorCode ierr; 1552 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1553 PC_IS* pcis = (PC_IS*) (pc->data); 1554 const PetscScalar zero = 0.0; 1555 1556 PetscFunctionBegin; 1557 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 1558 if (applytranspose) { 1559 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1560 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1561 } else { 1562 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1563 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1564 } 1565 /* start communications from local primal nodes to rhs of coarse solver */ 1566 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 1567 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1568 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1569 1570 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 1571 /* TODO remove null space when doing multilevel */ 1572 if (pcbddc->coarse_ksp) { 1573 if (applytranspose) { 1574 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,NULL,NULL);CHKERRQ(ierr); 1575 } else { 1576 ierr = KSPSolve(pcbddc->coarse_ksp,NULL,NULL);CHKERRQ(ierr); 1577 } 1578 } 1579 1580 /* Local solution on R nodes */ 1581 if (pcis->n) { 1582 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 1583 ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1584 ierr = VecScatterEnd(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1585 if (pcbddc->switch_static) { 1586 ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1587 ierr = VecScatterEnd(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1588 } 1589 ierr = PCBDDCSolveSubstructureCorrection(pc,pcbddc->vec1_R,pcbddc->vec2_R,pcbddc->vec1_C,applytranspose);CHKERRQ(ierr); 1590 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 1591 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1592 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1593 if (pcbddc->switch_static) { 1594 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1595 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1596 } 1597 } 1598 1599 /* communications from coarse sol to local primal nodes */ 1600 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1601 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1602 1603 /* Sum contributions from two levels */ 1604 if (applytranspose) { 1605 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1606 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1607 } else { 1608 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1609 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1610 } 1611 PetscFunctionReturn(0); 1612 } 1613 1614 /* TODO: the following two function can be optimized using VecPlaceArray whenever possible and using overlap flag */ 1615 #undef __FUNCT__ 1616 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 1617 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 1618 { 1619 PetscErrorCode ierr; 1620 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1621 PetscScalar *array,*array2; 1622 Vec from,to; 1623 1624 PetscFunctionBegin; 1625 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 1626 from = pcbddc->coarse_vec; 1627 to = pcbddc->vec1_P; 1628 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 1629 Vec tvec; 1630 PetscInt lsize; 1631 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1632 ierr = VecGetLocalSize(tvec,&lsize);CHKERRQ(ierr); 1633 ierr = VecGetArrayRead(tvec,(const PetscScalar**)&array);CHKERRQ(ierr); 1634 ierr = VecGetArray(from,&array2);CHKERRQ(ierr); 1635 ierr = PetscMemcpy(array2,array,lsize*sizeof(PetscScalar));CHKERRQ(ierr); 1636 ierr = VecRestoreArrayRead(tvec,(const PetscScalar**)&array);CHKERRQ(ierr); 1637 ierr = VecRestoreArray(from,&array2);CHKERRQ(ierr); 1638 } 1639 } else { /* from local to global -> put data in coarse right hand side */ 1640 from = pcbddc->vec1_P; 1641 to = pcbddc->coarse_vec; 1642 } 1643 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 1644 PetscFunctionReturn(0); 1645 } 1646 1647 #undef __FUNCT__ 1648 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 1649 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 1650 { 1651 PetscErrorCode ierr; 1652 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1653 PetscScalar *array,*array2; 1654 Vec from,to; 1655 1656 PetscFunctionBegin; 1657 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 1658 from = pcbddc->coarse_vec; 1659 to = pcbddc->vec1_P; 1660 } else { /* from local to global -> put data in coarse right hand side */ 1661 from = pcbddc->vec1_P; 1662 to = pcbddc->coarse_vec; 1663 } 1664 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 1665 if (smode == SCATTER_FORWARD) { 1666 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 1667 Vec tvec; 1668 PetscInt lsize; 1669 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1670 ierr = VecGetLocalSize(tvec,&lsize);CHKERRQ(ierr); 1671 ierr = VecGetArrayRead(to,(const PetscScalar**)&array);CHKERRQ(ierr); 1672 ierr = VecGetArray(tvec,&array2);CHKERRQ(ierr); 1673 ierr = PetscMemcpy(array2,array,lsize*sizeof(PetscScalar));CHKERRQ(ierr); 1674 ierr = VecRestoreArrayRead(to,(const PetscScalar**)&array);CHKERRQ(ierr); 1675 ierr = VecRestoreArray(tvec,&array2);CHKERRQ(ierr); 1676 } 1677 } 1678 PetscFunctionReturn(0); 1679 } 1680 1681 /* uncomment for testing purposes */ 1682 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 1683 #undef __FUNCT__ 1684 #define __FUNCT__ "PCBDDCConstraintsSetUp" 1685 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 1686 { 1687 PetscErrorCode ierr; 1688 PC_IS* pcis = (PC_IS*)(pc->data); 1689 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1690 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1691 /* constraint and (optionally) change of basis matrix implemented as SeqAIJ */ 1692 MatType impMatType=MATSEQAIJ; 1693 /* one and zero */ 1694 PetscScalar one=1.0,zero=0.0; 1695 /* space to store constraints and their local indices */ 1696 PetscScalar *temp_quadrature_constraint; 1697 PetscInt *temp_indices,*temp_indices_to_constraint,*temp_indices_to_constraint_B; 1698 /* iterators */ 1699 PetscInt i,j,k,total_counts,temp_start_ptr; 1700 /* stuff to store connected components stored in pcbddc->mat_graph */ 1701 IS ISForVertices,*ISForFaces,*ISForEdges,*used_IS; 1702 PetscInt n_ISForFaces,n_ISForEdges; 1703 /* near null space stuff */ 1704 MatNullSpace nearnullsp; 1705 const Vec *nearnullvecs; 1706 Vec *localnearnullsp; 1707 PetscBool nnsp_has_cnst; 1708 PetscInt nnsp_size; 1709 PetscScalar *array; 1710 /* BLAS integers */ 1711 PetscBLASInt lwork,lierr; 1712 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 1713 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 1714 /* LAPACK working arrays for SVD or POD */ 1715 PetscBool skip_lapack; 1716 PetscScalar *work; 1717 PetscReal *singular_vals; 1718 #if defined(PETSC_USE_COMPLEX) 1719 PetscReal *rwork; 1720 #endif 1721 #if defined(PETSC_MISSING_LAPACK_GESVD) 1722 PetscBLASInt Blas_one_2=1; 1723 PetscScalar *temp_basis,*correlation_mat; 1724 #else 1725 PetscBLASInt dummy_int_1=1,dummy_int_2=1; 1726 PetscScalar dummy_scalar_1=0.0,dummy_scalar_2=0.0; 1727 #endif 1728 /* reuse */ 1729 PetscInt olocal_primal_size; 1730 PetscInt *oprimal_indices_local_idxs; 1731 /* change of basis */ 1732 PetscInt *aux_primal_numbering,*aux_primal_minloc,*global_indices; 1733 PetscBool boolforchange,qr_needed; 1734 PetscBT touched,change_basis,qr_needed_idx; 1735 /* auxiliary stuff */ 1736 PetscInt *nnz,*is_indices,*aux_primal_numbering_B; 1737 PetscInt ncc,*gidxs=NULL,*permutation=NULL,*temp_indices_to_constraint_work=NULL; 1738 PetscScalar *temp_quadrature_constraint_work=NULL; 1739 /* some quantities */ 1740 PetscInt n_vertices,total_primal_vertices,valid_constraints; 1741 PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints; 1742 1743 1744 PetscFunctionBegin; 1745 /* Destroy Mat objects computed previously */ 1746 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1747 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1748 /* TODO synch with adaptive selection */ 1749 /* Get index sets for faces, edges and vertices from graph */ 1750 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 1751 /* free unneeded index sets */ 1752 if (!pcbddc->use_vertices) { 1753 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 1754 } 1755 if (!pcbddc->use_edges) { 1756 for (i=0;i<n_ISForEdges;i++) { 1757 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 1758 } 1759 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 1760 n_ISForEdges = 0; 1761 } 1762 if (!pcbddc->use_faces) { 1763 for (i=0;i<n_ISForFaces;i++) { 1764 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 1765 } 1766 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 1767 n_ISForFaces = 0; 1768 } 1769 /* HACKS (the following two blocks of code) */ 1770 if (!ISForVertices && pcbddc->NullSpace && !pcbddc->user_ChangeOfBasisMatrix) { 1771 pcbddc->use_change_of_basis = PETSC_TRUE; 1772 if (!ISForEdges) { 1773 pcbddc->use_change_on_faces = PETSC_TRUE; 1774 } 1775 } 1776 if (pcbddc->NullSpace) { 1777 /* use_change_of_basis should be consistent among processors */ 1778 PetscBool tbool[2],gbool[2]; 1779 tbool [0] = pcbddc->use_change_of_basis; 1780 tbool [1] = pcbddc->use_change_on_faces; 1781 ierr = MPI_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1782 pcbddc->use_change_of_basis = gbool[0]; 1783 pcbddc->use_change_on_faces = gbool[1]; 1784 } 1785 /* print some info */ 1786 if (pcbddc->dbg_flag) { 1787 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1788 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 1789 i = 0; 1790 if (ISForVertices) { 1791 ierr = ISGetSize(ISForVertices,&i);CHKERRQ(ierr); 1792 } 1793 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices\n",PetscGlobalRank,i);CHKERRQ(ierr); 1794 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges\n",PetscGlobalRank,n_ISForEdges);CHKERRQ(ierr); 1795 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces\n",PetscGlobalRank,n_ISForFaces);CHKERRQ(ierr); 1796 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1797 } 1798 1799 if (!pcbddc->adaptive_selection) { 1800 /* check if near null space is attached to global mat */ 1801 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 1802 if (nearnullsp) { 1803 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 1804 /* remove any stored info */ 1805 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 1806 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 1807 /* store information for BDDC solver reuse */ 1808 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 1809 pcbddc->onearnullspace = nearnullsp; 1810 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 1811 for (i=0;i<nnsp_size;i++) { 1812 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 1813 } 1814 } else { /* if near null space is not provided BDDC uses constants by default */ 1815 nnsp_size = 0; 1816 nnsp_has_cnst = PETSC_TRUE; 1817 } 1818 /* get max number of constraints on a single cc */ 1819 max_constraints = nnsp_size; 1820 if (nnsp_has_cnst) max_constraints++; 1821 1822 /* 1823 Evaluate maximum storage size needed by the procedure 1824 - temp_indices will contain start index of each constraint stored as follows 1825 - temp_indices_to_constraint [temp_indices[i],...,temp_indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts 1826 - temp_indices_to_constraint_B[temp_indices[i],...,temp_indices[i+1]-1] will contain the indices (in boundary numbering) on which the constraint acts 1827 - temp_quadrature_constraint [temp_indices[i],...,temp_indices[i+1]-1] will contain the scalars representing the constraint itself 1828 */ 1829 total_counts = n_ISForFaces+n_ISForEdges; 1830 total_counts *= max_constraints; 1831 n_vertices = 0; 1832 if (ISForVertices) { 1833 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 1834 } 1835 total_counts += n_vertices; 1836 ierr = PetscMalloc1(total_counts+1,&temp_indices);CHKERRQ(ierr); 1837 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 1838 total_counts = 0; 1839 max_size_of_constraint = 0; 1840 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 1841 if (i<n_ISForEdges) { 1842 used_IS = &ISForEdges[i]; 1843 } else { 1844 used_IS = &ISForFaces[i-n_ISForEdges]; 1845 } 1846 ierr = ISGetSize(*used_IS,&j);CHKERRQ(ierr); 1847 total_counts += j; 1848 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 1849 } 1850 total_counts *= max_constraints; 1851 total_counts += n_vertices; 1852 ierr = PetscMalloc3(total_counts,&temp_quadrature_constraint,total_counts,&temp_indices_to_constraint,total_counts,&temp_indices_to_constraint_B);CHKERRQ(ierr); 1853 /* get local part of global near null space vectors */ 1854 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 1855 for (k=0;k<nnsp_size;k++) { 1856 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 1857 ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1858 ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1859 } 1860 1861 /* whether or not to skip lapack calls */ 1862 skip_lapack = PETSC_TRUE; 1863 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 1864 1865 /* allocate some auxiliary stuff */ 1866 if (!skip_lapack || pcbddc->use_qr_single) { 1867 ierr = PetscMalloc4(max_size_of_constraint,&gidxs,max_size_of_constraint,&permutation,max_size_of_constraint,&temp_indices_to_constraint_work,max_size_of_constraint,&temp_quadrature_constraint_work);CHKERRQ(ierr); 1868 } 1869 1870 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 1871 if (!skip_lapack) { 1872 PetscScalar temp_work; 1873 1874 #if defined(PETSC_MISSING_LAPACK_GESVD) 1875 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 1876 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 1877 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 1878 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 1879 #if defined(PETSC_USE_COMPLEX) 1880 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 1881 #endif 1882 /* now we evaluate the optimal workspace using query with lwork=-1 */ 1883 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 1884 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 1885 lwork = -1; 1886 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1887 #if !defined(PETSC_USE_COMPLEX) 1888 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 1889 #else 1890 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 1891 #endif 1892 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1893 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 1894 #else /* on missing GESVD */ 1895 /* SVD */ 1896 PetscInt max_n,min_n; 1897 max_n = max_size_of_constraint; 1898 min_n = max_constraints; 1899 if (max_size_of_constraint < max_constraints) { 1900 min_n = max_size_of_constraint; 1901 max_n = max_constraints; 1902 } 1903 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 1904 #if defined(PETSC_USE_COMPLEX) 1905 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 1906 #endif 1907 /* now we evaluate the optimal workspace using query with lwork=-1 */ 1908 lwork = -1; 1909 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 1910 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 1911 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 1912 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1913 #if !defined(PETSC_USE_COMPLEX) 1914 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,&lierr)); 1915 #else 1916 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,rwork,&lierr)); 1917 #endif 1918 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1919 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 1920 #endif /* on missing GESVD */ 1921 /* Allocate optimal workspace */ 1922 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 1923 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 1924 } 1925 /* Now we can loop on constraining sets */ 1926 total_counts = 0; 1927 temp_indices[0] = 0; 1928 /* vertices */ 1929 if (ISForVertices) { 1930 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1931 if (nnsp_has_cnst) { /* consider all vertices */ 1932 ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 1933 for (i=0;i<n_vertices;i++) { 1934 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 1935 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 1936 total_counts++; 1937 } 1938 } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */ 1939 PetscBool used_vertex; 1940 for (i=0;i<n_vertices;i++) { 1941 used_vertex = PETSC_FALSE; 1942 k = 0; 1943 while (!used_vertex && k<nnsp_size) { 1944 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1945 if (PetscAbsScalar(array[is_indices[i]])>0.0) { 1946 temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i]; 1947 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 1948 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 1949 total_counts++; 1950 used_vertex = PETSC_TRUE; 1951 } 1952 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1953 k++; 1954 } 1955 } 1956 } 1957 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1958 n_vertices = total_counts; 1959 } 1960 1961 /* edges and faces */ 1962 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 1963 if (ncc<n_ISForEdges) { 1964 used_IS = &ISForEdges[ncc]; 1965 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 1966 } else { 1967 used_IS = &ISForFaces[ncc-n_ISForEdges]; 1968 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 1969 } 1970 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 1971 temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */ 1972 ierr = ISGetSize(*used_IS,&size_of_constraint);CHKERRQ(ierr); 1973 ierr = ISGetIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1974 /* change of basis should not be performed on local periodic nodes */ 1975 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 1976 if (nnsp_has_cnst) { 1977 PetscScalar quad_value; 1978 temp_constraints++; 1979 if (!pcbddc->use_nnsp_true) { 1980 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 1981 } else { 1982 quad_value = 1.0; 1983 } 1984 ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 1985 for (j=0;j<size_of_constraint;j++) { 1986 temp_quadrature_constraint[temp_indices[total_counts]+j]=quad_value; 1987 } 1988 /* sort by global ordering if using lapack subroutines (not needed!) */ 1989 if (!skip_lapack || pcbddc->use_qr_single) { 1990 ierr = ISLocalToGlobalMappingApply(matis->mapping,size_of_constraint,temp_indices_to_constraint+temp_indices[total_counts],gidxs);CHKERRQ(ierr); 1991 for (j=0;j<size_of_constraint;j++) { 1992 permutation[j]=j; 1993 } 1994 ierr = PetscSortIntWithPermutation(size_of_constraint,gidxs,permutation);CHKERRQ(ierr); 1995 for (j=0;j<size_of_constraint;j++) { 1996 if (permutation[j]!=j) SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"This should not happen"); 1997 } 1998 for (j=0;j<size_of_constraint;j++) { 1999 temp_indices_to_constraint_work[j] = temp_indices_to_constraint[temp_indices[total_counts]+permutation[j]]; 2000 temp_quadrature_constraint_work[j] = temp_quadrature_constraint[temp_indices[total_counts]+permutation[j]]; 2001 } 2002 ierr = PetscMemcpy(temp_indices_to_constraint+temp_indices[total_counts],temp_indices_to_constraint_work,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 2003 ierr = PetscMemcpy(temp_quadrature_constraint+temp_indices[total_counts],temp_quadrature_constraint_work,size_of_constraint*sizeof(PetscScalar));CHKERRQ(ierr); 2004 } 2005 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 2006 total_counts++; 2007 } 2008 for (k=0;k<nnsp_size;k++) { 2009 PetscReal real_value; 2010 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2011 ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 2012 for (j=0;j<size_of_constraint;j++) { 2013 temp_quadrature_constraint[temp_indices[total_counts]+j]=array[is_indices[j]]; 2014 } 2015 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2016 /* check if array is null on the connected component */ 2017 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2018 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_one)); 2019 if (real_value > 0.0) { /* keep indices and values */ 2020 /* sort by global ordering if using lapack subroutines */ 2021 if (!skip_lapack || pcbddc->use_qr_single) { 2022 ierr = ISLocalToGlobalMappingApply(matis->mapping,size_of_constraint,temp_indices_to_constraint+temp_indices[total_counts],gidxs);CHKERRQ(ierr); 2023 for (j=0;j<size_of_constraint;j++) { 2024 permutation[j]=j; 2025 } 2026 ierr = PetscSortIntWithPermutation(size_of_constraint,gidxs,permutation);CHKERRQ(ierr); 2027 for (j=0;j<size_of_constraint;j++) { 2028 temp_indices_to_constraint_work[j] = temp_indices_to_constraint[temp_indices[total_counts]+permutation[j]]; 2029 temp_quadrature_constraint_work[j] = temp_quadrature_constraint[temp_indices[total_counts]+permutation[j]]; 2030 } 2031 ierr = PetscMemcpy(temp_indices_to_constraint+temp_indices[total_counts],temp_indices_to_constraint_work,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 2032 ierr = PetscMemcpy(temp_quadrature_constraint+temp_indices[total_counts],temp_quadrature_constraint_work,size_of_constraint*sizeof(PetscScalar));CHKERRQ(ierr); 2033 } 2034 temp_constraints++; 2035 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 2036 total_counts++; 2037 } 2038 } 2039 ierr = ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2040 valid_constraints = temp_constraints; 2041 if (!pcbddc->use_nnsp_true && temp_constraints) { 2042 if (temp_constraints == 1) { /* just normalize the constraint */ 2043 PetscScalar norm; 2044 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2045 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one)); 2046 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 2047 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one)); 2048 } else { /* perform SVD */ 2049 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 2050 2051 #if defined(PETSC_MISSING_LAPACK_GESVD) 2052 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 2053 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 2054 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 2055 the constraints basis will differ (by a complex factor with absolute value equal to 1) 2056 from that computed using LAPACKgesvd 2057 -> This is due to a different computation of eigenvectors in LAPACKheev 2058 -> The quality of the POD-computed basis will be the same */ 2059 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 2060 /* Store upper triangular part of correlation matrix */ 2061 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2062 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2063 for (j=0;j<temp_constraints;j++) { 2064 for (k=0;k<j+1;k++) { 2065 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k]=BLASdot_(&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Blas_one,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Blas_one_2)); 2066 } 2067 } 2068 /* compute eigenvalues and eigenvectors of correlation matrix */ 2069 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2070 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 2071 #if !defined(PETSC_USE_COMPLEX) 2072 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 2073 #else 2074 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 2075 #endif 2076 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2077 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 2078 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 2079 j = 0; 2080 while (j < temp_constraints && singular_vals[j] < tol) j++; 2081 total_counts = total_counts-j; 2082 valid_constraints = temp_constraints-j; 2083 /* scale and copy POD basis into used quadrature memory */ 2084 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2085 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2086 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 2087 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2088 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 2089 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 2090 if (j<temp_constraints) { 2091 PetscInt ii; 2092 for (k=j;k<temp_constraints;k++) singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]); 2093 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2094 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 2095 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2096 for (k=0;k<temp_constraints-j;k++) { 2097 for (ii=0;ii<size_of_constraint;ii++) { 2098 temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 2099 } 2100 } 2101 } 2102 #else /* on missing GESVD */ 2103 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2104 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2105 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2106 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2107 #if !defined(PETSC_USE_COMPLEX) 2108 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,&lierr)); 2109 #else 2110 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,rwork,&lierr)); 2111 #endif 2112 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 2113 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2114 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 2115 k = temp_constraints; 2116 if (k > size_of_constraint) k = size_of_constraint; 2117 j = 0; 2118 while (j < k && singular_vals[k-j-1] < tol) j++; 2119 valid_constraints = k-j; 2120 total_counts = total_counts-temp_constraints+valid_constraints; 2121 #endif /* on missing GESVD */ 2122 } 2123 } 2124 /* setting change_of_basis flag is safe now */ 2125 if (boolforchange) { 2126 for (j=0;j<valid_constraints;j++) { 2127 PetscBTSet(change_basis,total_counts-j-1); 2128 } 2129 } 2130 } 2131 /* free workspace */ 2132 if (!skip_lapack || pcbddc->use_qr_single) { 2133 ierr = PetscFree4(gidxs,permutation,temp_indices_to_constraint_work,temp_quadrature_constraint_work);CHKERRQ(ierr); 2134 } 2135 if (!skip_lapack) { 2136 ierr = PetscFree(work);CHKERRQ(ierr); 2137 #if defined(PETSC_USE_COMPLEX) 2138 ierr = PetscFree(rwork);CHKERRQ(ierr); 2139 #endif 2140 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 2141 #if defined(PETSC_MISSING_LAPACK_GESVD) 2142 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 2143 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 2144 #endif 2145 } 2146 for (k=0;k<nnsp_size;k++) { 2147 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 2148 } 2149 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 2150 } else { 2151 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2152 PetscInt cum = 0; 2153 2154 total_counts = 0; 2155 n_vertices = 0; 2156 if (sub_schurs->is_Ej_com) { 2157 ierr = ISGetLocalSize(sub_schurs->is_Ej_com,&n_vertices);CHKERRQ(ierr); 2158 } 2159 max_constraints = 0; 2160 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 2161 total_counts += pcbddc->adaptive_constraints_n[i]; 2162 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 2163 } 2164 temp_indices = pcbddc->adaptive_constraints_ptrs; 2165 temp_indices_to_constraint = pcbddc->adaptive_constraints_idxs; 2166 temp_quadrature_constraint = pcbddc->adaptive_constraints_data; 2167 2168 #if 0 2169 printf("Found %d totals\n",total_counts); 2170 for (i=0;i<total_counts;i++) { 2171 printf("const %d, start %d",i,temp_indices[i]); 2172 printf(" end %d:\n",temp_indices[i+1]); 2173 for (j=temp_indices[i];j<temp_indices[i+1];j++) { 2174 printf(" idxs %d",temp_indices_to_constraint[j]); 2175 printf(" data %1.2e\n",temp_quadrature_constraint[j]); 2176 } 2177 } 2178 for (i=0;i<n_vertices;i++) { 2179 PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i+n_vertices]); 2180 } 2181 for (i=0;i<sub_schurs->n_subs;i++) { 2182 PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]); 2183 } 2184 #endif 2185 2186 for (i=0;i<total_counts;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,temp_indices[i+1]-temp_indices[i]); 2187 ierr = PetscMalloc1(temp_indices[total_counts],&temp_indices_to_constraint_B);CHKERRQ(ierr); 2188 /* Change of basis */ 2189 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 2190 if (pcbddc->use_change_of_basis) { 2191 cum = n_vertices; 2192 for (i=0;i<sub_schurs->n_subs;i++) { 2193 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 2194 for (j=0;j<pcbddc->adaptive_constraints_n[i+n_vertices];j++) { 2195 ierr = PetscBTSet(change_basis,cum+j);CHKERRQ(ierr); 2196 } 2197 } 2198 cum += pcbddc->adaptive_constraints_n[i+n_vertices]; 2199 } 2200 } 2201 } 2202 2203 /* free index sets of faces, edges and vertices */ 2204 for (i=0;i<n_ISForFaces;i++) { 2205 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 2206 } 2207 if (n_ISForFaces) { 2208 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 2209 } 2210 for (i=0;i<n_ISForEdges;i++) { 2211 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 2212 } 2213 if (n_ISForEdges) { 2214 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 2215 } 2216 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 2217 2218 /* map temp_indices_to_constraint in boundary numbering */ 2219 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,temp_indices[total_counts],temp_indices_to_constraint,&i,temp_indices_to_constraint_B);CHKERRQ(ierr); 2220 if (i != temp_indices[total_counts]) { 2221 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",temp_indices[total_counts],i); 2222 } 2223 2224 /* set quantities in pcbddc data structure and store previous primal size */ 2225 /* n_vertices defines the number of subdomain corners in the primal space */ 2226 /* n_constraints defines the number of averages (they can be point primal dofs if change of basis is requested) */ 2227 olocal_primal_size = pcbddc->local_primal_size; 2228 pcbddc->local_primal_size = total_counts; 2229 pcbddc->n_vertices = n_vertices; 2230 pcbddc->n_constraints = pcbddc->local_primal_size-pcbddc->n_vertices; 2231 2232 /* Create constraint matrix */ 2233 /* The constraint matrix is used to compute the l2g map of primal dofs */ 2234 /* so we need to set it up properly either with or without change of basis */ 2235 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 2236 ierr = MatSetType(pcbddc->ConstraintMatrix,impMatType);CHKERRQ(ierr); 2237 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 2238 /* array to compute a local numbering of constraints : vertices first then constraints */ 2239 ierr = PetscMalloc1(pcbddc->local_primal_size,&aux_primal_numbering);CHKERRQ(ierr); 2240 /* array to select the proper local node (of minimum index with respect to global ordering) when changing the basis */ 2241 /* note: it should not be needed since IS for faces and edges are already sorted by global ordering when analyzing the graph but... just in case */ 2242 ierr = PetscMalloc1(pcbddc->local_primal_size,&aux_primal_minloc);CHKERRQ(ierr); 2243 /* auxiliary stuff for basis change */ 2244 ierr = PetscMalloc1(max_size_of_constraint,&global_indices);CHKERRQ(ierr); 2245 ierr = PetscBTCreate(pcis->n_B,&touched);CHKERRQ(ierr); 2246 2247 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 2248 total_primal_vertices=0; 2249 for (i=0;i<pcbddc->local_primal_size;i++) { 2250 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 2251 if (size_of_constraint == 1) { 2252 ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]]);CHKERRQ(ierr); 2253 aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]]; 2254 aux_primal_minloc[total_primal_vertices]=0; 2255 total_primal_vertices++; 2256 } else if (PetscBTLookup(change_basis,i)) { /* Same procedure used in PCBDDCGetPrimalConstraintsLocalIdx */ 2257 PetscInt min_loc,min_index; 2258 ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],global_indices);CHKERRQ(ierr); 2259 /* find first untouched local node */ 2260 k = 0; 2261 while (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) k++; 2262 min_index = global_indices[k]; 2263 min_loc = k; 2264 /* search the minimum among global nodes already untouched on the cc */ 2265 for (k=1;k<size_of_constraint;k++) { 2266 /* there can be more than one constraint on a single connected component */ 2267 if (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k]) && min_index > global_indices[k]) { 2268 min_index = global_indices[k]; 2269 min_loc = k; 2270 } 2271 } 2272 ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]+min_loc]);CHKERRQ(ierr); 2273 aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]+min_loc]; 2274 aux_primal_minloc[total_primal_vertices]=min_loc; 2275 total_primal_vertices++; 2276 } 2277 } 2278 /* determine if a QR strategy is needed for change of basis */ 2279 qr_needed = PETSC_FALSE; 2280 ierr = PetscBTCreate(pcbddc->local_primal_size,&qr_needed_idx);CHKERRQ(ierr); 2281 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 2282 if (PetscBTLookup(change_basis,i)) { 2283 if (!pcbddc->use_qr_single) { 2284 size_of_constraint = temp_indices[i+1]-temp_indices[i]; 2285 j = 0; 2286 for (k=0;k<size_of_constraint;k++) { 2287 if (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) { 2288 j++; 2289 } 2290 } 2291 /* found more than one primal dof on the cc */ 2292 if (j > 1) { 2293 PetscBTSet(qr_needed_idx,i); 2294 qr_needed = PETSC_TRUE; 2295 } 2296 } else { 2297 PetscBTSet(qr_needed_idx,i); 2298 qr_needed = PETSC_TRUE; 2299 } 2300 } 2301 } 2302 /* free workspace */ 2303 ierr = PetscFree(global_indices);CHKERRQ(ierr); 2304 2305 /* permute indices in order to have a sorted set of vertices */ 2306 ierr = PetscSortInt(total_primal_vertices,aux_primal_numbering);CHKERRQ(ierr); 2307 2308 /* nonzero structure of constraint matrix */ 2309 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 2310 for (i=0;i<total_primal_vertices;i++) nnz[i]=1; 2311 j=total_primal_vertices; 2312 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 2313 if (!PetscBTLookup(change_basis,i)) { 2314 nnz[j]=temp_indices[i+1]-temp_indices[i]; 2315 j++; 2316 } 2317 } 2318 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 2319 ierr = PetscFree(nnz);CHKERRQ(ierr); 2320 /* set values in constraint matrix */ 2321 for (i=0;i<total_primal_vertices;i++) { 2322 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,aux_primal_numbering[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 2323 } 2324 total_counts = total_primal_vertices; 2325 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 2326 if (!PetscBTLookup(change_basis,i)) { 2327 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 2328 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&total_counts,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);CHKERRQ(ierr); 2329 total_counts++; 2330 } 2331 } 2332 /* assembling */ 2333 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2334 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2335 /* 2336 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 2337 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 2338 */ 2339 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 2340 if (pcbddc->use_change_of_basis) { 2341 /* dual and primal dofs on a single cc */ 2342 PetscInt dual_dofs,primal_dofs; 2343 /* iterator on aux_primal_minloc (ordered as read from nearnullspace: vertices, edges and then constraints) */ 2344 PetscInt primal_counter; 2345 /* working stuff for GEQRF */ 2346 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 2347 PetscBLASInt lqr_work; 2348 /* working stuff for UNGQR */ 2349 PetscScalar *gqr_work,lgqr_work_t; 2350 PetscBLASInt lgqr_work; 2351 /* working stuff for TRTRS */ 2352 PetscScalar *trs_rhs; 2353 PetscBLASInt Blas_NRHS; 2354 /* pointers for values insertion into change of basis matrix */ 2355 PetscInt *start_rows,*start_cols; 2356 PetscScalar *start_vals; 2357 /* working stuff for values insertion */ 2358 PetscBT is_primal; 2359 /* matrix sizes */ 2360 PetscInt global_size,local_size; 2361 /* temporary change of basis */ 2362 Mat localChangeOfBasisMatrix; 2363 2364 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 2365 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 2366 ierr = MatSetType(localChangeOfBasisMatrix,impMatType);CHKERRQ(ierr); 2367 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 2368 2369 /* nonzeros for local mat */ 2370 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 2371 for (i=0;i<pcis->n;i++) nnz[i]=1; 2372 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 2373 if (PetscBTLookup(change_basis,i)) { 2374 size_of_constraint = temp_indices[i+1]-temp_indices[i]; 2375 if (PetscBTLookup(qr_needed_idx,i)) { 2376 for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint[temp_indices[i]+j]] = size_of_constraint; 2377 } else { 2378 for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint[temp_indices[i]+j]] = 2; 2379 /* get local primal index on the cc */ 2380 j = 0; 2381 while (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+j])) j++; 2382 nnz[temp_indices_to_constraint[temp_indices[i]+j]] = size_of_constraint; 2383 } 2384 } 2385 } 2386 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 2387 ierr = PetscFree(nnz);CHKERRQ(ierr); 2388 /* Set initial identity in the matrix */ 2389 for (i=0;i<pcis->n;i++) { 2390 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 2391 } 2392 2393 if (pcbddc->dbg_flag) { 2394 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 2395 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 2396 } 2397 2398 2399 /* Now we loop on the constraints which need a change of basis */ 2400 /* 2401 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 2402 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 2403 2404 Basic blocks of change of basis matrix T computed by 2405 2406 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 2407 2408 | 1 0 ... 0 s_1/S | 2409 | 0 1 ... 0 s_2/S | 2410 | ... | 2411 | 0 ... 1 s_{n-1}/S | 2412 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 2413 2414 with S = \sum_{i=1}^n s_i^2 2415 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 2416 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 2417 2418 - QR decomposition of constraints otherwise 2419 */ 2420 if (qr_needed) { 2421 /* space to store Q */ 2422 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 2423 /* first we issue queries for optimal work */ 2424 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 2425 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 2426 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2427 lqr_work = -1; 2428 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 2429 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 2430 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 2431 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 2432 lgqr_work = -1; 2433 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 2434 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 2435 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 2436 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2437 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 2438 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 2439 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 2440 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 2441 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 2442 /* array to store scaling factors for reflectors */ 2443 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 2444 /* array to store rhs and solution of triangular solver */ 2445 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 2446 /* allocating workspace for check */ 2447 if (pcbddc->dbg_flag) { 2448 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&work);CHKERRQ(ierr); 2449 } 2450 } 2451 /* array to store whether a node is primal or not */ 2452 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 2453 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 2454 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,aux_primal_numbering,&i,aux_primal_numbering_B);CHKERRQ(ierr); 2455 if (i != total_primal_vertices) { 2456 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i); 2457 } 2458 for (i=0;i<total_primal_vertices;i++) { 2459 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 2460 } 2461 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 2462 2463 /* loop on constraints and see whether or not they need a change of basis and compute it */ 2464 /* -> using implicit ordering contained in temp_indices data */ 2465 total_counts = pcbddc->n_vertices; 2466 primal_counter = total_counts; 2467 while (total_counts<pcbddc->local_primal_size) { 2468 primal_dofs = 1; 2469 if (PetscBTLookup(change_basis,total_counts)) { 2470 /* get all constraints with same support: if more then one constraint is present on the cc then surely indices are stored contiguosly */ 2471 while (total_counts+primal_dofs < pcbddc->local_primal_size && temp_indices_to_constraint[temp_indices[total_counts]] == temp_indices_to_constraint[temp_indices[total_counts+primal_dofs]]) { 2472 primal_dofs++; 2473 } 2474 /* get constraint info */ 2475 size_of_constraint = temp_indices[total_counts+1]-temp_indices[total_counts]; 2476 dual_dofs = size_of_constraint-primal_dofs; 2477 2478 if (pcbddc->dbg_flag) { 2479 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d to %d (incl) need a change of basis (size %d)\n",total_counts,total_counts+primal_dofs-1,size_of_constraint);CHKERRQ(ierr); 2480 } 2481 2482 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 2483 2484 /* copy quadrature constraints for change of basis check */ 2485 if (pcbddc->dbg_flag) { 2486 ierr = PetscMemcpy(work,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2487 } 2488 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 2489 ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2490 2491 /* compute QR decomposition of constraints */ 2492 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2493 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2494 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2495 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2496 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 2497 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 2498 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2499 2500 /* explictly compute R^-T */ 2501 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 2502 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 2503 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2504 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 2505 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2506 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 2507 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2508 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 2509 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 2510 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2511 2512 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 2513 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2514 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2515 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 2516 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2517 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2518 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 2519 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 2520 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2521 2522 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 2523 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 2524 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 2525 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2526 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2527 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 2528 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2529 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 2530 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 2531 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2532 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_LDC)); 2533 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2534 ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2535 2536 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 2537 start_rows = &temp_indices_to_constraint[temp_indices[total_counts]]; 2538 /* insert cols for primal dofs */ 2539 for (j=0;j<primal_dofs;j++) { 2540 start_vals = &qr_basis[j*size_of_constraint]; 2541 start_cols = &temp_indices_to_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter+j]]; 2542 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 2543 } 2544 /* insert cols for dual dofs */ 2545 for (j=0,k=0;j<dual_dofs;k++) { 2546 if (!PetscBTLookup(is_primal,temp_indices_to_constraint_B[temp_indices[total_counts]+k])) { 2547 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 2548 start_cols = &temp_indices_to_constraint[temp_indices[total_counts]+k]; 2549 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 2550 j++; 2551 } 2552 } 2553 2554 /* check change of basis */ 2555 if (pcbddc->dbg_flag) { 2556 PetscInt ii,jj; 2557 PetscBool valid_qr=PETSC_TRUE; 2558 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 2559 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2560 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 2561 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2562 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 2563 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 2564 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2565 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&work[size_of_constraint*primal_dofs],&Blas_LDC)); 2566 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2567 for (jj=0;jj<size_of_constraint;jj++) { 2568 for (ii=0;ii<primal_dofs;ii++) { 2569 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 2570 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 2571 } 2572 } 2573 if (!valid_qr) { 2574 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 2575 for (jj=0;jj<size_of_constraint;jj++) { 2576 for (ii=0;ii<primal_dofs;ii++) { 2577 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 2578 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2579 } 2580 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 2581 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2582 } 2583 } 2584 } 2585 } else { 2586 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 2587 } 2588 } 2589 } else { /* simple transformation block */ 2590 PetscInt row,col; 2591 PetscScalar val,norm; 2592 2593 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2594 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,temp_quadrature_constraint+temp_indices[total_counts],&Blas_one,temp_quadrature_constraint+temp_indices[total_counts],&Blas_one)); 2595 for (j=0;j<size_of_constraint;j++) { 2596 PetscInt row_B = temp_indices_to_constraint_B[temp_indices[total_counts]+j]; 2597 row = temp_indices_to_constraint[temp_indices[total_counts]+j]; 2598 if (!PetscBTLookup(is_primal,row_B)) { 2599 col = temp_indices_to_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]; 2600 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 2601 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,temp_quadrature_constraint[temp_indices[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 2602 } else { 2603 for (k=0;k<size_of_constraint;k++) { 2604 col = temp_indices_to_constraint[temp_indices[total_counts]+k]; 2605 if (row != col) { 2606 val = -temp_quadrature_constraint[temp_indices[total_counts]+k]/temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]; 2607 } else { 2608 val = temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]/norm; 2609 } 2610 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 2611 } 2612 } 2613 } 2614 if (pcbddc->dbg_flag) { 2615 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 2616 } 2617 } 2618 /* increment primal counter */ 2619 primal_counter += primal_dofs; 2620 } else { 2621 if (pcbddc->dbg_flag) { 2622 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,temp_indices[total_counts+1]-temp_indices[total_counts]);CHKERRQ(ierr); 2623 } 2624 } 2625 /* increment constraint counter total_counts */ 2626 total_counts += primal_dofs; 2627 } 2628 2629 /* free workspace */ 2630 if (qr_needed) { 2631 if (pcbddc->dbg_flag) { 2632 ierr = PetscFree(work);CHKERRQ(ierr); 2633 } 2634 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 2635 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 2636 ierr = PetscFree(qr_work);CHKERRQ(ierr); 2637 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 2638 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 2639 } 2640 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 2641 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2642 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2643 2644 /* assembling of global change of variable */ 2645 { 2646 Mat tmat; 2647 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 2648 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 2649 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 2650 ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr); 2651 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2652 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 2653 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 2654 ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr); 2655 ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2656 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 2657 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 2658 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 2659 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2660 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2661 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 2662 ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr); 2663 } 2664 /* check */ 2665 if (pcbddc->dbg_flag) { 2666 PetscReal error; 2667 Vec x,x_change; 2668 2669 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 2670 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 2671 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 2672 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 2673 ierr = VecScatterBegin(matis->ctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2674 ierr = VecScatterEnd(matis->ctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2675 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 2676 ierr = VecScatterBegin(matis->ctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2677 ierr = VecScatterEnd(matis->ctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2678 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 2679 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 2680 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 2681 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2682 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr); 2683 ierr = VecDestroy(&x);CHKERRQ(ierr); 2684 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 2685 } 2686 2687 /* adapt sub_schurs computed (if any) */ 2688 if (pcbddc->use_deluxe_scaling) { 2689 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 2690 if (sub_schurs->n_subs_par_g) { 2691 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Change of basis with deluxe scaling and parallel problems still needs to be implemented"); 2692 } 2693 if (sub_schurs->S_Ej_all) { 2694 Mat S_1,S_2,tmat; 2695 IS is_all_N; 2696 2697 ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr); 2698 ierr = MatGetSubMatrixUnsorted(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr); 2699 ierr = ISDestroy(&is_all_N);CHKERRQ(ierr); 2700 ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_1);CHKERRQ(ierr); 2701 ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr); 2702 sub_schurs->S_Ej_all = S_1; 2703 ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_2);CHKERRQ(ierr); 2704 ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 2705 sub_schurs->sum_S_Ej_all = S_2; 2706 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 2707 } 2708 } 2709 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 2710 } else if (pcbddc->user_ChangeOfBasisMatrix) { 2711 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 2712 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 2713 } 2714 2715 /* set up change of basis context */ 2716 if (pcbddc->ChangeOfBasisMatrix) { 2717 PCBDDCChange_ctx change_ctx; 2718 2719 if (!pcbddc->new_global_mat) { 2720 PetscInt global_size,local_size; 2721 2722 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 2723 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 2724 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->new_global_mat);CHKERRQ(ierr); 2725 ierr = MatSetSizes(pcbddc->new_global_mat,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 2726 ierr = MatSetType(pcbddc->new_global_mat,MATSHELL);CHKERRQ(ierr); 2727 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT,(void (*)(void))PCBDDCMatMult_Private);CHKERRQ(ierr); 2728 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCMatMultTranspose_Private);CHKERRQ(ierr); 2729 ierr = PetscNew(&change_ctx);CHKERRQ(ierr); 2730 ierr = MatShellSetContext(pcbddc->new_global_mat,change_ctx);CHKERRQ(ierr); 2731 } else { 2732 ierr = MatShellGetContext(pcbddc->new_global_mat,&change_ctx);CHKERRQ(ierr); 2733 ierr = MatDestroy(&change_ctx->global_change);CHKERRQ(ierr); 2734 ierr = VecDestroyVecs(2,&change_ctx->work);CHKERRQ(ierr); 2735 } 2736 if (!pcbddc->user_ChangeOfBasisMatrix) { 2737 ierr = PetscObjectReference((PetscObject)pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2738 change_ctx->global_change = pcbddc->ChangeOfBasisMatrix; 2739 } else { 2740 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 2741 change_ctx->global_change = pcbddc->user_ChangeOfBasisMatrix; 2742 } 2743 ierr = VecDuplicateVecs(pcis->vec1_global,2,&change_ctx->work);CHKERRQ(ierr); 2744 ierr = MatSetUp(pcbddc->new_global_mat);CHKERRQ(ierr); 2745 ierr = MatAssemblyBegin(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2746 ierr = MatAssemblyEnd(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2747 } 2748 2749 /* get indices in local ordering for vertices and constraints */ 2750 if (olocal_primal_size == pcbddc->local_primal_size) { /* if this is true, I need to check if a new primal space has been introduced */ 2751 ierr = PetscMalloc1(olocal_primal_size,&oprimal_indices_local_idxs);CHKERRQ(ierr); 2752 ierr = PetscMemcpy(oprimal_indices_local_idxs,pcbddc->primal_indices_local_idxs,olocal_primal_size*sizeof(PetscInt));CHKERRQ(ierr); 2753 } 2754 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 2755 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2756 ierr = PetscMalloc1(pcbddc->local_primal_size,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2757 ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&i,&aux_primal_numbering);CHKERRQ(ierr); 2758 ierr = PetscMemcpy(pcbddc->primal_indices_local_idxs,aux_primal_numbering,i*sizeof(PetscInt));CHKERRQ(ierr); 2759 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 2760 ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&j,&aux_primal_numbering);CHKERRQ(ierr); 2761 ierr = PetscMemcpy(&pcbddc->primal_indices_local_idxs[i],aux_primal_numbering,j*sizeof(PetscInt));CHKERRQ(ierr); 2762 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 2763 /* set quantities in PCBDDC data struct */ 2764 pcbddc->n_actual_vertices = i; 2765 /* check if a new primal space has been introduced */ 2766 pcbddc->new_primal_space_local = PETSC_TRUE; 2767 if (olocal_primal_size == pcbddc->local_primal_size) { 2768 ierr = PetscMemcmp(pcbddc->primal_indices_local_idxs,oprimal_indices_local_idxs,olocal_primal_size,&pcbddc->new_primal_space_local);CHKERRQ(ierr); 2769 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 2770 ierr = PetscFree(oprimal_indices_local_idxs);CHKERRQ(ierr); 2771 } 2772 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 2773 ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2774 2775 /* flush dbg viewer */ 2776 if (pcbddc->dbg_flag) { 2777 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2778 } 2779 2780 /* free workspace */ 2781 ierr = PetscBTDestroy(&touched);CHKERRQ(ierr); 2782 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 2783 ierr = PetscFree(aux_primal_minloc);CHKERRQ(ierr); 2784 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 2785 if (!pcbddc->adaptive_selection) { 2786 ierr = PetscFree(temp_indices);CHKERRQ(ierr); 2787 ierr = PetscFree3(temp_quadrature_constraint,temp_indices_to_constraint,temp_indices_to_constraint_B);CHKERRQ(ierr); 2788 } else { 2789 ierr = PetscFree4(pcbddc->adaptive_constraints_n, 2790 pcbddc->adaptive_constraints_ptrs, 2791 pcbddc->adaptive_constraints_idxs, 2792 pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 2793 ierr = PetscFree(temp_indices_to_constraint_B);CHKERRQ(ierr); 2794 } 2795 PetscFunctionReturn(0); 2796 } 2797 2798 #undef __FUNCT__ 2799 #define __FUNCT__ "PCBDDCAnalyzeInterface" 2800 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 2801 { 2802 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2803 PC_IS *pcis = (PC_IS*)pc->data; 2804 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2805 PetscInt ierr,i,vertex_size; 2806 PetscViewer viewer=pcbddc->dbg_viewer; 2807 2808 PetscFunctionBegin; 2809 /* Reset previously computed graph */ 2810 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 2811 /* Init local Graph struct */ 2812 ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping);CHKERRQ(ierr); 2813 2814 /* Check validity of the csr graph passed in by the user */ 2815 if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) { 2816 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 2817 } 2818 2819 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 2820 if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) { 2821 Mat mat_adj; 2822 PetscInt *xadj,*adjncy; 2823 PetscInt nvtxs; 2824 PetscBool flg_row=PETSC_TRUE; 2825 2826 ierr = MatConvert(matis->A,MATMPIADJ,MAT_INITIAL_MATRIX,&mat_adj);CHKERRQ(ierr); 2827 ierr = MatGetRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 2828 if (!flg_row) { 2829 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__); 2830 } 2831 if (pcbddc->use_local_adj) { 2832 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 2833 pcbddc->computed_rowadj = PETSC_TRUE; 2834 } else { /* just compute subdomain's connected components */ 2835 IS is_dummy; 2836 ISLocalToGlobalMapping l2gmap_dummy; 2837 PetscInt j,sum; 2838 PetscInt *cxadj,*cadjncy; 2839 const PetscInt *idxs; 2840 PCBDDCGraph graph; 2841 PetscBT is_on_boundary; 2842 2843 ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n,0,1,&is_dummy);CHKERRQ(ierr); 2844 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 2845 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2846 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 2847 ierr = PCBDDCGraphInit(graph,l2gmap_dummy);CHKERRQ(ierr); 2848 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 2849 graph->xadj = xadj; 2850 graph->adjncy = adjncy; 2851 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 2852 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 2853 2854 if (pcbddc->dbg_flag) { 2855 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] Found %d subdomains\n",PetscGlobalRank,graph->ncc);CHKERRQ(ierr); 2856 for (i=0;i<graph->ncc;i++) { 2857 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] %d cc size %d\n",PetscGlobalRank,i,graph->cptr[i+1]-graph->cptr[i]);CHKERRQ(ierr); 2858 } 2859 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2860 } 2861 2862 ierr = PetscBTCreate(nvtxs,&is_on_boundary);CHKERRQ(ierr); 2863 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2864 for (i=0;i<pcis->n_B;i++) { 2865 ierr = PetscBTSet(is_on_boundary,idxs[i]);CHKERRQ(ierr); 2866 } 2867 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2868 2869 ierr = PetscCalloc1(nvtxs+1,&cxadj);CHKERRQ(ierr); 2870 sum = 0; 2871 for (i=0;i<graph->ncc;i++) { 2872 PetscInt sizecc = 0; 2873 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 2874 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 2875 sizecc++; 2876 } 2877 } 2878 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 2879 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 2880 cxadj[graph->queue[j]] = sizecc; 2881 } 2882 } 2883 sum += sizecc*sizecc; 2884 } 2885 ierr = PetscMalloc1(sum,&cadjncy);CHKERRQ(ierr); 2886 sum = 0; 2887 for (i=0;i<nvtxs;i++) { 2888 PetscInt temp = cxadj[i]; 2889 cxadj[i] = sum; 2890 sum += temp; 2891 } 2892 cxadj[nvtxs] = sum; 2893 for (i=0;i<graph->ncc;i++) { 2894 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 2895 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 2896 PetscInt k,sizecc = 0; 2897 for (k=graph->cptr[i];k<graph->cptr[i+1];k++) { 2898 if (PetscBTLookup(is_on_boundary,graph->queue[k])) { 2899 cadjncy[cxadj[graph->queue[j]]+sizecc]=graph->queue[k]; 2900 sizecc++; 2901 } 2902 } 2903 } 2904 } 2905 } 2906 if (nvtxs) { 2907 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,cxadj,cadjncy,PETSC_OWN_POINTER);CHKERRQ(ierr); 2908 } else { 2909 ierr = PetscFree(cxadj);CHKERRQ(ierr); 2910 ierr = PetscFree(cadjncy);CHKERRQ(ierr); 2911 } 2912 graph->xadj = 0; 2913 graph->adjncy = 0; 2914 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 2915 ierr = PetscBTDestroy(&is_on_boundary);CHKERRQ(ierr); 2916 } 2917 ierr = MatRestoreRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 2918 if (!flg_row) { 2919 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__); 2920 } 2921 ierr = MatDestroy(&mat_adj);CHKERRQ(ierr); 2922 } 2923 2924 /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */ 2925 vertex_size = 1; 2926 if (pcbddc->user_provided_isfordofs) { 2927 if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */ 2928 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 2929 for (i=0;i<pcbddc->n_ISForDofs;i++) { 2930 ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 2931 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 2932 } 2933 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 2934 pcbddc->n_ISForDofs = 0; 2935 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 2936 } 2937 /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */ 2938 ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr); 2939 } else { 2940 if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */ 2941 ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr); 2942 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 2943 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 2944 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 2945 } 2946 } 2947 } 2948 2949 /* Setup of Graph */ 2950 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */ 2951 ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 2952 } 2953 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */ 2954 ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 2955 } 2956 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices); 2957 2958 /* Graph's connected components analysis */ 2959 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 2960 2961 /* print some info to stdout */ 2962 if (pcbddc->dbg_flag) { 2963 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer); 2964 } 2965 2966 /* mark topography has done */ 2967 pcbddc->recompute_topography = PETSC_FALSE; 2968 PetscFunctionReturn(0); 2969 } 2970 2971 #undef __FUNCT__ 2972 #define __FUNCT__ "PCBDDCGetPrimalVerticesLocalIdx" 2973 PetscErrorCode PCBDDCGetPrimalVerticesLocalIdx(PC pc, PetscInt *n_vertices, PetscInt **vertices_idx) 2974 { 2975 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 2976 PetscInt *vertices,*row_cmat_indices,n,i,size_of_constraint,local_primal_size; 2977 PetscErrorCode ierr; 2978 2979 PetscFunctionBegin; 2980 n = 0; 2981 vertices = 0; 2982 if (pcbddc->ConstraintMatrix) { 2983 ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&i);CHKERRQ(ierr); 2984 for (i=0;i<local_primal_size;i++) { 2985 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2986 if (size_of_constraint == 1) n++; 2987 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2988 } 2989 if (vertices_idx) { 2990 ierr = PetscMalloc1(n,&vertices);CHKERRQ(ierr); 2991 n = 0; 2992 for (i=0;i<local_primal_size;i++) { 2993 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2994 if (size_of_constraint == 1) { 2995 vertices[n++]=row_cmat_indices[0]; 2996 } 2997 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2998 } 2999 } 3000 } 3001 *n_vertices = n; 3002 if (vertices_idx) *vertices_idx = vertices; 3003 PetscFunctionReturn(0); 3004 } 3005 3006 #undef __FUNCT__ 3007 #define __FUNCT__ "PCBDDCGetPrimalConstraintsLocalIdx" 3008 PetscErrorCode PCBDDCGetPrimalConstraintsLocalIdx(PC pc, PetscInt *n_constraints, PetscInt **constraints_idx) 3009 { 3010 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 3011 PetscInt *constraints_index,*row_cmat_indices,*row_cmat_global_indices; 3012 PetscInt n,i,j,size_of_constraint,local_primal_size,local_size,max_size_of_constraint,min_index,min_loc; 3013 PetscBT touched; 3014 PetscErrorCode ierr; 3015 3016 /* This function assumes that the number of local constraints per connected component 3017 is not greater than the number of nodes defined for the connected component 3018 (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */ 3019 PetscFunctionBegin; 3020 n = 0; 3021 constraints_index = 0; 3022 if (pcbddc->ConstraintMatrix) { 3023 ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&local_size);CHKERRQ(ierr); 3024 max_size_of_constraint = 0; 3025 for (i=0;i<local_primal_size;i++) { 3026 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 3027 if (size_of_constraint > 1) { 3028 n++; 3029 } 3030 max_size_of_constraint = PetscMax(size_of_constraint,max_size_of_constraint); 3031 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 3032 } 3033 if (constraints_idx) { 3034 ierr = PetscMalloc1(n,&constraints_index);CHKERRQ(ierr); 3035 ierr = PetscMalloc1(max_size_of_constraint,&row_cmat_global_indices);CHKERRQ(ierr); 3036 ierr = PetscBTCreate(local_size,&touched);CHKERRQ(ierr); 3037 n = 0; 3038 for (i=0;i<local_primal_size;i++) { 3039 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 3040 if (size_of_constraint > 1) { 3041 ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,row_cmat_indices,row_cmat_global_indices);CHKERRQ(ierr); 3042 /* find first untouched local node */ 3043 j = 0; 3044 while (PetscBTLookup(touched,row_cmat_indices[j])) j++; 3045 min_index = row_cmat_global_indices[j]; 3046 min_loc = j; 3047 /* search the minimum among nodes not yet touched on the connected component 3048 since there can be more than one constraint on a single cc */ 3049 for (j=1;j<size_of_constraint;j++) { 3050 if (!PetscBTLookup(touched,row_cmat_indices[j]) && min_index > row_cmat_global_indices[j]) { 3051 min_index = row_cmat_global_indices[j]; 3052 min_loc = j; 3053 } 3054 } 3055 ierr = PetscBTSet(touched,row_cmat_indices[min_loc]);CHKERRQ(ierr); 3056 constraints_index[n++] = row_cmat_indices[min_loc]; 3057 } 3058 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 3059 } 3060 ierr = PetscBTDestroy(&touched);CHKERRQ(ierr); 3061 ierr = PetscFree(row_cmat_global_indices);CHKERRQ(ierr); 3062 } 3063 } 3064 *n_constraints = n; 3065 if (constraints_idx) *constraints_idx = constraints_index; 3066 PetscFunctionReturn(0); 3067 } 3068 3069 #undef __FUNCT__ 3070 #define __FUNCT__ "PCBDDCSubsetNumbering" 3071 PetscErrorCode PCBDDCSubsetNumbering(MPI_Comm comm,ISLocalToGlobalMapping l2gmap, PetscInt n_local_dofs, PetscInt local_dofs[], PetscInt local_dofs_mult[], PetscInt* n_global_subset, PetscInt* global_numbering_subset[]) 3072 { 3073 Vec local_vec,global_vec; 3074 IS seqis,paris; 3075 VecScatter scatter_ctx; 3076 PetscScalar *array; 3077 PetscInt *temp_global_dofs; 3078 PetscScalar globalsum; 3079 PetscInt i,j,s; 3080 PetscInt nlocals,first_index,old_index,max_local; 3081 PetscMPIInt rank_prec_comm,size_prec_comm,max_global; 3082 PetscMPIInt *dof_sizes,*dof_displs; 3083 PetscBool first_found; 3084 PetscErrorCode ierr; 3085 3086 PetscFunctionBegin; 3087 /* mpi buffers */ 3088 ierr = MPI_Comm_size(comm,&size_prec_comm);CHKERRQ(ierr); 3089 ierr = MPI_Comm_rank(comm,&rank_prec_comm);CHKERRQ(ierr); 3090 j = ( !rank_prec_comm ? size_prec_comm : 0); 3091 ierr = PetscMalloc1(j,&dof_sizes);CHKERRQ(ierr); 3092 ierr = PetscMalloc1(j,&dof_displs);CHKERRQ(ierr); 3093 /* get maximum size of subset */ 3094 ierr = PetscMalloc1(n_local_dofs,&temp_global_dofs);CHKERRQ(ierr); 3095 ierr = ISLocalToGlobalMappingApply(l2gmap,n_local_dofs,local_dofs,temp_global_dofs);CHKERRQ(ierr); 3096 max_local = 0; 3097 for (i=0;i<n_local_dofs;i++) { 3098 if (max_local < temp_global_dofs[i] ) { 3099 max_local = temp_global_dofs[i]; 3100 } 3101 } 3102 ierr = MPI_Allreduce(&max_local,&max_global,1,MPIU_INT,MPI_MAX,comm);CHKERRQ(ierr); 3103 max_global++; 3104 max_local = 0; 3105 for (i=0;i<n_local_dofs;i++) { 3106 if (max_local < local_dofs[i] ) { 3107 max_local = local_dofs[i]; 3108 } 3109 } 3110 max_local++; 3111 /* allocate workspace */ 3112 ierr = VecCreate(PETSC_COMM_SELF,&local_vec);CHKERRQ(ierr); 3113 ierr = VecSetSizes(local_vec,PETSC_DECIDE,max_local);CHKERRQ(ierr); 3114 ierr = VecSetType(local_vec,VECSEQ);CHKERRQ(ierr); 3115 ierr = VecCreate(comm,&global_vec);CHKERRQ(ierr); 3116 ierr = VecSetSizes(global_vec,PETSC_DECIDE,max_global);CHKERRQ(ierr); 3117 ierr = VecSetType(global_vec,VECMPI);CHKERRQ(ierr); 3118 /* create scatter */ 3119 ierr = ISCreateGeneral(PETSC_COMM_SELF,n_local_dofs,local_dofs,PETSC_COPY_VALUES,&seqis);CHKERRQ(ierr); 3120 ierr = ISCreateGeneral(comm,n_local_dofs,temp_global_dofs,PETSC_COPY_VALUES,&paris);CHKERRQ(ierr); 3121 ierr = VecScatterCreate(local_vec,seqis,global_vec,paris,&scatter_ctx);CHKERRQ(ierr); 3122 ierr = ISDestroy(&seqis);CHKERRQ(ierr); 3123 ierr = ISDestroy(&paris);CHKERRQ(ierr); 3124 /* init array */ 3125 ierr = VecSet(global_vec,0.0);CHKERRQ(ierr); 3126 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 3127 ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr); 3128 if (local_dofs_mult) { 3129 for (i=0;i<n_local_dofs;i++) { 3130 array[local_dofs[i]]=(PetscScalar)local_dofs_mult[i]; 3131 } 3132 } else { 3133 for (i=0;i<n_local_dofs;i++) { 3134 array[local_dofs[i]]=1.0; 3135 } 3136 } 3137 ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr); 3138 /* scatter into global vec and get total number of global dofs */ 3139 ierr = VecScatterBegin(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3140 ierr = VecScatterEnd(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3141 ierr = VecSum(global_vec,&globalsum);CHKERRQ(ierr); 3142 *n_global_subset = (PetscInt)PetscRealPart(globalsum); 3143 /* Fill global_vec with cumulative function for global numbering */ 3144 ierr = VecGetArray(global_vec,&array);CHKERRQ(ierr); 3145 ierr = VecGetLocalSize(global_vec,&s);CHKERRQ(ierr); 3146 nlocals = 0; 3147 first_index = -1; 3148 first_found = PETSC_FALSE; 3149 for (i=0;i<s;i++) { 3150 if (!first_found && PetscRealPart(array[i]) > 0.1) { 3151 first_found = PETSC_TRUE; 3152 first_index = i; 3153 } 3154 nlocals += (PetscInt)PetscRealPart(array[i]); 3155 } 3156 ierr = MPI_Gather(&nlocals,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr); 3157 if (!rank_prec_comm) { 3158 dof_displs[0]=0; 3159 for (i=1;i<size_prec_comm;i++) { 3160 dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1]; 3161 } 3162 } 3163 ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&nlocals,1,MPIU_INT,0,comm);CHKERRQ(ierr); 3164 if (first_found) { 3165 array[first_index] += (PetscScalar)nlocals; 3166 old_index = first_index; 3167 for (i=first_index+1;i<s;i++) { 3168 if (PetscRealPart(array[i]) > 0.1) { 3169 array[i] += array[old_index]; 3170 old_index = i; 3171 } 3172 } 3173 } 3174 ierr = VecRestoreArray(global_vec,&array);CHKERRQ(ierr); 3175 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 3176 ierr = VecScatterBegin(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3177 ierr = VecScatterEnd(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3178 /* get global ordering of local dofs */ 3179 ierr = VecGetArrayRead(local_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 3180 if (local_dofs_mult) { 3181 for (i=0;i<n_local_dofs;i++) { 3182 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-local_dofs_mult[i]; 3183 } 3184 } else { 3185 for (i=0;i<n_local_dofs;i++) { 3186 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-1; 3187 } 3188 } 3189 ierr = VecRestoreArrayRead(local_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 3190 /* free workspace */ 3191 ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr); 3192 ierr = VecDestroy(&local_vec);CHKERRQ(ierr); 3193 ierr = VecDestroy(&global_vec);CHKERRQ(ierr); 3194 ierr = PetscFree(dof_sizes);CHKERRQ(ierr); 3195 ierr = PetscFree(dof_displs);CHKERRQ(ierr); 3196 /* return pointer to global ordering of local dofs */ 3197 *global_numbering_subset = temp_global_dofs; 3198 PetscFunctionReturn(0); 3199 } 3200 3201 #undef __FUNCT__ 3202 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 3203 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 3204 { 3205 PetscInt i,j; 3206 PetscScalar *alphas; 3207 PetscErrorCode ierr; 3208 3209 PetscFunctionBegin; 3210 /* this implements stabilized Gram-Schmidt */ 3211 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 3212 for (i=0;i<n;i++) { 3213 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 3214 if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); } 3215 for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); } 3216 } 3217 ierr = PetscFree(alphas);CHKERRQ(ierr); 3218 PetscFunctionReturn(0); 3219 } 3220 3221 #undef __FUNCT__ 3222 #define __FUNCT__ "MatISGetSubassemblingPattern" 3223 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscBool contiguous, IS* is_sends) 3224 { 3225 Mat subdomain_adj; 3226 IS new_ranks,ranks_send_to; 3227 MatPartitioning partitioner; 3228 Mat_IS *matis; 3229 PetscInt n_neighs,*neighs,*n_shared,**shared; 3230 PetscInt prank; 3231 PetscMPIInt size,rank,color; 3232 PetscInt *xadj,*adjncy,*oldranks; 3233 PetscInt *adjncy_wgt,*v_wgt,*is_indices,*ranks_send_to_idx; 3234 PetscInt i,local_size,threshold=0; 3235 PetscErrorCode ierr; 3236 PetscBool use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE; 3237 PetscSubcomm subcomm; 3238 3239 PetscFunctionBegin; 3240 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr); 3241 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 3242 ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 3243 3244 /* Get info on mapping */ 3245 matis = (Mat_IS*)(mat->data); 3246 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&local_size);CHKERRQ(ierr); 3247 ierr = ISLocalToGlobalMappingGetInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 3248 3249 /* build local CSR graph of subdomains' connectivity */ 3250 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 3251 xadj[0] = 0; 3252 xadj[1] = PetscMax(n_neighs-1,0); 3253 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 3254 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 3255 3256 if (threshold) { 3257 PetscInt xadj_count = 0; 3258 for (i=1;i<n_neighs;i++) { 3259 if (n_shared[i] > threshold) { 3260 adjncy[xadj_count] = neighs[i]; 3261 adjncy_wgt[xadj_count] = n_shared[i]; 3262 xadj_count++; 3263 } 3264 } 3265 xadj[1] = xadj_count; 3266 } else { 3267 if (xadj[1]) { 3268 ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr); 3269 ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr); 3270 } 3271 } 3272 ierr = ISLocalToGlobalMappingRestoreInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 3273 if (use_square) { 3274 for (i=0;i<xadj[1];i++) { 3275 adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i]; 3276 } 3277 } 3278 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 3279 3280 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 3281 3282 /* 3283 Restrict work on active processes only. 3284 */ 3285 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr); 3286 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 3287 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 3288 ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr); 3289 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 3290 if (color) { 3291 ierr = PetscFree(xadj);CHKERRQ(ierr); 3292 ierr = PetscFree(adjncy);CHKERRQ(ierr); 3293 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 3294 } else { 3295 PetscInt coarsening_ratio; 3296 ierr = MPI_Comm_size(subcomm->comm,&size);CHKERRQ(ierr); 3297 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 3298 prank = rank; 3299 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm->comm);CHKERRQ(ierr); 3300 /* 3301 for (i=0;i<size;i++) { 3302 PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]); 3303 } 3304 */ 3305 for (i=0;i<xadj[1];i++) { 3306 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 3307 } 3308 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 3309 ierr = MatCreateMPIAdj(subcomm->comm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 3310 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 3311 3312 /* Partition */ 3313 ierr = MatPartitioningCreate(subcomm->comm,&partitioner);CHKERRQ(ierr); 3314 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 3315 if (use_vwgt) { 3316 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 3317 v_wgt[0] = local_size; 3318 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 3319 } 3320 n_subdomains = PetscMin((PetscInt)size,n_subdomains); 3321 coarsening_ratio = size/n_subdomains; 3322 ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr); 3323 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 3324 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 3325 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 3326 3327 ierr = ISGetIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3328 if (contiguous) { 3329 ranks_send_to_idx[0] = oldranks[is_indices[0]]; /* contiguos set of processes */ 3330 } else { 3331 ranks_send_to_idx[0] = coarsening_ratio*oldranks[is_indices[0]]; /* scattered set of processes */ 3332 } 3333 ierr = ISRestoreIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3334 /* clean up */ 3335 ierr = PetscFree(oldranks);CHKERRQ(ierr); 3336 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 3337 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 3338 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 3339 } 3340 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 3341 3342 /* assemble parallel IS for sends */ 3343 i = 1; 3344 if (color) i=0; 3345 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr); 3346 3347 /* get back IS */ 3348 *is_sends = ranks_send_to; 3349 PetscFunctionReturn(0); 3350 } 3351 3352 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 3353 3354 #undef __FUNCT__ 3355 #define __FUNCT__ "MatISSubassemble" 3356 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[]) 3357 { 3358 Mat local_mat; 3359 Mat_IS *matis; 3360 IS is_sends_internal; 3361 PetscInt rows,cols,new_local_rows; 3362 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals; 3363 PetscBool ismatis,isdense,newisdense,destroy_mat; 3364 ISLocalToGlobalMapping l2gmap; 3365 PetscInt* l2gmap_indices; 3366 const PetscInt* is_indices; 3367 MatType new_local_type; 3368 /* buffers */ 3369 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 3370 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 3371 PetscInt *recv_buffer_idxs_local; 3372 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 3373 /* MPI */ 3374 MPI_Comm comm,comm_n; 3375 PetscSubcomm subcomm; 3376 PetscMPIInt n_sends,n_recvs,commsize; 3377 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 3378 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 3379 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,source_dest; 3380 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals; 3381 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals; 3382 PetscErrorCode ierr; 3383 3384 PetscFunctionBegin; 3385 /* TODO: add missing checks */ 3386 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 3387 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 3388 PetscValidLogicalCollectiveEnum(mat,reuse,5); 3389 PetscValidLogicalCollectiveInt(mat,nis,7); 3390 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 3391 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 3392 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 3393 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 3394 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 3395 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 3396 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 3397 if (reuse == MAT_REUSE_MATRIX && *mat_n) { 3398 PetscInt mrows,mcols,mnrows,mncols; 3399 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 3400 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 3401 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 3402 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 3403 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 3404 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 3405 } 3406 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 3407 PetscValidLogicalCollectiveInt(mat,bs,0); 3408 /* prepare IS for sending if not provided */ 3409 if (!is_sends) { 3410 PetscBool pcontig = PETSC_TRUE; 3411 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 3412 ierr = MatISGetSubassemblingPattern(mat,n_subdomains,pcontig,&is_sends_internal);CHKERRQ(ierr); 3413 } else { 3414 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 3415 is_sends_internal = is_sends; 3416 } 3417 3418 /* get pointer of MATIS data */ 3419 matis = (Mat_IS*)mat->data; 3420 3421 /* get comm */ 3422 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3423 3424 /* compute number of sends */ 3425 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 3426 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 3427 3428 /* compute number of receives */ 3429 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 3430 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 3431 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 3432 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 3433 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 3434 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 3435 ierr = PetscFree(iflags);CHKERRQ(ierr); 3436 3437 /* restrict comm if requested */ 3438 subcomm = 0; 3439 destroy_mat = PETSC_FALSE; 3440 if (restrict_comm) { 3441 PetscMPIInt color,rank,subcommsize; 3442 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3443 color = 0; 3444 if (n_sends && !n_recvs) color = 1; /* sending only processes will not partecipate in new comm */ 3445 ierr = MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 3446 subcommsize = commsize - subcommsize; 3447 /* check if reuse has been requested */ 3448 if (reuse == MAT_REUSE_MATRIX) { 3449 if (*mat_n) { 3450 PetscMPIInt subcommsize2; 3451 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 3452 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 3453 comm_n = PetscObjectComm((PetscObject)*mat_n); 3454 } else { 3455 comm_n = PETSC_COMM_SELF; 3456 } 3457 } else { /* MAT_INITIAL_MATRIX */ 3458 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 3459 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 3460 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 3461 comm_n = subcomm->comm; 3462 } 3463 /* flag to destroy *mat_n if not significative */ 3464 if (color) destroy_mat = PETSC_TRUE; 3465 } else { 3466 comm_n = comm; 3467 } 3468 3469 /* prepare send/receive buffers */ 3470 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 3471 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 3472 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 3473 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 3474 if (nis) { 3475 ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr); 3476 } 3477 3478 /* Get data from local matrices */ 3479 if (!isdense) { 3480 SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 3481 /* TODO: See below some guidelines on how to prepare the local buffers */ 3482 /* 3483 send_buffer_vals should contain the raw values of the local matrix 3484 send_buffer_idxs should contain: 3485 - MatType_PRIVATE type 3486 - PetscInt size_of_l2gmap 3487 - PetscInt global_row_indices[size_of_l2gmap] 3488 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 3489 */ 3490 } else { 3491 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 3492 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&i);CHKERRQ(ierr); 3493 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 3494 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 3495 send_buffer_idxs[1] = i; 3496 ierr = ISLocalToGlobalMappingGetIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 3497 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 3498 ierr = ISLocalToGlobalMappingRestoreIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 3499 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 3500 for (i=0;i<n_sends;i++) { 3501 ilengths_vals[is_indices[i]] = len*len; 3502 ilengths_idxs[is_indices[i]] = len+2; 3503 } 3504 } 3505 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 3506 /* additional is (if any) */ 3507 if (nis) { 3508 PetscMPIInt psum; 3509 PetscInt j; 3510 for (j=0,psum=0;j<nis;j++) { 3511 PetscInt plen; 3512 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 3513 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 3514 psum += len+1; /* indices + lenght */ 3515 } 3516 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 3517 for (j=0,psum=0;j<nis;j++) { 3518 PetscInt plen; 3519 const PetscInt *is_array_idxs; 3520 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 3521 send_buffer_idxs_is[psum] = plen; 3522 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 3523 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 3524 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 3525 psum += plen+1; /* indices + lenght */ 3526 } 3527 for (i=0;i<n_sends;i++) { 3528 ilengths_idxs_is[is_indices[i]] = psum; 3529 } 3530 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 3531 } 3532 3533 buf_size_idxs = 0; 3534 buf_size_vals = 0; 3535 buf_size_idxs_is = 0; 3536 for (i=0;i<n_recvs;i++) { 3537 buf_size_idxs += (PetscInt)olengths_idxs[i]; 3538 buf_size_vals += (PetscInt)olengths_vals[i]; 3539 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 3540 } 3541 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 3542 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 3543 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 3544 3545 /* get new tags for clean communications */ 3546 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 3547 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 3548 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 3549 3550 /* allocate for requests */ 3551 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 3552 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 3553 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 3554 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 3555 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 3556 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 3557 3558 /* communications */ 3559 ptr_idxs = recv_buffer_idxs; 3560 ptr_vals = recv_buffer_vals; 3561 ptr_idxs_is = recv_buffer_idxs_is; 3562 for (i=0;i<n_recvs;i++) { 3563 source_dest = onodes[i]; 3564 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 3565 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 3566 ptr_idxs += olengths_idxs[i]; 3567 ptr_vals += olengths_vals[i]; 3568 if (nis) { 3569 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 3570 ptr_idxs_is += olengths_idxs_is[i]; 3571 } 3572 } 3573 for (i=0;i<n_sends;i++) { 3574 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 3575 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 3576 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 3577 if (nis) { 3578 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 3579 } 3580 } 3581 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 3582 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 3583 3584 /* assemble new l2g map */ 3585 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3586 ptr_idxs = recv_buffer_idxs; 3587 new_local_rows = 0; 3588 for (i=0;i<n_recvs;i++) { 3589 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 3590 ptr_idxs += olengths_idxs[i]; 3591 } 3592 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 3593 ptr_idxs = recv_buffer_idxs; 3594 new_local_rows = 0; 3595 for (i=0;i<n_recvs;i++) { 3596 ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 3597 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 3598 ptr_idxs += olengths_idxs[i]; 3599 } 3600 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 3601 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 3602 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 3603 3604 /* infer new local matrix type from received local matrices type */ 3605 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 3606 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 3607 if (n_recvs) { 3608 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 3609 ptr_idxs = recv_buffer_idxs; 3610 for (i=0;i<n_recvs;i++) { 3611 if ((PetscInt)new_local_type_private != *ptr_idxs) { 3612 new_local_type_private = MATAIJ_PRIVATE; 3613 break; 3614 } 3615 ptr_idxs += olengths_idxs[i]; 3616 } 3617 switch (new_local_type_private) { 3618 case MATDENSE_PRIVATE: 3619 if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */ 3620 new_local_type = MATSEQAIJ; 3621 bs = 1; 3622 } else { /* if I receive only 1 dense matrix */ 3623 new_local_type = MATSEQDENSE; 3624 bs = 1; 3625 } 3626 break; 3627 case MATAIJ_PRIVATE: 3628 new_local_type = MATSEQAIJ; 3629 bs = 1; 3630 break; 3631 case MATBAIJ_PRIVATE: 3632 new_local_type = MATSEQBAIJ; 3633 break; 3634 case MATSBAIJ_PRIVATE: 3635 new_local_type = MATSEQSBAIJ; 3636 break; 3637 default: 3638 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__); 3639 break; 3640 } 3641 } else { /* by default, new_local_type is seqdense */ 3642 new_local_type = MATSEQDENSE; 3643 bs = 1; 3644 } 3645 3646 /* create MATIS object if needed */ 3647 if (reuse == MAT_INITIAL_MATRIX) { 3648 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 3649 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,mat_n);CHKERRQ(ierr); 3650 } else { 3651 /* it also destroys the local matrices */ 3652 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 3653 } 3654 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 3655 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 3656 3657 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3658 3659 /* Global to local map of received indices */ 3660 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 3661 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 3662 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 3663 3664 /* restore attributes -> type of incoming data and its size */ 3665 buf_size_idxs = 0; 3666 for (i=0;i<n_recvs;i++) { 3667 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 3668 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 3669 buf_size_idxs += (PetscInt)olengths_idxs[i]; 3670 } 3671 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 3672 3673 /* set preallocation */ 3674 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 3675 if (!newisdense) { 3676 PetscInt *new_local_nnz=0; 3677 3678 ptr_vals = recv_buffer_vals; 3679 ptr_idxs = recv_buffer_idxs_local; 3680 if (n_recvs) { 3681 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 3682 } 3683 for (i=0;i<n_recvs;i++) { 3684 PetscInt j; 3685 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 3686 for (j=0;j<*(ptr_idxs+1);j++) { 3687 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 3688 } 3689 } else { 3690 /* TODO */ 3691 } 3692 ptr_idxs += olengths_idxs[i]; 3693 } 3694 if (new_local_nnz) { 3695 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 3696 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 3697 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 3698 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 3699 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 3700 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 3701 } else { 3702 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 3703 } 3704 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 3705 } else { 3706 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 3707 } 3708 3709 /* set values */ 3710 ptr_vals = recv_buffer_vals; 3711 ptr_idxs = recv_buffer_idxs_local; 3712 for (i=0;i<n_recvs;i++) { 3713 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 3714 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 3715 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 3716 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 3717 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 3718 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 3719 } else { 3720 /* TODO */ 3721 } 3722 ptr_idxs += olengths_idxs[i]; 3723 ptr_vals += olengths_vals[i]; 3724 } 3725 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3726 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3727 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3728 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3729 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 3730 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 3731 3732 #if 0 3733 if (!restrict_comm) { /* check */ 3734 Vec lvec,rvec; 3735 PetscReal infty_error; 3736 3737 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 3738 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 3739 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 3740 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 3741 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 3742 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 3743 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 3744 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 3745 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 3746 } 3747 #endif 3748 3749 /* assemble new additional is (if any) */ 3750 if (nis) { 3751 PetscInt **temp_idxs,*count_is,j,psum; 3752 3753 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3754 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 3755 ptr_idxs = recv_buffer_idxs_is; 3756 psum = 0; 3757 for (i=0;i<n_recvs;i++) { 3758 for (j=0;j<nis;j++) { 3759 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 3760 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 3761 psum += plen; 3762 ptr_idxs += plen+1; /* shift pointer to received data */ 3763 } 3764 } 3765 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 3766 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 3767 for (i=1;i<nis;i++) { 3768 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 3769 } 3770 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 3771 ptr_idxs = recv_buffer_idxs_is; 3772 for (i=0;i<n_recvs;i++) { 3773 for (j=0;j<nis;j++) { 3774 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 3775 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 3776 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 3777 ptr_idxs += plen+1; /* shift pointer to received data */ 3778 } 3779 } 3780 for (i=0;i<nis;i++) { 3781 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 3782 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 3783 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 3784 } 3785 ierr = PetscFree(count_is);CHKERRQ(ierr); 3786 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 3787 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 3788 } 3789 /* free workspace */ 3790 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 3791 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3792 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 3793 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3794 if (isdense) { 3795 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 3796 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 3797 } else { 3798 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 3799 } 3800 if (nis) { 3801 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3802 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 3803 } 3804 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 3805 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 3806 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 3807 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 3808 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 3809 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 3810 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 3811 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 3812 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 3813 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 3814 ierr = PetscFree(onodes);CHKERRQ(ierr); 3815 if (nis) { 3816 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 3817 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 3818 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 3819 } 3820 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 3821 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 3822 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 3823 for (i=0;i<nis;i++) { 3824 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 3825 } 3826 } 3827 PetscFunctionReturn(0); 3828 } 3829 3830 /* temporary hack into ksp private data structure */ 3831 #include <petsc-private/kspimpl.h> 3832 3833 #undef __FUNCT__ 3834 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 3835 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 3836 { 3837 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3838 PC_IS *pcis = (PC_IS*)pc->data; 3839 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 3840 MatNullSpace CoarseNullSpace=NULL; 3841 ISLocalToGlobalMapping coarse_islg; 3842 IS coarse_is,*isarray; 3843 PetscInt i,im_active=-1,active_procs=-1; 3844 PetscInt nis,nisdofs,nisneu; 3845 PC pc_temp; 3846 PCType coarse_pc_type; 3847 KSPType coarse_ksp_type; 3848 PetscBool multilevel_requested,multilevel_allowed; 3849 PetscBool isredundant,isbddc,isnn,coarse_reuse; 3850 Mat t_coarse_mat_is; 3851 PetscInt void_procs,ncoarse_ml,ncoarse_ds,ncoarse; 3852 PetscMPIInt all_procs; 3853 PetscBool csin_ml,csin_ds,csin,csin_type_simple,redist; 3854 PetscBool compute_vecs = PETSC_FALSE; 3855 PetscScalar *array; 3856 PetscErrorCode ierr; 3857 3858 PetscFunctionBegin; 3859 /* Assign global numbering to coarse dofs */ 3860 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 3861 compute_vecs = PETSC_TRUE; 3862 PetscInt ocoarse_size; 3863 ocoarse_size = pcbddc->coarse_size; 3864 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 3865 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 3866 /* see if we can avoid some work */ 3867 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 3868 if (ocoarse_size != pcbddc->coarse_size) { /* ...but with different size, so reset it and set reuse flag to false */ 3869 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 3870 coarse_reuse = PETSC_FALSE; 3871 } else { /* we can safely reuse already computed coarse matrix */ 3872 coarse_reuse = PETSC_TRUE; 3873 } 3874 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 3875 coarse_reuse = PETSC_FALSE; 3876 } 3877 /* reset any subassembling information */ 3878 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 3879 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 3880 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 3881 coarse_reuse = PETSC_TRUE; 3882 } 3883 3884 /* count "active" (i.e. with positive local size) and "void" processes */ 3885 im_active = !!(pcis->n); 3886 ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3887 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr); 3888 void_procs = all_procs-active_procs; 3889 csin_type_simple = PETSC_TRUE; 3890 redist = PETSC_FALSE; 3891 if (pcbddc->current_level && void_procs) { 3892 csin_ml = PETSC_TRUE; 3893 ncoarse_ml = void_procs; 3894 csin_ds = PETSC_TRUE; 3895 ncoarse_ds = void_procs; 3896 } else { 3897 csin_ml = PETSC_FALSE; 3898 ncoarse_ml = all_procs; 3899 if (void_procs) { 3900 csin_ds = PETSC_TRUE; 3901 ncoarse_ds = void_procs; 3902 csin_type_simple = PETSC_FALSE; 3903 } else { 3904 if (pcbddc->redistribute_coarse && pcbddc->redistribute_coarse < all_procs) { 3905 csin_ds = PETSC_TRUE; 3906 ncoarse_ds = pcbddc->redistribute_coarse; 3907 redist = PETSC_TRUE; 3908 } else { 3909 csin_ds = PETSC_FALSE; 3910 ncoarse_ds = all_procs; 3911 } 3912 } 3913 } 3914 3915 /* 3916 test if we can go multilevel: three conditions must be satisfied: 3917 - we have not exceeded the number of levels requested 3918 - we can actually subassemble the active processes 3919 - we can find a suitable number of MPI processes where we can place the subassembled problem 3920 */ 3921 multilevel_allowed = PETSC_FALSE; 3922 multilevel_requested = PETSC_FALSE; 3923 if (pcbddc->current_level < pcbddc->max_levels) { 3924 multilevel_requested = PETSC_TRUE; 3925 if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) { 3926 multilevel_allowed = PETSC_FALSE; 3927 } else { 3928 multilevel_allowed = PETSC_TRUE; 3929 } 3930 } 3931 /* determine number of process partecipating to coarse solver */ 3932 if (multilevel_allowed) { 3933 ncoarse = ncoarse_ml; 3934 csin = csin_ml; 3935 } else { 3936 ncoarse = ncoarse_ds; 3937 csin = csin_ds; 3938 } 3939 3940 /* creates temporary l2gmap and IS for coarse indexes */ 3941 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 3942 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 3943 3944 /* creates temporary MATIS object for coarse matrix */ 3945 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr); 3946 ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr); 3947 ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr); 3948 ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr); 3949 #if 0 3950 { 3951 PetscViewer viewer; 3952 char filename[256]; 3953 sprintf(filename,"local_coarse_mat%d.m",PetscGlobalRank); 3954 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 3955 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 3956 ierr = MatView(coarse_submat_dense,viewer);CHKERRQ(ierr); 3957 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 3958 } 3959 #endif 3960 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,&t_coarse_mat_is);CHKERRQ(ierr); 3961 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 3962 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3963 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3964 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 3965 3966 /* compute dofs splitting and neumann boundaries for coarse dofs */ 3967 if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal) ) { /* protects from unneded computations */ 3968 PetscInt *tidxs,*tidxs2,nout,tsize,i; 3969 const PetscInt *idxs; 3970 ISLocalToGlobalMapping tmap; 3971 3972 /* create map between primal indices (in local representative ordering) and local primal numbering */ 3973 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 3974 /* allocate space for temporary storage */ 3975 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 3976 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 3977 /* allocate for IS array */ 3978 nisdofs = pcbddc->n_ISForDofsLocal; 3979 nisneu = !!pcbddc->NeumannBoundariesLocal; 3980 nis = nisdofs + nisneu; 3981 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 3982 /* dofs splitting */ 3983 for (i=0;i<nisdofs;i++) { 3984 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 3985 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 3986 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 3987 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 3988 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 3989 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 3990 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->ISForDofsLocal[i]),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 3991 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 3992 } 3993 /* neumann boundaries */ 3994 if (pcbddc->NeumannBoundariesLocal) { 3995 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 3996 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 3997 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 3998 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 3999 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 4000 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 4001 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->NeumannBoundariesLocal),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 4002 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 4003 } 4004 /* free memory */ 4005 ierr = PetscFree(tidxs);CHKERRQ(ierr); 4006 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 4007 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 4008 } else { 4009 nis = 0; 4010 nisdofs = 0; 4011 nisneu = 0; 4012 isarray = NULL; 4013 } 4014 /* destroy no longer needed map */ 4015 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 4016 4017 /* restrict on coarse candidates (if needed) */ 4018 coarse_mat_is = NULL; 4019 if (csin) { 4020 if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */ 4021 if (redist) { 4022 PetscMPIInt rank; 4023 PetscInt spc,n_spc_p1,dest[1]; 4024 4025 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 4026 spc = all_procs/pcbddc->redistribute_coarse; 4027 n_spc_p1 = all_procs%pcbddc->redistribute_coarse; 4028 if (rank > n_spc_p1*(spc+1)-1) { 4029 dest[0] = n_spc_p1+(rank-(n_spc_p1*(spc+1)))/spc; 4030 } else { 4031 dest[0] = rank/(spc+1); 4032 } 4033 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),1,dest,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4034 } else { 4035 PetscInt j,tissize,*nisindices; 4036 PetscInt *coarse_candidates; 4037 const PetscInt* tisindices; 4038 /* get coarse candidates' ranks in pc communicator */ 4039 ierr = PetscMalloc1(all_procs,&coarse_candidates);CHKERRQ(ierr); 4040 ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4041 for (i=0,j=0;i<all_procs;i++) { 4042 if (!coarse_candidates[i]) { 4043 coarse_candidates[j]=i; 4044 j++; 4045 } 4046 } 4047 if (j < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",j,ncoarse); 4048 /* get a suitable subassembling pattern */ 4049 if (csin_type_simple) { 4050 PetscMPIInt rank; 4051 PetscInt issize,isidx; 4052 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 4053 if (im_active) { 4054 issize = 1; 4055 isidx = (PetscInt)rank; 4056 } else { 4057 issize = 0; 4058 isidx = -1; 4059 } 4060 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4061 } else { 4062 ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,PETSC_TRUE,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4063 } 4064 if (pcbddc->dbg_flag) { 4065 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4066 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr); 4067 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 4068 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr); 4069 for (i=0;i<j;i++) { 4070 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr); 4071 } 4072 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr); 4073 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4074 } 4075 /* shift the pattern on coarse candidates */ 4076 ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr); 4077 ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 4078 ierr = PetscMalloc1(tissize,&nisindices);CHKERRQ(ierr); 4079 for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]]; 4080 ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 4081 ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr); 4082 ierr = PetscFree(coarse_candidates);CHKERRQ(ierr); 4083 } 4084 } 4085 if (pcbddc->dbg_flag) { 4086 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4087 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr); 4088 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 4089 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4090 } 4091 /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */ 4092 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 4093 } else { 4094 if (pcbddc->dbg_flag) { 4095 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4096 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr); 4097 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4098 } 4099 ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr); 4100 coarse_mat_is = t_coarse_mat_is; 4101 } 4102 4103 /* create local to global scatters for coarse problem */ 4104 if (compute_vecs) { 4105 PetscInt lrows; 4106 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 4107 if (coarse_mat_is) { 4108 ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr); 4109 } else { 4110 lrows = 0; 4111 } 4112 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 4113 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 4114 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 4115 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 4116 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 4117 } 4118 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 4119 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 4120 4121 /* set defaults for coarse KSP and PC */ 4122 if (multilevel_allowed) { 4123 coarse_ksp_type = KSPRICHARDSON; 4124 coarse_pc_type = PCBDDC; 4125 } else { 4126 coarse_ksp_type = KSPPREONLY; 4127 coarse_pc_type = PCREDUNDANT; 4128 } 4129 4130 /* print some info if requested */ 4131 if (pcbddc->dbg_flag) { 4132 if (!multilevel_allowed) { 4133 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4134 if (multilevel_requested) { 4135 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 4136 } else if (pcbddc->max_levels) { 4137 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 4138 } 4139 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4140 } 4141 } 4142 4143 /* create the coarse KSP object only once with defaults */ 4144 if (coarse_mat_is) { 4145 MatReuse coarse_mat_reuse; 4146 PetscViewer dbg_viewer = NULL; 4147 if (pcbddc->dbg_flag) { 4148 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is)); 4149 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 4150 } 4151 if (!pcbddc->coarse_ksp) { 4152 char prefix[256],str_level[16]; 4153 size_t len; 4154 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr); 4155 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 4156 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 4157 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr); 4158 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 4159 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 4160 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 4161 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 4162 /* prefix */ 4163 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 4164 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 4165 if (!pcbddc->current_level) { 4166 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 4167 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 4168 } else { 4169 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 4170 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 4171 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 4172 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 4173 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 4174 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 4175 } 4176 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 4177 /* allow user customization */ 4178 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 4179 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 4180 } 4181 4182 /* get some info after set from options */ 4183 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 4184 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 4185 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 4186 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 4187 if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */ 4188 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 4189 isbddc = PETSC_FALSE; 4190 } 4191 if (isredundant) { 4192 KSP inner_ksp; 4193 PC inner_pc; 4194 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 4195 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 4196 ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr); 4197 } 4198 4199 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 4200 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 4201 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 4202 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 4203 if (nisdofs) { 4204 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 4205 for (i=0;i<nisdofs;i++) { 4206 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 4207 } 4208 } 4209 if (nisneu) { 4210 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 4211 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 4212 } 4213 4214 /* assemble coarse matrix */ 4215 if (coarse_reuse) { 4216 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 4217 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 4218 coarse_mat_reuse = MAT_REUSE_MATRIX; 4219 } else { 4220 coarse_mat_reuse = MAT_INITIAL_MATRIX; 4221 } 4222 if (isbddc || isnn) { 4223 if (pcbddc->coarsening_ratio > 1) { 4224 if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */ 4225 ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,PETSC_TRUE,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 4226 if (pcbddc->dbg_flag) { 4227 ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4228 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr); 4229 ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr); 4230 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 4231 } 4232 } 4233 ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr); 4234 } else { 4235 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 4236 coarse_mat = coarse_mat_is; 4237 } 4238 } else { 4239 ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 4240 } 4241 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 4242 4243 /* propagate symmetry info to coarse matrix */ 4244 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pcbddc->issym);CHKERRQ(ierr); 4245 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 4246 4247 /* set operators */ 4248 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 4249 if (pcbddc->dbg_flag) { 4250 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 4251 } 4252 } else { /* processes non partecipating to coarse solver (if any) */ 4253 coarse_mat = 0; 4254 } 4255 ierr = PetscFree(isarray);CHKERRQ(ierr); 4256 #if 0 4257 { 4258 PetscViewer viewer; 4259 char filename[256]; 4260 sprintf(filename,"coarse_mat.m"); 4261 ierr = PetscViewerASCIIOpen(PETSC_COMM_WORLD,filename,&viewer);CHKERRQ(ierr); 4262 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 4263 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 4264 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 4265 } 4266 #endif 4267 4268 /* Compute coarse null space (special handling by BDDC only) */ 4269 if (pcbddc->NullSpace) { 4270 ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr); 4271 } 4272 4273 if (pcbddc->coarse_ksp) { 4274 Vec crhs,csol; 4275 PetscBool ispreonly; 4276 if (CoarseNullSpace) { 4277 if (isbddc) { 4278 ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr); 4279 } else { 4280 ierr = KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);CHKERRQ(ierr); 4281 } 4282 } 4283 /* setup coarse ksp */ 4284 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 4285 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 4286 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 4287 /* hack */ 4288 if (!csol) { 4289 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 4290 } 4291 if (!crhs) { 4292 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 4293 } 4294 /* Check coarse problem if in debug mode or if solving with an iterative method */ 4295 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 4296 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 4297 KSP check_ksp; 4298 KSPType check_ksp_type; 4299 PC check_pc; 4300 Vec check_vec,coarse_vec; 4301 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 4302 PetscInt its; 4303 PetscBool compute_eigs; 4304 PetscReal *eigs_r,*eigs_c; 4305 PetscInt neigs; 4306 const char *prefix; 4307 4308 /* Create ksp object suitable for estimation of extreme eigenvalues */ 4309 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 4310 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 4311 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 4312 if (ispreonly) { 4313 check_ksp_type = KSPPREONLY; 4314 compute_eigs = PETSC_FALSE; 4315 } else { 4316 check_ksp_type = KSPGMRES; 4317 compute_eigs = PETSC_TRUE; 4318 } 4319 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 4320 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 4321 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 4322 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 4323 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 4324 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 4325 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 4326 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 4327 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 4328 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 4329 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 4330 /* create random vec */ 4331 ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr); 4332 ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr); 4333 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 4334 if (CoarseNullSpace) { 4335 ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr); 4336 } 4337 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 4338 /* solve coarse problem */ 4339 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 4340 if (CoarseNullSpace) { 4341 ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr); 4342 } 4343 /* set eigenvalue estimation if preonly has not been requested */ 4344 if (compute_eigs) { 4345 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 4346 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 4347 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 4348 lambda_max = eigs_r[neigs-1]; 4349 lambda_min = eigs_r[0]; 4350 if (pcbddc->use_coarse_estimates) { 4351 if (lambda_max>lambda_min) { 4352 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr); 4353 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 4354 } 4355 } 4356 } 4357 4358 /* check coarse problem residual error */ 4359 if (pcbddc->dbg_flag) { 4360 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 4361 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 4362 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 4363 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 4364 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 4365 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 4366 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 4367 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (%d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 4368 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 4369 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 4370 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 4371 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 4372 if (compute_eigs) { 4373 PetscReal lambda_max_s,lambda_min_s; 4374 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 4375 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 4376 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 4377 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 4378 for (i=0;i<neigs;i++) { 4379 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 4380 } 4381 } 4382 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 4383 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 4384 } 4385 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 4386 if (compute_eigs) { 4387 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 4388 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 4389 } 4390 } 4391 } 4392 /* print additional info */ 4393 if (pcbddc->dbg_flag) { 4394 /* waits until all processes reaches this point */ 4395 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 4396 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 4397 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4398 } 4399 4400 /* free memory */ 4401 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 4402 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 4403 PetscFunctionReturn(0); 4404 } 4405 4406 #undef __FUNCT__ 4407 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 4408 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 4409 { 4410 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 4411 PC_IS* pcis = (PC_IS*)pc->data; 4412 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 4413 PetscInt i,coarse_size; 4414 PetscInt *local_primal_indices; 4415 PetscErrorCode ierr; 4416 4417 PetscFunctionBegin; 4418 /* Compute global number of coarse dofs */ 4419 if (!pcbddc->primal_indices_local_idxs && pcbddc->local_primal_size) { 4420 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Local primal indices have not been created"); 4421 } 4422 ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)(pc->pmat)),matis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,NULL,&coarse_size,&local_primal_indices);CHKERRQ(ierr); 4423 4424 /* check numbering */ 4425 if (pcbddc->dbg_flag) { 4426 PetscScalar coarsesum,*array; 4427 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 4428 4429 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4430 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4431 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 4432 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 4433 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 4434 for (i=0;i<pcbddc->local_primal_size;i++) { 4435 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 4436 } 4437 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 4438 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 4439 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4440 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4441 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4442 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4443 ierr = VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4444 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4445 for (i=0;i<pcis->n;i++) { 4446 if (array[i] == 1.0) { 4447 set_error = PETSC_TRUE; 4448 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr); 4449 } 4450 } 4451 ierr = MPI_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4452 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4453 for (i=0;i<pcis->n;i++) { 4454 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 4455 } 4456 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4457 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4458 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4459 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4460 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 4461 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 4462 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 4463 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 4464 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4465 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 4466 for (i=0;i<pcbddc->local_primal_size;i++) { 4467 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i]); 4468 } 4469 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4470 } 4471 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4472 if (set_error_reduced) { 4473 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 4474 } 4475 } 4476 /* get back data */ 4477 *coarse_size_n = coarse_size; 4478 *local_primal_indices_n = local_primal_indices; 4479 PetscFunctionReturn(0); 4480 } 4481 4482 #undef __FUNCT__ 4483 #define __FUNCT__ "PCBDDCGlobalToLocal" 4484 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 4485 { 4486 IS localis_t; 4487 PetscInt i,lsize,*idxs,n; 4488 PetscScalar *vals; 4489 PetscErrorCode ierr; 4490 4491 PetscFunctionBegin; 4492 /* get indices in local ordering exploiting local to global map */ 4493 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 4494 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 4495 for (i=0;i<lsize;i++) vals[i] = 1.0; 4496 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 4497 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 4498 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 4499 if (idxs) { /* multilevel guard */ 4500 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 4501 } 4502 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 4503 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 4504 ierr = PetscFree(vals);CHKERRQ(ierr); 4505 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 4506 /* now compute set in local ordering */ 4507 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4508 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4509 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 4510 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 4511 for (i=0,lsize=0;i<n;i++) { 4512 if (PetscRealPart(vals[i]) > 0.5) { 4513 lsize++; 4514 } 4515 } 4516 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 4517 for (i=0,lsize=0;i<n;i++) { 4518 if (PetscRealPart(vals[i]) > 0.5) { 4519 idxs[lsize++] = i; 4520 } 4521 } 4522 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 4523 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 4524 *localis = localis_t; 4525 PetscFunctionReturn(0); 4526 } 4527 4528 /* the next two functions will be called in KSPMatMult if a change of basis has been requested */ 4529 #undef __FUNCT__ 4530 #define __FUNCT__ "PCBDDCMatMult_Private" 4531 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y) 4532 { 4533 PCBDDCChange_ctx change_ctx; 4534 PetscErrorCode ierr; 4535 4536 PetscFunctionBegin; 4537 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 4538 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 4539 ierr = MatMult(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 4540 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 4541 PetscFunctionReturn(0); 4542 } 4543 4544 #undef __FUNCT__ 4545 #define __FUNCT__ "PCBDDCMatMultTranspose_Private" 4546 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y) 4547 { 4548 PCBDDCChange_ctx change_ctx; 4549 PetscErrorCode ierr; 4550 4551 PetscFunctionBegin; 4552 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 4553 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 4554 ierr = MatMultTranspose(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 4555 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 4556 PetscFunctionReturn(0); 4557 } 4558 4559 #undef __FUNCT__ 4560 #define __FUNCT__ "PCBDDCSetUpSubSchurs" 4561 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 4562 { 4563 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 4564 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 4565 PetscInt *used_xadj,*used_adjncy; 4566 PetscBool free_used_adj; 4567 PetscErrorCode ierr; 4568 4569 PetscFunctionBegin; 4570 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 4571 free_used_adj = PETSC_FALSE; 4572 if (pcbddc->sub_schurs_layers == -1) { 4573 used_xadj = NULL; 4574 used_adjncy = NULL; 4575 } else { 4576 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 4577 used_xadj = pcbddc->mat_graph->xadj; 4578 used_adjncy = pcbddc->mat_graph->adjncy; 4579 } else if (pcbddc->computed_rowadj) { 4580 used_xadj = pcbddc->mat_graph->xadj; 4581 used_adjncy = pcbddc->mat_graph->adjncy; 4582 } else { 4583 Mat mat_adj; 4584 PetscBool flg_row=PETSC_TRUE; 4585 const PetscInt *xadj,*adjncy; 4586 PetscInt nvtxs; 4587 4588 ierr = MatConvert(pcbddc->local_mat,MATMPIADJ,MAT_INITIAL_MATRIX,&mat_adj);CHKERRQ(ierr); 4589 ierr = MatGetRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4590 if (!flg_row) { 4591 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__); 4592 } 4593 ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr); 4594 ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr); 4595 ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr); 4596 ierr = MatRestoreRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4597 if (!flg_row) { 4598 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__); 4599 } 4600 ierr = MatDestroy(&mat_adj);CHKERRQ(ierr); 4601 free_used_adj = PETSC_TRUE; 4602 } 4603 } 4604 ierr = PCBDDCSubSchursSetUp(sub_schurs,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->adaptive_selection,pcbddc->use_deluxe_scaling,pcbddc->adaptive_invert_Stildas,pcbddc->use_edges,pcbddc->use_faces);CHKERRQ(ierr); 4605 4606 /* free adjacency */ 4607 if (free_used_adj) { 4608 ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr); 4609 } 4610 PetscFunctionReturn(0); 4611 } 4612 4613 #undef __FUNCT__ 4614 #define __FUNCT__ "PCBDDCInitSubSchurs" 4615 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 4616 { 4617 PC_IS *pcis=(PC_IS*)pc->data; 4618 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 4619 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 4620 PCBDDCGraph graph; 4621 Mat S_j; 4622 PetscErrorCode ierr; 4623 4624 PetscFunctionBegin; 4625 /* attach interface graph for determining subsets */ 4626 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 4627 IS verticesIS; 4628 4629 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 4630 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 4631 ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap);CHKERRQ(ierr); 4632 ierr = PCBDDCGraphSetUp(graph,0,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticesIS);CHKERRQ(ierr); 4633 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 4634 ierr = ISDestroy(&verticesIS);CHKERRQ(ierr); 4635 /* 4636 if (pcbddc->dbg_flag) { 4637 ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 4638 } 4639 */ 4640 } else { 4641 graph = pcbddc->mat_graph; 4642 } 4643 4644 /* Create Schur complement matrix */ 4645 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 4646 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 4647 4648 /* sub_schurs init */ 4649 ierr = PCBDDCSubSchursInit(sub_schurs,pcbddc->local_mat,S_j,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap,pcbddc->sub_schurs_threshold);CHKERRQ(ierr); 4650 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 4651 /* free graph struct */ 4652 if (pcbddc->sub_schurs_rebuild) { 4653 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 4654 } 4655 PetscFunctionReturn(0); 4656 } 4657