1 #include <../src/ksp/pc/impls/bddc/bddc.h> 2 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 3 #include <petscblaslapack.h> 4 5 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y); 6 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y); 7 8 #undef __FUNCT__ 9 #define __FUNCT__ "PCBDDCAdaptiveSelection" 10 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 11 { 12 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 13 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 14 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 15 PetscBLASInt *B_iwork,*B_ifail; 16 PetscScalar *work,lwork; 17 PetscScalar *St,*S,*eigv; 18 PetscScalar *Sarray,*Starray; 19 PetscScalar *Smult,*Seigv; 20 PetscReal *eigs,thresh; 21 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs; 22 #if defined(PETSC_USE_COMPLEX) 23 PetscReal *rwork; 24 #endif 25 PetscBool symmetric; 26 PetscErrorCode ierr; 27 28 PetscFunctionBegin; 29 if (!sub_schurs->use_mumps) { 30 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS"); 31 } 32 ierr = MatIsSymmetric(sub_schurs->A,0.0,&symmetric);CHKERRQ(ierr); 33 if (!symmetric) { 34 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Not yet implemented"); 35 } 36 37 /* max size of subsets */ 38 mss = 0; 39 for (i=0;i<sub_schurs->n_subs;i++) { 40 if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) { 41 PetscInt subset_size; 42 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 43 mss = PetscMax(mss,subset_size); 44 } 45 } 46 47 /* min/max and threshold */ 48 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 49 nmin = pcbddc->adaptive_nmin > -1 ? pcbddc->adaptive_nmin : 1; 50 nmax = PetscMax(nmin,nmax); 51 if (pcbddc->adaptive_threshold > 1.0) { 52 thresh = 1.0/pcbddc->adaptive_threshold; 53 } else { 54 thresh = 1.0; 55 } 56 57 /* allocate lapack workspace */ 58 cum = cum2 = 0; 59 maxneigs = 0; 60 for (i=0;i<sub_schurs->n_subs;i++) { 61 if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) { 62 PetscInt n,subset_size; 63 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 64 n = PetscMin(subset_size,nmax); 65 cum += subset_size*n; 66 cum2 += n; 67 maxneigs = PetscMax(maxneigs,n); 68 } 69 } 70 71 if (mss) { 72 if (symmetric) { 73 PetscBLASInt B_itype = 1; 74 PetscBLASInt B_N = mss; 75 PetscScalar zero = 0.0; 76 PetscScalar eps = 0.0; /* dlamch? */ 77 78 B_lwork = -1; 79 S = NULL; 80 St = NULL; 81 eigs = NULL; 82 eigv = NULL; 83 B_iwork = NULL; 84 B_ifail = NULL; 85 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 86 #if defined(PETSC_USE_COMPLEX) 87 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 88 #else 89 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 90 #endif 91 if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 92 ierr = PetscFPTrapPop();CHKERRQ(ierr); 93 } else { 94 /* TODO */ 95 } 96 } else { 97 lwork = 0; 98 } 99 100 nv = 0; 101 if (sub_schurs->is_Ej_com) { /* complement of subsets, each entry is a vertex */ 102 ierr = ISGetLocalSize(sub_schurs->is_Ej_com,&nv);CHKERRQ(ierr); 103 } 104 ierr = PetscBLASIntCast((PetscInt)lwork,&B_lwork);CHKERRQ(ierr); 105 ierr = PetscMalloc7(mss*mss,&S,mss*mss,&St,mss*mss,&eigv,mss,&eigs, 106 B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr); 107 #if defined(PETSC_USE_COMPLEX) 108 ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr); 109 #endif 110 ierr = PetscMalloc2(mss*mss,&Smult,mss*mss,&Seigv);CHKERRQ(ierr); 111 ierr = PetscMalloc4(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 112 nv+cum2+1,&pcbddc->adaptive_constraints_ptrs, 113 nv+cum,&pcbddc->adaptive_constraints_idxs, 114 nv+cum,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 115 ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr); 116 117 maxneigs = 0; 118 cum = cum2 = cumarray = 0; 119 if (sub_schurs->is_Ej_com) { 120 const PetscInt *idxs; 121 122 ierr = ISGetIndices(sub_schurs->is_Ej_com,&idxs);CHKERRQ(ierr); 123 for (cum=0;cum<nv;cum++) { 124 pcbddc->adaptive_constraints_n[cum] = 1; 125 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 126 pcbddc->adaptive_constraints_ptrs[cum] = cum; 127 pcbddc->adaptive_constraints_data[cum] = 1.0; 128 } 129 cum2 = cum; 130 ierr = ISRestoreIndices(sub_schurs->is_Ej_com,&idxs);CHKERRQ(ierr); 131 } 132 133 if (mss) { /* multilevel */ 134 if (pcbddc->use_deluxe_scaling) { 135 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 136 } else { 137 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_all,&Sarray);CHKERRQ(ierr); 138 } 139 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 140 } 141 142 for (i=0;i<sub_schurs->n_subs;i++) { 143 PetscInt j,subset_size; 144 145 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 146 if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) { 147 const PetscInt *idxs; 148 PetscScalar one = 1.0,zero=0.0; 149 PetscBLASInt B_N; 150 151 /* S should be copied since we need it for deluxe scaling */ 152 if (symmetric) { 153 PetscInt j; 154 for (j=0;j<subset_size;j++) { 155 ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 156 } 157 for (j=0;j<subset_size;j++) { 158 ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 159 } 160 } else { 161 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 162 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 163 } 164 /* is always this the right matrix? */ 165 ierr = PetscMemcpy(Smult,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 166 167 /* we could reuse space already allocated when building sum_S_Ej_tilda_all */ 168 /* St = Starray+cumarray; */ 169 170 if (symmetric) { 171 PetscBLASInt B_itype = 1; 172 PetscBLASInt B_IL = 1, B_IU; 173 PetscScalar eps = -1.0; /* dlamch? */ 174 PetscInt nmin_s; 175 176 /* ask for eigenvalues lower than thresh */ 177 PetscPrintf(PETSC_COMM_SELF,"[%d] Computing for sub %d/%d.\n",PetscGlobalRank,i,sub_schurs->n_subs); 178 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 179 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 180 #if defined(PETSC_USE_COMPLEX) 181 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 182 #else 183 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 184 #endif 185 ierr = PetscFPTrapPop();CHKERRQ(ierr); 186 if (B_ierr) { 187 if (B_ierr < 0 ) { 188 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 189 } else if (B_ierr <= B_N) { 190 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 191 } else { 192 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 193 } 194 } 195 196 if (B_neigs > nmax) { 197 PetscPrintf(PETSC_COMM_SELF,"[%d] found %d eigs, more than maximum required %d.\n",PetscGlobalRank,B_neigs,nmax); 198 B_neigs = nmax; 199 } 200 201 nmin_s = PetscMin(nmin,B_N); 202 if (B_neigs < nmin_s) { 203 PetscBLASInt B_neigs2; 204 205 B_IL = B_neigs + 1; 206 ierr = PetscBLASIntCast(nmin_s,&B_IU);CHKERRQ(ierr); 207 PetscPrintf(PETSC_COMM_SELF,"[%d] found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",PetscGlobalRank,B_neigs,nmin,B_IL,B_IU); 208 if (symmetric) { 209 PetscInt j; 210 for (j=0;j<subset_size;j++) { 211 ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 212 } 213 for (j=0;j<subset_size;j++) { 214 ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 215 } 216 } else { 217 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 218 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 219 } 220 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 221 #if defined(PETSC_USE_COMPLEX) 222 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 223 #else 224 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 225 #endif 226 ierr = PetscFPTrapPop();CHKERRQ(ierr); 227 B_neigs += B_neigs2; 228 } 229 if (B_ierr) { 230 if (B_ierr < 0 ) { 231 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 232 } else if (B_ierr <= B_N) { 233 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 234 } else { 235 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 236 } 237 } 238 PetscPrintf(PETSC_COMM_SELF,"[%d] -> Got %d eigs\n",PetscGlobalRank,B_neigs); 239 for (j=0;j<B_neigs;j++) { 240 if (eigs[j] == 0.0) { 241 PetscPrintf(PETSC_COMM_SELF,"[%d] Inf\n",PetscGlobalRank); 242 } else { 243 PetscPrintf(PETSC_COMM_SELF,"[%d] %1.6e\n",PetscGlobalRank,1.0/eigs[j]); 244 } 245 } 246 } else { 247 /* TODO */ 248 } 249 maxneigs = PetscMax(B_neigs,maxneigs); 250 pcbddc->adaptive_constraints_n[i+nv] = B_neigs; 251 252 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 253 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&B_N,&B_neigs,&B_N,&one,Smult,&B_N,eigv,&B_N,&zero,Seigv,&B_N)); 254 ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+cum2,Seigv,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 255 #if 0 256 PetscInt ii; 257 for (ii=0;ii<B_neigs;ii++) { 258 PetscPrintf(PETSC_COMM_SELF,"[%d] -> Eigenvector %d/%d (%d)\n",PetscGlobalRank,ii,B_neigs,B_N); 259 for (j=0;j<B_N;j++) { 260 PetscPrintf(PETSC_COMM_SELF,"[%d] %1.4e %1.4e\n",PetscGlobalRank,eigv[ii*B_N+j],Seigv[ii*B_N+j]); 261 } 262 } 263 #endif 264 ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 265 for (j=0;j<B_neigs;j++) { 266 #if 0 267 { 268 PetscBLASInt Blas_N,Blas_one = 1.0; 269 PetscScalar norm; 270 ierr = PetscBLASIntCast(subset_size,&Blas_N);CHKERRQ(ierr); 271 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,pcbddc->adaptive_constraints_data+cum2,&Blas_one,pcbddc->adaptive_constraints_data+cum2,&Blas_one)); 272 if (pcbddc->adaptive_constraints_data[cum2] > 0.0) { 273 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 274 } else { 275 norm = -1.0/PetscSqrtReal(PetscRealPart(norm)); 276 } 277 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,pcbddc->adaptive_constraints_data+cum2,&Blas_one)); 278 } 279 #endif 280 ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+cum2,idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr); 281 pcbddc->adaptive_constraints_ptrs[cum++] = cum2; 282 cum2 += subset_size; 283 } 284 ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 285 } 286 /* shift for next computation */ 287 cumarray += subset_size*subset_size; 288 } 289 pcbddc->adaptive_constraints_ptrs[cum] = cum2; 290 ierr = PetscFree2(Smult,Seigv);CHKERRQ(ierr); 291 292 if (mss) { 293 if (pcbddc->use_deluxe_scaling) { 294 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 295 } else { 296 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_all,&Sarray);CHKERRQ(ierr); 297 } 298 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 299 } 300 ierr = PetscFree7(S,St,eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr); 301 #if defined(PETSC_USE_COMPLEX) 302 ierr = PetscFree(rwork);CHKERRQ(ierr); 303 #endif 304 if (pcbddc->dbg_flag) { 305 PetscInt maxneigs_r; 306 ierr = MPI_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 307 ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr); 308 } 309 PetscFunctionReturn(0); 310 } 311 312 #undef __FUNCT__ 313 #define __FUNCT__ "PCBDDCSetUpSolvers" 314 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 315 { 316 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 317 PetscScalar *coarse_submat_vals; 318 PetscErrorCode ierr; 319 320 PetscFunctionBegin; 321 /* Setup local scatters R_to_B and (optionally) R_to_D */ 322 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 323 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 324 325 /* Setup local neumann solver ksp_R */ 326 /* PCBDDCSetUpLocalScatters should be called first! */ 327 ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 328 329 /* Change global null space passed in by the user if change of basis has been requested */ 330 if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) { 331 ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr); 332 } 333 334 /* 335 Setup local correction and local part of coarse basis. 336 Gives back the dense local part of the coarse matrix in column major ordering 337 */ 338 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 339 340 /* Compute total number of coarse nodes and setup coarse solver */ 341 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 342 343 /* free */ 344 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 345 PetscFunctionReturn(0); 346 } 347 348 #undef __FUNCT__ 349 #define __FUNCT__ "PCBDDCResetCustomization" 350 PetscErrorCode PCBDDCResetCustomization(PC pc) 351 { 352 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 353 PetscErrorCode ierr; 354 355 PetscFunctionBegin; 356 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 357 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 358 ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr); 359 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 360 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 361 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 362 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 363 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 364 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 365 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 366 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 367 PetscFunctionReturn(0); 368 } 369 370 #undef __FUNCT__ 371 #define __FUNCT__ "PCBDDCResetTopography" 372 PetscErrorCode PCBDDCResetTopography(PC pc) 373 { 374 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 375 PetscErrorCode ierr; 376 377 PetscFunctionBegin; 378 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 379 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 380 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 381 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 382 ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr); 383 PetscFunctionReturn(0); 384 } 385 386 #undef __FUNCT__ 387 #define __FUNCT__ "PCBDDCResetSolvers" 388 PetscErrorCode PCBDDCResetSolvers(PC pc) 389 { 390 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 391 PetscScalar *array; 392 PetscErrorCode ierr; 393 394 PetscFunctionBegin; 395 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 396 ierr = VecDestroy(&pcbddc->coarse_rhs);CHKERRQ(ierr); 397 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr); 398 ierr = PetscFree(array);CHKERRQ(ierr); 399 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 400 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 401 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 402 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 403 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 404 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 405 if (pcbddc->local_auxmat2) { 406 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 407 ierr = PetscFree(array);CHKERRQ(ierr); 408 } 409 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 410 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 411 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 412 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 413 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 414 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 415 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 416 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 417 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 418 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 419 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 420 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 421 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 422 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 423 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 424 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 425 PetscFunctionReturn(0); 426 } 427 428 #undef __FUNCT__ 429 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors" 430 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 431 { 432 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 433 PC_IS *pcis = (PC_IS*)pc->data; 434 VecType impVecType; 435 PetscInt n_constraints,n_R,old_size; 436 PetscErrorCode ierr; 437 438 PetscFunctionBegin; 439 if (!pcbddc->ConstraintMatrix) { 440 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created"); 441 } 442 /* get sizes */ 443 n_constraints = pcbddc->local_primal_size - pcbddc->n_actual_vertices; 444 n_R = pcis->n-pcbddc->n_actual_vertices; 445 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 446 /* local work vectors (try to avoid unneeded work)*/ 447 /* R nodes */ 448 old_size = -1; 449 if (pcbddc->vec1_R) { 450 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 451 } 452 if (n_R != old_size) { 453 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 454 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 455 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 456 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 457 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 458 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 459 } 460 /* local primal dofs */ 461 old_size = -1; 462 if (pcbddc->vec1_P) { 463 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 464 } 465 if (pcbddc->local_primal_size != old_size) { 466 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 467 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 468 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 469 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 470 } 471 /* local explicit constraints */ 472 old_size = -1; 473 if (pcbddc->vec1_C) { 474 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 475 } 476 if (n_constraints && n_constraints != old_size) { 477 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 478 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 479 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 480 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 481 } 482 PetscFunctionReturn(0); 483 } 484 485 #undef __FUNCT__ 486 #define __FUNCT__ "PCBDDCSetUpCorrection" 487 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 488 { 489 PetscErrorCode ierr; 490 /* pointers to pcis and pcbddc */ 491 PC_IS* pcis = (PC_IS*)pc->data; 492 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 493 /* submatrices of local problem */ 494 Mat A_RV,A_VR,A_VV; 495 /* submatrices of local coarse problem */ 496 Mat S_VV,S_CV,S_VC,S_CC; 497 /* working matrices */ 498 Mat C_CR; 499 /* additional working stuff */ 500 PC pc_R; 501 Mat F; 502 PetscBool isLU,isCHOL,isILU; 503 504 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 505 PetscScalar *work; 506 PetscInt *idx_V_B; 507 PetscInt n,n_vertices,n_constraints; 508 PetscInt i,n_R,n_D,n_B; 509 PetscBool unsymmetric_check; 510 /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */ 511 MatType impMatType; 512 /* some shortcuts to scalars */ 513 PetscScalar one=1.0,m_one=-1.0; 514 515 PetscFunctionBegin; 516 /* get number of vertices (corners plus constraints with change of basis) 517 pcbddc->n_actual_vertices stores the actual number of vertices, pcbddc->n_vertices the number of corners computed */ 518 n_vertices = pcbddc->n_actual_vertices; 519 n_constraints = pcbddc->local_primal_size-n_vertices; 520 /* Set Non-overlapping dimensions */ 521 n_B = pcis->n_B; n_D = pcis->n - n_B; 522 n_R = pcis->n-n_vertices; 523 524 /* Set types for local objects needed by BDDC precondtioner */ 525 impMatType = MATSEQDENSE; 526 527 /* vertices in boundary numbering */ 528 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 529 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->primal_indices_local_idxs,&i,idx_V_B);CHKERRQ(ierr); 530 if (i != n_vertices) { 531 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i); 532 } 533 534 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 535 ierr = PetscMalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 536 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr); 537 ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr); 538 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr); 539 ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr); 540 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr); 541 ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr); 542 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr); 543 ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr); 544 545 unsymmetric_check = PETSC_FALSE; 546 /* allocate workspace */ 547 n = 0; 548 if (n_constraints) { 549 n += n_R*n_constraints; 550 } 551 if (n_vertices) { 552 n = PetscMax(2*n_R*n_vertices,n); 553 } 554 if (!pcbddc->issym) { 555 n = PetscMax(2*n_R*pcbddc->local_primal_size,n); 556 unsymmetric_check = PETSC_TRUE; 557 } 558 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 559 560 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 561 ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr); 562 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr); 563 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr); 564 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr); 565 if (isLU || isILU || isCHOL) { 566 ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr); 567 } else { 568 F = NULL; 569 } 570 571 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 572 if (n_constraints) { 573 Mat M1,M2,M3; 574 IS is_aux; 575 /* see if we can save some allocations */ 576 if (pcbddc->local_auxmat2) { 577 PetscInt on_R,on_constraints; 578 ierr = MatGetSize(pcbddc->local_auxmat2,&on_R,&on_constraints);CHKERRQ(ierr); 579 if (on_R != n_R || on_constraints != n_constraints) { 580 PetscScalar *marray; 581 582 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&marray);CHKERRQ(ierr); 583 ierr = PetscFree(marray);CHKERRQ(ierr); 584 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 585 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 586 } 587 } 588 /* auxiliary matrices */ 589 if (!pcbddc->local_auxmat2) { 590 PetscScalar *marray; 591 592 ierr = PetscMalloc1(2*n_R*n_constraints,&marray);CHKERRQ(ierr); 593 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,marray,&pcbddc->local_auxmat2);CHKERRQ(ierr); 594 marray += n_R*n_constraints; 595 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_R,marray,&pcbddc->local_auxmat1);CHKERRQ(ierr); 596 } 597 598 /* Extract constraints on R nodes: C_{CR} */ 599 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 600 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 601 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 602 603 /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */ 604 ierr = PetscMemzero(work,n_R*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 605 for (i=0;i<n_constraints;i++) { 606 const PetscScalar *row_cmat_values; 607 const PetscInt *row_cmat_indices; 608 PetscInt size_of_constraint,j; 609 610 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 611 for (j=0;j<size_of_constraint;j++) { 612 work[row_cmat_indices[j]+i*n_R] = -row_cmat_values[j]; 613 } 614 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 615 } 616 if (F) { 617 Mat B; 618 619 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 620 ierr = MatMatSolve(F,B,pcbddc->local_auxmat2);CHKERRQ(ierr); 621 ierr = MatDestroy(&B);CHKERRQ(ierr); 622 } else { 623 PetscScalar *xarray; 624 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&xarray);CHKERRQ(ierr); 625 for (i=0;i<n_constraints;i++) { 626 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 627 ierr = VecPlaceArray(pcbddc->vec2_R,xarray+i*n_R);CHKERRQ(ierr); 628 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 629 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 630 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 631 } 632 ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&xarray);CHKERRQ(ierr); 633 } 634 635 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 636 ierr = MatConvert(C_CR,impMatType,MAT_REUSE_MATRIX,&C_CR);CHKERRQ(ierr); 637 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 638 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr); 639 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 640 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 641 ierr = VecSet(pcbddc->vec1_C,m_one);CHKERRQ(ierr); 642 ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr); 643 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 644 ierr = MatDestroy(&M2);CHKERRQ(ierr); 645 ierr = MatDestroy(&M3);CHKERRQ(ierr); 646 /* Assemble local_auxmat1 = S_CC*C_{CR} needed by BDDC application in KSP and in preproc */ 647 ierr = MatMatMult(M1,C_CR,MAT_REUSE_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 648 ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 649 ierr = MatDestroy(&M1);CHKERRQ(ierr); 650 } 651 /* Get submatrices from subdomain matrix */ 652 if (n_vertices) { 653 Mat newmat; 654 IS is_aux; 655 PetscInt ibs,mbs; 656 PetscBool issbaij; 657 658 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 659 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 660 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 661 if (ibs != mbs) { /* need to convert to SEQAIJ */ 662 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 663 ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 664 ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 665 ierr = MatGetSubMatrix(newmat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 666 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 667 } else { 668 /* this is safe */ 669 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 670 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 671 if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 672 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 673 ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 674 ierr = MatTranspose(A_VR,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 675 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 676 ierr = MatConvert(A_VV,MATSEQBAIJ,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr); 677 } else { 678 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 679 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 680 } 681 } 682 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 683 } 684 685 /* Matrix of coarse basis functions (local) */ 686 if (pcbddc->coarse_phi_B) { 687 PetscInt on_B,on_primal,on_D=n_D; 688 if (pcbddc->coarse_phi_D) { 689 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr); 690 } 691 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 692 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 693 PetscScalar *marray; 694 695 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr); 696 ierr = PetscFree(marray);CHKERRQ(ierr); 697 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 698 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 699 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 700 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 701 } 702 } 703 704 if (!pcbddc->coarse_phi_B) { 705 PetscScalar *marray; 706 707 n = n_B*pcbddc->local_primal_size; 708 if (pcbddc->switch_static || pcbddc->dbg_flag) { 709 n += n_D*pcbddc->local_primal_size; 710 } 711 if (!pcbddc->issym) { 712 n *= 2; 713 } 714 ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr); 715 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 716 n = n_B*pcbddc->local_primal_size; 717 if (pcbddc->switch_static || pcbddc->dbg_flag) { 718 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 719 n += n_D*pcbddc->local_primal_size; 720 } 721 if (!pcbddc->issym) { 722 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 723 if (pcbddc->switch_static || pcbddc->dbg_flag) { 724 n = n_B*pcbddc->local_primal_size; 725 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 726 } 727 } else { 728 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 729 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 730 if (pcbddc->switch_static || pcbddc->dbg_flag) { 731 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 732 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 733 } 734 } 735 } 736 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 737 /* vertices */ 738 if (n_vertices) { 739 740 if (n_R) { 741 Mat A_RRmA_RV,S_VVt; /* S_VVt with LDA=N */ 742 PetscBLASInt B_N,B_one = 1; 743 PetscScalar *x,*y; 744 745 ierr = PetscMemzero(work,2*n_R*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 746 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 747 ierr = MatConvert(A_RV,impMatType,MAT_REUSE_MATRIX,&A_RV);CHKERRQ(ierr); 748 if (F) { 749 ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr); 750 } else { 751 ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr); 752 for (i=0;i<n_vertices;i++) { 753 ierr = VecPlaceArray(pcbddc->vec1_R,y+i*n_R);CHKERRQ(ierr); 754 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 755 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 756 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 757 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 758 } 759 ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr); 760 } 761 ierr = MatScale(A_RRmA_RV,m_one);CHKERRQ(ierr); 762 /* S_VV and S_CV are the subdomain contribution to coarse matrix. WARNING -> column major ordering */ 763 if (n_constraints) { 764 Mat B; 765 ierr = MatMatMult(pcbddc->local_auxmat1,A_RRmA_RV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr); 766 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr); 767 ierr = MatMatMult(pcbddc->local_auxmat2,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 768 ierr = MatScale(S_CV,m_one);CHKERRQ(ierr); 769 ierr = PetscBLASIntCast(n_R*n_vertices,&B_N);CHKERRQ(ierr); 770 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+n_R*n_vertices,&B_one,work,&B_one)); 771 ierr = MatDestroy(&B);CHKERRQ(ierr); 772 } 773 ierr = MatConvert(A_VR,impMatType,MAT_REUSE_MATRIX,&A_VR);CHKERRQ(ierr); 774 ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr); 775 ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr); 776 ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr); 777 ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr); 778 ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr); 779 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one)); 780 ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr); 781 ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr); 782 ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 783 ierr = MatDestroy(&S_VVt);CHKERRQ(ierr); 784 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 785 } else { 786 ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr); 787 ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 788 } 789 /* coarse basis functions */ 790 for (i=0;i<n_vertices;i++) { 791 PetscScalar *y; 792 793 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr); 794 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 795 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 796 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 797 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 798 y[n_B*i+idx_V_B[i]] = 1.0; 799 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 800 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 801 802 if (pcbddc->switch_static || pcbddc->dbg_flag) { 803 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 804 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 805 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 806 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 807 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 808 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 809 } 810 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 811 } 812 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 813 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 814 } 815 816 if (n_constraints) { 817 Mat B; 818 819 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 820 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 821 ierr = MatMatMult(pcbddc->local_auxmat2,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 822 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 823 if (n_vertices) { 824 ierr = MatMatMult(A_VR,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_VC);CHKERRQ(ierr); 825 } 826 ierr = MatDestroy(&B);CHKERRQ(ierr); 827 /* coarse basis functions */ 828 for (i=0;i<n_constraints;i++) { 829 PetscScalar *y; 830 831 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr); 832 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 833 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr); 834 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 835 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 836 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 837 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 838 839 if (pcbddc->switch_static || pcbddc->dbg_flag) { 840 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 841 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr); 842 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 843 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 844 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 845 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 846 } 847 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 848 } 849 } 850 851 /* compute other basis functions for non-symmetric problems */ 852 if (!pcbddc->issym) { 853 Mat B,X; 854 855 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,pcbddc->local_primal_size,work,&B);CHKERRQ(ierr); 856 857 if (n_constraints) { 858 Mat S_CCT,B_C; 859 860 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work+n_vertices*n_R,&B_C);CHKERRQ(ierr); 861 ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr); 862 ierr = MatTransposeMatMult(C_CR,S_CCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr); 863 ierr = MatDestroy(&S_CCT);CHKERRQ(ierr); 864 if (n_vertices) { 865 Mat B_V,S_VCT; 866 867 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&B_V);CHKERRQ(ierr); 868 ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr); 869 ierr = MatTransposeMatMult(C_CR,S_VCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr); 870 ierr = MatDestroy(&B_V);CHKERRQ(ierr); 871 ierr = MatDestroy(&S_VCT);CHKERRQ(ierr); 872 } 873 ierr = MatDestroy(&B_C);CHKERRQ(ierr); 874 } 875 if (n_vertices && n_R) { 876 Mat A_VRT; 877 PetscBLASInt B_N,B_one = 1; 878 879 if (!n_constraints) { /* if there are no constraints, reset work */ 880 ierr = PetscMemzero(work,n_R*pcbddc->local_primal_size*sizeof(PetscScalar));CHKERRQ(ierr); 881 } 882 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work+pcbddc->local_primal_size*n_R,&A_VRT);CHKERRQ(ierr); 883 ierr = MatTranspose(A_VR,MAT_REUSE_MATRIX,&A_VRT);CHKERRQ(ierr); 884 ierr = PetscBLASIntCast(n_vertices*n_R,&B_N);CHKERRQ(ierr); 885 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&m_one,work+pcbddc->local_primal_size*n_R,&B_one,work,&B_one)); 886 ierr = MatDestroy(&A_VRT);CHKERRQ(ierr); 887 } 888 889 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,pcbddc->local_primal_size,work+pcbddc->local_primal_size*n_R,&X);CHKERRQ(ierr); 890 if (F) { /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 891 for (i=0;i<pcbddc->local_primal_size;i++) { 892 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 893 ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr); 894 ierr = MatSolveTranspose(F,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 895 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 896 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 897 } 898 } else { 899 for (i=0;i<pcbddc->local_primal_size;i++) { 900 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 901 ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr); 902 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 903 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 904 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 905 } 906 } 907 ierr = MatDestroy(&B);CHKERRQ(ierr); 908 /* coarse basis functions */ 909 for (i=0;i<pcbddc->local_primal_size;i++) { 910 PetscScalar *y; 911 912 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*(i+pcbddc->local_primal_size));CHKERRQ(ierr); 913 ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 914 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 915 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 916 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 917 if (i<n_vertices) { 918 y[n_B*i+idx_V_B[i]] = 1.0; 919 } 920 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 921 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 922 923 if (pcbddc->switch_static || pcbddc->dbg_flag) { 924 ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 925 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 926 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 927 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 928 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 929 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 930 } 931 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 932 } 933 ierr = MatDestroy(&X);CHKERRQ(ierr); 934 } 935 936 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 937 ierr = MatDestroy(&S_VV);CHKERRQ(ierr); 938 ierr = MatDestroy(&S_CV);CHKERRQ(ierr); 939 ierr = MatDestroy(&S_VC);CHKERRQ(ierr); 940 ierr = MatDestroy(&S_CC);CHKERRQ(ierr); 941 /* Checking coarse_sub_mat and coarse basis functios */ 942 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 943 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 944 if (pcbddc->dbg_flag) { 945 Mat coarse_sub_mat; 946 Mat AUXMAT,TM1,TM2,TM3,TM4; 947 Mat coarse_phi_D,coarse_phi_B; 948 Mat coarse_psi_D,coarse_psi_B; 949 Mat A_II,A_BB,A_IB,A_BI; 950 MatType checkmattype=MATSEQAIJ; 951 PetscReal real_value; 952 953 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 954 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 955 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 956 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 957 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 958 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 959 if (unsymmetric_check) { 960 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 961 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 962 } 963 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 964 965 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 966 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation\n");CHKERRQ(ierr); 967 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 968 if (unsymmetric_check) { 969 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 970 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 971 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 972 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 973 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 974 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 975 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 976 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 977 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 978 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 979 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 980 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 981 } else { 982 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 983 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 984 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 985 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 986 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 987 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 988 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 989 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 990 } 991 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 992 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 993 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 994 ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr); 995 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 996 ierr = MatNorm(TM1,NORM_INFINITY,&real_value);CHKERRQ(ierr); 997 998 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 999 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 1000 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1001 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 1002 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 1003 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 1004 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 1005 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 1006 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 1007 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 1008 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 1009 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 1010 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 1011 if (unsymmetric_check) { 1012 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 1013 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 1014 } 1015 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 1016 } 1017 1018 /* free memory */ 1019 ierr = PetscFree(work);CHKERRQ(ierr); 1020 if (n_vertices) { 1021 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 1022 } 1023 if (n_constraints) { 1024 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 1025 } 1026 /* get back data */ 1027 *coarse_submat_vals_n = coarse_submat_vals; 1028 PetscFunctionReturn(0); 1029 } 1030 1031 #undef __FUNCT__ 1032 #define __FUNCT__ "MatGetSubMatrixUnsorted" 1033 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, MatStructure reuse, Mat* B) 1034 { 1035 Mat *work_mat; 1036 IS isrow_s,iscol_s; 1037 PetscBool rsorted,csorted; 1038 PetscInt rsize,*idxs_perm_r,csize,*idxs_perm_c; 1039 PetscErrorCode ierr; 1040 1041 PetscFunctionBegin; 1042 ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr); 1043 ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr); 1044 ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr); 1045 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 1046 1047 if (!rsorted) { 1048 const PetscInt *idxs; 1049 PetscInt *idxs_sorted,i; 1050 1051 ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr); 1052 ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr); 1053 for (i=0;i<rsize;i++) { 1054 idxs_perm_r[i] = i; 1055 } 1056 ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr); 1057 ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr); 1058 for (i=0;i<rsize;i++) { 1059 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 1060 } 1061 ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr); 1062 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr); 1063 } else { 1064 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1065 isrow_s = isrow; 1066 } 1067 1068 if (!csorted) { 1069 if (isrow == iscol) { 1070 ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr); 1071 iscol_s = isrow_s; 1072 } else { 1073 const PetscInt *idxs; 1074 PetscInt *idxs_sorted,i; 1075 1076 ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr); 1077 ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr); 1078 for (i=0;i<csize;i++) { 1079 idxs_perm_c[i] = i; 1080 } 1081 ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr); 1082 ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr); 1083 for (i=0;i<csize;i++) { 1084 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 1085 } 1086 ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr); 1087 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr); 1088 } 1089 } else { 1090 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1091 iscol_s = iscol; 1092 } 1093 1094 ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,reuse,&work_mat);CHKERRQ(ierr); 1095 1096 if (!rsorted || !csorted) { 1097 Mat new_mat; 1098 IS is_perm_r,is_perm_c; 1099 1100 if (!rsorted) { 1101 PetscInt *idxs_r,i; 1102 ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr); 1103 for (i=0;i<rsize;i++) { 1104 idxs_r[idxs_perm_r[i]] = i; 1105 } 1106 ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr); 1107 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr); 1108 } else { 1109 ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr); 1110 } 1111 ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr); 1112 1113 if (!csorted) { 1114 if (isrow_s == iscol_s) { 1115 ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr); 1116 is_perm_c = is_perm_r; 1117 } else { 1118 PetscInt *idxs_c,i; 1119 ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr); 1120 for (i=0;i<csize;i++) { 1121 idxs_c[idxs_perm_c[i]] = i; 1122 } 1123 ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr); 1124 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr); 1125 } 1126 } else { 1127 ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr); 1128 } 1129 ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr); 1130 1131 ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr); 1132 ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr); 1133 work_mat[0] = new_mat; 1134 ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr); 1135 ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr); 1136 } 1137 1138 ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr); 1139 *B = work_mat[0]; 1140 ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr); 1141 ierr = ISDestroy(&isrow_s);CHKERRQ(ierr); 1142 ierr = ISDestroy(&iscol_s);CHKERRQ(ierr); 1143 PetscFunctionReturn(0); 1144 } 1145 1146 #undef __FUNCT__ 1147 #define __FUNCT__ "PCBDDCComputeLocalMatrix" 1148 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 1149 { 1150 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1151 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1152 Mat new_mat; 1153 IS is_local,is_global; 1154 PetscInt local_size; 1155 PetscBool isseqaij; 1156 PetscErrorCode ierr; 1157 1158 PetscFunctionBegin; 1159 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1160 ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr); 1161 ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr); 1162 ierr = ISLocalToGlobalMappingApplyIS(matis->mapping,is_local,&is_global);CHKERRQ(ierr); 1163 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 1164 ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,MAT_INITIAL_MATRIX,&new_mat);CHKERRQ(ierr); 1165 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 1166 1167 /* check */ 1168 if (pcbddc->dbg_flag) { 1169 Vec x,x_change; 1170 PetscReal error; 1171 1172 ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr); 1173 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 1174 ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr); 1175 ierr = VecScatterBegin(matis->ctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1176 ierr = VecScatterEnd(matis->ctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1177 ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr); 1178 ierr = VecScatterBegin(matis->ctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1179 ierr = VecScatterEnd(matis->ctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1180 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 1181 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 1182 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1183 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr); 1184 ierr = VecDestroy(&x);CHKERRQ(ierr); 1185 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 1186 } 1187 1188 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 1189 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 1190 if (isseqaij) { 1191 ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 1192 } else { 1193 Mat work_mat; 1194 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 1195 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 1196 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 1197 } 1198 ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,pcbddc->issym);CHKERRQ(ierr); 1199 /* 1200 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 1201 ierr = MatView(new_mat,(PetscViewer)0);CHKERRQ(ierr); 1202 */ 1203 ierr = MatDestroy(&new_mat);CHKERRQ(ierr); 1204 PetscFunctionReturn(0); 1205 } 1206 1207 #undef __FUNCT__ 1208 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 1209 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 1210 { 1211 PC_IS* pcis = (PC_IS*)(pc->data); 1212 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1213 IS is_aux1,is_aux2; 1214 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 1215 PetscInt n_vertices,i,j,n_R,n_D,n_B; 1216 PetscInt vbs,bs; 1217 PetscBT bitmask; 1218 PetscErrorCode ierr; 1219 1220 PetscFunctionBegin; 1221 /* 1222 No need to setup local scatters if 1223 - primal space is unchanged 1224 AND 1225 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 1226 AND 1227 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 1228 */ 1229 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 1230 PetscFunctionReturn(0); 1231 } 1232 /* destroy old objects */ 1233 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 1234 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 1235 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 1236 /* Set Non-overlapping dimensions */ 1237 n_B = pcis->n_B; n_D = pcis->n - n_B; 1238 n_vertices = pcbddc->n_actual_vertices; 1239 /* create auxiliary bitmask */ 1240 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 1241 for (i=0;i<n_vertices;i++) { 1242 ierr = PetscBTSet(bitmask,pcbddc->primal_indices_local_idxs[i]);CHKERRQ(ierr); 1243 } 1244 1245 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 1246 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 1247 for (i=0, n_R=0; i<pcis->n; i++) { 1248 if (!PetscBTLookup(bitmask,i)) { 1249 idx_R_local[n_R] = i; 1250 n_R++; 1251 } 1252 } 1253 1254 /* Block code */ 1255 vbs = 1; 1256 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 1257 if (bs>1 && !(n_vertices%bs)) { 1258 PetscBool is_blocked = PETSC_TRUE; 1259 PetscInt *vary; 1260 /* Verify if the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 1261 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 1262 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 1263 for (i=0; i<n_vertices; i++) vary[pcbddc->primal_indices_local_idxs[i]/bs]++; 1264 for (i=0; i<n_vertices; i++) { 1265 if (vary[i]!=0 && vary[i]!=bs) { 1266 is_blocked = PETSC_FALSE; 1267 break; 1268 } 1269 } 1270 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 1271 vbs = bs; 1272 for (i=0;i<n_R/vbs;i++) { 1273 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 1274 } 1275 } 1276 ierr = PetscFree(vary);CHKERRQ(ierr); 1277 } 1278 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 1279 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 1280 1281 /* print some info if requested */ 1282 if (pcbddc->dbg_flag) { 1283 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1284 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1285 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1286 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 1287 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 1288 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices,pcbddc->local_primal_size);CHKERRQ(ierr); 1289 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"pcbddc->n_vertices = %d, pcbddc->n_constraints = %d\n",pcbddc->n_vertices,pcbddc->n_constraints);CHKERRQ(ierr); 1290 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1291 } 1292 1293 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 1294 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1295 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 1296 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 1297 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1298 for (i=0; i<n_D; i++) { 1299 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 1300 } 1301 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1302 for (i=0, j=0; i<n_R; i++) { 1303 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 1304 aux_array1[j++] = i; 1305 } 1306 } 1307 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 1308 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1309 for (i=0, j=0; i<n_B; i++) { 1310 if (!PetscBTLookup(bitmask,is_indices[i])) { 1311 aux_array2[j++] = i; 1312 } 1313 } 1314 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1315 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 1316 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 1317 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1318 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 1319 1320 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1321 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 1322 for (i=0, j=0; i<n_R; i++) { 1323 if (PetscBTLookup(bitmask,idx_R_local[i])) { 1324 aux_array1[j++] = i; 1325 } 1326 } 1327 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 1328 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 1329 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1330 } 1331 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 1332 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1333 PetscFunctionReturn(0); 1334 } 1335 1336 1337 #undef __FUNCT__ 1338 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 1339 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 1340 { 1341 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1342 PC_IS *pcis = (PC_IS*)pc->data; 1343 PC pc_temp; 1344 Mat A_RR; 1345 MatReuse reuse; 1346 PetscScalar m_one = -1.0; 1347 PetscReal value; 1348 PetscInt n_D,n_R,ibs,mbs; 1349 PetscBool use_exact,use_exact_reduced,issbaij; 1350 PetscErrorCode ierr; 1351 /* prefixes stuff */ 1352 char dir_prefix[256],neu_prefix[256],str_level[16]; 1353 size_t len; 1354 1355 PetscFunctionBegin; 1356 1357 /* compute prefixes */ 1358 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 1359 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 1360 if (!pcbddc->current_level) { 1361 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1362 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1363 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1364 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1365 } else { 1366 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 1367 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 1368 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 1369 len -= 15; /* remove "pc_bddc_coarse_" */ 1370 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 1371 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 1372 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 1373 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 1374 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1375 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1376 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 1377 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 1378 } 1379 1380 /* DIRICHLET PROBLEM */ 1381 if (dirichlet) { 1382 /* Matrix for Dirichlet problem is pcis->A_II */ 1383 ierr = ISGetSize(pcis->is_I_local,&n_D);CHKERRQ(ierr); 1384 if (!pcbddc->ksp_D) { /* create object if not yet build */ 1385 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 1386 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 1387 /* default */ 1388 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 1389 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 1390 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1391 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1392 if (issbaij) { 1393 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1394 } else { 1395 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1396 } 1397 /* Allow user's customization */ 1398 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 1399 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1400 } 1401 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 1402 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1403 if (!n_D) { 1404 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1405 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1406 } 1407 /* Set Up KSP for Dirichlet problem of BDDC */ 1408 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 1409 /* set ksp_D into pcis data */ 1410 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 1411 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 1412 pcis->ksp_D = pcbddc->ksp_D; 1413 } 1414 1415 /* NEUMANN PROBLEM */ 1416 A_RR = 0; 1417 if (neumann) { 1418 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 1419 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 1420 if (pcbddc->ksp_R) { /* already created ksp */ 1421 PetscInt nn_R; 1422 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 1423 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 1424 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 1425 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 1426 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 1427 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1428 reuse = MAT_INITIAL_MATRIX; 1429 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 1430 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 1431 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1432 reuse = MAT_INITIAL_MATRIX; 1433 } else { /* safe to reuse the matrix */ 1434 reuse = MAT_REUSE_MATRIX; 1435 } 1436 } 1437 /* last check */ 1438 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 1439 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1440 reuse = MAT_INITIAL_MATRIX; 1441 } 1442 } else { /* first time, so we need to create the matrix */ 1443 reuse = MAT_INITIAL_MATRIX; 1444 } 1445 /* extract A_RR */ 1446 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 1447 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 1448 if (ibs != mbs) { 1449 Mat newmat; 1450 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 1451 ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 1452 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 1453 } else { 1454 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 1455 } 1456 if (!pcbddc->ksp_R) { /* create object if not present */ 1457 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 1458 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 1459 /* default */ 1460 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 1461 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 1462 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1463 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1464 if (issbaij) { 1465 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1466 } else { 1467 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1468 } 1469 /* Allow user's customization */ 1470 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 1471 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1472 } 1473 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 1474 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1475 if (!n_R) { 1476 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1477 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1478 } 1479 /* Set Up KSP for Neumann problem of BDDC */ 1480 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 1481 } 1482 1483 /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */ 1484 if (pcbddc->NullSpace || pcbddc->dbg_flag) { 1485 if (pcbddc->dbg_flag) { 1486 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1487 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1488 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1489 } 1490 if (dirichlet) { /* Dirichlet */ 1491 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 1492 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 1493 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 1494 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 1495 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 1496 /* need to be adapted? */ 1497 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1498 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1499 ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr); 1500 /* print info */ 1501 if (pcbddc->dbg_flag) { 1502 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 1503 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1504 } 1505 if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) { 1506 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr); 1507 } 1508 } 1509 if (neumann) { /* Neumann */ 1510 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 1511 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1512 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 1513 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 1514 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 1515 /* need to be adapted? */ 1516 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1517 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1518 /* print info */ 1519 if (pcbddc->dbg_flag) { 1520 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 1521 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1522 } 1523 if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */ 1524 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr); 1525 } 1526 } 1527 } 1528 /* free Neumann problem's matrix */ 1529 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1530 PetscFunctionReturn(0); 1531 } 1532 1533 #undef __FUNCT__ 1534 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection" 1535 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec rhs, Vec sol, Vec work, PetscBool applytranspose) 1536 { 1537 PetscErrorCode ierr; 1538 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1539 1540 PetscFunctionBegin; 1541 if (applytranspose) { 1542 if (pcbddc->local_auxmat1) { 1543 ierr = MatMultTranspose(pcbddc->local_auxmat2,rhs,work);CHKERRQ(ierr); 1544 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,work,rhs,rhs);CHKERRQ(ierr); 1545 } 1546 ierr = KSPSolveTranspose(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr); 1547 } else { 1548 ierr = KSPSolve(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr); 1549 if (pcbddc->local_auxmat1) { 1550 ierr = MatMult(pcbddc->local_auxmat1,sol,work);CHKERRQ(ierr); 1551 ierr = MatMultAdd(pcbddc->local_auxmat2,work,sol,sol);CHKERRQ(ierr); 1552 } 1553 } 1554 PetscFunctionReturn(0); 1555 } 1556 1557 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 1558 #undef __FUNCT__ 1559 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 1560 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 1561 { 1562 PetscErrorCode ierr; 1563 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1564 PC_IS* pcis = (PC_IS*) (pc->data); 1565 const PetscScalar zero = 0.0; 1566 1567 PetscFunctionBegin; 1568 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 1569 if (applytranspose) { 1570 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1571 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1572 } else { 1573 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1574 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1575 } 1576 /* start communications from local primal nodes to rhs of coarse solver */ 1577 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 1578 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1579 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1580 1581 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 1582 /* TODO remove null space when doing multilevel */ 1583 if (pcbddc->coarse_ksp) { 1584 if (applytranspose) { 1585 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,NULL,NULL);CHKERRQ(ierr); 1586 } else { 1587 ierr = KSPSolve(pcbddc->coarse_ksp,NULL,NULL);CHKERRQ(ierr); 1588 } 1589 } 1590 1591 /* Local solution on R nodes */ 1592 if (pcis->n) { 1593 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 1594 ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1595 ierr = VecScatterEnd(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1596 if (pcbddc->switch_static) { 1597 ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1598 ierr = VecScatterEnd(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1599 } 1600 ierr = PCBDDCSolveSubstructureCorrection(pc,pcbddc->vec1_R,pcbddc->vec2_R,pcbddc->vec1_C,applytranspose);CHKERRQ(ierr); 1601 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 1602 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1603 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1604 if (pcbddc->switch_static) { 1605 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1606 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1607 } 1608 } 1609 1610 /* communications from coarse sol to local primal nodes */ 1611 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1612 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1613 1614 /* Sum contributions from two levels */ 1615 if (applytranspose) { 1616 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1617 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1618 } else { 1619 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1620 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1621 } 1622 PetscFunctionReturn(0); 1623 } 1624 1625 /* TODO: the following two function can be optimized using VecPlaceArray whenever possible and using overlap flag */ 1626 #undef __FUNCT__ 1627 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 1628 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 1629 { 1630 PetscErrorCode ierr; 1631 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1632 PetscScalar *array,*array2; 1633 Vec from,to; 1634 1635 PetscFunctionBegin; 1636 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 1637 from = pcbddc->coarse_vec; 1638 to = pcbddc->vec1_P; 1639 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 1640 Vec tvec; 1641 PetscInt lsize; 1642 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1643 ierr = VecGetLocalSize(tvec,&lsize);CHKERRQ(ierr); 1644 ierr = VecGetArrayRead(tvec,(const PetscScalar**)&array);CHKERRQ(ierr); 1645 ierr = VecGetArray(from,&array2);CHKERRQ(ierr); 1646 ierr = PetscMemcpy(array2,array,lsize*sizeof(PetscScalar));CHKERRQ(ierr); 1647 ierr = VecRestoreArrayRead(tvec,(const PetscScalar**)&array);CHKERRQ(ierr); 1648 ierr = VecRestoreArray(from,&array2);CHKERRQ(ierr); 1649 } 1650 } else { /* from local to global -> put data in coarse right hand side */ 1651 from = pcbddc->vec1_P; 1652 to = pcbddc->coarse_vec; 1653 } 1654 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 1655 PetscFunctionReturn(0); 1656 } 1657 1658 #undef __FUNCT__ 1659 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 1660 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 1661 { 1662 PetscErrorCode ierr; 1663 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1664 PetscScalar *array,*array2; 1665 Vec from,to; 1666 1667 PetscFunctionBegin; 1668 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 1669 from = pcbddc->coarse_vec; 1670 to = pcbddc->vec1_P; 1671 } else { /* from local to global -> put data in coarse right hand side */ 1672 from = pcbddc->vec1_P; 1673 to = pcbddc->coarse_vec; 1674 } 1675 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 1676 if (smode == SCATTER_FORWARD) { 1677 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 1678 Vec tvec; 1679 PetscInt lsize; 1680 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1681 ierr = VecGetLocalSize(tvec,&lsize);CHKERRQ(ierr); 1682 ierr = VecGetArrayRead(to,(const PetscScalar**)&array);CHKERRQ(ierr); 1683 ierr = VecGetArray(tvec,&array2);CHKERRQ(ierr); 1684 ierr = PetscMemcpy(array2,array,lsize*sizeof(PetscScalar));CHKERRQ(ierr); 1685 ierr = VecRestoreArrayRead(to,(const PetscScalar**)&array);CHKERRQ(ierr); 1686 ierr = VecRestoreArray(tvec,&array2);CHKERRQ(ierr); 1687 } 1688 } 1689 PetscFunctionReturn(0); 1690 } 1691 1692 /* uncomment for testing purposes */ 1693 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 1694 #undef __FUNCT__ 1695 #define __FUNCT__ "PCBDDCConstraintsSetUp" 1696 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 1697 { 1698 PetscErrorCode ierr; 1699 PC_IS* pcis = (PC_IS*)(pc->data); 1700 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1701 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1702 /* one and zero */ 1703 PetscScalar one=1.0,zero=0.0; 1704 /* space to store constraints and their local indices */ 1705 PetscScalar *temp_quadrature_constraint; 1706 PetscInt *temp_indices,*temp_indices_to_constraint,*temp_indices_to_constraint_B; 1707 /* iterators */ 1708 PetscInt i,j,k,total_counts,temp_start_ptr; 1709 /* stuff to store connected components stored in pcbddc->mat_graph */ 1710 IS ISForVertices,*ISForFaces,*ISForEdges,*used_IS; 1711 PetscInt n_ISForFaces,n_ISForEdges; 1712 /* near null space stuff */ 1713 MatNullSpace nearnullsp; 1714 const Vec *nearnullvecs; 1715 Vec *localnearnullsp; 1716 PetscBool nnsp_has_cnst; 1717 PetscInt nnsp_size; 1718 PetscScalar *array; 1719 /* BLAS integers */ 1720 PetscBLASInt lwork,lierr; 1721 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 1722 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 1723 /* LAPACK working arrays for SVD or POD */ 1724 PetscBool skip_lapack; 1725 PetscScalar *work; 1726 PetscReal *singular_vals; 1727 #if defined(PETSC_USE_COMPLEX) 1728 PetscReal *rwork; 1729 #endif 1730 #if defined(PETSC_MISSING_LAPACK_GESVD) 1731 PetscBLASInt Blas_one_2=1; 1732 PetscScalar *temp_basis,*correlation_mat; 1733 #else 1734 PetscBLASInt dummy_int_1=1,dummy_int_2=1; 1735 PetscScalar dummy_scalar_1=0.0,dummy_scalar_2=0.0; 1736 #endif 1737 /* reuse */ 1738 PetscInt olocal_primal_size; 1739 PetscInt *oprimal_indices_local_idxs; 1740 /* change of basis */ 1741 PetscInt *aux_primal_numbering,*aux_primal_minloc,*global_indices; 1742 PetscBool boolforchange,qr_needed; 1743 PetscBT touched,change_basis,qr_needed_idx; 1744 /* auxiliary stuff */ 1745 PetscInt *nnz,*is_indices,*aux_primal_numbering_B; 1746 PetscInt ncc,*gidxs=NULL,*permutation=NULL,*temp_indices_to_constraint_work=NULL; 1747 PetscScalar *temp_quadrature_constraint_work=NULL; 1748 /* some quantities */ 1749 PetscInt n_vertices,total_primal_vertices,valid_constraints; 1750 PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints; 1751 1752 1753 PetscFunctionBegin; 1754 /* Destroy Mat objects computed previously */ 1755 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1756 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1757 /* TODO synch with adaptive selection */ 1758 /* Get index sets for faces, edges and vertices from graph */ 1759 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 1760 /* free unneeded index sets */ 1761 if (!pcbddc->use_vertices) { 1762 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 1763 } 1764 if (!pcbddc->use_edges) { 1765 for (i=0;i<n_ISForEdges;i++) { 1766 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 1767 } 1768 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 1769 n_ISForEdges = 0; 1770 } 1771 if (!pcbddc->use_faces) { 1772 for (i=0;i<n_ISForFaces;i++) { 1773 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 1774 } 1775 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 1776 n_ISForFaces = 0; 1777 } 1778 /* HACKS (the following two blocks of code) */ 1779 if (!ISForVertices && pcbddc->NullSpace && !pcbddc->user_ChangeOfBasisMatrix) { 1780 pcbddc->use_change_of_basis = PETSC_TRUE; 1781 if (!ISForEdges) { 1782 pcbddc->use_change_on_faces = PETSC_TRUE; 1783 } 1784 } 1785 if (pcbddc->NullSpace) { 1786 /* use_change_of_basis should be consistent among processors */ 1787 PetscBool tbool[2],gbool[2]; 1788 tbool [0] = pcbddc->use_change_of_basis; 1789 tbool [1] = pcbddc->use_change_on_faces; 1790 ierr = MPI_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1791 pcbddc->use_change_of_basis = gbool[0]; 1792 pcbddc->use_change_on_faces = gbool[1]; 1793 } 1794 /* print some info */ 1795 if (pcbddc->dbg_flag) { 1796 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1797 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 1798 i = 0; 1799 if (ISForVertices) { 1800 ierr = ISGetSize(ISForVertices,&i);CHKERRQ(ierr); 1801 } 1802 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices\n",PetscGlobalRank,i);CHKERRQ(ierr); 1803 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges\n",PetscGlobalRank,n_ISForEdges);CHKERRQ(ierr); 1804 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces\n",PetscGlobalRank,n_ISForFaces);CHKERRQ(ierr); 1805 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1806 } 1807 1808 if (!pcbddc->adaptive_selection) { 1809 /* check if near null space is attached to global mat */ 1810 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 1811 if (nearnullsp) { 1812 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 1813 /* remove any stored info */ 1814 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 1815 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 1816 /* store information for BDDC solver reuse */ 1817 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 1818 pcbddc->onearnullspace = nearnullsp; 1819 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 1820 for (i=0;i<nnsp_size;i++) { 1821 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 1822 } 1823 } else { /* if near null space is not provided BDDC uses constants by default */ 1824 nnsp_size = 0; 1825 nnsp_has_cnst = PETSC_TRUE; 1826 } 1827 /* get max number of constraints on a single cc */ 1828 max_constraints = nnsp_size; 1829 if (nnsp_has_cnst) max_constraints++; 1830 1831 /* 1832 Evaluate maximum storage size needed by the procedure 1833 - temp_indices will contain start index of each constraint stored as follows 1834 - temp_indices_to_constraint [temp_indices[i],...,temp_indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts 1835 - temp_indices_to_constraint_B[temp_indices[i],...,temp_indices[i+1]-1] will contain the indices (in boundary numbering) on which the constraint acts 1836 - temp_quadrature_constraint [temp_indices[i],...,temp_indices[i+1]-1] will contain the scalars representing the constraint itself 1837 */ 1838 total_counts = n_ISForFaces+n_ISForEdges; 1839 total_counts *= max_constraints; 1840 n_vertices = 0; 1841 if (ISForVertices) { 1842 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 1843 } 1844 total_counts += n_vertices; 1845 ierr = PetscMalloc1(total_counts+1,&temp_indices);CHKERRQ(ierr); 1846 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 1847 total_counts = 0; 1848 max_size_of_constraint = 0; 1849 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 1850 if (i<n_ISForEdges) { 1851 used_IS = &ISForEdges[i]; 1852 } else { 1853 used_IS = &ISForFaces[i-n_ISForEdges]; 1854 } 1855 ierr = ISGetSize(*used_IS,&j);CHKERRQ(ierr); 1856 total_counts += j; 1857 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 1858 } 1859 total_counts *= max_constraints; 1860 total_counts += n_vertices; 1861 ierr = PetscMalloc3(total_counts,&temp_quadrature_constraint,total_counts,&temp_indices_to_constraint,total_counts,&temp_indices_to_constraint_B);CHKERRQ(ierr); 1862 /* get local part of global near null space vectors */ 1863 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 1864 for (k=0;k<nnsp_size;k++) { 1865 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 1866 ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1867 ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1868 } 1869 1870 /* whether or not to skip lapack calls */ 1871 skip_lapack = PETSC_TRUE; 1872 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 1873 1874 /* allocate some auxiliary stuff */ 1875 if (!skip_lapack || pcbddc->use_qr_single) { 1876 ierr = PetscMalloc4(max_size_of_constraint,&gidxs,max_size_of_constraint,&permutation,max_size_of_constraint,&temp_indices_to_constraint_work,max_size_of_constraint,&temp_quadrature_constraint_work);CHKERRQ(ierr); 1877 } 1878 1879 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 1880 if (!skip_lapack) { 1881 PetscScalar temp_work; 1882 1883 #if defined(PETSC_MISSING_LAPACK_GESVD) 1884 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 1885 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 1886 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 1887 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 1888 #if defined(PETSC_USE_COMPLEX) 1889 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 1890 #endif 1891 /* now we evaluate the optimal workspace using query with lwork=-1 */ 1892 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 1893 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 1894 lwork = -1; 1895 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1896 #if !defined(PETSC_USE_COMPLEX) 1897 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 1898 #else 1899 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 1900 #endif 1901 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1902 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 1903 #else /* on missing GESVD */ 1904 /* SVD */ 1905 PetscInt max_n,min_n; 1906 max_n = max_size_of_constraint; 1907 min_n = max_constraints; 1908 if (max_size_of_constraint < max_constraints) { 1909 min_n = max_size_of_constraint; 1910 max_n = max_constraints; 1911 } 1912 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 1913 #if defined(PETSC_USE_COMPLEX) 1914 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 1915 #endif 1916 /* now we evaluate the optimal workspace using query with lwork=-1 */ 1917 lwork = -1; 1918 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 1919 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 1920 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 1921 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1922 #if !defined(PETSC_USE_COMPLEX) 1923 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,&lierr)); 1924 #else 1925 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,rwork,&lierr)); 1926 #endif 1927 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1928 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 1929 #endif /* on missing GESVD */ 1930 /* Allocate optimal workspace */ 1931 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 1932 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 1933 } 1934 /* Now we can loop on constraining sets */ 1935 total_counts = 0; 1936 temp_indices[0] = 0; 1937 /* vertices */ 1938 if (ISForVertices) { 1939 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1940 if (nnsp_has_cnst) { /* consider all vertices */ 1941 ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 1942 for (i=0;i<n_vertices;i++) { 1943 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 1944 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 1945 total_counts++; 1946 } 1947 } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */ 1948 PetscBool used_vertex; 1949 for (i=0;i<n_vertices;i++) { 1950 used_vertex = PETSC_FALSE; 1951 k = 0; 1952 while (!used_vertex && k<nnsp_size) { 1953 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1954 if (PetscAbsScalar(array[is_indices[i]])>0.0) { 1955 temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i]; 1956 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 1957 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 1958 total_counts++; 1959 used_vertex = PETSC_TRUE; 1960 } 1961 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1962 k++; 1963 } 1964 } 1965 } 1966 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1967 n_vertices = total_counts; 1968 } 1969 1970 /* edges and faces */ 1971 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 1972 if (ncc<n_ISForEdges) { 1973 used_IS = &ISForEdges[ncc]; 1974 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 1975 } else { 1976 used_IS = &ISForFaces[ncc-n_ISForEdges]; 1977 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 1978 } 1979 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 1980 temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */ 1981 ierr = ISGetSize(*used_IS,&size_of_constraint);CHKERRQ(ierr); 1982 ierr = ISGetIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1983 /* change of basis should not be performed on local periodic nodes */ 1984 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 1985 if (nnsp_has_cnst) { 1986 PetscScalar quad_value; 1987 temp_constraints++; 1988 if (!pcbddc->use_nnsp_true) { 1989 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 1990 } else { 1991 quad_value = 1.0; 1992 } 1993 ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 1994 for (j=0;j<size_of_constraint;j++) { 1995 temp_quadrature_constraint[temp_indices[total_counts]+j]=quad_value; 1996 } 1997 /* sort by global ordering if using lapack subroutines (not needed!) */ 1998 if (!skip_lapack || pcbddc->use_qr_single) { 1999 ierr = ISLocalToGlobalMappingApply(matis->mapping,size_of_constraint,temp_indices_to_constraint+temp_indices[total_counts],gidxs);CHKERRQ(ierr); 2000 for (j=0;j<size_of_constraint;j++) { 2001 permutation[j]=j; 2002 } 2003 ierr = PetscSortIntWithPermutation(size_of_constraint,gidxs,permutation);CHKERRQ(ierr); 2004 for (j=0;j<size_of_constraint;j++) { 2005 if (permutation[j]!=j) SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"This should not happen"); 2006 } 2007 for (j=0;j<size_of_constraint;j++) { 2008 temp_indices_to_constraint_work[j] = temp_indices_to_constraint[temp_indices[total_counts]+permutation[j]]; 2009 temp_quadrature_constraint_work[j] = temp_quadrature_constraint[temp_indices[total_counts]+permutation[j]]; 2010 } 2011 ierr = PetscMemcpy(temp_indices_to_constraint+temp_indices[total_counts],temp_indices_to_constraint_work,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 2012 ierr = PetscMemcpy(temp_quadrature_constraint+temp_indices[total_counts],temp_quadrature_constraint_work,size_of_constraint*sizeof(PetscScalar));CHKERRQ(ierr); 2013 } 2014 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 2015 total_counts++; 2016 } 2017 for (k=0;k<nnsp_size;k++) { 2018 PetscReal real_value; 2019 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2020 ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 2021 for (j=0;j<size_of_constraint;j++) { 2022 temp_quadrature_constraint[temp_indices[total_counts]+j]=array[is_indices[j]]; 2023 } 2024 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2025 /* check if array is null on the connected component */ 2026 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2027 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_one)); 2028 if (real_value > 0.0) { /* keep indices and values */ 2029 /* sort by global ordering if using lapack subroutines */ 2030 if (!skip_lapack || pcbddc->use_qr_single) { 2031 ierr = ISLocalToGlobalMappingApply(matis->mapping,size_of_constraint,temp_indices_to_constraint+temp_indices[total_counts],gidxs);CHKERRQ(ierr); 2032 for (j=0;j<size_of_constraint;j++) { 2033 permutation[j]=j; 2034 } 2035 ierr = PetscSortIntWithPermutation(size_of_constraint,gidxs,permutation);CHKERRQ(ierr); 2036 for (j=0;j<size_of_constraint;j++) { 2037 temp_indices_to_constraint_work[j] = temp_indices_to_constraint[temp_indices[total_counts]+permutation[j]]; 2038 temp_quadrature_constraint_work[j] = temp_quadrature_constraint[temp_indices[total_counts]+permutation[j]]; 2039 } 2040 ierr = PetscMemcpy(temp_indices_to_constraint+temp_indices[total_counts],temp_indices_to_constraint_work,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 2041 ierr = PetscMemcpy(temp_quadrature_constraint+temp_indices[total_counts],temp_quadrature_constraint_work,size_of_constraint*sizeof(PetscScalar));CHKERRQ(ierr); 2042 } 2043 temp_constraints++; 2044 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 2045 total_counts++; 2046 } 2047 } 2048 ierr = ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2049 valid_constraints = temp_constraints; 2050 if (!pcbddc->use_nnsp_true && temp_constraints) { 2051 if (temp_constraints == 1) { /* just normalize the constraint */ 2052 PetscScalar norm; 2053 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2054 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one)); 2055 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 2056 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one)); 2057 } else { /* perform SVD */ 2058 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 2059 2060 #if defined(PETSC_MISSING_LAPACK_GESVD) 2061 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 2062 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 2063 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 2064 the constraints basis will differ (by a complex factor with absolute value equal to 1) 2065 from that computed using LAPACKgesvd 2066 -> This is due to a different computation of eigenvectors in LAPACKheev 2067 -> The quality of the POD-computed basis will be the same */ 2068 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 2069 /* Store upper triangular part of correlation matrix */ 2070 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2071 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2072 for (j=0;j<temp_constraints;j++) { 2073 for (k=0;k<j+1;k++) { 2074 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k]=BLASdot_(&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Blas_one,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Blas_one_2)); 2075 } 2076 } 2077 /* compute eigenvalues and eigenvectors of correlation matrix */ 2078 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2079 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 2080 #if !defined(PETSC_USE_COMPLEX) 2081 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 2082 #else 2083 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 2084 #endif 2085 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2086 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 2087 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 2088 j = 0; 2089 while (j < temp_constraints && singular_vals[j] < tol) j++; 2090 total_counts = total_counts-j; 2091 valid_constraints = temp_constraints-j; 2092 /* scale and copy POD basis into used quadrature memory */ 2093 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2094 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2095 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 2096 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2097 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 2098 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 2099 if (j<temp_constraints) { 2100 PetscInt ii; 2101 for (k=j;k<temp_constraints;k++) singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]); 2102 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2103 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 2104 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2105 for (k=0;k<temp_constraints-j;k++) { 2106 for (ii=0;ii<size_of_constraint;ii++) { 2107 temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 2108 } 2109 } 2110 } 2111 #else /* on missing GESVD */ 2112 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2113 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2114 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2115 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2116 #if !defined(PETSC_USE_COMPLEX) 2117 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,&lierr)); 2118 #else 2119 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,rwork,&lierr)); 2120 #endif 2121 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 2122 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2123 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 2124 k = temp_constraints; 2125 if (k > size_of_constraint) k = size_of_constraint; 2126 j = 0; 2127 while (j < k && singular_vals[k-j-1] < tol) j++; 2128 valid_constraints = k-j; 2129 total_counts = total_counts-temp_constraints+valid_constraints; 2130 #endif /* on missing GESVD */ 2131 } 2132 } 2133 /* setting change_of_basis flag is safe now */ 2134 if (boolforchange) { 2135 for (j=0;j<valid_constraints;j++) { 2136 PetscBTSet(change_basis,total_counts-j-1); 2137 } 2138 } 2139 } 2140 /* free workspace */ 2141 if (!skip_lapack || pcbddc->use_qr_single) { 2142 ierr = PetscFree4(gidxs,permutation,temp_indices_to_constraint_work,temp_quadrature_constraint_work);CHKERRQ(ierr); 2143 } 2144 if (!skip_lapack) { 2145 ierr = PetscFree(work);CHKERRQ(ierr); 2146 #if defined(PETSC_USE_COMPLEX) 2147 ierr = PetscFree(rwork);CHKERRQ(ierr); 2148 #endif 2149 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 2150 #if defined(PETSC_MISSING_LAPACK_GESVD) 2151 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 2152 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 2153 #endif 2154 } 2155 for (k=0;k<nnsp_size;k++) { 2156 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 2157 } 2158 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 2159 } else { 2160 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2161 PetscInt cum = 0; 2162 2163 total_counts = 0; 2164 n_vertices = 0; 2165 if (sub_schurs->is_Ej_com) { 2166 ierr = ISGetLocalSize(sub_schurs->is_Ej_com,&n_vertices);CHKERRQ(ierr); 2167 } 2168 max_constraints = 0; 2169 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 2170 total_counts += pcbddc->adaptive_constraints_n[i]; 2171 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 2172 } 2173 temp_indices = pcbddc->adaptive_constraints_ptrs; 2174 temp_indices_to_constraint = pcbddc->adaptive_constraints_idxs; 2175 temp_quadrature_constraint = pcbddc->adaptive_constraints_data; 2176 2177 #if 0 2178 printf("Found %d totals\n",total_counts); 2179 for (i=0;i<total_counts;i++) { 2180 printf("const %d, start %d",i,temp_indices[i]); 2181 printf(" end %d:\n",temp_indices[i+1]); 2182 for (j=temp_indices[i];j<temp_indices[i+1];j++) { 2183 printf(" idxs %d",temp_indices_to_constraint[j]); 2184 printf(" data %1.2e\n",temp_quadrature_constraint[j]); 2185 } 2186 } 2187 for (i=0;i<n_vertices;i++) { 2188 PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i+n_vertices]); 2189 } 2190 for (i=0;i<sub_schurs->n_subs;i++) { 2191 PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]); 2192 } 2193 #endif 2194 2195 for (i=0;i<total_counts;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,temp_indices[i+1]-temp_indices[i]); 2196 ierr = PetscMalloc1(temp_indices[total_counts],&temp_indices_to_constraint_B);CHKERRQ(ierr); 2197 /* Change of basis */ 2198 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 2199 if (pcbddc->use_change_of_basis) { 2200 cum = n_vertices; 2201 for (i=0;i<sub_schurs->n_subs;i++) { 2202 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 2203 for (j=0;j<pcbddc->adaptive_constraints_n[i+n_vertices];j++) { 2204 ierr = PetscBTSet(change_basis,cum+j);CHKERRQ(ierr); 2205 } 2206 } 2207 cum += pcbddc->adaptive_constraints_n[i+n_vertices]; 2208 } 2209 } 2210 } 2211 2212 /* free index sets of faces, edges and vertices */ 2213 for (i=0;i<n_ISForFaces;i++) { 2214 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 2215 } 2216 if (n_ISForFaces) { 2217 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 2218 } 2219 for (i=0;i<n_ISForEdges;i++) { 2220 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 2221 } 2222 if (n_ISForEdges) { 2223 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 2224 } 2225 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 2226 2227 /* map temp_indices_to_constraint in boundary numbering */ 2228 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,temp_indices[total_counts],temp_indices_to_constraint,&i,temp_indices_to_constraint_B);CHKERRQ(ierr); 2229 if (i != temp_indices[total_counts]) { 2230 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",temp_indices[total_counts],i); 2231 } 2232 2233 /* set quantities in pcbddc data structure and store previous primal size */ 2234 /* n_vertices defines the number of subdomain corners in the primal space */ 2235 /* n_constraints defines the number of averages (they can be point primal dofs if change of basis is requested) */ 2236 olocal_primal_size = pcbddc->local_primal_size; 2237 pcbddc->local_primal_size = total_counts; 2238 pcbddc->n_vertices = n_vertices; 2239 pcbddc->n_constraints = pcbddc->local_primal_size-pcbddc->n_vertices; 2240 2241 /* Create constraint matrix */ 2242 /* The constraint matrix is used to compute the l2g map of primal dofs */ 2243 /* so we need to set it up properly either with or without change of basis */ 2244 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 2245 ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr); 2246 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 2247 /* array to compute a local numbering of constraints : vertices first then constraints */ 2248 ierr = PetscMalloc1(pcbddc->local_primal_size,&aux_primal_numbering);CHKERRQ(ierr); 2249 /* array to select the proper local node (of minimum index with respect to global ordering) when changing the basis */ 2250 /* note: it should not be needed since IS for faces and edges are already sorted by global ordering when analyzing the graph but... just in case */ 2251 ierr = PetscMalloc1(pcbddc->local_primal_size,&aux_primal_minloc);CHKERRQ(ierr); 2252 /* auxiliary stuff for basis change */ 2253 ierr = PetscMalloc1(max_size_of_constraint,&global_indices);CHKERRQ(ierr); 2254 ierr = PetscBTCreate(pcis->n_B,&touched);CHKERRQ(ierr); 2255 2256 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 2257 total_primal_vertices=0; 2258 for (i=0;i<pcbddc->local_primal_size;i++) { 2259 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 2260 if (size_of_constraint == 1) { 2261 ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]]);CHKERRQ(ierr); 2262 aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]]; 2263 aux_primal_minloc[total_primal_vertices]=0; 2264 total_primal_vertices++; 2265 } else if (PetscBTLookup(change_basis,i)) { /* Same procedure used in PCBDDCGetPrimalConstraintsLocalIdx */ 2266 PetscInt min_loc,min_index; 2267 ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],global_indices);CHKERRQ(ierr); 2268 /* find first untouched local node */ 2269 k = 0; 2270 while (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) k++; 2271 min_index = global_indices[k]; 2272 min_loc = k; 2273 /* search the minimum among global nodes already untouched on the cc */ 2274 for (k=1;k<size_of_constraint;k++) { 2275 /* there can be more than one constraint on a single connected component */ 2276 if (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k]) && min_index > global_indices[k]) { 2277 min_index = global_indices[k]; 2278 min_loc = k; 2279 } 2280 } 2281 ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]+min_loc]);CHKERRQ(ierr); 2282 aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]+min_loc]; 2283 aux_primal_minloc[total_primal_vertices]=min_loc; 2284 total_primal_vertices++; 2285 } 2286 } 2287 /* determine if a QR strategy is needed for change of basis */ 2288 qr_needed = PETSC_FALSE; 2289 ierr = PetscBTCreate(pcbddc->local_primal_size,&qr_needed_idx);CHKERRQ(ierr); 2290 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 2291 if (PetscBTLookup(change_basis,i)) { 2292 if (!pcbddc->use_qr_single) { 2293 size_of_constraint = temp_indices[i+1]-temp_indices[i]; 2294 j = 0; 2295 for (k=0;k<size_of_constraint;k++) { 2296 if (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) { 2297 j++; 2298 } 2299 } 2300 /* found more than one primal dof on the cc */ 2301 if (j > 1) { 2302 PetscBTSet(qr_needed_idx,i); 2303 qr_needed = PETSC_TRUE; 2304 } 2305 } else { 2306 PetscBTSet(qr_needed_idx,i); 2307 qr_needed = PETSC_TRUE; 2308 } 2309 } 2310 } 2311 /* free workspace */ 2312 ierr = PetscFree(global_indices);CHKERRQ(ierr); 2313 2314 /* permute indices in order to have a sorted set of vertices */ 2315 ierr = PetscSortInt(total_primal_vertices,aux_primal_numbering);CHKERRQ(ierr); 2316 2317 /* nonzero structure of constraint matrix */ 2318 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 2319 for (i=0;i<total_primal_vertices;i++) nnz[i]=1; 2320 j=total_primal_vertices; 2321 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 2322 if (!PetscBTLookup(change_basis,i)) { 2323 nnz[j]=temp_indices[i+1]-temp_indices[i]; 2324 j++; 2325 } 2326 } 2327 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 2328 ierr = PetscFree(nnz);CHKERRQ(ierr); 2329 /* set values in constraint matrix */ 2330 for (i=0;i<total_primal_vertices;i++) { 2331 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,aux_primal_numbering[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 2332 } 2333 total_counts = total_primal_vertices; 2334 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 2335 if (!PetscBTLookup(change_basis,i)) { 2336 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 2337 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&total_counts,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);CHKERRQ(ierr); 2338 total_counts++; 2339 } 2340 } 2341 /* assembling */ 2342 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2343 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2344 /* 2345 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 2346 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 2347 */ 2348 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 2349 if (pcbddc->use_change_of_basis) { 2350 /* dual and primal dofs on a single cc */ 2351 PetscInt dual_dofs,primal_dofs; 2352 /* iterator on aux_primal_minloc (ordered as read from nearnullspace: vertices, edges and then constraints) */ 2353 PetscInt primal_counter; 2354 /* working stuff for GEQRF */ 2355 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 2356 PetscBLASInt lqr_work; 2357 /* working stuff for UNGQR */ 2358 PetscScalar *gqr_work,lgqr_work_t; 2359 PetscBLASInt lgqr_work; 2360 /* working stuff for TRTRS */ 2361 PetscScalar *trs_rhs; 2362 PetscBLASInt Blas_NRHS; 2363 /* pointers for values insertion into change of basis matrix */ 2364 PetscInt *start_rows,*start_cols; 2365 PetscScalar *start_vals; 2366 /* working stuff for values insertion */ 2367 PetscBT is_primal; 2368 /* matrix sizes */ 2369 PetscInt global_size,local_size; 2370 /* temporary change of basis */ 2371 Mat localChangeOfBasisMatrix; 2372 2373 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 2374 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 2375 ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 2376 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 2377 /* nonzeros for local mat */ 2378 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 2379 for (i=0;i<pcis->n;i++) nnz[i]=1; 2380 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 2381 if (PetscBTLookup(change_basis,i)) { 2382 size_of_constraint = temp_indices[i+1]-temp_indices[i]; 2383 if (PetscBTLookup(qr_needed_idx,i)) { 2384 for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint[temp_indices[i]+j]] = size_of_constraint; 2385 } else { 2386 for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint[temp_indices[i]+j]] = 2; 2387 /* get local primal index on the cc */ 2388 j = 0; 2389 while (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+j])) j++; 2390 nnz[temp_indices_to_constraint[temp_indices[i]+j]] = size_of_constraint; 2391 } 2392 } 2393 } 2394 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 2395 ierr = PetscFree(nnz);CHKERRQ(ierr); 2396 /* Set initial identity in the matrix */ 2397 for (i=0;i<pcis->n;i++) { 2398 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 2399 } 2400 2401 if (pcbddc->dbg_flag) { 2402 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 2403 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 2404 } 2405 2406 2407 /* Now we loop on the constraints which need a change of basis */ 2408 /* 2409 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 2410 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 2411 2412 Basic blocks of change of basis matrix T computed by 2413 2414 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 2415 2416 | 1 0 ... 0 s_1/S | 2417 | 0 1 ... 0 s_2/S | 2418 | ... | 2419 | 0 ... 1 s_{n-1}/S | 2420 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 2421 2422 with S = \sum_{i=1}^n s_i^2 2423 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 2424 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 2425 2426 - QR decomposition of constraints otherwise 2427 */ 2428 if (qr_needed) { 2429 /* space to store Q */ 2430 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 2431 /* first we issue queries for optimal work */ 2432 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 2433 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 2434 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2435 lqr_work = -1; 2436 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 2437 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 2438 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 2439 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 2440 lgqr_work = -1; 2441 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 2442 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 2443 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 2444 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2445 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 2446 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 2447 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 2448 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 2449 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 2450 /* array to store scaling factors for reflectors */ 2451 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 2452 /* array to store rhs and solution of triangular solver */ 2453 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 2454 /* allocating workspace for check */ 2455 if (pcbddc->dbg_flag) { 2456 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&work);CHKERRQ(ierr); 2457 } 2458 } 2459 /* array to store whether a node is primal or not */ 2460 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 2461 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 2462 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,aux_primal_numbering,&i,aux_primal_numbering_B);CHKERRQ(ierr); 2463 if (i != total_primal_vertices) { 2464 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i); 2465 } 2466 for (i=0;i<total_primal_vertices;i++) { 2467 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 2468 } 2469 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 2470 2471 /* loop on constraints and see whether or not they need a change of basis and compute it */ 2472 /* -> using implicit ordering contained in temp_indices data */ 2473 total_counts = pcbddc->n_vertices; 2474 primal_counter = total_counts; 2475 while (total_counts<pcbddc->local_primal_size) { 2476 primal_dofs = 1; 2477 if (PetscBTLookup(change_basis,total_counts)) { 2478 /* get all constraints with same support: if more then one constraint is present on the cc then surely indices are stored contiguosly */ 2479 while (total_counts+primal_dofs < pcbddc->local_primal_size && temp_indices_to_constraint[temp_indices[total_counts]] == temp_indices_to_constraint[temp_indices[total_counts+primal_dofs]]) { 2480 primal_dofs++; 2481 } 2482 /* get constraint info */ 2483 size_of_constraint = temp_indices[total_counts+1]-temp_indices[total_counts]; 2484 dual_dofs = size_of_constraint-primal_dofs; 2485 2486 if (pcbddc->dbg_flag) { 2487 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d to %d (incl) need a change of basis (size %d)\n",total_counts,total_counts+primal_dofs-1,size_of_constraint);CHKERRQ(ierr); 2488 } 2489 2490 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 2491 2492 /* copy quadrature constraints for change of basis check */ 2493 if (pcbddc->dbg_flag) { 2494 ierr = PetscMemcpy(work,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2495 } 2496 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 2497 ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2498 2499 /* compute QR decomposition of constraints */ 2500 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2501 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2502 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2503 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2504 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 2505 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 2506 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2507 2508 /* explictly compute R^-T */ 2509 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 2510 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 2511 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2512 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 2513 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2514 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 2515 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2516 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 2517 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 2518 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2519 2520 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 2521 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2522 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2523 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 2524 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2525 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2526 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 2527 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 2528 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2529 2530 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 2531 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 2532 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 2533 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2534 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2535 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 2536 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2537 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 2538 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 2539 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2540 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_LDC)); 2541 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2542 ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2543 2544 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 2545 start_rows = &temp_indices_to_constraint[temp_indices[total_counts]]; 2546 /* insert cols for primal dofs */ 2547 for (j=0;j<primal_dofs;j++) { 2548 start_vals = &qr_basis[j*size_of_constraint]; 2549 start_cols = &temp_indices_to_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter+j]]; 2550 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 2551 } 2552 /* insert cols for dual dofs */ 2553 for (j=0,k=0;j<dual_dofs;k++) { 2554 if (!PetscBTLookup(is_primal,temp_indices_to_constraint_B[temp_indices[total_counts]+k])) { 2555 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 2556 start_cols = &temp_indices_to_constraint[temp_indices[total_counts]+k]; 2557 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 2558 j++; 2559 } 2560 } 2561 2562 /* check change of basis */ 2563 if (pcbddc->dbg_flag) { 2564 PetscInt ii,jj; 2565 PetscBool valid_qr=PETSC_TRUE; 2566 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 2567 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2568 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 2569 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2570 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 2571 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 2572 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2573 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&work[size_of_constraint*primal_dofs],&Blas_LDC)); 2574 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2575 for (jj=0;jj<size_of_constraint;jj++) { 2576 for (ii=0;ii<primal_dofs;ii++) { 2577 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 2578 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 2579 } 2580 } 2581 if (!valid_qr) { 2582 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 2583 for (jj=0;jj<size_of_constraint;jj++) { 2584 for (ii=0;ii<primal_dofs;ii++) { 2585 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 2586 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2587 } 2588 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 2589 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2590 } 2591 } 2592 } 2593 } else { 2594 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 2595 } 2596 } 2597 } else { /* simple transformation block */ 2598 PetscInt row,col; 2599 PetscScalar val,norm; 2600 2601 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2602 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,temp_quadrature_constraint+temp_indices[total_counts],&Blas_one,temp_quadrature_constraint+temp_indices[total_counts],&Blas_one)); 2603 for (j=0;j<size_of_constraint;j++) { 2604 PetscInt row_B = temp_indices_to_constraint_B[temp_indices[total_counts]+j]; 2605 row = temp_indices_to_constraint[temp_indices[total_counts]+j]; 2606 if (!PetscBTLookup(is_primal,row_B)) { 2607 col = temp_indices_to_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]; 2608 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 2609 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,temp_quadrature_constraint[temp_indices[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 2610 } else { 2611 for (k=0;k<size_of_constraint;k++) { 2612 col = temp_indices_to_constraint[temp_indices[total_counts]+k]; 2613 if (row != col) { 2614 val = -temp_quadrature_constraint[temp_indices[total_counts]+k]/temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]; 2615 } else { 2616 val = temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]/norm; 2617 } 2618 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 2619 } 2620 } 2621 } 2622 if (pcbddc->dbg_flag) { 2623 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 2624 } 2625 } 2626 /* increment primal counter */ 2627 primal_counter += primal_dofs; 2628 } else { 2629 if (pcbddc->dbg_flag) { 2630 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,temp_indices[total_counts+1]-temp_indices[total_counts]);CHKERRQ(ierr); 2631 } 2632 } 2633 /* increment constraint counter total_counts */ 2634 total_counts += primal_dofs; 2635 } 2636 2637 /* free workspace */ 2638 if (qr_needed) { 2639 if (pcbddc->dbg_flag) { 2640 ierr = PetscFree(work);CHKERRQ(ierr); 2641 } 2642 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 2643 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 2644 ierr = PetscFree(qr_work);CHKERRQ(ierr); 2645 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 2646 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 2647 } 2648 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 2649 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2650 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2651 2652 /* assembling of global change of variable */ 2653 { 2654 Mat tmat; 2655 PetscInt bs; 2656 2657 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 2658 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 2659 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 2660 ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr); 2661 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2662 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 2663 ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr); 2664 ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr); 2665 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 2666 ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr); 2667 ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2668 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 2669 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 2670 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 2671 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2672 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2673 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 2674 ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr); 2675 } 2676 /* check */ 2677 if (pcbddc->dbg_flag) { 2678 PetscReal error; 2679 Vec x,x_change; 2680 2681 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 2682 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 2683 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 2684 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 2685 ierr = VecScatterBegin(matis->ctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2686 ierr = VecScatterEnd(matis->ctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2687 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 2688 ierr = VecScatterBegin(matis->ctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2689 ierr = VecScatterEnd(matis->ctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2690 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 2691 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 2692 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 2693 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2694 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr); 2695 ierr = VecDestroy(&x);CHKERRQ(ierr); 2696 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 2697 } 2698 2699 /* adapt sub_schurs computed (if any) */ 2700 if (pcbddc->use_deluxe_scaling) { 2701 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 2702 if (sub_schurs->n_subs_par_g) { 2703 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Change of basis with deluxe scaling and parallel problems still needs to be implemented"); 2704 } 2705 if (sub_schurs->S_Ej_all) { 2706 Mat S_1,S_2,tmat; 2707 IS is_all_N; 2708 2709 ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr); 2710 ierr = MatGetSubMatrixUnsorted(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr); 2711 ierr = ISDestroy(&is_all_N);CHKERRQ(ierr); 2712 ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_1);CHKERRQ(ierr); 2713 ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr); 2714 sub_schurs->S_Ej_all = S_1; 2715 ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_2);CHKERRQ(ierr); 2716 ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 2717 sub_schurs->sum_S_Ej_all = S_2; 2718 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 2719 } 2720 } 2721 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 2722 } else if (pcbddc->user_ChangeOfBasisMatrix) { 2723 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 2724 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 2725 } 2726 2727 /* set up change of basis context */ 2728 if (pcbddc->ChangeOfBasisMatrix) { 2729 PCBDDCChange_ctx change_ctx; 2730 2731 if (!pcbddc->new_global_mat) { 2732 PetscInt global_size,local_size; 2733 2734 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 2735 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 2736 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->new_global_mat);CHKERRQ(ierr); 2737 ierr = MatSetSizes(pcbddc->new_global_mat,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 2738 ierr = MatSetType(pcbddc->new_global_mat,MATSHELL);CHKERRQ(ierr); 2739 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT,(void (*)(void))PCBDDCMatMult_Private);CHKERRQ(ierr); 2740 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCMatMultTranspose_Private);CHKERRQ(ierr); 2741 ierr = PetscNew(&change_ctx);CHKERRQ(ierr); 2742 ierr = MatShellSetContext(pcbddc->new_global_mat,change_ctx);CHKERRQ(ierr); 2743 } else { 2744 ierr = MatShellGetContext(pcbddc->new_global_mat,&change_ctx);CHKERRQ(ierr); 2745 ierr = MatDestroy(&change_ctx->global_change);CHKERRQ(ierr); 2746 ierr = VecDestroyVecs(2,&change_ctx->work);CHKERRQ(ierr); 2747 } 2748 if (!pcbddc->user_ChangeOfBasisMatrix) { 2749 ierr = PetscObjectReference((PetscObject)pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2750 change_ctx->global_change = pcbddc->ChangeOfBasisMatrix; 2751 } else { 2752 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 2753 change_ctx->global_change = pcbddc->user_ChangeOfBasisMatrix; 2754 } 2755 ierr = VecDuplicateVecs(pcis->vec1_global,2,&change_ctx->work);CHKERRQ(ierr); 2756 ierr = MatSetUp(pcbddc->new_global_mat);CHKERRQ(ierr); 2757 ierr = MatAssemblyBegin(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2758 ierr = MatAssemblyEnd(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2759 } 2760 2761 /* get indices in local ordering for vertices and constraints */ 2762 if (olocal_primal_size == pcbddc->local_primal_size) { /* if this is true, I need to check if a new primal space has been introduced */ 2763 ierr = PetscMalloc1(olocal_primal_size,&oprimal_indices_local_idxs);CHKERRQ(ierr); 2764 ierr = PetscMemcpy(oprimal_indices_local_idxs,pcbddc->primal_indices_local_idxs,olocal_primal_size*sizeof(PetscInt));CHKERRQ(ierr); 2765 } 2766 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 2767 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2768 ierr = PetscMalloc1(pcbddc->local_primal_size,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2769 ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&i,&aux_primal_numbering);CHKERRQ(ierr); 2770 ierr = PetscMemcpy(pcbddc->primal_indices_local_idxs,aux_primal_numbering,i*sizeof(PetscInt));CHKERRQ(ierr); 2771 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 2772 ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&j,&aux_primal_numbering);CHKERRQ(ierr); 2773 ierr = PetscMemcpy(&pcbddc->primal_indices_local_idxs[i],aux_primal_numbering,j*sizeof(PetscInt));CHKERRQ(ierr); 2774 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 2775 /* set quantities in PCBDDC data struct */ 2776 pcbddc->n_actual_vertices = i; 2777 /* check if a new primal space has been introduced */ 2778 pcbddc->new_primal_space_local = PETSC_TRUE; 2779 if (olocal_primal_size == pcbddc->local_primal_size) { 2780 ierr = PetscMemcmp(pcbddc->primal_indices_local_idxs,oprimal_indices_local_idxs,olocal_primal_size,&pcbddc->new_primal_space_local);CHKERRQ(ierr); 2781 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 2782 ierr = PetscFree(oprimal_indices_local_idxs);CHKERRQ(ierr); 2783 } 2784 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 2785 ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2786 2787 /* flush dbg viewer */ 2788 if (pcbddc->dbg_flag) { 2789 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2790 } 2791 2792 /* free workspace */ 2793 ierr = PetscBTDestroy(&touched);CHKERRQ(ierr); 2794 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 2795 ierr = PetscFree(aux_primal_minloc);CHKERRQ(ierr); 2796 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 2797 if (!pcbddc->adaptive_selection) { 2798 ierr = PetscFree(temp_indices);CHKERRQ(ierr); 2799 ierr = PetscFree3(temp_quadrature_constraint,temp_indices_to_constraint,temp_indices_to_constraint_B);CHKERRQ(ierr); 2800 } else { 2801 ierr = PetscFree4(pcbddc->adaptive_constraints_n, 2802 pcbddc->adaptive_constraints_ptrs, 2803 pcbddc->adaptive_constraints_idxs, 2804 pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 2805 ierr = PetscFree(temp_indices_to_constraint_B);CHKERRQ(ierr); 2806 } 2807 PetscFunctionReturn(0); 2808 } 2809 2810 #undef __FUNCT__ 2811 #define __FUNCT__ "PCBDDCAnalyzeInterface" 2812 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 2813 { 2814 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2815 PC_IS *pcis = (PC_IS*)pc->data; 2816 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2817 PetscInt ierr,i,vertex_size; 2818 PetscViewer viewer=pcbddc->dbg_viewer; 2819 2820 PetscFunctionBegin; 2821 /* Reset previously computed graph */ 2822 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 2823 /* Init local Graph struct */ 2824 ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping);CHKERRQ(ierr); 2825 2826 /* Check validity of the csr graph passed in by the user */ 2827 if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) { 2828 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 2829 } 2830 2831 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 2832 if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) { 2833 PetscInt *xadj,*adjncy; 2834 PetscInt nvtxs; 2835 2836 if (pcbddc->use_local_adj) { 2837 PetscBool flg_row=PETSC_FALSE; 2838 2839 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 2840 if (flg_row) { 2841 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 2842 pcbddc->computed_rowadj = PETSC_TRUE; 2843 } 2844 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 2845 } else if (pcbddc->current_level) { /* just compute subdomain's connected components for coarser levels */ 2846 IS is_dummy; 2847 ISLocalToGlobalMapping l2gmap_dummy; 2848 PetscInt j,sum; 2849 PetscInt *cxadj,*cadjncy; 2850 const PetscInt *idxs; 2851 PCBDDCGraph graph; 2852 PetscBT is_on_boundary; 2853 2854 ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n,0,1,&is_dummy);CHKERRQ(ierr); 2855 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 2856 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2857 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 2858 ierr = PCBDDCGraphInit(graph,l2gmap_dummy);CHKERRQ(ierr); 2859 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 2860 graph->xadj = xadj; 2861 graph->adjncy = adjncy; 2862 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 2863 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 2864 2865 if (pcbddc->dbg_flag) { 2866 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] Found %d subdomains\n",PetscGlobalRank,graph->ncc);CHKERRQ(ierr); 2867 for (i=0;i<graph->ncc;i++) { 2868 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] %d cc size %d\n",PetscGlobalRank,i,graph->cptr[i+1]-graph->cptr[i]);CHKERRQ(ierr); 2869 } 2870 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2871 } 2872 2873 ierr = PetscBTCreate(nvtxs,&is_on_boundary);CHKERRQ(ierr); 2874 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2875 for (i=0;i<pcis->n_B;i++) { 2876 ierr = PetscBTSet(is_on_boundary,idxs[i]);CHKERRQ(ierr); 2877 } 2878 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2879 2880 ierr = PetscCalloc1(nvtxs+1,&cxadj);CHKERRQ(ierr); 2881 sum = 0; 2882 for (i=0;i<graph->ncc;i++) { 2883 PetscInt sizecc = 0; 2884 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 2885 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 2886 sizecc++; 2887 } 2888 } 2889 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 2890 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 2891 cxadj[graph->queue[j]] = sizecc; 2892 } 2893 } 2894 sum += sizecc*sizecc; 2895 } 2896 ierr = PetscMalloc1(sum,&cadjncy);CHKERRQ(ierr); 2897 sum = 0; 2898 for (i=0;i<nvtxs;i++) { 2899 PetscInt temp = cxadj[i]; 2900 cxadj[i] = sum; 2901 sum += temp; 2902 } 2903 cxadj[nvtxs] = sum; 2904 for (i=0;i<graph->ncc;i++) { 2905 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 2906 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 2907 PetscInt k,sizecc = 0; 2908 for (k=graph->cptr[i];k<graph->cptr[i+1];k++) { 2909 if (PetscBTLookup(is_on_boundary,graph->queue[k])) { 2910 cadjncy[cxadj[graph->queue[j]]+sizecc]=graph->queue[k]; 2911 sizecc++; 2912 } 2913 } 2914 } 2915 } 2916 } 2917 if (nvtxs) { 2918 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,cxadj,cadjncy,PETSC_OWN_POINTER);CHKERRQ(ierr); 2919 } else { 2920 ierr = PetscFree(cxadj);CHKERRQ(ierr); 2921 ierr = PetscFree(cadjncy);CHKERRQ(ierr); 2922 } 2923 graph->xadj = 0; 2924 graph->adjncy = 0; 2925 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 2926 ierr = PetscBTDestroy(&is_on_boundary);CHKERRQ(ierr); 2927 } 2928 } 2929 2930 /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */ 2931 vertex_size = 1; 2932 if (pcbddc->user_provided_isfordofs) { 2933 if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */ 2934 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 2935 for (i=0;i<pcbddc->n_ISForDofs;i++) { 2936 ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 2937 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 2938 } 2939 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 2940 pcbddc->n_ISForDofs = 0; 2941 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 2942 } 2943 /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */ 2944 ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr); 2945 } else { 2946 if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */ 2947 ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr); 2948 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 2949 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 2950 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 2951 } 2952 } 2953 } 2954 2955 /* Setup of Graph */ 2956 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */ 2957 ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 2958 } 2959 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */ 2960 ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 2961 } 2962 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices); 2963 2964 /* Graph's connected components analysis */ 2965 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 2966 2967 /* print some info to stdout */ 2968 if (pcbddc->dbg_flag) { 2969 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer); 2970 } 2971 2972 /* mark topography has done */ 2973 pcbddc->recompute_topography = PETSC_FALSE; 2974 PetscFunctionReturn(0); 2975 } 2976 2977 #undef __FUNCT__ 2978 #define __FUNCT__ "PCBDDCGetPrimalVerticesLocalIdx" 2979 PetscErrorCode PCBDDCGetPrimalVerticesLocalIdx(PC pc, PetscInt *n_vertices, PetscInt **vertices_idx) 2980 { 2981 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 2982 PetscInt *vertices,*row_cmat_indices,n,i,size_of_constraint,local_primal_size; 2983 PetscErrorCode ierr; 2984 2985 PetscFunctionBegin; 2986 n = 0; 2987 vertices = 0; 2988 if (pcbddc->ConstraintMatrix) { 2989 ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&i);CHKERRQ(ierr); 2990 for (i=0;i<local_primal_size;i++) { 2991 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2992 if (size_of_constraint == 1) n++; 2993 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2994 } 2995 if (vertices_idx) { 2996 ierr = PetscMalloc1(n,&vertices);CHKERRQ(ierr); 2997 n = 0; 2998 for (i=0;i<local_primal_size;i++) { 2999 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 3000 if (size_of_constraint == 1) { 3001 vertices[n++]=row_cmat_indices[0]; 3002 } 3003 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 3004 } 3005 } 3006 } 3007 *n_vertices = n; 3008 if (vertices_idx) *vertices_idx = vertices; 3009 PetscFunctionReturn(0); 3010 } 3011 3012 #undef __FUNCT__ 3013 #define __FUNCT__ "PCBDDCGetPrimalConstraintsLocalIdx" 3014 PetscErrorCode PCBDDCGetPrimalConstraintsLocalIdx(PC pc, PetscInt *n_constraints, PetscInt **constraints_idx) 3015 { 3016 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 3017 PetscInt *constraints_index,*row_cmat_indices,*row_cmat_global_indices; 3018 PetscInt n,i,j,size_of_constraint,local_primal_size,local_size,max_size_of_constraint,min_index,min_loc; 3019 PetscBT touched; 3020 PetscErrorCode ierr; 3021 3022 /* This function assumes that the number of local constraints per connected component 3023 is not greater than the number of nodes defined for the connected component 3024 (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */ 3025 PetscFunctionBegin; 3026 n = 0; 3027 constraints_index = 0; 3028 if (pcbddc->ConstraintMatrix) { 3029 ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&local_size);CHKERRQ(ierr); 3030 max_size_of_constraint = 0; 3031 for (i=0;i<local_primal_size;i++) { 3032 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 3033 if (size_of_constraint > 1) { 3034 n++; 3035 } 3036 max_size_of_constraint = PetscMax(size_of_constraint,max_size_of_constraint); 3037 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 3038 } 3039 if (constraints_idx) { 3040 ierr = PetscMalloc1(n,&constraints_index);CHKERRQ(ierr); 3041 ierr = PetscMalloc1(max_size_of_constraint,&row_cmat_global_indices);CHKERRQ(ierr); 3042 ierr = PetscBTCreate(local_size,&touched);CHKERRQ(ierr); 3043 n = 0; 3044 for (i=0;i<local_primal_size;i++) { 3045 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 3046 if (size_of_constraint > 1) { 3047 ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,row_cmat_indices,row_cmat_global_indices);CHKERRQ(ierr); 3048 /* find first untouched local node */ 3049 j = 0; 3050 while (PetscBTLookup(touched,row_cmat_indices[j])) j++; 3051 min_index = row_cmat_global_indices[j]; 3052 min_loc = j; 3053 /* search the minimum among nodes not yet touched on the connected component 3054 since there can be more than one constraint on a single cc */ 3055 for (j=1;j<size_of_constraint;j++) { 3056 if (!PetscBTLookup(touched,row_cmat_indices[j]) && min_index > row_cmat_global_indices[j]) { 3057 min_index = row_cmat_global_indices[j]; 3058 min_loc = j; 3059 } 3060 } 3061 ierr = PetscBTSet(touched,row_cmat_indices[min_loc]);CHKERRQ(ierr); 3062 constraints_index[n++] = row_cmat_indices[min_loc]; 3063 } 3064 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 3065 } 3066 ierr = PetscBTDestroy(&touched);CHKERRQ(ierr); 3067 ierr = PetscFree(row_cmat_global_indices);CHKERRQ(ierr); 3068 } 3069 } 3070 *n_constraints = n; 3071 if (constraints_idx) *constraints_idx = constraints_index; 3072 PetscFunctionReturn(0); 3073 } 3074 3075 #undef __FUNCT__ 3076 #define __FUNCT__ "PCBDDCSubsetNumbering" 3077 PetscErrorCode PCBDDCSubsetNumbering(MPI_Comm comm,ISLocalToGlobalMapping l2gmap, PetscInt n_local_dofs, PetscInt local_dofs[], PetscInt local_dofs_mult[], PetscInt* n_global_subset, PetscInt* global_numbering_subset[]) 3078 { 3079 Vec local_vec,global_vec; 3080 IS seqis,paris; 3081 VecScatter scatter_ctx; 3082 PetscScalar *array; 3083 PetscInt *temp_global_dofs; 3084 PetscScalar globalsum; 3085 PetscInt i,j,s; 3086 PetscInt nlocals,first_index,old_index,max_local; 3087 PetscMPIInt rank_prec_comm,size_prec_comm,max_global; 3088 PetscMPIInt *dof_sizes,*dof_displs; 3089 PetscBool first_found; 3090 PetscErrorCode ierr; 3091 3092 PetscFunctionBegin; 3093 /* mpi buffers */ 3094 ierr = MPI_Comm_size(comm,&size_prec_comm);CHKERRQ(ierr); 3095 ierr = MPI_Comm_rank(comm,&rank_prec_comm);CHKERRQ(ierr); 3096 j = ( !rank_prec_comm ? size_prec_comm : 0); 3097 ierr = PetscMalloc1(j,&dof_sizes);CHKERRQ(ierr); 3098 ierr = PetscMalloc1(j,&dof_displs);CHKERRQ(ierr); 3099 /* get maximum size of subset */ 3100 ierr = PetscMalloc1(n_local_dofs,&temp_global_dofs);CHKERRQ(ierr); 3101 ierr = ISLocalToGlobalMappingApply(l2gmap,n_local_dofs,local_dofs,temp_global_dofs);CHKERRQ(ierr); 3102 max_local = 0; 3103 for (i=0;i<n_local_dofs;i++) { 3104 if (max_local < temp_global_dofs[i] ) { 3105 max_local = temp_global_dofs[i]; 3106 } 3107 } 3108 ierr = MPI_Allreduce(&max_local,&max_global,1,MPIU_INT,MPI_MAX,comm);CHKERRQ(ierr); 3109 max_global++; 3110 max_local = 0; 3111 for (i=0;i<n_local_dofs;i++) { 3112 if (max_local < local_dofs[i] ) { 3113 max_local = local_dofs[i]; 3114 } 3115 } 3116 max_local++; 3117 /* allocate workspace */ 3118 ierr = VecCreate(PETSC_COMM_SELF,&local_vec);CHKERRQ(ierr); 3119 ierr = VecSetSizes(local_vec,PETSC_DECIDE,max_local);CHKERRQ(ierr); 3120 ierr = VecSetType(local_vec,VECSEQ);CHKERRQ(ierr); 3121 ierr = VecCreate(comm,&global_vec);CHKERRQ(ierr); 3122 ierr = VecSetSizes(global_vec,PETSC_DECIDE,max_global);CHKERRQ(ierr); 3123 ierr = VecSetType(global_vec,VECMPI);CHKERRQ(ierr); 3124 /* create scatter */ 3125 ierr = ISCreateGeneral(PETSC_COMM_SELF,n_local_dofs,local_dofs,PETSC_COPY_VALUES,&seqis);CHKERRQ(ierr); 3126 ierr = ISCreateGeneral(comm,n_local_dofs,temp_global_dofs,PETSC_COPY_VALUES,&paris);CHKERRQ(ierr); 3127 ierr = VecScatterCreate(local_vec,seqis,global_vec,paris,&scatter_ctx);CHKERRQ(ierr); 3128 ierr = ISDestroy(&seqis);CHKERRQ(ierr); 3129 ierr = ISDestroy(&paris);CHKERRQ(ierr); 3130 /* init array */ 3131 ierr = VecSet(global_vec,0.0);CHKERRQ(ierr); 3132 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 3133 ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr); 3134 if (local_dofs_mult) { 3135 for (i=0;i<n_local_dofs;i++) { 3136 array[local_dofs[i]]=(PetscScalar)local_dofs_mult[i]; 3137 } 3138 } else { 3139 for (i=0;i<n_local_dofs;i++) { 3140 array[local_dofs[i]]=1.0; 3141 } 3142 } 3143 ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr); 3144 /* scatter into global vec and get total number of global dofs */ 3145 ierr = VecScatterBegin(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3146 ierr = VecScatterEnd(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3147 ierr = VecSum(global_vec,&globalsum);CHKERRQ(ierr); 3148 *n_global_subset = (PetscInt)PetscRealPart(globalsum); 3149 /* Fill global_vec with cumulative function for global numbering */ 3150 ierr = VecGetArray(global_vec,&array);CHKERRQ(ierr); 3151 ierr = VecGetLocalSize(global_vec,&s);CHKERRQ(ierr); 3152 nlocals = 0; 3153 first_index = -1; 3154 first_found = PETSC_FALSE; 3155 for (i=0;i<s;i++) { 3156 if (!first_found && PetscRealPart(array[i]) > 0.1) { 3157 first_found = PETSC_TRUE; 3158 first_index = i; 3159 } 3160 nlocals += (PetscInt)PetscRealPart(array[i]); 3161 } 3162 ierr = MPI_Gather(&nlocals,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr); 3163 if (!rank_prec_comm) { 3164 dof_displs[0]=0; 3165 for (i=1;i<size_prec_comm;i++) { 3166 dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1]; 3167 } 3168 } 3169 ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&nlocals,1,MPIU_INT,0,comm);CHKERRQ(ierr); 3170 if (first_found) { 3171 array[first_index] += (PetscScalar)nlocals; 3172 old_index = first_index; 3173 for (i=first_index+1;i<s;i++) { 3174 if (PetscRealPart(array[i]) > 0.1) { 3175 array[i] += array[old_index]; 3176 old_index = i; 3177 } 3178 } 3179 } 3180 ierr = VecRestoreArray(global_vec,&array);CHKERRQ(ierr); 3181 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 3182 ierr = VecScatterBegin(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3183 ierr = VecScatterEnd(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3184 /* get global ordering of local dofs */ 3185 ierr = VecGetArrayRead(local_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 3186 if (local_dofs_mult) { 3187 for (i=0;i<n_local_dofs;i++) { 3188 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-local_dofs_mult[i]; 3189 } 3190 } else { 3191 for (i=0;i<n_local_dofs;i++) { 3192 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-1; 3193 } 3194 } 3195 ierr = VecRestoreArrayRead(local_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 3196 /* free workspace */ 3197 ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr); 3198 ierr = VecDestroy(&local_vec);CHKERRQ(ierr); 3199 ierr = VecDestroy(&global_vec);CHKERRQ(ierr); 3200 ierr = PetscFree(dof_sizes);CHKERRQ(ierr); 3201 ierr = PetscFree(dof_displs);CHKERRQ(ierr); 3202 /* return pointer to global ordering of local dofs */ 3203 *global_numbering_subset = temp_global_dofs; 3204 PetscFunctionReturn(0); 3205 } 3206 3207 #undef __FUNCT__ 3208 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 3209 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 3210 { 3211 PetscInt i,j; 3212 PetscScalar *alphas; 3213 PetscErrorCode ierr; 3214 3215 PetscFunctionBegin; 3216 /* this implements stabilized Gram-Schmidt */ 3217 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 3218 for (i=0;i<n;i++) { 3219 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 3220 if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); } 3221 for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); } 3222 } 3223 ierr = PetscFree(alphas);CHKERRQ(ierr); 3224 PetscFunctionReturn(0); 3225 } 3226 3227 #undef __FUNCT__ 3228 #define __FUNCT__ "MatISGetSubassemblingPattern" 3229 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscBool contiguous, IS* is_sends) 3230 { 3231 Mat subdomain_adj; 3232 IS new_ranks,ranks_send_to; 3233 MatPartitioning partitioner; 3234 Mat_IS *matis; 3235 PetscInt n_neighs,*neighs,*n_shared,**shared; 3236 PetscInt prank; 3237 PetscMPIInt size,rank,color; 3238 PetscInt *xadj,*adjncy,*oldranks; 3239 PetscInt *adjncy_wgt,*v_wgt,*is_indices,*ranks_send_to_idx; 3240 PetscInt i,local_size,threshold=0; 3241 PetscErrorCode ierr; 3242 PetscBool use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE; 3243 PetscSubcomm subcomm; 3244 3245 PetscFunctionBegin; 3246 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr); 3247 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 3248 ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 3249 3250 /* Get info on mapping */ 3251 matis = (Mat_IS*)(mat->data); 3252 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&local_size);CHKERRQ(ierr); 3253 ierr = ISLocalToGlobalMappingGetInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 3254 3255 /* build local CSR graph of subdomains' connectivity */ 3256 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 3257 xadj[0] = 0; 3258 xadj[1] = PetscMax(n_neighs-1,0); 3259 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 3260 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 3261 3262 if (threshold) { 3263 PetscInt xadj_count = 0; 3264 for (i=1;i<n_neighs;i++) { 3265 if (n_shared[i] > threshold) { 3266 adjncy[xadj_count] = neighs[i]; 3267 adjncy_wgt[xadj_count] = n_shared[i]; 3268 xadj_count++; 3269 } 3270 } 3271 xadj[1] = xadj_count; 3272 } else { 3273 if (xadj[1]) { 3274 ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr); 3275 ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr); 3276 } 3277 } 3278 ierr = ISLocalToGlobalMappingRestoreInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 3279 if (use_square) { 3280 for (i=0;i<xadj[1];i++) { 3281 adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i]; 3282 } 3283 } 3284 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 3285 3286 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 3287 3288 /* 3289 Restrict work on active processes only. 3290 */ 3291 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr); 3292 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 3293 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 3294 ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr); 3295 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 3296 if (color) { 3297 ierr = PetscFree(xadj);CHKERRQ(ierr); 3298 ierr = PetscFree(adjncy);CHKERRQ(ierr); 3299 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 3300 } else { 3301 PetscInt coarsening_ratio; 3302 ierr = MPI_Comm_size(subcomm->comm,&size);CHKERRQ(ierr); 3303 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 3304 prank = rank; 3305 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm->comm);CHKERRQ(ierr); 3306 /* 3307 for (i=0;i<size;i++) { 3308 PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]); 3309 } 3310 */ 3311 for (i=0;i<xadj[1];i++) { 3312 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 3313 } 3314 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 3315 ierr = MatCreateMPIAdj(subcomm->comm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 3316 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 3317 3318 /* Partition */ 3319 ierr = MatPartitioningCreate(subcomm->comm,&partitioner);CHKERRQ(ierr); 3320 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 3321 if (use_vwgt) { 3322 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 3323 v_wgt[0] = local_size; 3324 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 3325 } 3326 n_subdomains = PetscMin((PetscInt)size,n_subdomains); 3327 coarsening_ratio = size/n_subdomains; 3328 ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr); 3329 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 3330 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 3331 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 3332 3333 ierr = ISGetIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3334 if (contiguous) { 3335 ranks_send_to_idx[0] = oldranks[is_indices[0]]; /* contiguos set of processes */ 3336 } else { 3337 ranks_send_to_idx[0] = coarsening_ratio*oldranks[is_indices[0]]; /* scattered set of processes */ 3338 } 3339 ierr = ISRestoreIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3340 /* clean up */ 3341 ierr = PetscFree(oldranks);CHKERRQ(ierr); 3342 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 3343 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 3344 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 3345 } 3346 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 3347 3348 /* assemble parallel IS for sends */ 3349 i = 1; 3350 if (color) i=0; 3351 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr); 3352 3353 /* get back IS */ 3354 *is_sends = ranks_send_to; 3355 PetscFunctionReturn(0); 3356 } 3357 3358 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 3359 3360 #undef __FUNCT__ 3361 #define __FUNCT__ "MatISSubassemble" 3362 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[]) 3363 { 3364 Mat local_mat; 3365 Mat_IS *matis; 3366 IS is_sends_internal; 3367 PetscInt rows,cols,new_local_rows; 3368 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals; 3369 PetscBool ismatis,isdense,newisdense,destroy_mat; 3370 ISLocalToGlobalMapping l2gmap; 3371 PetscInt* l2gmap_indices; 3372 const PetscInt* is_indices; 3373 MatType new_local_type; 3374 /* buffers */ 3375 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 3376 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 3377 PetscInt *recv_buffer_idxs_local; 3378 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 3379 /* MPI */ 3380 MPI_Comm comm,comm_n; 3381 PetscSubcomm subcomm; 3382 PetscMPIInt n_sends,n_recvs,commsize; 3383 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 3384 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 3385 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,source_dest; 3386 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals; 3387 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals; 3388 PetscErrorCode ierr; 3389 3390 PetscFunctionBegin; 3391 /* TODO: add missing checks */ 3392 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 3393 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 3394 PetscValidLogicalCollectiveEnum(mat,reuse,5); 3395 PetscValidLogicalCollectiveInt(mat,nis,7); 3396 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 3397 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 3398 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 3399 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 3400 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 3401 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 3402 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 3403 if (reuse == MAT_REUSE_MATRIX && *mat_n) { 3404 PetscInt mrows,mcols,mnrows,mncols; 3405 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 3406 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 3407 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 3408 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 3409 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 3410 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 3411 } 3412 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 3413 PetscValidLogicalCollectiveInt(mat,bs,0); 3414 /* prepare IS for sending if not provided */ 3415 if (!is_sends) { 3416 PetscBool pcontig = PETSC_TRUE; 3417 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 3418 ierr = MatISGetSubassemblingPattern(mat,n_subdomains,pcontig,&is_sends_internal);CHKERRQ(ierr); 3419 } else { 3420 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 3421 is_sends_internal = is_sends; 3422 } 3423 3424 /* get pointer of MATIS data */ 3425 matis = (Mat_IS*)mat->data; 3426 3427 /* get comm */ 3428 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3429 3430 /* compute number of sends */ 3431 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 3432 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 3433 3434 /* compute number of receives */ 3435 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 3436 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 3437 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 3438 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 3439 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 3440 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 3441 ierr = PetscFree(iflags);CHKERRQ(ierr); 3442 3443 /* restrict comm if requested */ 3444 subcomm = 0; 3445 destroy_mat = PETSC_FALSE; 3446 if (restrict_comm) { 3447 PetscMPIInt color,subcommsize; 3448 3449 color = 0; 3450 if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm */ 3451 ierr = MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 3452 subcommsize = commsize - subcommsize; 3453 /* check if reuse has been requested */ 3454 if (reuse == MAT_REUSE_MATRIX) { 3455 if (*mat_n) { 3456 PetscMPIInt subcommsize2; 3457 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 3458 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 3459 comm_n = PetscObjectComm((PetscObject)*mat_n); 3460 } else { 3461 comm_n = PETSC_COMM_SELF; 3462 } 3463 } else { /* MAT_INITIAL_MATRIX */ 3464 PetscMPIInt rank; 3465 3466 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3467 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 3468 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 3469 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 3470 comm_n = subcomm->comm; 3471 } 3472 /* flag to destroy *mat_n if not significative */ 3473 if (color) destroy_mat = PETSC_TRUE; 3474 } else { 3475 comm_n = comm; 3476 } 3477 3478 /* prepare send/receive buffers */ 3479 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 3480 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 3481 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 3482 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 3483 if (nis) { 3484 ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr); 3485 } 3486 3487 /* Get data from local matrices */ 3488 if (!isdense) { 3489 SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 3490 /* TODO: See below some guidelines on how to prepare the local buffers */ 3491 /* 3492 send_buffer_vals should contain the raw values of the local matrix 3493 send_buffer_idxs should contain: 3494 - MatType_PRIVATE type 3495 - PetscInt size_of_l2gmap 3496 - PetscInt global_row_indices[size_of_l2gmap] 3497 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 3498 */ 3499 } else { 3500 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 3501 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&i);CHKERRQ(ierr); 3502 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 3503 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 3504 send_buffer_idxs[1] = i; 3505 ierr = ISLocalToGlobalMappingGetIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 3506 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 3507 ierr = ISLocalToGlobalMappingRestoreIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 3508 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 3509 for (i=0;i<n_sends;i++) { 3510 ilengths_vals[is_indices[i]] = len*len; 3511 ilengths_idxs[is_indices[i]] = len+2; 3512 } 3513 } 3514 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 3515 /* additional is (if any) */ 3516 if (nis) { 3517 PetscMPIInt psum; 3518 PetscInt j; 3519 for (j=0,psum=0;j<nis;j++) { 3520 PetscInt plen; 3521 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 3522 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 3523 psum += len+1; /* indices + lenght */ 3524 } 3525 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 3526 for (j=0,psum=0;j<nis;j++) { 3527 PetscInt plen; 3528 const PetscInt *is_array_idxs; 3529 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 3530 send_buffer_idxs_is[psum] = plen; 3531 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 3532 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 3533 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 3534 psum += plen+1; /* indices + lenght */ 3535 } 3536 for (i=0;i<n_sends;i++) { 3537 ilengths_idxs_is[is_indices[i]] = psum; 3538 } 3539 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 3540 } 3541 3542 buf_size_idxs = 0; 3543 buf_size_vals = 0; 3544 buf_size_idxs_is = 0; 3545 for (i=0;i<n_recvs;i++) { 3546 buf_size_idxs += (PetscInt)olengths_idxs[i]; 3547 buf_size_vals += (PetscInt)olengths_vals[i]; 3548 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 3549 } 3550 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 3551 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 3552 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 3553 3554 /* get new tags for clean communications */ 3555 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 3556 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 3557 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 3558 3559 /* allocate for requests */ 3560 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 3561 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 3562 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 3563 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 3564 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 3565 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 3566 3567 /* communications */ 3568 ptr_idxs = recv_buffer_idxs; 3569 ptr_vals = recv_buffer_vals; 3570 ptr_idxs_is = recv_buffer_idxs_is; 3571 for (i=0;i<n_recvs;i++) { 3572 source_dest = onodes[i]; 3573 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 3574 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 3575 ptr_idxs += olengths_idxs[i]; 3576 ptr_vals += olengths_vals[i]; 3577 if (nis) { 3578 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 3579 ptr_idxs_is += olengths_idxs_is[i]; 3580 } 3581 } 3582 for (i=0;i<n_sends;i++) { 3583 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 3584 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 3585 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 3586 if (nis) { 3587 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 3588 } 3589 } 3590 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 3591 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 3592 3593 /* assemble new l2g map */ 3594 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3595 ptr_idxs = recv_buffer_idxs; 3596 new_local_rows = 0; 3597 for (i=0;i<n_recvs;i++) { 3598 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 3599 ptr_idxs += olengths_idxs[i]; 3600 } 3601 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 3602 ptr_idxs = recv_buffer_idxs; 3603 new_local_rows = 0; 3604 for (i=0;i<n_recvs;i++) { 3605 ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 3606 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 3607 ptr_idxs += olengths_idxs[i]; 3608 } 3609 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 3610 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 3611 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 3612 3613 /* infer new local matrix type from received local matrices type */ 3614 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 3615 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 3616 if (n_recvs) { 3617 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 3618 ptr_idxs = recv_buffer_idxs; 3619 for (i=0;i<n_recvs;i++) { 3620 if ((PetscInt)new_local_type_private != *ptr_idxs) { 3621 new_local_type_private = MATAIJ_PRIVATE; 3622 break; 3623 } 3624 ptr_idxs += olengths_idxs[i]; 3625 } 3626 switch (new_local_type_private) { 3627 case MATDENSE_PRIVATE: 3628 if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */ 3629 new_local_type = MATSEQAIJ; 3630 bs = 1; 3631 } else { /* if I receive only 1 dense matrix */ 3632 new_local_type = MATSEQDENSE; 3633 bs = 1; 3634 } 3635 break; 3636 case MATAIJ_PRIVATE: 3637 new_local_type = MATSEQAIJ; 3638 bs = 1; 3639 break; 3640 case MATBAIJ_PRIVATE: 3641 new_local_type = MATSEQBAIJ; 3642 break; 3643 case MATSBAIJ_PRIVATE: 3644 new_local_type = MATSEQSBAIJ; 3645 break; 3646 default: 3647 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__); 3648 break; 3649 } 3650 } else { /* by default, new_local_type is seqdense */ 3651 new_local_type = MATSEQDENSE; 3652 bs = 1; 3653 } 3654 3655 /* create MATIS object if needed */ 3656 if (reuse == MAT_INITIAL_MATRIX) { 3657 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 3658 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,mat_n);CHKERRQ(ierr); 3659 } else { 3660 /* it also destroys the local matrices */ 3661 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 3662 } 3663 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 3664 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 3665 3666 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3667 3668 /* Global to local map of received indices */ 3669 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 3670 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 3671 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 3672 3673 /* restore attributes -> type of incoming data and its size */ 3674 buf_size_idxs = 0; 3675 for (i=0;i<n_recvs;i++) { 3676 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 3677 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 3678 buf_size_idxs += (PetscInt)olengths_idxs[i]; 3679 } 3680 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 3681 3682 /* set preallocation */ 3683 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 3684 if (!newisdense) { 3685 PetscInt *new_local_nnz=0; 3686 3687 ptr_vals = recv_buffer_vals; 3688 ptr_idxs = recv_buffer_idxs_local; 3689 if (n_recvs) { 3690 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 3691 } 3692 for (i=0;i<n_recvs;i++) { 3693 PetscInt j; 3694 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 3695 for (j=0;j<*(ptr_idxs+1);j++) { 3696 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 3697 } 3698 } else { 3699 /* TODO */ 3700 } 3701 ptr_idxs += olengths_idxs[i]; 3702 } 3703 if (new_local_nnz) { 3704 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 3705 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 3706 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 3707 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 3708 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 3709 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 3710 } else { 3711 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 3712 } 3713 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 3714 } else { 3715 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 3716 } 3717 3718 /* set values */ 3719 ptr_vals = recv_buffer_vals; 3720 ptr_idxs = recv_buffer_idxs_local; 3721 for (i=0;i<n_recvs;i++) { 3722 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 3723 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 3724 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 3725 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 3726 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 3727 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 3728 } else { 3729 /* TODO */ 3730 } 3731 ptr_idxs += olengths_idxs[i]; 3732 ptr_vals += olengths_vals[i]; 3733 } 3734 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3735 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3736 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3737 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3738 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 3739 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 3740 3741 #if 0 3742 if (!restrict_comm) { /* check */ 3743 Vec lvec,rvec; 3744 PetscReal infty_error; 3745 3746 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 3747 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 3748 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 3749 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 3750 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 3751 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 3752 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 3753 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 3754 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 3755 } 3756 #endif 3757 3758 /* assemble new additional is (if any) */ 3759 if (nis) { 3760 PetscInt **temp_idxs,*count_is,j,psum; 3761 3762 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3763 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 3764 ptr_idxs = recv_buffer_idxs_is; 3765 psum = 0; 3766 for (i=0;i<n_recvs;i++) { 3767 for (j=0;j<nis;j++) { 3768 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 3769 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 3770 psum += plen; 3771 ptr_idxs += plen+1; /* shift pointer to received data */ 3772 } 3773 } 3774 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 3775 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 3776 for (i=1;i<nis;i++) { 3777 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 3778 } 3779 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 3780 ptr_idxs = recv_buffer_idxs_is; 3781 for (i=0;i<n_recvs;i++) { 3782 for (j=0;j<nis;j++) { 3783 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 3784 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 3785 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 3786 ptr_idxs += plen+1; /* shift pointer to received data */ 3787 } 3788 } 3789 for (i=0;i<nis;i++) { 3790 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 3791 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 3792 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 3793 } 3794 ierr = PetscFree(count_is);CHKERRQ(ierr); 3795 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 3796 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 3797 } 3798 /* free workspace */ 3799 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 3800 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3801 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 3802 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3803 if (isdense) { 3804 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 3805 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 3806 } else { 3807 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 3808 } 3809 if (nis) { 3810 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3811 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 3812 } 3813 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 3814 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 3815 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 3816 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 3817 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 3818 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 3819 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 3820 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 3821 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 3822 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 3823 ierr = PetscFree(onodes);CHKERRQ(ierr); 3824 if (nis) { 3825 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 3826 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 3827 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 3828 } 3829 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 3830 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 3831 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 3832 for (i=0;i<nis;i++) { 3833 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 3834 } 3835 } 3836 PetscFunctionReturn(0); 3837 } 3838 3839 /* temporary hack into ksp private data structure */ 3840 #include <petsc-private/kspimpl.h> 3841 3842 #undef __FUNCT__ 3843 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 3844 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 3845 { 3846 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3847 PC_IS *pcis = (PC_IS*)pc->data; 3848 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 3849 MatNullSpace CoarseNullSpace=NULL; 3850 ISLocalToGlobalMapping coarse_islg; 3851 IS coarse_is,*isarray; 3852 PetscInt i,im_active=-1,active_procs=-1; 3853 PetscInt nis,nisdofs,nisneu; 3854 PC pc_temp; 3855 PCType coarse_pc_type; 3856 KSPType coarse_ksp_type; 3857 PetscBool multilevel_requested,multilevel_allowed; 3858 PetscBool isredundant,isbddc,isnn,coarse_reuse; 3859 Mat t_coarse_mat_is; 3860 PetscInt void_procs,ncoarse_ml,ncoarse_ds,ncoarse; 3861 PetscMPIInt all_procs; 3862 PetscBool csin_ml,csin_ds,csin,csin_type_simple,redist; 3863 PetscBool compute_vecs = PETSC_FALSE; 3864 PetscScalar *array; 3865 PetscErrorCode ierr; 3866 3867 PetscFunctionBegin; 3868 /* Assign global numbering to coarse dofs */ 3869 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 3870 compute_vecs = PETSC_TRUE; 3871 PetscInt ocoarse_size; 3872 ocoarse_size = pcbddc->coarse_size; 3873 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 3874 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 3875 /* see if we can avoid some work */ 3876 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 3877 if (ocoarse_size != pcbddc->coarse_size) { /* ...but with different size, so reset it and set reuse flag to false */ 3878 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 3879 coarse_reuse = PETSC_FALSE; 3880 } else { /* we can safely reuse already computed coarse matrix */ 3881 coarse_reuse = PETSC_TRUE; 3882 } 3883 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 3884 coarse_reuse = PETSC_FALSE; 3885 } 3886 /* reset any subassembling information */ 3887 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 3888 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 3889 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 3890 coarse_reuse = PETSC_TRUE; 3891 } 3892 3893 /* count "active" (i.e. with positive local size) and "void" processes */ 3894 im_active = !!(pcis->n); 3895 ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3896 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr); 3897 void_procs = all_procs-active_procs; 3898 csin_type_simple = PETSC_TRUE; 3899 redist = PETSC_FALSE; 3900 if (pcbddc->current_level && void_procs) { 3901 csin_ml = PETSC_TRUE; 3902 ncoarse_ml = void_procs; 3903 /* it has no sense to redistribute on a set of processors larger than the number of active processes */ 3904 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < active_procs) { 3905 csin_ds = PETSC_TRUE; 3906 ncoarse_ds = pcbddc->redistribute_coarse; 3907 redist = PETSC_TRUE; 3908 } else { 3909 csin_ds = PETSC_TRUE; 3910 ncoarse_ds = active_procs; 3911 redist = PETSC_TRUE; 3912 } 3913 } else { 3914 csin_ml = PETSC_FALSE; 3915 ncoarse_ml = all_procs; 3916 if (void_procs) { 3917 csin_ds = PETSC_TRUE; 3918 ncoarse_ds = void_procs; 3919 csin_type_simple = PETSC_FALSE; 3920 } else { 3921 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < all_procs) { 3922 csin_ds = PETSC_TRUE; 3923 ncoarse_ds = pcbddc->redistribute_coarse; 3924 redist = PETSC_TRUE; 3925 } else { 3926 csin_ds = PETSC_FALSE; 3927 ncoarse_ds = all_procs; 3928 } 3929 } 3930 } 3931 3932 /* 3933 test if we can go multilevel: three conditions must be satisfied: 3934 - we have not exceeded the number of levels requested 3935 - we can actually subassemble the active processes 3936 - we can find a suitable number of MPI processes where we can place the subassembled problem 3937 */ 3938 multilevel_allowed = PETSC_FALSE; 3939 multilevel_requested = PETSC_FALSE; 3940 if (pcbddc->current_level < pcbddc->max_levels) { 3941 multilevel_requested = PETSC_TRUE; 3942 if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) { 3943 multilevel_allowed = PETSC_FALSE; 3944 } else { 3945 multilevel_allowed = PETSC_TRUE; 3946 } 3947 } 3948 /* determine number of process partecipating to coarse solver */ 3949 if (multilevel_allowed) { 3950 ncoarse = ncoarse_ml; 3951 csin = csin_ml; 3952 } else { 3953 ncoarse = ncoarse_ds; 3954 csin = csin_ds; 3955 } 3956 3957 /* creates temporary l2gmap and IS for coarse indexes */ 3958 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 3959 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 3960 3961 /* creates temporary MATIS object for coarse matrix */ 3962 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr); 3963 ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr); 3964 ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr); 3965 ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr); 3966 #if 0 3967 { 3968 PetscViewer viewer; 3969 char filename[256]; 3970 sprintf(filename,"local_coarse_mat%d.m",PetscGlobalRank); 3971 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 3972 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 3973 ierr = MatView(coarse_submat_dense,viewer);CHKERRQ(ierr); 3974 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 3975 } 3976 #endif 3977 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,&t_coarse_mat_is);CHKERRQ(ierr); 3978 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 3979 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3980 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3981 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 3982 3983 /* compute dofs splitting and neumann boundaries for coarse dofs */ 3984 if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal) ) { /* protects from unneded computations */ 3985 PetscInt *tidxs,*tidxs2,nout,tsize,i; 3986 const PetscInt *idxs; 3987 ISLocalToGlobalMapping tmap; 3988 3989 /* create map between primal indices (in local representative ordering) and local primal numbering */ 3990 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 3991 /* allocate space for temporary storage */ 3992 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 3993 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 3994 /* allocate for IS array */ 3995 nisdofs = pcbddc->n_ISForDofsLocal; 3996 nisneu = !!pcbddc->NeumannBoundariesLocal; 3997 nis = nisdofs + nisneu; 3998 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 3999 /* dofs splitting */ 4000 for (i=0;i<nisdofs;i++) { 4001 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 4002 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 4003 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 4004 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 4005 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 4006 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 4007 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->ISForDofsLocal[i]),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 4008 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 4009 } 4010 /* neumann boundaries */ 4011 if (pcbddc->NeumannBoundariesLocal) { 4012 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 4013 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 4014 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 4015 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 4016 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 4017 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 4018 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->NeumannBoundariesLocal),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 4019 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 4020 } 4021 /* free memory */ 4022 ierr = PetscFree(tidxs);CHKERRQ(ierr); 4023 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 4024 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 4025 } else { 4026 nis = 0; 4027 nisdofs = 0; 4028 nisneu = 0; 4029 isarray = NULL; 4030 } 4031 /* destroy no longer needed map */ 4032 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 4033 4034 /* restrict on coarse candidates (if needed) */ 4035 coarse_mat_is = NULL; 4036 if (csin) { 4037 if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */ 4038 if (redist) { 4039 PetscMPIInt rank; 4040 PetscInt spc,n_spc_p1,dest[1],destsize; 4041 4042 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 4043 spc = active_procs/pcbddc->redistribute_coarse; 4044 n_spc_p1 = active_procs%pcbddc->redistribute_coarse; 4045 if (im_active) { 4046 destsize = 1; 4047 if (rank > n_spc_p1*(spc+1)-1) { 4048 dest[0] = n_spc_p1+(rank-(n_spc_p1*(spc+1)))/spc; 4049 } else { 4050 dest[0] = rank/(spc+1); 4051 } 4052 } else { 4053 destsize = 0; 4054 } 4055 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),destsize,dest,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4056 } else if (csin_type_simple) { 4057 PetscMPIInt rank; 4058 PetscInt issize,isidx; 4059 4060 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 4061 if (im_active) { 4062 issize = 1; 4063 isidx = (PetscInt)rank; 4064 } else { 4065 issize = 0; 4066 isidx = -1; 4067 } 4068 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4069 } else { /* get a suitable subassembling pattern from MATIS code */ 4070 ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,PETSC_TRUE,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4071 } 4072 4073 /* we need to shift on coarse candidates either if we are not redistributing or we are redistributing and we have enough void processes */ 4074 if (!redist || ncoarse <= void_procs) { 4075 PetscInt ncoarse_cand,tissize,*nisindices; 4076 PetscInt *coarse_candidates; 4077 const PetscInt* tisindices; 4078 4079 /* get coarse candidates' ranks in pc communicator */ 4080 ierr = PetscMalloc1(all_procs,&coarse_candidates);CHKERRQ(ierr); 4081 ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4082 for (i=0,ncoarse_cand=0;i<all_procs;i++) { 4083 if (!coarse_candidates[i]) { 4084 coarse_candidates[ncoarse_cand++]=i; 4085 } 4086 } 4087 if (ncoarse_cand < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",ncoarse_cand,ncoarse); 4088 4089 4090 if (pcbddc->dbg_flag) { 4091 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4092 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr); 4093 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 4094 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr); 4095 for (i=0;i<ncoarse_cand;i++) { 4096 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr); 4097 } 4098 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr); 4099 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4100 } 4101 /* shift the pattern on coarse candidates */ 4102 ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr); 4103 ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 4104 ierr = PetscMalloc1(tissize,&nisindices);CHKERRQ(ierr); 4105 for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]]; 4106 ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 4107 ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr); 4108 ierr = PetscFree(coarse_candidates);CHKERRQ(ierr); 4109 } 4110 if (pcbddc->dbg_flag) { 4111 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4112 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr); 4113 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 4114 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4115 } 4116 } 4117 /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */ 4118 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 4119 } else { 4120 if (pcbddc->dbg_flag) { 4121 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4122 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr); 4123 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4124 } 4125 ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr); 4126 coarse_mat_is = t_coarse_mat_is; 4127 } 4128 4129 /* create local to global scatters for coarse problem */ 4130 if (compute_vecs) { 4131 PetscInt lrows; 4132 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 4133 if (coarse_mat_is) { 4134 ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr); 4135 } else { 4136 lrows = 0; 4137 } 4138 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 4139 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 4140 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 4141 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 4142 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 4143 } 4144 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 4145 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 4146 4147 /* set defaults for coarse KSP and PC */ 4148 if (multilevel_allowed) { 4149 coarse_ksp_type = KSPRICHARDSON; 4150 coarse_pc_type = PCBDDC; 4151 } else { 4152 coarse_ksp_type = KSPPREONLY; 4153 coarse_pc_type = PCREDUNDANT; 4154 } 4155 4156 /* print some info if requested */ 4157 if (pcbddc->dbg_flag) { 4158 if (!multilevel_allowed) { 4159 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4160 if (multilevel_requested) { 4161 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 4162 } else if (pcbddc->max_levels) { 4163 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 4164 } 4165 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4166 } 4167 } 4168 4169 /* create the coarse KSP object only once with defaults */ 4170 if (coarse_mat_is) { 4171 MatReuse coarse_mat_reuse; 4172 PetscViewer dbg_viewer = NULL; 4173 if (pcbddc->dbg_flag) { 4174 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is)); 4175 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 4176 } 4177 if (!pcbddc->coarse_ksp) { 4178 char prefix[256],str_level[16]; 4179 size_t len; 4180 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr); 4181 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 4182 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 4183 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr); 4184 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 4185 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 4186 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 4187 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 4188 /* prefix */ 4189 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 4190 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 4191 if (!pcbddc->current_level) { 4192 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 4193 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 4194 } else { 4195 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 4196 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 4197 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 4198 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 4199 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 4200 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 4201 } 4202 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 4203 /* allow user customization */ 4204 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 4205 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 4206 } 4207 4208 /* get some info after set from options */ 4209 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 4210 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 4211 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 4212 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 4213 if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */ 4214 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 4215 isbddc = PETSC_FALSE; 4216 } 4217 if (isredundant) { 4218 KSP inner_ksp; 4219 PC inner_pc; 4220 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 4221 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 4222 ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr); 4223 } 4224 4225 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 4226 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 4227 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 4228 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 4229 if (nisdofs) { 4230 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 4231 for (i=0;i<nisdofs;i++) { 4232 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 4233 } 4234 } 4235 if (nisneu) { 4236 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 4237 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 4238 } 4239 4240 /* assemble coarse matrix */ 4241 if (coarse_reuse) { 4242 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 4243 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 4244 coarse_mat_reuse = MAT_REUSE_MATRIX; 4245 } else { 4246 coarse_mat_reuse = MAT_INITIAL_MATRIX; 4247 } 4248 if (isbddc || isnn) { 4249 if (pcbddc->coarsening_ratio > 1) { 4250 if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */ 4251 ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,PETSC_TRUE,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 4252 if (pcbddc->dbg_flag) { 4253 ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4254 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr); 4255 ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr); 4256 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 4257 } 4258 } 4259 ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr); 4260 } else { 4261 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 4262 coarse_mat = coarse_mat_is; 4263 } 4264 } else { 4265 ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 4266 } 4267 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 4268 4269 /* propagate symmetry info to coarse matrix */ 4270 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pcbddc->issym);CHKERRQ(ierr); 4271 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 4272 4273 /* set operators */ 4274 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 4275 if (pcbddc->dbg_flag) { 4276 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 4277 } 4278 } else { /* processes non partecipating to coarse solver (if any) */ 4279 coarse_mat = 0; 4280 } 4281 ierr = PetscFree(isarray);CHKERRQ(ierr); 4282 #if 0 4283 { 4284 PetscViewer viewer; 4285 char filename[256]; 4286 sprintf(filename,"coarse_mat.m"); 4287 ierr = PetscViewerASCIIOpen(PETSC_COMM_WORLD,filename,&viewer);CHKERRQ(ierr); 4288 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 4289 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 4290 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 4291 } 4292 #endif 4293 4294 /* Compute coarse null space (special handling by BDDC only) */ 4295 if (pcbddc->NullSpace) { 4296 ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr); 4297 } 4298 4299 if (pcbddc->coarse_ksp) { 4300 Vec crhs,csol; 4301 PetscBool ispreonly; 4302 if (CoarseNullSpace) { 4303 if (isbddc) { 4304 ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr); 4305 } else { 4306 ierr = KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);CHKERRQ(ierr); 4307 } 4308 } 4309 /* setup coarse ksp */ 4310 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 4311 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 4312 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 4313 /* hack */ 4314 if (!csol) { 4315 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 4316 } 4317 if (!crhs) { 4318 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 4319 } 4320 /* Check coarse problem if in debug mode or if solving with an iterative method */ 4321 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 4322 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 4323 KSP check_ksp; 4324 KSPType check_ksp_type; 4325 PC check_pc; 4326 Vec check_vec,coarse_vec; 4327 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 4328 PetscInt its; 4329 PetscBool compute_eigs; 4330 PetscReal *eigs_r,*eigs_c; 4331 PetscInt neigs; 4332 const char *prefix; 4333 4334 /* Create ksp object suitable for estimation of extreme eigenvalues */ 4335 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 4336 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 4337 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 4338 if (ispreonly) { 4339 check_ksp_type = KSPPREONLY; 4340 compute_eigs = PETSC_FALSE; 4341 } else { 4342 check_ksp_type = KSPGMRES; 4343 compute_eigs = PETSC_TRUE; 4344 } 4345 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 4346 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 4347 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 4348 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 4349 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 4350 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 4351 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 4352 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 4353 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 4354 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 4355 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 4356 /* create random vec */ 4357 ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr); 4358 ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr); 4359 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 4360 if (CoarseNullSpace) { 4361 ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr); 4362 } 4363 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 4364 /* solve coarse problem */ 4365 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 4366 if (CoarseNullSpace) { 4367 ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr); 4368 } 4369 /* set eigenvalue estimation if preonly has not been requested */ 4370 if (compute_eigs) { 4371 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 4372 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 4373 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 4374 lambda_max = eigs_r[neigs-1]; 4375 lambda_min = eigs_r[0]; 4376 if (pcbddc->use_coarse_estimates) { 4377 if (lambda_max>lambda_min) { 4378 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr); 4379 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 4380 } 4381 } 4382 } 4383 4384 /* check coarse problem residual error */ 4385 if (pcbddc->dbg_flag) { 4386 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 4387 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 4388 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 4389 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 4390 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 4391 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 4392 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 4393 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 4394 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 4395 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 4396 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 4397 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 4398 if (compute_eigs) { 4399 PetscReal lambda_max_s,lambda_min_s; 4400 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 4401 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 4402 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 4403 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 4404 for (i=0;i<neigs;i++) { 4405 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 4406 } 4407 } 4408 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 4409 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 4410 } 4411 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 4412 if (compute_eigs) { 4413 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 4414 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 4415 } 4416 } 4417 } 4418 /* print additional info */ 4419 if (pcbddc->dbg_flag) { 4420 /* waits until all processes reaches this point */ 4421 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 4422 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 4423 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4424 } 4425 4426 /* free memory */ 4427 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 4428 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 4429 PetscFunctionReturn(0); 4430 } 4431 4432 #undef __FUNCT__ 4433 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 4434 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 4435 { 4436 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 4437 PC_IS* pcis = (PC_IS*)pc->data; 4438 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 4439 PetscInt i,coarse_size; 4440 PetscInt *local_primal_indices; 4441 PetscErrorCode ierr; 4442 4443 PetscFunctionBegin; 4444 /* Compute global number of coarse dofs */ 4445 if (!pcbddc->primal_indices_local_idxs && pcbddc->local_primal_size) { 4446 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Local primal indices have not been created"); 4447 } 4448 ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)(pc->pmat)),matis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,NULL,&coarse_size,&local_primal_indices);CHKERRQ(ierr); 4449 4450 /* check numbering */ 4451 if (pcbddc->dbg_flag) { 4452 PetscScalar coarsesum,*array; 4453 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 4454 4455 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4456 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4457 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 4458 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 4459 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 4460 for (i=0;i<pcbddc->local_primal_size;i++) { 4461 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 4462 } 4463 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 4464 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 4465 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4466 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4467 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4468 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4469 ierr = VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4470 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4471 for (i=0;i<pcis->n;i++) { 4472 if (array[i] == 1.0) { 4473 set_error = PETSC_TRUE; 4474 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr); 4475 } 4476 } 4477 ierr = MPI_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4478 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4479 for (i=0;i<pcis->n;i++) { 4480 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 4481 } 4482 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4483 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4484 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4485 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4486 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 4487 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 4488 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 4489 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 4490 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4491 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 4492 for (i=0;i<pcbddc->local_primal_size;i++) { 4493 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i]); 4494 } 4495 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4496 } 4497 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4498 if (set_error_reduced) { 4499 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 4500 } 4501 } 4502 /* get back data */ 4503 *coarse_size_n = coarse_size; 4504 *local_primal_indices_n = local_primal_indices; 4505 PetscFunctionReturn(0); 4506 } 4507 4508 #undef __FUNCT__ 4509 #define __FUNCT__ "PCBDDCGlobalToLocal" 4510 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 4511 { 4512 IS localis_t; 4513 PetscInt i,lsize,*idxs,n; 4514 PetscScalar *vals; 4515 PetscErrorCode ierr; 4516 4517 PetscFunctionBegin; 4518 /* get indices in local ordering exploiting local to global map */ 4519 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 4520 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 4521 for (i=0;i<lsize;i++) vals[i] = 1.0; 4522 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 4523 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 4524 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 4525 if (idxs) { /* multilevel guard */ 4526 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 4527 } 4528 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 4529 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 4530 ierr = PetscFree(vals);CHKERRQ(ierr); 4531 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 4532 /* now compute set in local ordering */ 4533 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4534 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4535 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 4536 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 4537 for (i=0,lsize=0;i<n;i++) { 4538 if (PetscRealPart(vals[i]) > 0.5) { 4539 lsize++; 4540 } 4541 } 4542 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 4543 for (i=0,lsize=0;i<n;i++) { 4544 if (PetscRealPart(vals[i]) > 0.5) { 4545 idxs[lsize++] = i; 4546 } 4547 } 4548 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 4549 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 4550 *localis = localis_t; 4551 PetscFunctionReturn(0); 4552 } 4553 4554 /* the next two functions will be called in KSPMatMult if a change of basis has been requested */ 4555 #undef __FUNCT__ 4556 #define __FUNCT__ "PCBDDCMatMult_Private" 4557 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y) 4558 { 4559 PCBDDCChange_ctx change_ctx; 4560 PetscErrorCode ierr; 4561 4562 PetscFunctionBegin; 4563 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 4564 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 4565 ierr = MatMult(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 4566 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 4567 PetscFunctionReturn(0); 4568 } 4569 4570 #undef __FUNCT__ 4571 #define __FUNCT__ "PCBDDCMatMultTranspose_Private" 4572 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y) 4573 { 4574 PCBDDCChange_ctx change_ctx; 4575 PetscErrorCode ierr; 4576 4577 PetscFunctionBegin; 4578 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 4579 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 4580 ierr = MatMultTranspose(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 4581 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 4582 PetscFunctionReturn(0); 4583 } 4584 4585 #undef __FUNCT__ 4586 #define __FUNCT__ "PCBDDCSetUpSubSchurs" 4587 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 4588 { 4589 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 4590 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 4591 PetscInt *used_xadj,*used_adjncy; 4592 PetscBool free_used_adj; 4593 PetscErrorCode ierr; 4594 4595 PetscFunctionBegin; 4596 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 4597 free_used_adj = PETSC_FALSE; 4598 if (pcbddc->sub_schurs_layers == -1) { 4599 used_xadj = NULL; 4600 used_adjncy = NULL; 4601 } else { 4602 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 4603 used_xadj = pcbddc->mat_graph->xadj; 4604 used_adjncy = pcbddc->mat_graph->adjncy; 4605 } else if (pcbddc->computed_rowadj) { 4606 used_xadj = pcbddc->mat_graph->xadj; 4607 used_adjncy = pcbddc->mat_graph->adjncy; 4608 } else { 4609 PetscBool flg_row=PETSC_FALSE; 4610 const PetscInt *xadj,*adjncy; 4611 PetscInt nvtxs; 4612 4613 ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4614 if (flg_row) { 4615 ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr); 4616 ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr); 4617 ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr); 4618 free_used_adj = PETSC_TRUE; 4619 } else { 4620 pcbddc->sub_schurs_layers = -1; 4621 used_xadj = NULL; 4622 used_adjncy = NULL; 4623 } 4624 ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4625 } 4626 } 4627 ierr = PCBDDCSubSchursSetUp(sub_schurs,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->adaptive_selection,pcbddc->use_deluxe_scaling,pcbddc->adaptive_invert_Stildas,pcbddc->use_edges,pcbddc->use_faces);CHKERRQ(ierr); 4628 4629 /* free adjacency */ 4630 if (free_used_adj) { 4631 ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr); 4632 } 4633 PetscFunctionReturn(0); 4634 } 4635 4636 #undef __FUNCT__ 4637 #define __FUNCT__ "PCBDDCInitSubSchurs" 4638 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 4639 { 4640 PC_IS *pcis=(PC_IS*)pc->data; 4641 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 4642 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 4643 PCBDDCGraph graph; 4644 Mat S_j; 4645 PetscErrorCode ierr; 4646 4647 PetscFunctionBegin; 4648 /* attach interface graph for determining subsets */ 4649 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 4650 IS verticesIS; 4651 4652 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 4653 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 4654 ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap);CHKERRQ(ierr); 4655 ierr = PCBDDCGraphSetUp(graph,0,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticesIS);CHKERRQ(ierr); 4656 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 4657 ierr = ISDestroy(&verticesIS);CHKERRQ(ierr); 4658 /* 4659 if (pcbddc->dbg_flag) { 4660 ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 4661 } 4662 */ 4663 } else { 4664 graph = pcbddc->mat_graph; 4665 } 4666 4667 /* Create Schur complement matrix */ 4668 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 4669 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 4670 4671 /* sub_schurs init */ 4672 ierr = PCBDDCSubSchursInit(sub_schurs,pcbddc->local_mat,S_j,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap,pcbddc->sub_schurs_threshold);CHKERRQ(ierr); 4673 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 4674 /* free graph struct */ 4675 if (pcbddc->sub_schurs_rebuild) { 4676 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 4677 } 4678 PetscFunctionReturn(0); 4679 } 4680