1 #include <../src/ksp/pc/impls/bddc/bddc.h> 2 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 3 #include <petscblaslapack.h> 4 5 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y); 6 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y); 7 8 #undef __FUNCT__ 9 #define __FUNCT__ "PCBDDCAdaptiveSelection" 10 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 11 { 12 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 13 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 14 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 15 PetscBLASInt *B_iwork,*B_ifail; 16 PetscScalar *work,lwork; 17 PetscScalar *St,*S,*eigv; 18 PetscScalar *Sarray,*Starray; 19 PetscScalar *Smult,*Seigv; 20 PetscReal *eigs,thresh; 21 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs; 22 #if defined(PETSC_USE_COMPLEX) 23 PetscReal *rwork; 24 #endif 25 PetscBool symmetric; 26 PetscErrorCode ierr; 27 28 PetscFunctionBegin; 29 if (!sub_schurs->use_mumps) { 30 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS"); 31 } 32 ierr = MatIsSymmetric(sub_schurs->A,0.0,&symmetric);CHKERRQ(ierr); 33 if (!symmetric) { 34 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Not yet implemented"); 35 } 36 37 /* max size of subsets */ 38 mss = 0; 39 for (i=0;i<sub_schurs->n_subs;i++) { 40 if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) { 41 PetscInt subset_size; 42 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 43 mss = PetscMax(mss,subset_size); 44 } 45 } 46 47 /* min/max and threshold */ 48 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 49 nmin = pcbddc->adaptive_nmin > -1 ? pcbddc->adaptive_nmin : 1; 50 nmax = PetscMax(nmin,nmax); 51 if (pcbddc->adaptive_threshold > 1.0) { 52 thresh = 1.0/pcbddc->adaptive_threshold; 53 } else { 54 thresh = 1.0; 55 } 56 57 /* allocate lapack workspace */ 58 cum = cum2 = 0; 59 maxneigs = 0; 60 for (i=0;i<sub_schurs->n_subs;i++) { 61 if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) { 62 PetscInt n,subset_size; 63 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 64 n = PetscMin(subset_size,nmax); 65 cum += subset_size*n; 66 cum2 += n; 67 maxneigs = PetscMax(maxneigs,n); 68 } 69 } 70 71 if (mss) { 72 if (symmetric) { 73 PetscBLASInt B_itype = 1; 74 PetscBLASInt B_N = mss; 75 PetscScalar zero = 0.0; 76 PetscScalar eps = 0.0; /* dlamch? */ 77 78 B_lwork = -1; 79 S = NULL; 80 St = NULL; 81 eigs = NULL; 82 eigv = NULL; 83 B_iwork = NULL; 84 B_ifail = NULL; 85 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 86 #if defined(PETSC_USE_COMPLEX) 87 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 88 #else 89 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 90 #endif 91 if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 92 ierr = PetscFPTrapPop();CHKERRQ(ierr); 93 } else { 94 /* TODO */ 95 } 96 } else { 97 lwork = 0; 98 } 99 100 nv = 0; 101 if (sub_schurs->is_Ej_com) { /* complement of subsets, each entry is a vertex */ 102 ierr = ISGetLocalSize(sub_schurs->is_Ej_com,&nv);CHKERRQ(ierr); 103 } 104 ierr = PetscBLASIntCast((PetscInt)lwork,&B_lwork);CHKERRQ(ierr); 105 ierr = PetscMalloc7(mss*mss,&S,mss*mss,&St,mss*mss,&eigv,mss,&eigs, 106 B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr); 107 #if defined(PETSC_USE_COMPLEX) 108 ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr); 109 #endif 110 ierr = PetscMalloc2(mss*mss,&Smult,mss*mss,&Seigv);CHKERRQ(ierr); 111 ierr = PetscMalloc4(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 112 nv+cum2+1,&pcbddc->adaptive_constraints_ptrs, 113 nv+cum,&pcbddc->adaptive_constraints_idxs, 114 nv+cum,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 115 ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr); 116 117 maxneigs = 0; 118 cum = cum2 = cumarray = 0; 119 if (sub_schurs->is_Ej_com) { 120 const PetscInt *idxs; 121 122 ierr = ISGetIndices(sub_schurs->is_Ej_com,&idxs);CHKERRQ(ierr); 123 for (cum=0;cum<nv;cum++) { 124 pcbddc->adaptive_constraints_n[cum] = 1; 125 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 126 pcbddc->adaptive_constraints_ptrs[cum] = cum; 127 pcbddc->adaptive_constraints_data[cum] = 1.0; 128 } 129 cum2 = cum; 130 ierr = ISRestoreIndices(sub_schurs->is_Ej_com,&idxs);CHKERRQ(ierr); 131 } 132 133 if (mss) { /* multilevel */ 134 if (pcbddc->use_deluxe_scaling) { 135 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 136 } else { 137 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_all,&Sarray);CHKERRQ(ierr); 138 } 139 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 140 } 141 142 for (i=0;i<sub_schurs->n_subs;i++) { 143 PetscInt j,subset_size; 144 145 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 146 if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) { 147 const PetscInt *idxs; 148 PetscScalar one = 1.0,zero=0.0; 149 PetscBLASInt B_N; 150 151 /* S should be copied since we need it for deluxe scaling */ 152 if (symmetric) { 153 PetscInt j; 154 for (j=0;j<subset_size;j++) { 155 ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 156 } 157 for (j=0;j<subset_size;j++) { 158 ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 159 } 160 } else { 161 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 162 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 163 } 164 /* is always this the right matrix? */ 165 ierr = PetscMemcpy(Smult,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 166 167 /* we could reuse space already allocated when building sum_S_Ej_tilda_all */ 168 /* St = Starray+cumarray; */ 169 170 if (symmetric) { 171 PetscBLASInt B_itype = 1; 172 PetscBLASInt B_IL = 1, B_IU; 173 PetscScalar eps = -1.0; /* dlamch? */ 174 PetscInt nmin_s; 175 176 /* ask for eigenvalues lower than thresh */ 177 PetscPrintf(PETSC_COMM_SELF,"[%d] Computing for sub %d/%d.\n",PetscGlobalRank,i,sub_schurs->n_subs); 178 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 179 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 180 #if defined(PETSC_USE_COMPLEX) 181 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 182 #else 183 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 184 #endif 185 ierr = PetscFPTrapPop();CHKERRQ(ierr); 186 if (B_ierr) { 187 if (B_ierr < 0 ) { 188 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 189 } else if (B_ierr <= B_N) { 190 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 191 } else { 192 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 193 } 194 } 195 196 if (B_neigs > nmax) { 197 PetscPrintf(PETSC_COMM_SELF,"[%d] found %d eigs, more than maximum required %d.\n",PetscGlobalRank,B_neigs,nmax); 198 B_neigs = nmax; 199 } 200 201 nmin_s = PetscMin(nmin,B_N); 202 if (B_neigs < nmin_s) { 203 PetscBLASInt B_neigs2; 204 205 B_IL = B_neigs + 1; 206 ierr = PetscBLASIntCast(nmin_s,&B_IU);CHKERRQ(ierr); 207 PetscPrintf(PETSC_COMM_SELF,"[%d] found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",PetscGlobalRank,B_neigs,nmin,B_IL,B_IU); 208 if (symmetric) { 209 PetscInt j; 210 for (j=0;j<subset_size;j++) { 211 ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 212 } 213 for (j=0;j<subset_size;j++) { 214 ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 215 } 216 } else { 217 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 218 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 219 } 220 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 221 #if defined(PETSC_USE_COMPLEX) 222 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 223 #else 224 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 225 #endif 226 ierr = PetscFPTrapPop();CHKERRQ(ierr); 227 B_neigs += B_neigs2; 228 } 229 if (B_ierr) { 230 if (B_ierr < 0 ) { 231 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 232 } else if (B_ierr <= B_N) { 233 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 234 } else { 235 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 236 } 237 } 238 PetscPrintf(PETSC_COMM_SELF,"[%d] -> Got %d eigs\n",PetscGlobalRank,B_neigs); 239 for (j=0;j<B_neigs;j++) { 240 if (eigs[j] == 0.0) { 241 PetscPrintf(PETSC_COMM_SELF,"[%d] Inf\n",PetscGlobalRank); 242 } else { 243 PetscPrintf(PETSC_COMM_SELF,"[%d] %1.6e\n",PetscGlobalRank,1.0/eigs[j]); 244 } 245 } 246 } else { 247 /* TODO */ 248 } 249 maxneigs = PetscMax(B_neigs,maxneigs); 250 pcbddc->adaptive_constraints_n[i+nv] = B_neigs; 251 252 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 253 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&B_N,&B_neigs,&B_N,&one,Smult,&B_N,eigv,&B_N,&zero,Seigv,&B_N)); 254 ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+cum2,Seigv,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 255 #if 0 256 PetscInt ii; 257 for (ii=0;ii<B_neigs;ii++) { 258 PetscPrintf(PETSC_COMM_SELF,"[%d] -> Eigenvector %d/%d (%d)\n",PetscGlobalRank,ii,B_neigs,B_N); 259 for (j=0;j<B_N;j++) { 260 PetscPrintf(PETSC_COMM_SELF,"[%d] %1.4e %1.4e\n",PetscGlobalRank,eigv[ii*B_N+j],Seigv[ii*B_N+j]); 261 } 262 } 263 #endif 264 ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 265 for (j=0;j<B_neigs;j++) { 266 #if 0 267 { 268 PetscBLASInt Blas_N,Blas_one = 1.0; 269 PetscScalar norm; 270 ierr = PetscBLASIntCast(subset_size,&Blas_N);CHKERRQ(ierr); 271 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,pcbddc->adaptive_constraints_data+cum2,&Blas_one,pcbddc->adaptive_constraints_data+cum2,&Blas_one)); 272 if (pcbddc->adaptive_constraints_data[cum2] > 0.0) { 273 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 274 } else { 275 norm = -1.0/PetscSqrtReal(PetscRealPart(norm)); 276 } 277 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,pcbddc->adaptive_constraints_data+cum2,&Blas_one)); 278 } 279 #endif 280 ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+cum2,idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr); 281 pcbddc->adaptive_constraints_ptrs[cum++] = cum2; 282 cum2 += subset_size; 283 } 284 ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 285 } 286 /* shift for next computation */ 287 cumarray += subset_size*subset_size; 288 } 289 pcbddc->adaptive_constraints_ptrs[cum] = cum2; 290 ierr = PetscFree2(Smult,Seigv);CHKERRQ(ierr); 291 292 if (mss) { 293 if (pcbddc->use_deluxe_scaling) { 294 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 295 } else { 296 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_all,&Sarray);CHKERRQ(ierr); 297 } 298 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 299 } 300 ierr = PetscFree7(S,St,eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr); 301 #if defined(PETSC_USE_COMPLEX) 302 ierr = PetscFree(rwork);CHKERRQ(ierr); 303 #endif 304 if (pcbddc->dbg_flag) { 305 PetscInt maxneigs_r; 306 ierr = MPI_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 307 ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr); 308 } 309 PetscFunctionReturn(0); 310 } 311 312 #undef __FUNCT__ 313 #define __FUNCT__ "PCBDDCSetUpSolvers" 314 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 315 { 316 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 317 PetscScalar *coarse_submat_vals; 318 PetscErrorCode ierr; 319 320 PetscFunctionBegin; 321 /* Setup local scatters R_to_B and (optionally) R_to_D */ 322 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 323 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 324 325 /* Setup local neumann solver ksp_R */ 326 /* PCBDDCSetUpLocalScatters should be called first! */ 327 ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 328 329 /* Change global null space passed in by the user if change of basis has been requested */ 330 if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) { 331 ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr); 332 } 333 334 /* 335 Setup local correction and local part of coarse basis. 336 Gives back the dense local part of the coarse matrix in column major ordering 337 */ 338 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 339 340 /* Compute total number of coarse nodes and setup coarse solver */ 341 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 342 343 /* free */ 344 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 345 PetscFunctionReturn(0); 346 } 347 348 #undef __FUNCT__ 349 #define __FUNCT__ "PCBDDCResetCustomization" 350 PetscErrorCode PCBDDCResetCustomization(PC pc) 351 { 352 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 353 PetscErrorCode ierr; 354 355 PetscFunctionBegin; 356 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 357 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 358 ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr); 359 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 360 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 361 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 362 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 363 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 364 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 365 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 366 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 367 PetscFunctionReturn(0); 368 } 369 370 #undef __FUNCT__ 371 #define __FUNCT__ "PCBDDCResetTopography" 372 PetscErrorCode PCBDDCResetTopography(PC pc) 373 { 374 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 375 PetscErrorCode ierr; 376 377 PetscFunctionBegin; 378 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 379 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 380 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 381 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 382 ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr); 383 PetscFunctionReturn(0); 384 } 385 386 #undef __FUNCT__ 387 #define __FUNCT__ "PCBDDCResetSolvers" 388 PetscErrorCode PCBDDCResetSolvers(PC pc) 389 { 390 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 391 PetscScalar *array; 392 PetscErrorCode ierr; 393 394 PetscFunctionBegin; 395 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 396 ierr = VecDestroy(&pcbddc->coarse_rhs);CHKERRQ(ierr); 397 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr); 398 ierr = PetscFree(array);CHKERRQ(ierr); 399 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 400 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 401 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 402 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 403 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 404 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 405 if (pcbddc->local_auxmat2) { 406 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 407 ierr = PetscFree(array);CHKERRQ(ierr); 408 } 409 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 410 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 411 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 412 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 413 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 414 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 415 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 416 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 417 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 418 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 419 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 420 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 421 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 422 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 423 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 424 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 425 PetscFunctionReturn(0); 426 } 427 428 #undef __FUNCT__ 429 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors" 430 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 431 { 432 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 433 PC_IS *pcis = (PC_IS*)pc->data; 434 VecType impVecType; 435 PetscInt n_constraints,n_R,old_size; 436 PetscErrorCode ierr; 437 438 PetscFunctionBegin; 439 if (!pcbddc->ConstraintMatrix) { 440 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created"); 441 } 442 /* get sizes */ 443 n_constraints = pcbddc->local_primal_size - pcbddc->n_actual_vertices; 444 n_R = pcis->n-pcbddc->n_actual_vertices; 445 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 446 /* local work vectors (try to avoid unneeded work)*/ 447 /* R nodes */ 448 old_size = -1; 449 if (pcbddc->vec1_R) { 450 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 451 } 452 if (n_R != old_size) { 453 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 454 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 455 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 456 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 457 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 458 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 459 } 460 /* local primal dofs */ 461 old_size = -1; 462 if (pcbddc->vec1_P) { 463 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 464 } 465 if (pcbddc->local_primal_size != old_size) { 466 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 467 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 468 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 469 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 470 } 471 /* local explicit constraints */ 472 old_size = -1; 473 if (pcbddc->vec1_C) { 474 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 475 } 476 if (n_constraints && n_constraints != old_size) { 477 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 478 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 479 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 480 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 481 } 482 PetscFunctionReturn(0); 483 } 484 485 #undef __FUNCT__ 486 #define __FUNCT__ "PCBDDCSetUpCorrection" 487 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 488 { 489 PetscErrorCode ierr; 490 /* pointers to pcis and pcbddc */ 491 PC_IS* pcis = (PC_IS*)pc->data; 492 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 493 /* submatrices of local problem */ 494 Mat A_RV,A_VR,A_VV; 495 /* submatrices of local coarse problem */ 496 Mat S_VV,S_CV,S_VC,S_CC; 497 /* working matrices */ 498 Mat C_CR; 499 /* additional working stuff */ 500 PC pc_R; 501 Mat F; 502 PetscBool isLU,isCHOL,isILU; 503 504 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 505 PetscScalar *work; 506 PetscInt *idx_V_B; 507 PetscInt n,n_vertices,n_constraints; 508 PetscInt i,n_R,n_D,n_B; 509 PetscBool unsymmetric_check; 510 /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */ 511 MatType impMatType; 512 /* some shortcuts to scalars */ 513 PetscScalar one=1.0,m_one=-1.0; 514 515 PetscFunctionBegin; 516 /* get number of vertices (corners plus constraints with change of basis) 517 pcbddc->n_actual_vertices stores the actual number of vertices, pcbddc->n_vertices the number of corners computed */ 518 n_vertices = pcbddc->n_actual_vertices; 519 n_constraints = pcbddc->local_primal_size-n_vertices; 520 /* Set Non-overlapping dimensions */ 521 n_B = pcis->n_B; n_D = pcis->n - n_B; 522 n_R = pcis->n-n_vertices; 523 524 /* Set types for local objects needed by BDDC precondtioner */ 525 impMatType = MATSEQDENSE; 526 527 /* vertices in boundary numbering */ 528 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 529 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->primal_indices_local_idxs,&i,idx_V_B);CHKERRQ(ierr); 530 if (i != n_vertices) { 531 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i); 532 } 533 534 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 535 ierr = PetscMalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 536 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr); 537 ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr); 538 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr); 539 ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr); 540 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr); 541 ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr); 542 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr); 543 ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr); 544 545 unsymmetric_check = PETSC_FALSE; 546 /* allocate workspace */ 547 n = 0; 548 if (n_constraints) { 549 n += n_R*n_constraints; 550 } 551 if (n_vertices) { 552 n = PetscMax(2*n_R*n_vertices,n); 553 } 554 if (!pcbddc->issym) { 555 n = PetscMax(2*n_R*pcbddc->local_primal_size,n); 556 unsymmetric_check = PETSC_TRUE; 557 } 558 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 559 560 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 561 ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr); 562 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr); 563 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr); 564 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr); 565 if (isLU || isILU || isCHOL) { 566 ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr); 567 } else { 568 F = NULL; 569 } 570 571 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 572 if (n_constraints) { 573 Mat M1,M2,M3; 574 IS is_aux; 575 /* see if we can save some allocations */ 576 if (pcbddc->local_auxmat2) { 577 PetscInt on_R,on_constraints; 578 ierr = MatGetSize(pcbddc->local_auxmat2,&on_R,&on_constraints);CHKERRQ(ierr); 579 if (on_R != n_R || on_constraints != n_constraints) { 580 PetscScalar *marray; 581 582 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&marray);CHKERRQ(ierr); 583 ierr = PetscFree(marray);CHKERRQ(ierr); 584 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 585 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 586 } 587 } 588 /* auxiliary matrices */ 589 if (!pcbddc->local_auxmat2) { 590 PetscScalar *marray; 591 592 ierr = PetscMalloc1(2*n_R*n_constraints,&marray);CHKERRQ(ierr); 593 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,marray,&pcbddc->local_auxmat2);CHKERRQ(ierr); 594 marray += n_R*n_constraints; 595 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_R,marray,&pcbddc->local_auxmat1);CHKERRQ(ierr); 596 } 597 598 /* Extract constraints on R nodes: C_{CR} */ 599 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 600 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 601 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 602 603 /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */ 604 ierr = PetscMemzero(work,n_R*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 605 for (i=0;i<n_constraints;i++) { 606 const PetscScalar *row_cmat_values; 607 const PetscInt *row_cmat_indices; 608 PetscInt size_of_constraint,j; 609 610 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 611 for (j=0;j<size_of_constraint;j++) { 612 work[row_cmat_indices[j]+i*n_R] = -row_cmat_values[j]; 613 } 614 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 615 } 616 if (F) { 617 Mat B; 618 619 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 620 ierr = MatMatSolve(F,B,pcbddc->local_auxmat2);CHKERRQ(ierr); 621 ierr = MatDestroy(&B);CHKERRQ(ierr); 622 } else { 623 PetscScalar *xarray; 624 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&xarray);CHKERRQ(ierr); 625 for (i=0;i<n_constraints;i++) { 626 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 627 ierr = VecPlaceArray(pcbddc->vec2_R,xarray+i*n_R);CHKERRQ(ierr); 628 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 629 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 630 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 631 } 632 ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&xarray);CHKERRQ(ierr); 633 } 634 635 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 636 ierr = MatConvert(C_CR,impMatType,MAT_REUSE_MATRIX,&C_CR);CHKERRQ(ierr); 637 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 638 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr); 639 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 640 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 641 ierr = VecSet(pcbddc->vec1_C,m_one);CHKERRQ(ierr); 642 ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr); 643 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 644 ierr = MatDestroy(&M2);CHKERRQ(ierr); 645 ierr = MatDestroy(&M3);CHKERRQ(ierr); 646 /* Assemble local_auxmat1 = S_CC*C_{CR} needed by BDDC application in KSP and in preproc */ 647 ierr = MatMatMult(M1,C_CR,MAT_REUSE_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 648 ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 649 ierr = MatDestroy(&M1);CHKERRQ(ierr); 650 } 651 /* Get submatrices from subdomain matrix */ 652 if (n_vertices) { 653 Mat newmat; 654 IS is_aux; 655 PetscInt ibs,mbs; 656 PetscBool issbaij; 657 658 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 659 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 660 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 661 if (ibs != mbs) { /* need to convert to SEQAIJ */ 662 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 663 ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 664 ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 665 ierr = MatGetSubMatrix(newmat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 666 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 667 } else { 668 /* this is safe */ 669 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 670 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 671 if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 672 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 673 ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 674 ierr = MatTranspose(A_VR,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 675 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 676 ierr = MatConvert(A_VV,MATSEQBAIJ,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr); 677 } else { 678 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 679 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 680 } 681 } 682 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 683 } 684 685 /* Matrix of coarse basis functions (local) */ 686 if (pcbddc->coarse_phi_B) { 687 PetscInt on_B,on_primal,on_D=n_D; 688 if (pcbddc->coarse_phi_D) { 689 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr); 690 } 691 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 692 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 693 PetscScalar *marray; 694 695 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr); 696 ierr = PetscFree(marray);CHKERRQ(ierr); 697 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 698 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 699 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 700 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 701 } 702 } 703 704 if (!pcbddc->coarse_phi_B) { 705 PetscScalar *marray; 706 707 n = n_B*pcbddc->local_primal_size; 708 if (pcbddc->switch_static || pcbddc->dbg_flag) { 709 n += n_D*pcbddc->local_primal_size; 710 } 711 if (!pcbddc->issym) { 712 n *= 2; 713 } 714 ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr); 715 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 716 n = n_B*pcbddc->local_primal_size; 717 if (pcbddc->switch_static || pcbddc->dbg_flag) { 718 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 719 n += n_D*pcbddc->local_primal_size; 720 } 721 if (!pcbddc->issym) { 722 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 723 if (pcbddc->switch_static || pcbddc->dbg_flag) { 724 n = n_B*pcbddc->local_primal_size; 725 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 726 } 727 } else { 728 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 729 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 730 if (pcbddc->switch_static || pcbddc->dbg_flag) { 731 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 732 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 733 } 734 } 735 } 736 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 737 /* vertices */ 738 if (n_vertices) { 739 740 if (n_R) { 741 Mat A_RRmA_RV,S_VVt; /* S_VVt with LDA=N */ 742 PetscBLASInt B_N,B_one = 1; 743 PetscScalar *x,*y; 744 745 ierr = PetscMemzero(work,2*n_R*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 746 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 747 ierr = MatConvert(A_RV,impMatType,MAT_REUSE_MATRIX,&A_RV);CHKERRQ(ierr); 748 if (F) { 749 ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr); 750 } else { 751 ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr); 752 for (i=0;i<n_vertices;i++) { 753 ierr = VecPlaceArray(pcbddc->vec1_R,y+i*n_R);CHKERRQ(ierr); 754 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 755 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 756 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 757 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 758 } 759 ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr); 760 } 761 ierr = MatScale(A_RRmA_RV,m_one);CHKERRQ(ierr); 762 /* S_VV and S_CV are the subdomain contribution to coarse matrix. WARNING -> column major ordering */ 763 if (n_constraints) { 764 Mat B; 765 ierr = MatMatMult(pcbddc->local_auxmat1,A_RRmA_RV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr); 766 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr); 767 ierr = MatMatMult(pcbddc->local_auxmat2,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 768 ierr = MatScale(S_CV,m_one);CHKERRQ(ierr); 769 ierr = PetscBLASIntCast(n_R*n_vertices,&B_N);CHKERRQ(ierr); 770 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+n_R*n_vertices,&B_one,work,&B_one)); 771 ierr = MatDestroy(&B);CHKERRQ(ierr); 772 } 773 ierr = MatConvert(A_VR,impMatType,MAT_REUSE_MATRIX,&A_VR);CHKERRQ(ierr); 774 ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr); 775 ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr); 776 ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr); 777 ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr); 778 ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr); 779 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one)); 780 ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr); 781 ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr); 782 ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 783 ierr = MatDestroy(&S_VVt);CHKERRQ(ierr); 784 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 785 } else { 786 ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr); 787 ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 788 } 789 /* coarse basis functions */ 790 for (i=0;i<n_vertices;i++) { 791 PetscScalar *y; 792 793 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr); 794 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 795 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 796 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 797 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 798 y[n_B*i+idx_V_B[i]] = 1.0; 799 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 800 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 801 802 if (pcbddc->switch_static || pcbddc->dbg_flag) { 803 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 804 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 805 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 806 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 807 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 808 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 809 } 810 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 811 } 812 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 813 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 814 } 815 816 if (n_constraints) { 817 Mat B; 818 819 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 820 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 821 ierr = MatMatMult(pcbddc->local_auxmat2,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 822 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 823 if (n_vertices) { 824 ierr = MatMatMult(A_VR,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_VC);CHKERRQ(ierr); 825 } 826 ierr = MatDestroy(&B);CHKERRQ(ierr); 827 /* coarse basis functions */ 828 for (i=0;i<n_constraints;i++) { 829 PetscScalar *y; 830 831 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr); 832 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 833 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr); 834 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 835 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 836 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 837 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 838 839 if (pcbddc->switch_static || pcbddc->dbg_flag) { 840 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 841 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr); 842 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 843 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 844 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 845 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 846 } 847 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 848 } 849 } 850 851 /* compute other basis functions for non-symmetric problems */ 852 if (!pcbddc->issym) { 853 Mat B,X; 854 855 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,pcbddc->local_primal_size,work,&B);CHKERRQ(ierr); 856 857 if (n_constraints) { 858 Mat S_CCT,B_C; 859 860 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work+n_vertices*n_R,&B_C);CHKERRQ(ierr); 861 ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr); 862 ierr = MatTransposeMatMult(C_CR,S_CCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr); 863 ierr = MatDestroy(&S_CCT);CHKERRQ(ierr); 864 if (n_vertices) { 865 Mat B_V,S_VCT; 866 867 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&B_V);CHKERRQ(ierr); 868 ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr); 869 ierr = MatTransposeMatMult(C_CR,S_VCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr); 870 ierr = MatDestroy(&B_V);CHKERRQ(ierr); 871 ierr = MatDestroy(&S_VCT);CHKERRQ(ierr); 872 } 873 ierr = MatDestroy(&B_C);CHKERRQ(ierr); 874 } 875 if (n_vertices && n_R) { 876 Mat A_VRT; 877 PetscBLASInt B_N,B_one = 1; 878 879 if (!n_constraints) { /* if there are no constraints, reset work */ 880 ierr = PetscMemzero(work,n_R*pcbddc->local_primal_size*sizeof(PetscScalar));CHKERRQ(ierr); 881 } 882 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work+pcbddc->local_primal_size*n_R,&A_VRT);CHKERRQ(ierr); 883 ierr = MatTranspose(A_VR,MAT_REUSE_MATRIX,&A_VRT);CHKERRQ(ierr); 884 ierr = PetscBLASIntCast(n_vertices*n_R,&B_N);CHKERRQ(ierr); 885 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&m_one,work+pcbddc->local_primal_size*n_R,&B_one,work,&B_one)); 886 ierr = MatDestroy(&A_VRT);CHKERRQ(ierr); 887 } 888 889 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,pcbddc->local_primal_size,work+pcbddc->local_primal_size*n_R,&X);CHKERRQ(ierr); 890 if (F) { /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 891 for (i=0;i<pcbddc->local_primal_size;i++) { 892 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 893 ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr); 894 ierr = MatSolveTranspose(F,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 895 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 896 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 897 } 898 } else { 899 for (i=0;i<pcbddc->local_primal_size;i++) { 900 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 901 ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr); 902 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 903 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 904 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 905 } 906 } 907 ierr = MatDestroy(&B);CHKERRQ(ierr); 908 /* coarse basis functions */ 909 for (i=0;i<pcbddc->local_primal_size;i++) { 910 PetscScalar *y; 911 912 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*(i+pcbddc->local_primal_size));CHKERRQ(ierr); 913 ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 914 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 915 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 916 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 917 if (i<n_vertices) { 918 y[n_B*i+idx_V_B[i]] = 1.0; 919 } 920 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 921 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 922 923 if (pcbddc->switch_static || pcbddc->dbg_flag) { 924 ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 925 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 926 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 927 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 928 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 929 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 930 } 931 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 932 } 933 ierr = MatDestroy(&X);CHKERRQ(ierr); 934 } 935 936 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 937 ierr = MatDestroy(&S_VV);CHKERRQ(ierr); 938 ierr = MatDestroy(&S_CV);CHKERRQ(ierr); 939 ierr = MatDestroy(&S_VC);CHKERRQ(ierr); 940 ierr = MatDestroy(&S_CC);CHKERRQ(ierr); 941 /* Checking coarse_sub_mat and coarse basis functios */ 942 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 943 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 944 if (pcbddc->dbg_flag) { 945 Mat coarse_sub_mat; 946 Mat AUXMAT,TM1,TM2,TM3,TM4; 947 Mat coarse_phi_D,coarse_phi_B; 948 Mat coarse_psi_D,coarse_psi_B; 949 Mat A_II,A_BB,A_IB,A_BI; 950 MatType checkmattype=MATSEQAIJ; 951 PetscReal real_value; 952 953 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 954 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 955 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 956 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 957 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 958 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 959 if (unsymmetric_check) { 960 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 961 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 962 } 963 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 964 965 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 966 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation\n");CHKERRQ(ierr); 967 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 968 if (unsymmetric_check) { 969 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 970 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 971 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 972 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 973 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 974 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 975 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 976 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 977 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 978 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 979 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 980 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 981 } else { 982 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 983 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 984 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 985 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 986 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 987 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 988 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 989 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 990 } 991 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 992 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 993 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 994 ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr); 995 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 996 ierr = MatNorm(TM1,NORM_INFINITY,&real_value);CHKERRQ(ierr); 997 998 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 999 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 1000 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1001 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 1002 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 1003 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 1004 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 1005 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 1006 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 1007 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 1008 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 1009 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 1010 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 1011 if (unsymmetric_check) { 1012 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 1013 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 1014 } 1015 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 1016 } 1017 1018 /* free memory */ 1019 ierr = PetscFree(work);CHKERRQ(ierr); 1020 if (n_vertices) { 1021 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 1022 } 1023 if (n_constraints) { 1024 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 1025 } 1026 /* get back data */ 1027 *coarse_submat_vals_n = coarse_submat_vals; 1028 PetscFunctionReturn(0); 1029 } 1030 1031 #undef __FUNCT__ 1032 #define __FUNCT__ "MatGetSubMatrixUnsorted" 1033 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, MatStructure reuse, Mat* B) 1034 { 1035 Mat *work_mat; 1036 IS isrow_s,iscol_s; 1037 PetscBool rsorted,csorted; 1038 PetscInt rsize,*idxs_perm_r,csize,*idxs_perm_c; 1039 PetscErrorCode ierr; 1040 1041 PetscFunctionBegin; 1042 ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr); 1043 ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr); 1044 ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr); 1045 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 1046 1047 if (!rsorted) { 1048 const PetscInt *idxs; 1049 PetscInt *idxs_sorted,i; 1050 1051 ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr); 1052 ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr); 1053 for (i=0;i<rsize;i++) { 1054 idxs_perm_r[i] = i; 1055 } 1056 ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr); 1057 ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr); 1058 for (i=0;i<rsize;i++) { 1059 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 1060 } 1061 ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr); 1062 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr); 1063 } else { 1064 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1065 isrow_s = isrow; 1066 } 1067 1068 if (!csorted) { 1069 if (isrow == iscol) { 1070 ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr); 1071 iscol_s = isrow_s; 1072 } else { 1073 const PetscInt *idxs; 1074 PetscInt *idxs_sorted,i; 1075 1076 ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr); 1077 ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr); 1078 for (i=0;i<csize;i++) { 1079 idxs_perm_c[i] = i; 1080 } 1081 ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr); 1082 ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr); 1083 for (i=0;i<csize;i++) { 1084 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 1085 } 1086 ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr); 1087 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr); 1088 } 1089 } else { 1090 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1091 iscol_s = iscol; 1092 } 1093 1094 ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,reuse,&work_mat);CHKERRQ(ierr); 1095 1096 if (!rsorted || !csorted) { 1097 Mat new_mat; 1098 IS is_perm_r,is_perm_c; 1099 1100 if (!rsorted) { 1101 PetscInt *idxs_r,i; 1102 ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr); 1103 for (i=0;i<rsize;i++) { 1104 idxs_r[idxs_perm_r[i]] = i; 1105 } 1106 ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr); 1107 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr); 1108 } else { 1109 ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr); 1110 } 1111 ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr); 1112 1113 if (!csorted) { 1114 if (isrow_s == iscol_s) { 1115 ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr); 1116 is_perm_c = is_perm_r; 1117 } else { 1118 PetscInt *idxs_c,i; 1119 ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr); 1120 for (i=0;i<csize;i++) { 1121 idxs_c[idxs_perm_c[i]] = i; 1122 } 1123 ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr); 1124 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr); 1125 } 1126 } else { 1127 ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr); 1128 } 1129 ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr); 1130 1131 ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr); 1132 ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr); 1133 work_mat[0] = new_mat; 1134 ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr); 1135 ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr); 1136 } 1137 1138 ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr); 1139 *B = work_mat[0]; 1140 ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr); 1141 ierr = ISDestroy(&isrow_s);CHKERRQ(ierr); 1142 ierr = ISDestroy(&iscol_s);CHKERRQ(ierr); 1143 PetscFunctionReturn(0); 1144 } 1145 1146 #undef __FUNCT__ 1147 #define __FUNCT__ "PCBDDCComputeLocalMatrix" 1148 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 1149 { 1150 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1151 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1152 Mat new_mat; 1153 IS is_local,is_global; 1154 PetscInt local_size; 1155 PetscBool isseqaij; 1156 PetscErrorCode ierr; 1157 1158 PetscFunctionBegin; 1159 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1160 ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr); 1161 ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr); 1162 ierr = ISLocalToGlobalMappingApplyIS(matis->mapping,is_local,&is_global);CHKERRQ(ierr); 1163 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 1164 ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,MAT_INITIAL_MATRIX,&new_mat);CHKERRQ(ierr); 1165 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 1166 1167 /* check */ 1168 if (pcbddc->dbg_flag) { 1169 Vec x,x_change; 1170 PetscReal error; 1171 1172 ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr); 1173 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 1174 ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr); 1175 ierr = VecScatterBegin(matis->ctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1176 ierr = VecScatterEnd(matis->ctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1177 ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr); 1178 ierr = VecScatterBegin(matis->ctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1179 ierr = VecScatterEnd(matis->ctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1180 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 1181 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 1182 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1183 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr); 1184 ierr = VecDestroy(&x);CHKERRQ(ierr); 1185 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 1186 } 1187 1188 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 1189 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 1190 if (isseqaij) { 1191 ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 1192 } else { 1193 Mat work_mat; 1194 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 1195 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 1196 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 1197 } 1198 ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,pcbddc->issym);CHKERRQ(ierr); 1199 /* 1200 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 1201 ierr = MatView(new_mat,(PetscViewer)0);CHKERRQ(ierr); 1202 */ 1203 ierr = MatDestroy(&new_mat);CHKERRQ(ierr); 1204 PetscFunctionReturn(0); 1205 } 1206 1207 #undef __FUNCT__ 1208 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 1209 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 1210 { 1211 PC_IS* pcis = (PC_IS*)(pc->data); 1212 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1213 IS is_aux1,is_aux2; 1214 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 1215 PetscInt n_vertices,i,j,n_R,n_D,n_B; 1216 PetscInt vbs,bs; 1217 PetscBT bitmask; 1218 PetscErrorCode ierr; 1219 1220 PetscFunctionBegin; 1221 /* 1222 No need to setup local scatters if 1223 - primal space is unchanged 1224 AND 1225 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 1226 AND 1227 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 1228 */ 1229 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 1230 PetscFunctionReturn(0); 1231 } 1232 /* destroy old objects */ 1233 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 1234 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 1235 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 1236 /* Set Non-overlapping dimensions */ 1237 n_B = pcis->n_B; n_D = pcis->n - n_B; 1238 n_vertices = pcbddc->n_actual_vertices; 1239 /* create auxiliary bitmask */ 1240 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 1241 for (i=0;i<n_vertices;i++) { 1242 ierr = PetscBTSet(bitmask,pcbddc->primal_indices_local_idxs[i]);CHKERRQ(ierr); 1243 } 1244 1245 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 1246 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 1247 for (i=0, n_R=0; i<pcis->n; i++) { 1248 if (!PetscBTLookup(bitmask,i)) { 1249 idx_R_local[n_R] = i; 1250 n_R++; 1251 } 1252 } 1253 1254 /* Block code */ 1255 vbs = 1; 1256 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 1257 if (bs>1 && !(n_vertices%bs)) { 1258 PetscBool is_blocked = PETSC_TRUE; 1259 PetscInt *vary; 1260 /* Verify if the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 1261 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 1262 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 1263 for (i=0; i<n_vertices; i++) vary[pcbddc->primal_indices_local_idxs[i]/bs]++; 1264 for (i=0; i<n_vertices; i++) { 1265 if (vary[i]!=0 && vary[i]!=bs) { 1266 is_blocked = PETSC_FALSE; 1267 break; 1268 } 1269 } 1270 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 1271 vbs = bs; 1272 for (i=0;i<n_R/vbs;i++) { 1273 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 1274 } 1275 } 1276 ierr = PetscFree(vary);CHKERRQ(ierr); 1277 } 1278 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 1279 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 1280 1281 /* print some info if requested */ 1282 if (pcbddc->dbg_flag) { 1283 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1284 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1285 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1286 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 1287 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 1288 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices,pcbddc->local_primal_size);CHKERRQ(ierr); 1289 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"pcbddc->n_vertices = %d, pcbddc->n_constraints = %d\n",pcbddc->n_vertices,pcbddc->n_constraints);CHKERRQ(ierr); 1290 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1291 } 1292 1293 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 1294 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1295 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 1296 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 1297 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1298 for (i=0; i<n_D; i++) { 1299 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 1300 } 1301 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1302 for (i=0, j=0; i<n_R; i++) { 1303 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 1304 aux_array1[j++] = i; 1305 } 1306 } 1307 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 1308 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1309 for (i=0, j=0; i<n_B; i++) { 1310 if (!PetscBTLookup(bitmask,is_indices[i])) { 1311 aux_array2[j++] = i; 1312 } 1313 } 1314 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1315 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 1316 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 1317 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1318 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 1319 1320 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1321 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 1322 for (i=0, j=0; i<n_R; i++) { 1323 if (PetscBTLookup(bitmask,idx_R_local[i])) { 1324 aux_array1[j++] = i; 1325 } 1326 } 1327 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 1328 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 1329 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1330 } 1331 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 1332 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1333 PetscFunctionReturn(0); 1334 } 1335 1336 1337 #undef __FUNCT__ 1338 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 1339 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 1340 { 1341 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1342 PC_IS *pcis = (PC_IS*)pc->data; 1343 PC pc_temp; 1344 Mat A_RR; 1345 MatReuse reuse; 1346 PetscScalar m_one = -1.0; 1347 PetscReal value; 1348 PetscInt n_D,n_R,ibs,mbs; 1349 PetscBool use_exact,use_exact_reduced,issbaij; 1350 PetscErrorCode ierr; 1351 /* prefixes stuff */ 1352 char dir_prefix[256],neu_prefix[256],str_level[16]; 1353 size_t len; 1354 1355 PetscFunctionBegin; 1356 1357 /* compute prefixes */ 1358 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 1359 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 1360 if (!pcbddc->current_level) { 1361 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1362 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1363 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1364 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1365 } else { 1366 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 1367 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 1368 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 1369 len -= 15; /* remove "pc_bddc_coarse_" */ 1370 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 1371 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 1372 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 1373 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 1374 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1375 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1376 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 1377 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 1378 } 1379 1380 /* DIRICHLET PROBLEM */ 1381 if (dirichlet) { 1382 /* Matrix for Dirichlet problem is pcis->A_II */ 1383 ierr = ISGetSize(pcis->is_I_local,&n_D);CHKERRQ(ierr); 1384 if (!pcbddc->ksp_D) { /* create object if not yet build */ 1385 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 1386 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 1387 /* default */ 1388 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 1389 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 1390 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1391 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1392 if (issbaij) { 1393 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1394 } else { 1395 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1396 } 1397 /* Allow user's customization */ 1398 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 1399 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1400 } 1401 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 1402 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1403 if (!n_D) { 1404 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1405 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1406 } 1407 /* Set Up KSP for Dirichlet problem of BDDC */ 1408 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 1409 /* set ksp_D into pcis data */ 1410 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 1411 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 1412 pcis->ksp_D = pcbddc->ksp_D; 1413 } 1414 1415 /* NEUMANN PROBLEM */ 1416 A_RR = 0; 1417 if (neumann) { 1418 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 1419 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 1420 if (pcbddc->ksp_R) { /* already created ksp */ 1421 PetscInt nn_R; 1422 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 1423 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 1424 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 1425 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 1426 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 1427 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1428 reuse = MAT_INITIAL_MATRIX; 1429 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 1430 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 1431 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1432 reuse = MAT_INITIAL_MATRIX; 1433 } else { /* safe to reuse the matrix */ 1434 reuse = MAT_REUSE_MATRIX; 1435 } 1436 } 1437 /* last check */ 1438 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 1439 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1440 reuse = MAT_INITIAL_MATRIX; 1441 } 1442 } else { /* first time, so we need to create the matrix */ 1443 reuse = MAT_INITIAL_MATRIX; 1444 } 1445 /* extract A_RR */ 1446 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 1447 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 1448 if (ibs != mbs) { 1449 Mat newmat; 1450 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 1451 ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 1452 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 1453 } else { 1454 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 1455 } 1456 if (!pcbddc->ksp_R) { /* create object if not present */ 1457 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 1458 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 1459 /* default */ 1460 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 1461 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 1462 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1463 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1464 if (issbaij) { 1465 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1466 } else { 1467 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1468 } 1469 /* Allow user's customization */ 1470 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 1471 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1472 } 1473 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 1474 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1475 if (!n_R) { 1476 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1477 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1478 } 1479 /* Set Up KSP for Neumann problem of BDDC */ 1480 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 1481 } 1482 1483 /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */ 1484 if (pcbddc->NullSpace || pcbddc->dbg_flag) { 1485 if (pcbddc->dbg_flag) { 1486 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1487 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1488 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1489 } 1490 if (dirichlet) { /* Dirichlet */ 1491 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 1492 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 1493 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 1494 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 1495 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 1496 /* need to be adapted? */ 1497 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1498 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1499 ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr); 1500 /* print info */ 1501 if (pcbddc->dbg_flag) { 1502 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 1503 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1504 } 1505 if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) { 1506 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr); 1507 } 1508 } 1509 if (neumann) { /* Neumann */ 1510 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 1511 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1512 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 1513 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 1514 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 1515 /* need to be adapted? */ 1516 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1517 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1518 /* print info */ 1519 if (pcbddc->dbg_flag) { 1520 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 1521 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1522 } 1523 if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */ 1524 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr); 1525 } 1526 } 1527 } 1528 /* free Neumann problem's matrix */ 1529 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1530 PetscFunctionReturn(0); 1531 } 1532 1533 #undef __FUNCT__ 1534 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection" 1535 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec rhs, Vec sol, Vec work, PetscBool applytranspose) 1536 { 1537 PetscErrorCode ierr; 1538 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1539 1540 PetscFunctionBegin; 1541 if (applytranspose) { 1542 if (pcbddc->local_auxmat1) { 1543 ierr = MatMultTranspose(pcbddc->local_auxmat2,rhs,work);CHKERRQ(ierr); 1544 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,work,rhs,rhs);CHKERRQ(ierr); 1545 } 1546 ierr = KSPSolveTranspose(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr); 1547 } else { 1548 ierr = KSPSolve(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr); 1549 if (pcbddc->local_auxmat1) { 1550 ierr = MatMult(pcbddc->local_auxmat1,sol,work);CHKERRQ(ierr); 1551 ierr = MatMultAdd(pcbddc->local_auxmat2,work,sol,sol);CHKERRQ(ierr); 1552 } 1553 } 1554 PetscFunctionReturn(0); 1555 } 1556 1557 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 1558 #undef __FUNCT__ 1559 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 1560 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 1561 { 1562 PetscErrorCode ierr; 1563 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1564 PC_IS* pcis = (PC_IS*) (pc->data); 1565 const PetscScalar zero = 0.0; 1566 1567 PetscFunctionBegin; 1568 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 1569 if (applytranspose) { 1570 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1571 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1572 } else { 1573 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1574 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1575 } 1576 /* start communications from local primal nodes to rhs of coarse solver */ 1577 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 1578 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1579 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1580 1581 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 1582 /* TODO remove null space when doing multilevel */ 1583 if (pcbddc->coarse_ksp) { 1584 if (applytranspose) { 1585 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,NULL,NULL);CHKERRQ(ierr); 1586 } else { 1587 ierr = KSPSolve(pcbddc->coarse_ksp,NULL,NULL);CHKERRQ(ierr); 1588 } 1589 } 1590 1591 /* Local solution on R nodes */ 1592 if (pcis->n) { 1593 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 1594 ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1595 ierr = VecScatterEnd(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1596 if (pcbddc->switch_static) { 1597 ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1598 ierr = VecScatterEnd(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1599 } 1600 ierr = PCBDDCSolveSubstructureCorrection(pc,pcbddc->vec1_R,pcbddc->vec2_R,pcbddc->vec1_C,applytranspose);CHKERRQ(ierr); 1601 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 1602 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1603 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1604 if (pcbddc->switch_static) { 1605 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1606 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1607 } 1608 } 1609 1610 /* communications from coarse sol to local primal nodes */ 1611 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1612 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1613 1614 /* Sum contributions from two levels */ 1615 if (applytranspose) { 1616 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1617 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1618 } else { 1619 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1620 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1621 } 1622 PetscFunctionReturn(0); 1623 } 1624 1625 /* TODO: the following two function can be optimized using VecPlaceArray whenever possible and using overlap flag */ 1626 #undef __FUNCT__ 1627 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 1628 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 1629 { 1630 PetscErrorCode ierr; 1631 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1632 PetscScalar *array,*array2; 1633 Vec from,to; 1634 1635 PetscFunctionBegin; 1636 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 1637 from = pcbddc->coarse_vec; 1638 to = pcbddc->vec1_P; 1639 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 1640 Vec tvec; 1641 PetscInt lsize; 1642 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1643 ierr = VecGetLocalSize(tvec,&lsize);CHKERRQ(ierr); 1644 ierr = VecGetArrayRead(tvec,(const PetscScalar**)&array);CHKERRQ(ierr); 1645 ierr = VecGetArray(from,&array2);CHKERRQ(ierr); 1646 ierr = PetscMemcpy(array2,array,lsize*sizeof(PetscScalar));CHKERRQ(ierr); 1647 ierr = VecRestoreArrayRead(tvec,(const PetscScalar**)&array);CHKERRQ(ierr); 1648 ierr = VecRestoreArray(from,&array2);CHKERRQ(ierr); 1649 } 1650 } else { /* from local to global -> put data in coarse right hand side */ 1651 from = pcbddc->vec1_P; 1652 to = pcbddc->coarse_vec; 1653 } 1654 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 1655 PetscFunctionReturn(0); 1656 } 1657 1658 #undef __FUNCT__ 1659 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 1660 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 1661 { 1662 PetscErrorCode ierr; 1663 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1664 PetscScalar *array,*array2; 1665 Vec from,to; 1666 1667 PetscFunctionBegin; 1668 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 1669 from = pcbddc->coarse_vec; 1670 to = pcbddc->vec1_P; 1671 } else { /* from local to global -> put data in coarse right hand side */ 1672 from = pcbddc->vec1_P; 1673 to = pcbddc->coarse_vec; 1674 } 1675 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 1676 if (smode == SCATTER_FORWARD) { 1677 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 1678 Vec tvec; 1679 PetscInt lsize; 1680 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1681 ierr = VecGetLocalSize(tvec,&lsize);CHKERRQ(ierr); 1682 ierr = VecGetArrayRead(to,(const PetscScalar**)&array);CHKERRQ(ierr); 1683 ierr = VecGetArray(tvec,&array2);CHKERRQ(ierr); 1684 ierr = PetscMemcpy(array2,array,lsize*sizeof(PetscScalar));CHKERRQ(ierr); 1685 ierr = VecRestoreArrayRead(to,(const PetscScalar**)&array);CHKERRQ(ierr); 1686 ierr = VecRestoreArray(tvec,&array2);CHKERRQ(ierr); 1687 } 1688 } 1689 PetscFunctionReturn(0); 1690 } 1691 1692 /* uncomment for testing purposes */ 1693 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 1694 #undef __FUNCT__ 1695 #define __FUNCT__ "PCBDDCConstraintsSetUp" 1696 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 1697 { 1698 PetscErrorCode ierr; 1699 PC_IS* pcis = (PC_IS*)(pc->data); 1700 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1701 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1702 /* one and zero */ 1703 PetscScalar one=1.0,zero=0.0; 1704 /* space to store constraints and their local indices */ 1705 PetscScalar *temp_quadrature_constraint; 1706 PetscInt *temp_indices,*temp_indices_to_constraint,*temp_indices_to_constraint_B; 1707 /* iterators */ 1708 PetscInt i,j,k,total_counts,temp_start_ptr; 1709 /* stuff to store connected components stored in pcbddc->mat_graph */ 1710 IS ISForVertices,*ISForFaces,*ISForEdges,*used_IS; 1711 PetscInt n_ISForFaces,n_ISForEdges; 1712 /* near null space stuff */ 1713 MatNullSpace nearnullsp; 1714 const Vec *nearnullvecs; 1715 Vec *localnearnullsp; 1716 PetscBool nnsp_has_cnst; 1717 PetscInt nnsp_size; 1718 PetscScalar *array; 1719 /* BLAS integers */ 1720 PetscBLASInt lwork,lierr; 1721 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 1722 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 1723 /* LAPACK working arrays for SVD or POD */ 1724 PetscBool skip_lapack; 1725 PetscScalar *work; 1726 PetscReal *singular_vals; 1727 #if defined(PETSC_USE_COMPLEX) 1728 PetscReal *rwork; 1729 #endif 1730 #if defined(PETSC_MISSING_LAPACK_GESVD) 1731 PetscBLASInt Blas_one_2=1; 1732 PetscScalar *temp_basis,*correlation_mat; 1733 #else 1734 PetscBLASInt dummy_int_1=1,dummy_int_2=1; 1735 PetscScalar dummy_scalar_1=0.0,dummy_scalar_2=0.0; 1736 #endif 1737 /* reuse */ 1738 PetscInt olocal_primal_size; 1739 PetscInt *oprimal_indices_local_idxs; 1740 /* change of basis */ 1741 PetscInt *aux_primal_numbering,*aux_primal_minloc,*global_indices; 1742 PetscBool boolforchange,qr_needed; 1743 PetscBT touched,change_basis,qr_needed_idx; 1744 /* auxiliary stuff */ 1745 PetscInt *nnz,*is_indices,*aux_primal_numbering_B; 1746 PetscInt ncc,*gidxs=NULL,*permutation=NULL,*temp_indices_to_constraint_work=NULL; 1747 PetscScalar *temp_quadrature_constraint_work=NULL; 1748 /* some quantities */ 1749 PetscInt n_vertices,total_primal_vertices,valid_constraints; 1750 PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints; 1751 1752 1753 PetscFunctionBegin; 1754 /* Destroy Mat objects computed previously */ 1755 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1756 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1757 /* TODO synch with adaptive selection */ 1758 /* Get index sets for faces, edges and vertices from graph */ 1759 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 1760 /* free unneeded index sets */ 1761 if (!pcbddc->use_vertices) { 1762 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 1763 } 1764 if (!pcbddc->use_edges) { 1765 for (i=0;i<n_ISForEdges;i++) { 1766 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 1767 } 1768 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 1769 n_ISForEdges = 0; 1770 } 1771 if (!pcbddc->use_faces) { 1772 for (i=0;i<n_ISForFaces;i++) { 1773 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 1774 } 1775 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 1776 n_ISForFaces = 0; 1777 } 1778 /* HACKS (the following two blocks of code) */ 1779 if (!ISForVertices && pcbddc->NullSpace && !pcbddc->user_ChangeOfBasisMatrix) { 1780 pcbddc->use_change_of_basis = PETSC_TRUE; 1781 if (!ISForEdges) { 1782 pcbddc->use_change_on_faces = PETSC_TRUE; 1783 } 1784 } 1785 if (pcbddc->NullSpace) { 1786 /* use_change_of_basis should be consistent among processors */ 1787 PetscBool tbool[2],gbool[2]; 1788 tbool [0] = pcbddc->use_change_of_basis; 1789 tbool [1] = pcbddc->use_change_on_faces; 1790 ierr = MPI_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1791 pcbddc->use_change_of_basis = gbool[0]; 1792 pcbddc->use_change_on_faces = gbool[1]; 1793 } 1794 /* print some info */ 1795 if (pcbddc->dbg_flag) { 1796 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1797 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 1798 i = 0; 1799 if (ISForVertices) { 1800 ierr = ISGetSize(ISForVertices,&i);CHKERRQ(ierr); 1801 } 1802 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices\n",PetscGlobalRank,i);CHKERRQ(ierr); 1803 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges\n",PetscGlobalRank,n_ISForEdges);CHKERRQ(ierr); 1804 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces\n",PetscGlobalRank,n_ISForFaces);CHKERRQ(ierr); 1805 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1806 } 1807 1808 if (!pcbddc->adaptive_selection) { 1809 /* check if near null space is attached to global mat */ 1810 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 1811 if (nearnullsp) { 1812 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 1813 /* remove any stored info */ 1814 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 1815 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 1816 /* store information for BDDC solver reuse */ 1817 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 1818 pcbddc->onearnullspace = nearnullsp; 1819 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 1820 for (i=0;i<nnsp_size;i++) { 1821 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 1822 } 1823 } else { /* if near null space is not provided BDDC uses constants by default */ 1824 nnsp_size = 0; 1825 nnsp_has_cnst = PETSC_TRUE; 1826 } 1827 /* get max number of constraints on a single cc */ 1828 max_constraints = nnsp_size; 1829 if (nnsp_has_cnst) max_constraints++; 1830 1831 /* 1832 Evaluate maximum storage size needed by the procedure 1833 - temp_indices will contain start index of each constraint stored as follows 1834 - temp_indices_to_constraint [temp_indices[i],...,temp_indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts 1835 - temp_indices_to_constraint_B[temp_indices[i],...,temp_indices[i+1]-1] will contain the indices (in boundary numbering) on which the constraint acts 1836 - temp_quadrature_constraint [temp_indices[i],...,temp_indices[i+1]-1] will contain the scalars representing the constraint itself 1837 */ 1838 total_counts = n_ISForFaces+n_ISForEdges; 1839 total_counts *= max_constraints; 1840 n_vertices = 0; 1841 if (ISForVertices) { 1842 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 1843 } 1844 total_counts += n_vertices; 1845 ierr = PetscMalloc1(total_counts+1,&temp_indices);CHKERRQ(ierr); 1846 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 1847 total_counts = 0; 1848 max_size_of_constraint = 0; 1849 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 1850 if (i<n_ISForEdges) { 1851 used_IS = &ISForEdges[i]; 1852 } else { 1853 used_IS = &ISForFaces[i-n_ISForEdges]; 1854 } 1855 ierr = ISGetSize(*used_IS,&j);CHKERRQ(ierr); 1856 total_counts += j; 1857 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 1858 } 1859 total_counts *= max_constraints; 1860 total_counts += n_vertices; 1861 ierr = PetscMalloc3(total_counts,&temp_quadrature_constraint,total_counts,&temp_indices_to_constraint,total_counts,&temp_indices_to_constraint_B);CHKERRQ(ierr); 1862 /* get local part of global near null space vectors */ 1863 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 1864 for (k=0;k<nnsp_size;k++) { 1865 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 1866 ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1867 ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1868 } 1869 1870 /* whether or not to skip lapack calls */ 1871 skip_lapack = PETSC_TRUE; 1872 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 1873 1874 /* allocate some auxiliary stuff */ 1875 if (!skip_lapack || pcbddc->use_qr_single) { 1876 ierr = PetscMalloc4(max_size_of_constraint,&gidxs,max_size_of_constraint,&permutation,max_size_of_constraint,&temp_indices_to_constraint_work,max_size_of_constraint,&temp_quadrature_constraint_work);CHKERRQ(ierr); 1877 } 1878 1879 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 1880 if (!skip_lapack) { 1881 PetscScalar temp_work; 1882 1883 #if defined(PETSC_MISSING_LAPACK_GESVD) 1884 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 1885 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 1886 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 1887 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 1888 #if defined(PETSC_USE_COMPLEX) 1889 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 1890 #endif 1891 /* now we evaluate the optimal workspace using query with lwork=-1 */ 1892 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 1893 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 1894 lwork = -1; 1895 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1896 #if !defined(PETSC_USE_COMPLEX) 1897 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 1898 #else 1899 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 1900 #endif 1901 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1902 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 1903 #else /* on missing GESVD */ 1904 /* SVD */ 1905 PetscInt max_n,min_n; 1906 max_n = max_size_of_constraint; 1907 min_n = max_constraints; 1908 if (max_size_of_constraint < max_constraints) { 1909 min_n = max_size_of_constraint; 1910 max_n = max_constraints; 1911 } 1912 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 1913 #if defined(PETSC_USE_COMPLEX) 1914 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 1915 #endif 1916 /* now we evaluate the optimal workspace using query with lwork=-1 */ 1917 lwork = -1; 1918 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 1919 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 1920 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 1921 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1922 #if !defined(PETSC_USE_COMPLEX) 1923 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,&lierr)); 1924 #else 1925 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,rwork,&lierr)); 1926 #endif 1927 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1928 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 1929 #endif /* on missing GESVD */ 1930 /* Allocate optimal workspace */ 1931 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 1932 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 1933 } 1934 /* Now we can loop on constraining sets */ 1935 total_counts = 0; 1936 temp_indices[0] = 0; 1937 /* vertices */ 1938 if (ISForVertices) { 1939 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1940 if (nnsp_has_cnst) { /* consider all vertices */ 1941 ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 1942 for (i=0;i<n_vertices;i++) { 1943 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 1944 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 1945 total_counts++; 1946 } 1947 } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */ 1948 PetscBool used_vertex; 1949 for (i=0;i<n_vertices;i++) { 1950 used_vertex = PETSC_FALSE; 1951 k = 0; 1952 while (!used_vertex && k<nnsp_size) { 1953 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1954 if (PetscAbsScalar(array[is_indices[i]])>0.0) { 1955 temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i]; 1956 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 1957 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 1958 total_counts++; 1959 used_vertex = PETSC_TRUE; 1960 } 1961 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1962 k++; 1963 } 1964 } 1965 } 1966 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1967 n_vertices = total_counts; 1968 } 1969 1970 /* edges and faces */ 1971 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 1972 if (ncc<n_ISForEdges) { 1973 used_IS = &ISForEdges[ncc]; 1974 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 1975 } else { 1976 used_IS = &ISForFaces[ncc-n_ISForEdges]; 1977 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 1978 } 1979 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 1980 temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */ 1981 ierr = ISGetSize(*used_IS,&size_of_constraint);CHKERRQ(ierr); 1982 ierr = ISGetIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1983 /* change of basis should not be performed on local periodic nodes */ 1984 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 1985 if (nnsp_has_cnst) { 1986 PetscScalar quad_value; 1987 temp_constraints++; 1988 if (!pcbddc->use_nnsp_true) { 1989 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 1990 } else { 1991 quad_value = 1.0; 1992 } 1993 ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 1994 for (j=0;j<size_of_constraint;j++) { 1995 temp_quadrature_constraint[temp_indices[total_counts]+j]=quad_value; 1996 } 1997 /* sort by global ordering if using lapack subroutines (not needed!) */ 1998 if (!skip_lapack || pcbddc->use_qr_single) { 1999 ierr = ISLocalToGlobalMappingApply(matis->mapping,size_of_constraint,temp_indices_to_constraint+temp_indices[total_counts],gidxs);CHKERRQ(ierr); 2000 for (j=0;j<size_of_constraint;j++) { 2001 permutation[j]=j; 2002 } 2003 ierr = PetscSortIntWithPermutation(size_of_constraint,gidxs,permutation);CHKERRQ(ierr); 2004 for (j=0;j<size_of_constraint;j++) { 2005 if (permutation[j]!=j) SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"This should not happen"); 2006 } 2007 for (j=0;j<size_of_constraint;j++) { 2008 temp_indices_to_constraint_work[j] = temp_indices_to_constraint[temp_indices[total_counts]+permutation[j]]; 2009 temp_quadrature_constraint_work[j] = temp_quadrature_constraint[temp_indices[total_counts]+permutation[j]]; 2010 } 2011 ierr = PetscMemcpy(temp_indices_to_constraint+temp_indices[total_counts],temp_indices_to_constraint_work,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 2012 ierr = PetscMemcpy(temp_quadrature_constraint+temp_indices[total_counts],temp_quadrature_constraint_work,size_of_constraint*sizeof(PetscScalar));CHKERRQ(ierr); 2013 } 2014 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 2015 total_counts++; 2016 } 2017 for (k=0;k<nnsp_size;k++) { 2018 PetscReal real_value; 2019 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2020 ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 2021 for (j=0;j<size_of_constraint;j++) { 2022 temp_quadrature_constraint[temp_indices[total_counts]+j]=array[is_indices[j]]; 2023 } 2024 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2025 /* check if array is null on the connected component */ 2026 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2027 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_one)); 2028 if (real_value > 0.0) { /* keep indices and values */ 2029 /* sort by global ordering if using lapack subroutines */ 2030 if (!skip_lapack || pcbddc->use_qr_single) { 2031 ierr = ISLocalToGlobalMappingApply(matis->mapping,size_of_constraint,temp_indices_to_constraint+temp_indices[total_counts],gidxs);CHKERRQ(ierr); 2032 for (j=0;j<size_of_constraint;j++) { 2033 permutation[j]=j; 2034 } 2035 ierr = PetscSortIntWithPermutation(size_of_constraint,gidxs,permutation);CHKERRQ(ierr); 2036 for (j=0;j<size_of_constraint;j++) { 2037 temp_indices_to_constraint_work[j] = temp_indices_to_constraint[temp_indices[total_counts]+permutation[j]]; 2038 temp_quadrature_constraint_work[j] = temp_quadrature_constraint[temp_indices[total_counts]+permutation[j]]; 2039 } 2040 ierr = PetscMemcpy(temp_indices_to_constraint+temp_indices[total_counts],temp_indices_to_constraint_work,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 2041 ierr = PetscMemcpy(temp_quadrature_constraint+temp_indices[total_counts],temp_quadrature_constraint_work,size_of_constraint*sizeof(PetscScalar));CHKERRQ(ierr); 2042 } 2043 temp_constraints++; 2044 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 2045 total_counts++; 2046 } 2047 } 2048 ierr = ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2049 valid_constraints = temp_constraints; 2050 if (!pcbddc->use_nnsp_true && temp_constraints) { 2051 if (temp_constraints == 1) { /* just normalize the constraint */ 2052 PetscScalar norm; 2053 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2054 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one)); 2055 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 2056 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one)); 2057 } else { /* perform SVD */ 2058 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 2059 2060 #if defined(PETSC_MISSING_LAPACK_GESVD) 2061 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 2062 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 2063 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 2064 the constraints basis will differ (by a complex factor with absolute value equal to 1) 2065 from that computed using LAPACKgesvd 2066 -> This is due to a different computation of eigenvectors in LAPACKheev 2067 -> The quality of the POD-computed basis will be the same */ 2068 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 2069 /* Store upper triangular part of correlation matrix */ 2070 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2071 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2072 for (j=0;j<temp_constraints;j++) { 2073 for (k=0;k<j+1;k++) { 2074 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k]=BLASdot_(&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Blas_one,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Blas_one_2)); 2075 } 2076 } 2077 /* compute eigenvalues and eigenvectors of correlation matrix */ 2078 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2079 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 2080 #if !defined(PETSC_USE_COMPLEX) 2081 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 2082 #else 2083 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 2084 #endif 2085 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2086 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 2087 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 2088 j = 0; 2089 while (j < temp_constraints && singular_vals[j] < tol) j++; 2090 total_counts = total_counts-j; 2091 valid_constraints = temp_constraints-j; 2092 /* scale and copy POD basis into used quadrature memory */ 2093 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2094 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2095 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 2096 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2097 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 2098 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 2099 if (j<temp_constraints) { 2100 PetscInt ii; 2101 for (k=j;k<temp_constraints;k++) singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]); 2102 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2103 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 2104 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2105 for (k=0;k<temp_constraints-j;k++) { 2106 for (ii=0;ii<size_of_constraint;ii++) { 2107 temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 2108 } 2109 } 2110 } 2111 #else /* on missing GESVD */ 2112 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2113 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2114 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2115 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2116 #if !defined(PETSC_USE_COMPLEX) 2117 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,&lierr)); 2118 #else 2119 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,rwork,&lierr)); 2120 #endif 2121 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 2122 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2123 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 2124 k = temp_constraints; 2125 if (k > size_of_constraint) k = size_of_constraint; 2126 j = 0; 2127 while (j < k && singular_vals[k-j-1] < tol) j++; 2128 valid_constraints = k-j; 2129 total_counts = total_counts-temp_constraints+valid_constraints; 2130 #endif /* on missing GESVD */ 2131 } 2132 } 2133 /* setting change_of_basis flag is safe now */ 2134 if (boolforchange) { 2135 for (j=0;j<valid_constraints;j++) { 2136 PetscBTSet(change_basis,total_counts-j-1); 2137 } 2138 } 2139 } 2140 /* free workspace */ 2141 if (!skip_lapack || pcbddc->use_qr_single) { 2142 ierr = PetscFree4(gidxs,permutation,temp_indices_to_constraint_work,temp_quadrature_constraint_work);CHKERRQ(ierr); 2143 } 2144 if (!skip_lapack) { 2145 ierr = PetscFree(work);CHKERRQ(ierr); 2146 #if defined(PETSC_USE_COMPLEX) 2147 ierr = PetscFree(rwork);CHKERRQ(ierr); 2148 #endif 2149 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 2150 #if defined(PETSC_MISSING_LAPACK_GESVD) 2151 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 2152 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 2153 #endif 2154 } 2155 for (k=0;k<nnsp_size;k++) { 2156 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 2157 } 2158 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 2159 } else { 2160 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2161 PetscInt cum = 0; 2162 2163 total_counts = 0; 2164 n_vertices = 0; 2165 if (sub_schurs->is_Ej_com) { 2166 ierr = ISGetLocalSize(sub_schurs->is_Ej_com,&n_vertices);CHKERRQ(ierr); 2167 } 2168 max_constraints = 0; 2169 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 2170 total_counts += pcbddc->adaptive_constraints_n[i]; 2171 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 2172 } 2173 temp_indices = pcbddc->adaptive_constraints_ptrs; 2174 temp_indices_to_constraint = pcbddc->adaptive_constraints_idxs; 2175 temp_quadrature_constraint = pcbddc->adaptive_constraints_data; 2176 2177 #if 0 2178 printf("Found %d totals\n",total_counts); 2179 for (i=0;i<total_counts;i++) { 2180 printf("const %d, start %d",i,temp_indices[i]); 2181 printf(" end %d:\n",temp_indices[i+1]); 2182 for (j=temp_indices[i];j<temp_indices[i+1];j++) { 2183 printf(" idxs %d",temp_indices_to_constraint[j]); 2184 printf(" data %1.2e\n",temp_quadrature_constraint[j]); 2185 } 2186 } 2187 for (i=0;i<n_vertices;i++) { 2188 PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i+n_vertices]); 2189 } 2190 for (i=0;i<sub_schurs->n_subs;i++) { 2191 PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]); 2192 } 2193 #endif 2194 2195 for (i=0;i<total_counts;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,temp_indices[i+1]-temp_indices[i]); 2196 ierr = PetscMalloc1(temp_indices[total_counts],&temp_indices_to_constraint_B);CHKERRQ(ierr); 2197 /* Change of basis */ 2198 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 2199 if (pcbddc->use_change_of_basis) { 2200 cum = n_vertices; 2201 for (i=0;i<sub_schurs->n_subs;i++) { 2202 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 2203 for (j=0;j<pcbddc->adaptive_constraints_n[i+n_vertices];j++) { 2204 ierr = PetscBTSet(change_basis,cum+j);CHKERRQ(ierr); 2205 } 2206 } 2207 cum += pcbddc->adaptive_constraints_n[i+n_vertices]; 2208 } 2209 } 2210 } 2211 2212 /* free index sets of faces, edges and vertices */ 2213 for (i=0;i<n_ISForFaces;i++) { 2214 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 2215 } 2216 if (n_ISForFaces) { 2217 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 2218 } 2219 for (i=0;i<n_ISForEdges;i++) { 2220 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 2221 } 2222 if (n_ISForEdges) { 2223 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 2224 } 2225 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 2226 2227 /* map temp_indices_to_constraint in boundary numbering */ 2228 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,temp_indices[total_counts],temp_indices_to_constraint,&i,temp_indices_to_constraint_B);CHKERRQ(ierr); 2229 if (i != temp_indices[total_counts]) { 2230 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",temp_indices[total_counts],i); 2231 } 2232 2233 /* set quantities in pcbddc data structure and store previous primal size */ 2234 /* n_vertices defines the number of subdomain corners in the primal space */ 2235 /* n_constraints defines the number of averages (they can be point primal dofs if change of basis is requested) */ 2236 olocal_primal_size = pcbddc->local_primal_size; 2237 pcbddc->local_primal_size = total_counts; 2238 pcbddc->n_vertices = n_vertices; 2239 pcbddc->n_constraints = pcbddc->local_primal_size-pcbddc->n_vertices; 2240 2241 /* Create constraint matrix */ 2242 /* The constraint matrix is used to compute the l2g map of primal dofs */ 2243 /* so we need to set it up properly either with or without change of basis */ 2244 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 2245 ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr); 2246 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 2247 /* array to compute a local numbering of constraints : vertices first then constraints */ 2248 ierr = PetscMalloc1(pcbddc->local_primal_size,&aux_primal_numbering);CHKERRQ(ierr); 2249 /* array to select the proper local node (of minimum index with respect to global ordering) when changing the basis */ 2250 /* note: it should not be needed since IS for faces and edges are already sorted by global ordering when analyzing the graph but... just in case */ 2251 ierr = PetscMalloc1(pcbddc->local_primal_size,&aux_primal_minloc);CHKERRQ(ierr); 2252 /* auxiliary stuff for basis change */ 2253 ierr = PetscMalloc1(max_size_of_constraint,&global_indices);CHKERRQ(ierr); 2254 ierr = PetscBTCreate(pcis->n_B,&touched);CHKERRQ(ierr); 2255 2256 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 2257 total_primal_vertices=0; 2258 for (i=0;i<pcbddc->local_primal_size;i++) { 2259 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 2260 if (size_of_constraint == 1) { 2261 ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]]);CHKERRQ(ierr); 2262 aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]]; 2263 aux_primal_minloc[total_primal_vertices]=0; 2264 total_primal_vertices++; 2265 } else if (PetscBTLookup(change_basis,i)) { /* Same procedure used in PCBDDCGetPrimalConstraintsLocalIdx */ 2266 PetscInt min_loc,min_index; 2267 ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],global_indices);CHKERRQ(ierr); 2268 /* find first untouched local node */ 2269 k = 0; 2270 while (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) k++; 2271 min_index = global_indices[k]; 2272 min_loc = k; 2273 /* search the minimum among global nodes already untouched on the cc */ 2274 for (k=1;k<size_of_constraint;k++) { 2275 /* there can be more than one constraint on a single connected component */ 2276 if (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k]) && min_index > global_indices[k]) { 2277 min_index = global_indices[k]; 2278 min_loc = k; 2279 } 2280 } 2281 ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]+min_loc]);CHKERRQ(ierr); 2282 aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]+min_loc]; 2283 aux_primal_minloc[total_primal_vertices]=min_loc; 2284 total_primal_vertices++; 2285 } 2286 } 2287 /* determine if a QR strategy is needed for change of basis */ 2288 qr_needed = PETSC_FALSE; 2289 ierr = PetscBTCreate(pcbddc->local_primal_size,&qr_needed_idx);CHKERRQ(ierr); 2290 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 2291 if (PetscBTLookup(change_basis,i)) { 2292 if (!pcbddc->use_qr_single) { 2293 size_of_constraint = temp_indices[i+1]-temp_indices[i]; 2294 j = 0; 2295 for (k=0;k<size_of_constraint;k++) { 2296 if (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) { 2297 j++; 2298 } 2299 } 2300 /* found more than one primal dof on the cc */ 2301 if (j > 1) { 2302 PetscBTSet(qr_needed_idx,i); 2303 qr_needed = PETSC_TRUE; 2304 } 2305 } else { 2306 PetscBTSet(qr_needed_idx,i); 2307 qr_needed = PETSC_TRUE; 2308 } 2309 } 2310 } 2311 /* free workspace */ 2312 ierr = PetscFree(global_indices);CHKERRQ(ierr); 2313 2314 /* permute indices in order to have a sorted set of vertices */ 2315 ierr = PetscSortInt(total_primal_vertices,aux_primal_numbering);CHKERRQ(ierr); 2316 2317 /* nonzero structure of constraint matrix */ 2318 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 2319 for (i=0;i<total_primal_vertices;i++) nnz[i]=1; 2320 j=total_primal_vertices; 2321 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 2322 if (!PetscBTLookup(change_basis,i)) { 2323 nnz[j]=temp_indices[i+1]-temp_indices[i]; 2324 j++; 2325 } 2326 } 2327 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 2328 ierr = PetscFree(nnz);CHKERRQ(ierr); 2329 /* set values in constraint matrix */ 2330 for (i=0;i<total_primal_vertices;i++) { 2331 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,aux_primal_numbering[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 2332 } 2333 total_counts = total_primal_vertices; 2334 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 2335 if (!PetscBTLookup(change_basis,i)) { 2336 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 2337 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&total_counts,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);CHKERRQ(ierr); 2338 total_counts++; 2339 } 2340 } 2341 /* assembling */ 2342 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2343 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2344 /* 2345 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 2346 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 2347 */ 2348 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 2349 if (pcbddc->use_change_of_basis) { 2350 /* dual and primal dofs on a single cc */ 2351 PetscInt dual_dofs,primal_dofs; 2352 /* iterator on aux_primal_minloc (ordered as read from nearnullspace: vertices, edges and then constraints) */ 2353 PetscInt primal_counter; 2354 /* working stuff for GEQRF */ 2355 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 2356 PetscBLASInt lqr_work; 2357 /* working stuff for UNGQR */ 2358 PetscScalar *gqr_work,lgqr_work_t; 2359 PetscBLASInt lgqr_work; 2360 /* working stuff for TRTRS */ 2361 PetscScalar *trs_rhs; 2362 PetscBLASInt Blas_NRHS; 2363 /* pointers for values insertion into change of basis matrix */ 2364 PetscInt *start_rows,*start_cols; 2365 PetscScalar *start_vals; 2366 /* working stuff for values insertion */ 2367 PetscBT is_primal; 2368 /* matrix sizes */ 2369 PetscInt global_size,local_size; 2370 /* temporary change of basis */ 2371 Mat localChangeOfBasisMatrix; 2372 2373 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 2374 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 2375 ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 2376 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 2377 /* nonzeros for local mat */ 2378 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 2379 for (i=0;i<pcis->n;i++) nnz[i]=1; 2380 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 2381 if (PetscBTLookup(change_basis,i)) { 2382 size_of_constraint = temp_indices[i+1]-temp_indices[i]; 2383 if (PetscBTLookup(qr_needed_idx,i)) { 2384 for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint[temp_indices[i]+j]] = size_of_constraint; 2385 } else { 2386 for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint[temp_indices[i]+j]] = 2; 2387 /* get local primal index on the cc */ 2388 j = 0; 2389 while (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+j])) j++; 2390 nnz[temp_indices_to_constraint[temp_indices[i]+j]] = size_of_constraint; 2391 } 2392 } 2393 } 2394 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 2395 ierr = PetscFree(nnz);CHKERRQ(ierr); 2396 /* Set initial identity in the matrix */ 2397 for (i=0;i<pcis->n;i++) { 2398 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 2399 } 2400 2401 if (pcbddc->dbg_flag) { 2402 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 2403 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 2404 } 2405 2406 2407 /* Now we loop on the constraints which need a change of basis */ 2408 /* 2409 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 2410 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 2411 2412 Basic blocks of change of basis matrix T computed by 2413 2414 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 2415 2416 | 1 0 ... 0 s_1/S | 2417 | 0 1 ... 0 s_2/S | 2418 | ... | 2419 | 0 ... 1 s_{n-1}/S | 2420 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 2421 2422 with S = \sum_{i=1}^n s_i^2 2423 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 2424 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 2425 2426 - QR decomposition of constraints otherwise 2427 */ 2428 if (qr_needed) { 2429 /* space to store Q */ 2430 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 2431 /* first we issue queries for optimal work */ 2432 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 2433 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 2434 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2435 lqr_work = -1; 2436 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 2437 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 2438 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 2439 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 2440 lgqr_work = -1; 2441 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 2442 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 2443 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 2444 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2445 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 2446 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 2447 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 2448 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 2449 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 2450 /* array to store scaling factors for reflectors */ 2451 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 2452 /* array to store rhs and solution of triangular solver */ 2453 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 2454 /* allocating workspace for check */ 2455 if (pcbddc->dbg_flag) { 2456 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&work);CHKERRQ(ierr); 2457 } 2458 } 2459 /* array to store whether a node is primal or not */ 2460 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 2461 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 2462 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,aux_primal_numbering,&i,aux_primal_numbering_B);CHKERRQ(ierr); 2463 if (i != total_primal_vertices) { 2464 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i); 2465 } 2466 for (i=0;i<total_primal_vertices;i++) { 2467 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 2468 } 2469 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 2470 2471 /* loop on constraints and see whether or not they need a change of basis and compute it */ 2472 /* -> using implicit ordering contained in temp_indices data */ 2473 total_counts = pcbddc->n_vertices; 2474 primal_counter = total_counts; 2475 while (total_counts<pcbddc->local_primal_size) { 2476 primal_dofs = 1; 2477 if (PetscBTLookup(change_basis,total_counts)) { 2478 /* get all constraints with same support: if more then one constraint is present on the cc then surely indices are stored contiguosly */ 2479 while (total_counts+primal_dofs < pcbddc->local_primal_size && temp_indices_to_constraint[temp_indices[total_counts]] == temp_indices_to_constraint[temp_indices[total_counts+primal_dofs]]) { 2480 primal_dofs++; 2481 } 2482 /* get constraint info */ 2483 size_of_constraint = temp_indices[total_counts+1]-temp_indices[total_counts]; 2484 dual_dofs = size_of_constraint-primal_dofs; 2485 2486 if (pcbddc->dbg_flag) { 2487 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d to %d (incl) need a change of basis (size %d)\n",total_counts,total_counts+primal_dofs-1,size_of_constraint);CHKERRQ(ierr); 2488 } 2489 2490 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 2491 2492 /* copy quadrature constraints for change of basis check */ 2493 if (pcbddc->dbg_flag) { 2494 ierr = PetscMemcpy(work,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2495 } 2496 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 2497 ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2498 2499 /* compute QR decomposition of constraints */ 2500 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2501 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2502 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2503 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2504 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 2505 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 2506 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2507 2508 /* explictly compute R^-T */ 2509 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 2510 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 2511 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2512 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 2513 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2514 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 2515 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2516 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 2517 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 2518 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2519 2520 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 2521 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2522 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2523 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 2524 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2525 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2526 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 2527 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 2528 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2529 2530 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 2531 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 2532 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 2533 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2534 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2535 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 2536 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2537 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 2538 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 2539 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2540 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_LDC)); 2541 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2542 ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2543 2544 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 2545 start_rows = &temp_indices_to_constraint[temp_indices[total_counts]]; 2546 /* insert cols for primal dofs */ 2547 for (j=0;j<primal_dofs;j++) { 2548 start_vals = &qr_basis[j*size_of_constraint]; 2549 start_cols = &temp_indices_to_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter+j]]; 2550 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 2551 } 2552 /* insert cols for dual dofs */ 2553 for (j=0,k=0;j<dual_dofs;k++) { 2554 if (!PetscBTLookup(is_primal,temp_indices_to_constraint_B[temp_indices[total_counts]+k])) { 2555 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 2556 start_cols = &temp_indices_to_constraint[temp_indices[total_counts]+k]; 2557 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 2558 j++; 2559 } 2560 } 2561 2562 /* check change of basis */ 2563 if (pcbddc->dbg_flag) { 2564 PetscInt ii,jj; 2565 PetscBool valid_qr=PETSC_TRUE; 2566 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 2567 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2568 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 2569 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2570 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 2571 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 2572 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2573 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&work[size_of_constraint*primal_dofs],&Blas_LDC)); 2574 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2575 for (jj=0;jj<size_of_constraint;jj++) { 2576 for (ii=0;ii<primal_dofs;ii++) { 2577 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 2578 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 2579 } 2580 } 2581 if (!valid_qr) { 2582 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 2583 for (jj=0;jj<size_of_constraint;jj++) { 2584 for (ii=0;ii<primal_dofs;ii++) { 2585 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 2586 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2587 } 2588 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 2589 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2590 } 2591 } 2592 } 2593 } else { 2594 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 2595 } 2596 } 2597 } else { /* simple transformation block */ 2598 PetscInt row,col; 2599 PetscScalar val,norm; 2600 2601 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2602 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,temp_quadrature_constraint+temp_indices[total_counts],&Blas_one,temp_quadrature_constraint+temp_indices[total_counts],&Blas_one)); 2603 for (j=0;j<size_of_constraint;j++) { 2604 PetscInt row_B = temp_indices_to_constraint_B[temp_indices[total_counts]+j]; 2605 row = temp_indices_to_constraint[temp_indices[total_counts]+j]; 2606 if (!PetscBTLookup(is_primal,row_B)) { 2607 col = temp_indices_to_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]; 2608 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 2609 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,temp_quadrature_constraint[temp_indices[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 2610 } else { 2611 for (k=0;k<size_of_constraint;k++) { 2612 col = temp_indices_to_constraint[temp_indices[total_counts]+k]; 2613 if (row != col) { 2614 val = -temp_quadrature_constraint[temp_indices[total_counts]+k]/temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]; 2615 } else { 2616 val = temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]/norm; 2617 } 2618 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 2619 } 2620 } 2621 } 2622 if (pcbddc->dbg_flag) { 2623 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 2624 } 2625 } 2626 /* increment primal counter */ 2627 primal_counter += primal_dofs; 2628 } else { 2629 if (pcbddc->dbg_flag) { 2630 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,temp_indices[total_counts+1]-temp_indices[total_counts]);CHKERRQ(ierr); 2631 } 2632 } 2633 /* increment constraint counter total_counts */ 2634 total_counts += primal_dofs; 2635 } 2636 2637 /* free workspace */ 2638 if (qr_needed) { 2639 if (pcbddc->dbg_flag) { 2640 ierr = PetscFree(work);CHKERRQ(ierr); 2641 } 2642 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 2643 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 2644 ierr = PetscFree(qr_work);CHKERRQ(ierr); 2645 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 2646 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 2647 } 2648 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 2649 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2650 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2651 2652 /* assembling of global change of variable */ 2653 { 2654 Mat tmat; 2655 PetscInt bs; 2656 2657 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 2658 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 2659 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 2660 ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr); 2661 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2662 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 2663 ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr); 2664 ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr); 2665 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 2666 ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr); 2667 ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2668 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 2669 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 2670 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 2671 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2672 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2673 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 2674 ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr); 2675 } 2676 /* check */ 2677 if (pcbddc->dbg_flag) { 2678 PetscReal error; 2679 Vec x,x_change; 2680 2681 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 2682 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 2683 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 2684 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 2685 ierr = VecScatterBegin(matis->ctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2686 ierr = VecScatterEnd(matis->ctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2687 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 2688 ierr = VecScatterBegin(matis->ctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2689 ierr = VecScatterEnd(matis->ctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2690 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 2691 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 2692 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 2693 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2694 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr); 2695 ierr = VecDestroy(&x);CHKERRQ(ierr); 2696 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 2697 } 2698 2699 /* adapt sub_schurs computed (if any) */ 2700 if (pcbddc->use_deluxe_scaling) { 2701 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 2702 if (sub_schurs->n_subs_par_g) { 2703 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Change of basis with deluxe scaling and parallel problems still needs to be implemented"); 2704 } 2705 if (sub_schurs->S_Ej_all) { 2706 Mat S_1,S_2,tmat; 2707 IS is_all_N; 2708 2709 ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr); 2710 ierr = MatGetSubMatrixUnsorted(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr); 2711 ierr = ISDestroy(&is_all_N);CHKERRQ(ierr); 2712 ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_1);CHKERRQ(ierr); 2713 ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr); 2714 sub_schurs->S_Ej_all = S_1; 2715 ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_2);CHKERRQ(ierr); 2716 ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 2717 sub_schurs->sum_S_Ej_all = S_2; 2718 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 2719 } 2720 } 2721 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 2722 } else if (pcbddc->user_ChangeOfBasisMatrix) { 2723 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 2724 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 2725 } 2726 2727 /* set up change of basis context */ 2728 if (pcbddc->ChangeOfBasisMatrix) { 2729 PCBDDCChange_ctx change_ctx; 2730 2731 if (!pcbddc->new_global_mat) { 2732 PetscInt global_size,local_size; 2733 2734 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 2735 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 2736 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->new_global_mat);CHKERRQ(ierr); 2737 ierr = MatSetSizes(pcbddc->new_global_mat,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 2738 ierr = MatSetType(pcbddc->new_global_mat,MATSHELL);CHKERRQ(ierr); 2739 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT,(void (*)(void))PCBDDCMatMult_Private);CHKERRQ(ierr); 2740 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCMatMultTranspose_Private);CHKERRQ(ierr); 2741 ierr = PetscNew(&change_ctx);CHKERRQ(ierr); 2742 ierr = MatShellSetContext(pcbddc->new_global_mat,change_ctx);CHKERRQ(ierr); 2743 } else { 2744 ierr = MatShellGetContext(pcbddc->new_global_mat,&change_ctx);CHKERRQ(ierr); 2745 ierr = MatDestroy(&change_ctx->global_change);CHKERRQ(ierr); 2746 ierr = VecDestroyVecs(2,&change_ctx->work);CHKERRQ(ierr); 2747 } 2748 if (!pcbddc->user_ChangeOfBasisMatrix) { 2749 ierr = PetscObjectReference((PetscObject)pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2750 change_ctx->global_change = pcbddc->ChangeOfBasisMatrix; 2751 } else { 2752 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 2753 change_ctx->global_change = pcbddc->user_ChangeOfBasisMatrix; 2754 } 2755 ierr = VecDuplicateVecs(pcis->vec1_global,2,&change_ctx->work);CHKERRQ(ierr); 2756 ierr = MatSetUp(pcbddc->new_global_mat);CHKERRQ(ierr); 2757 ierr = MatAssemblyBegin(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2758 ierr = MatAssemblyEnd(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2759 } 2760 2761 /* get indices in local ordering for vertices and constraints */ 2762 if (olocal_primal_size == pcbddc->local_primal_size) { /* if this is true, I need to check if a new primal space has been introduced */ 2763 ierr = PetscMalloc1(olocal_primal_size,&oprimal_indices_local_idxs);CHKERRQ(ierr); 2764 ierr = PetscMemcpy(oprimal_indices_local_idxs,pcbddc->primal_indices_local_idxs,olocal_primal_size*sizeof(PetscInt));CHKERRQ(ierr); 2765 } 2766 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 2767 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2768 ierr = PetscMalloc1(pcbddc->local_primal_size,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2769 ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&i,&aux_primal_numbering);CHKERRQ(ierr); 2770 ierr = PetscMemcpy(pcbddc->primal_indices_local_idxs,aux_primal_numbering,i*sizeof(PetscInt));CHKERRQ(ierr); 2771 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 2772 ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&j,&aux_primal_numbering);CHKERRQ(ierr); 2773 ierr = PetscMemcpy(&pcbddc->primal_indices_local_idxs[i],aux_primal_numbering,j*sizeof(PetscInt));CHKERRQ(ierr); 2774 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 2775 /* set quantities in PCBDDC data struct */ 2776 pcbddc->n_actual_vertices = i; 2777 /* check if a new primal space has been introduced */ 2778 pcbddc->new_primal_space_local = PETSC_TRUE; 2779 if (olocal_primal_size == pcbddc->local_primal_size) { 2780 ierr = PetscMemcmp(pcbddc->primal_indices_local_idxs,oprimal_indices_local_idxs,olocal_primal_size,&pcbddc->new_primal_space_local);CHKERRQ(ierr); 2781 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 2782 ierr = PetscFree(oprimal_indices_local_idxs);CHKERRQ(ierr); 2783 } 2784 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 2785 ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2786 2787 /* flush dbg viewer */ 2788 if (pcbddc->dbg_flag) { 2789 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2790 } 2791 2792 /* free workspace */ 2793 ierr = PetscBTDestroy(&touched);CHKERRQ(ierr); 2794 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 2795 ierr = PetscFree(aux_primal_minloc);CHKERRQ(ierr); 2796 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 2797 if (!pcbddc->adaptive_selection) { 2798 ierr = PetscFree(temp_indices);CHKERRQ(ierr); 2799 ierr = PetscFree3(temp_quadrature_constraint,temp_indices_to_constraint,temp_indices_to_constraint_B);CHKERRQ(ierr); 2800 } else { 2801 ierr = PetscFree4(pcbddc->adaptive_constraints_n, 2802 pcbddc->adaptive_constraints_ptrs, 2803 pcbddc->adaptive_constraints_idxs, 2804 pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 2805 ierr = PetscFree(temp_indices_to_constraint_B);CHKERRQ(ierr); 2806 } 2807 PetscFunctionReturn(0); 2808 } 2809 2810 #undef __FUNCT__ 2811 #define __FUNCT__ "PCBDDCAnalyzeInterface" 2812 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 2813 { 2814 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2815 PC_IS *pcis = (PC_IS*)pc->data; 2816 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2817 PetscInt ierr,i,vertex_size; 2818 PetscViewer viewer=pcbddc->dbg_viewer; 2819 2820 PetscFunctionBegin; 2821 /* Reset previously computed graph */ 2822 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 2823 /* Init local Graph struct */ 2824 ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping);CHKERRQ(ierr); 2825 2826 /* Check validity of the csr graph passed in by the user */ 2827 if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) { 2828 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 2829 } 2830 2831 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 2832 if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) { 2833 Mat mat_adj; 2834 PetscInt *xadj,*adjncy; 2835 PetscInt nvtxs; 2836 PetscBool flg_row=PETSC_TRUE; 2837 2838 ierr = MatConvert(matis->A,MATMPIADJ,MAT_INITIAL_MATRIX,&mat_adj);CHKERRQ(ierr); 2839 ierr = MatGetRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 2840 if (!flg_row) { 2841 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__); 2842 } 2843 if (pcbddc->use_local_adj) { 2844 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 2845 pcbddc->computed_rowadj = PETSC_TRUE; 2846 } else { /* just compute subdomain's connected components */ 2847 IS is_dummy; 2848 ISLocalToGlobalMapping l2gmap_dummy; 2849 PetscInt j,sum; 2850 PetscInt *cxadj,*cadjncy; 2851 const PetscInt *idxs; 2852 PCBDDCGraph graph; 2853 PetscBT is_on_boundary; 2854 2855 ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n,0,1,&is_dummy);CHKERRQ(ierr); 2856 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 2857 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2858 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 2859 ierr = PCBDDCGraphInit(graph,l2gmap_dummy);CHKERRQ(ierr); 2860 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 2861 graph->xadj = xadj; 2862 graph->adjncy = adjncy; 2863 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 2864 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 2865 2866 if (pcbddc->dbg_flag) { 2867 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] Found %d subdomains\n",PetscGlobalRank,graph->ncc);CHKERRQ(ierr); 2868 for (i=0;i<graph->ncc;i++) { 2869 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] %d cc size %d\n",PetscGlobalRank,i,graph->cptr[i+1]-graph->cptr[i]);CHKERRQ(ierr); 2870 } 2871 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2872 } 2873 2874 ierr = PetscBTCreate(nvtxs,&is_on_boundary);CHKERRQ(ierr); 2875 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2876 for (i=0;i<pcis->n_B;i++) { 2877 ierr = PetscBTSet(is_on_boundary,idxs[i]);CHKERRQ(ierr); 2878 } 2879 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2880 2881 ierr = PetscCalloc1(nvtxs+1,&cxadj);CHKERRQ(ierr); 2882 sum = 0; 2883 for (i=0;i<graph->ncc;i++) { 2884 PetscInt sizecc = 0; 2885 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 2886 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 2887 sizecc++; 2888 } 2889 } 2890 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 2891 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 2892 cxadj[graph->queue[j]] = sizecc; 2893 } 2894 } 2895 sum += sizecc*sizecc; 2896 } 2897 ierr = PetscMalloc1(sum,&cadjncy);CHKERRQ(ierr); 2898 sum = 0; 2899 for (i=0;i<nvtxs;i++) { 2900 PetscInt temp = cxadj[i]; 2901 cxadj[i] = sum; 2902 sum += temp; 2903 } 2904 cxadj[nvtxs] = sum; 2905 for (i=0;i<graph->ncc;i++) { 2906 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 2907 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 2908 PetscInt k,sizecc = 0; 2909 for (k=graph->cptr[i];k<graph->cptr[i+1];k++) { 2910 if (PetscBTLookup(is_on_boundary,graph->queue[k])) { 2911 cadjncy[cxadj[graph->queue[j]]+sizecc]=graph->queue[k]; 2912 sizecc++; 2913 } 2914 } 2915 } 2916 } 2917 } 2918 if (nvtxs) { 2919 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,cxadj,cadjncy,PETSC_OWN_POINTER);CHKERRQ(ierr); 2920 } else { 2921 ierr = PetscFree(cxadj);CHKERRQ(ierr); 2922 ierr = PetscFree(cadjncy);CHKERRQ(ierr); 2923 } 2924 graph->xadj = 0; 2925 graph->adjncy = 0; 2926 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 2927 ierr = PetscBTDestroy(&is_on_boundary);CHKERRQ(ierr); 2928 } 2929 ierr = MatRestoreRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 2930 if (!flg_row) { 2931 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__); 2932 } 2933 ierr = MatDestroy(&mat_adj);CHKERRQ(ierr); 2934 } 2935 2936 /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */ 2937 vertex_size = 1; 2938 if (pcbddc->user_provided_isfordofs) { 2939 if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */ 2940 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 2941 for (i=0;i<pcbddc->n_ISForDofs;i++) { 2942 ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 2943 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 2944 } 2945 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 2946 pcbddc->n_ISForDofs = 0; 2947 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 2948 } 2949 /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */ 2950 ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr); 2951 } else { 2952 if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */ 2953 ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr); 2954 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 2955 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 2956 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 2957 } 2958 } 2959 } 2960 2961 /* Setup of Graph */ 2962 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */ 2963 ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 2964 } 2965 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */ 2966 ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 2967 } 2968 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices); 2969 2970 /* Graph's connected components analysis */ 2971 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 2972 2973 /* print some info to stdout */ 2974 if (pcbddc->dbg_flag) { 2975 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer); 2976 } 2977 2978 /* mark topography has done */ 2979 pcbddc->recompute_topography = PETSC_FALSE; 2980 PetscFunctionReturn(0); 2981 } 2982 2983 #undef __FUNCT__ 2984 #define __FUNCT__ "PCBDDCGetPrimalVerticesLocalIdx" 2985 PetscErrorCode PCBDDCGetPrimalVerticesLocalIdx(PC pc, PetscInt *n_vertices, PetscInt **vertices_idx) 2986 { 2987 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 2988 PetscInt *vertices,*row_cmat_indices,n,i,size_of_constraint,local_primal_size; 2989 PetscErrorCode ierr; 2990 2991 PetscFunctionBegin; 2992 n = 0; 2993 vertices = 0; 2994 if (pcbddc->ConstraintMatrix) { 2995 ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&i);CHKERRQ(ierr); 2996 for (i=0;i<local_primal_size;i++) { 2997 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2998 if (size_of_constraint == 1) n++; 2999 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 3000 } 3001 if (vertices_idx) { 3002 ierr = PetscMalloc1(n,&vertices);CHKERRQ(ierr); 3003 n = 0; 3004 for (i=0;i<local_primal_size;i++) { 3005 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 3006 if (size_of_constraint == 1) { 3007 vertices[n++]=row_cmat_indices[0]; 3008 } 3009 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 3010 } 3011 } 3012 } 3013 *n_vertices = n; 3014 if (vertices_idx) *vertices_idx = vertices; 3015 PetscFunctionReturn(0); 3016 } 3017 3018 #undef __FUNCT__ 3019 #define __FUNCT__ "PCBDDCGetPrimalConstraintsLocalIdx" 3020 PetscErrorCode PCBDDCGetPrimalConstraintsLocalIdx(PC pc, PetscInt *n_constraints, PetscInt **constraints_idx) 3021 { 3022 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 3023 PetscInt *constraints_index,*row_cmat_indices,*row_cmat_global_indices; 3024 PetscInt n,i,j,size_of_constraint,local_primal_size,local_size,max_size_of_constraint,min_index,min_loc; 3025 PetscBT touched; 3026 PetscErrorCode ierr; 3027 3028 /* This function assumes that the number of local constraints per connected component 3029 is not greater than the number of nodes defined for the connected component 3030 (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */ 3031 PetscFunctionBegin; 3032 n = 0; 3033 constraints_index = 0; 3034 if (pcbddc->ConstraintMatrix) { 3035 ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&local_size);CHKERRQ(ierr); 3036 max_size_of_constraint = 0; 3037 for (i=0;i<local_primal_size;i++) { 3038 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 3039 if (size_of_constraint > 1) { 3040 n++; 3041 } 3042 max_size_of_constraint = PetscMax(size_of_constraint,max_size_of_constraint); 3043 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 3044 } 3045 if (constraints_idx) { 3046 ierr = PetscMalloc1(n,&constraints_index);CHKERRQ(ierr); 3047 ierr = PetscMalloc1(max_size_of_constraint,&row_cmat_global_indices);CHKERRQ(ierr); 3048 ierr = PetscBTCreate(local_size,&touched);CHKERRQ(ierr); 3049 n = 0; 3050 for (i=0;i<local_primal_size;i++) { 3051 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 3052 if (size_of_constraint > 1) { 3053 ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,row_cmat_indices,row_cmat_global_indices);CHKERRQ(ierr); 3054 /* find first untouched local node */ 3055 j = 0; 3056 while (PetscBTLookup(touched,row_cmat_indices[j])) j++; 3057 min_index = row_cmat_global_indices[j]; 3058 min_loc = j; 3059 /* search the minimum among nodes not yet touched on the connected component 3060 since there can be more than one constraint on a single cc */ 3061 for (j=1;j<size_of_constraint;j++) { 3062 if (!PetscBTLookup(touched,row_cmat_indices[j]) && min_index > row_cmat_global_indices[j]) { 3063 min_index = row_cmat_global_indices[j]; 3064 min_loc = j; 3065 } 3066 } 3067 ierr = PetscBTSet(touched,row_cmat_indices[min_loc]);CHKERRQ(ierr); 3068 constraints_index[n++] = row_cmat_indices[min_loc]; 3069 } 3070 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 3071 } 3072 ierr = PetscBTDestroy(&touched);CHKERRQ(ierr); 3073 ierr = PetscFree(row_cmat_global_indices);CHKERRQ(ierr); 3074 } 3075 } 3076 *n_constraints = n; 3077 if (constraints_idx) *constraints_idx = constraints_index; 3078 PetscFunctionReturn(0); 3079 } 3080 3081 #undef __FUNCT__ 3082 #define __FUNCT__ "PCBDDCSubsetNumbering" 3083 PetscErrorCode PCBDDCSubsetNumbering(MPI_Comm comm,ISLocalToGlobalMapping l2gmap, PetscInt n_local_dofs, PetscInt local_dofs[], PetscInt local_dofs_mult[], PetscInt* n_global_subset, PetscInt* global_numbering_subset[]) 3084 { 3085 Vec local_vec,global_vec; 3086 IS seqis,paris; 3087 VecScatter scatter_ctx; 3088 PetscScalar *array; 3089 PetscInt *temp_global_dofs; 3090 PetscScalar globalsum; 3091 PetscInt i,j,s; 3092 PetscInt nlocals,first_index,old_index,max_local; 3093 PetscMPIInt rank_prec_comm,size_prec_comm,max_global; 3094 PetscMPIInt *dof_sizes,*dof_displs; 3095 PetscBool first_found; 3096 PetscErrorCode ierr; 3097 3098 PetscFunctionBegin; 3099 /* mpi buffers */ 3100 ierr = MPI_Comm_size(comm,&size_prec_comm);CHKERRQ(ierr); 3101 ierr = MPI_Comm_rank(comm,&rank_prec_comm);CHKERRQ(ierr); 3102 j = ( !rank_prec_comm ? size_prec_comm : 0); 3103 ierr = PetscMalloc1(j,&dof_sizes);CHKERRQ(ierr); 3104 ierr = PetscMalloc1(j,&dof_displs);CHKERRQ(ierr); 3105 /* get maximum size of subset */ 3106 ierr = PetscMalloc1(n_local_dofs,&temp_global_dofs);CHKERRQ(ierr); 3107 ierr = ISLocalToGlobalMappingApply(l2gmap,n_local_dofs,local_dofs,temp_global_dofs);CHKERRQ(ierr); 3108 max_local = 0; 3109 for (i=0;i<n_local_dofs;i++) { 3110 if (max_local < temp_global_dofs[i] ) { 3111 max_local = temp_global_dofs[i]; 3112 } 3113 } 3114 ierr = MPI_Allreduce(&max_local,&max_global,1,MPIU_INT,MPI_MAX,comm);CHKERRQ(ierr); 3115 max_global++; 3116 max_local = 0; 3117 for (i=0;i<n_local_dofs;i++) { 3118 if (max_local < local_dofs[i] ) { 3119 max_local = local_dofs[i]; 3120 } 3121 } 3122 max_local++; 3123 /* allocate workspace */ 3124 ierr = VecCreate(PETSC_COMM_SELF,&local_vec);CHKERRQ(ierr); 3125 ierr = VecSetSizes(local_vec,PETSC_DECIDE,max_local);CHKERRQ(ierr); 3126 ierr = VecSetType(local_vec,VECSEQ);CHKERRQ(ierr); 3127 ierr = VecCreate(comm,&global_vec);CHKERRQ(ierr); 3128 ierr = VecSetSizes(global_vec,PETSC_DECIDE,max_global);CHKERRQ(ierr); 3129 ierr = VecSetType(global_vec,VECMPI);CHKERRQ(ierr); 3130 /* create scatter */ 3131 ierr = ISCreateGeneral(PETSC_COMM_SELF,n_local_dofs,local_dofs,PETSC_COPY_VALUES,&seqis);CHKERRQ(ierr); 3132 ierr = ISCreateGeneral(comm,n_local_dofs,temp_global_dofs,PETSC_COPY_VALUES,&paris);CHKERRQ(ierr); 3133 ierr = VecScatterCreate(local_vec,seqis,global_vec,paris,&scatter_ctx);CHKERRQ(ierr); 3134 ierr = ISDestroy(&seqis);CHKERRQ(ierr); 3135 ierr = ISDestroy(&paris);CHKERRQ(ierr); 3136 /* init array */ 3137 ierr = VecSet(global_vec,0.0);CHKERRQ(ierr); 3138 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 3139 ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr); 3140 if (local_dofs_mult) { 3141 for (i=0;i<n_local_dofs;i++) { 3142 array[local_dofs[i]]=(PetscScalar)local_dofs_mult[i]; 3143 } 3144 } else { 3145 for (i=0;i<n_local_dofs;i++) { 3146 array[local_dofs[i]]=1.0; 3147 } 3148 } 3149 ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr); 3150 /* scatter into global vec and get total number of global dofs */ 3151 ierr = VecScatterBegin(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3152 ierr = VecScatterEnd(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3153 ierr = VecSum(global_vec,&globalsum);CHKERRQ(ierr); 3154 *n_global_subset = (PetscInt)PetscRealPart(globalsum); 3155 /* Fill global_vec with cumulative function for global numbering */ 3156 ierr = VecGetArray(global_vec,&array);CHKERRQ(ierr); 3157 ierr = VecGetLocalSize(global_vec,&s);CHKERRQ(ierr); 3158 nlocals = 0; 3159 first_index = -1; 3160 first_found = PETSC_FALSE; 3161 for (i=0;i<s;i++) { 3162 if (!first_found && PetscRealPart(array[i]) > 0.1) { 3163 first_found = PETSC_TRUE; 3164 first_index = i; 3165 } 3166 nlocals += (PetscInt)PetscRealPart(array[i]); 3167 } 3168 ierr = MPI_Gather(&nlocals,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr); 3169 if (!rank_prec_comm) { 3170 dof_displs[0]=0; 3171 for (i=1;i<size_prec_comm;i++) { 3172 dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1]; 3173 } 3174 } 3175 ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&nlocals,1,MPIU_INT,0,comm);CHKERRQ(ierr); 3176 if (first_found) { 3177 array[first_index] += (PetscScalar)nlocals; 3178 old_index = first_index; 3179 for (i=first_index+1;i<s;i++) { 3180 if (PetscRealPart(array[i]) > 0.1) { 3181 array[i] += array[old_index]; 3182 old_index = i; 3183 } 3184 } 3185 } 3186 ierr = VecRestoreArray(global_vec,&array);CHKERRQ(ierr); 3187 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 3188 ierr = VecScatterBegin(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3189 ierr = VecScatterEnd(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3190 /* get global ordering of local dofs */ 3191 ierr = VecGetArrayRead(local_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 3192 if (local_dofs_mult) { 3193 for (i=0;i<n_local_dofs;i++) { 3194 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-local_dofs_mult[i]; 3195 } 3196 } else { 3197 for (i=0;i<n_local_dofs;i++) { 3198 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-1; 3199 } 3200 } 3201 ierr = VecRestoreArrayRead(local_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 3202 /* free workspace */ 3203 ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr); 3204 ierr = VecDestroy(&local_vec);CHKERRQ(ierr); 3205 ierr = VecDestroy(&global_vec);CHKERRQ(ierr); 3206 ierr = PetscFree(dof_sizes);CHKERRQ(ierr); 3207 ierr = PetscFree(dof_displs);CHKERRQ(ierr); 3208 /* return pointer to global ordering of local dofs */ 3209 *global_numbering_subset = temp_global_dofs; 3210 PetscFunctionReturn(0); 3211 } 3212 3213 #undef __FUNCT__ 3214 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 3215 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 3216 { 3217 PetscInt i,j; 3218 PetscScalar *alphas; 3219 PetscErrorCode ierr; 3220 3221 PetscFunctionBegin; 3222 /* this implements stabilized Gram-Schmidt */ 3223 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 3224 for (i=0;i<n;i++) { 3225 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 3226 if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); } 3227 for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); } 3228 } 3229 ierr = PetscFree(alphas);CHKERRQ(ierr); 3230 PetscFunctionReturn(0); 3231 } 3232 3233 #undef __FUNCT__ 3234 #define __FUNCT__ "MatISGetSubassemblingPattern" 3235 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscBool contiguous, IS* is_sends) 3236 { 3237 Mat subdomain_adj; 3238 IS new_ranks,ranks_send_to; 3239 MatPartitioning partitioner; 3240 Mat_IS *matis; 3241 PetscInt n_neighs,*neighs,*n_shared,**shared; 3242 PetscInt prank; 3243 PetscMPIInt size,rank,color; 3244 PetscInt *xadj,*adjncy,*oldranks; 3245 PetscInt *adjncy_wgt,*v_wgt,*is_indices,*ranks_send_to_idx; 3246 PetscInt i,local_size,threshold=0; 3247 PetscErrorCode ierr; 3248 PetscBool use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE; 3249 PetscSubcomm subcomm; 3250 3251 PetscFunctionBegin; 3252 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr); 3253 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 3254 ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 3255 3256 /* Get info on mapping */ 3257 matis = (Mat_IS*)(mat->data); 3258 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&local_size);CHKERRQ(ierr); 3259 ierr = ISLocalToGlobalMappingGetInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 3260 3261 /* build local CSR graph of subdomains' connectivity */ 3262 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 3263 xadj[0] = 0; 3264 xadj[1] = PetscMax(n_neighs-1,0); 3265 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 3266 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 3267 3268 if (threshold) { 3269 PetscInt xadj_count = 0; 3270 for (i=1;i<n_neighs;i++) { 3271 if (n_shared[i] > threshold) { 3272 adjncy[xadj_count] = neighs[i]; 3273 adjncy_wgt[xadj_count] = n_shared[i]; 3274 xadj_count++; 3275 } 3276 } 3277 xadj[1] = xadj_count; 3278 } else { 3279 if (xadj[1]) { 3280 ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr); 3281 ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr); 3282 } 3283 } 3284 ierr = ISLocalToGlobalMappingRestoreInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 3285 if (use_square) { 3286 for (i=0;i<xadj[1];i++) { 3287 adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i]; 3288 } 3289 } 3290 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 3291 3292 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 3293 3294 /* 3295 Restrict work on active processes only. 3296 */ 3297 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr); 3298 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 3299 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 3300 ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr); 3301 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 3302 if (color) { 3303 ierr = PetscFree(xadj);CHKERRQ(ierr); 3304 ierr = PetscFree(adjncy);CHKERRQ(ierr); 3305 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 3306 } else { 3307 PetscInt coarsening_ratio; 3308 ierr = MPI_Comm_size(subcomm->comm,&size);CHKERRQ(ierr); 3309 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 3310 prank = rank; 3311 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm->comm);CHKERRQ(ierr); 3312 /* 3313 for (i=0;i<size;i++) { 3314 PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]); 3315 } 3316 */ 3317 for (i=0;i<xadj[1];i++) { 3318 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 3319 } 3320 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 3321 ierr = MatCreateMPIAdj(subcomm->comm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 3322 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 3323 3324 /* Partition */ 3325 ierr = MatPartitioningCreate(subcomm->comm,&partitioner);CHKERRQ(ierr); 3326 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 3327 if (use_vwgt) { 3328 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 3329 v_wgt[0] = local_size; 3330 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 3331 } 3332 n_subdomains = PetscMin((PetscInt)size,n_subdomains); 3333 coarsening_ratio = size/n_subdomains; 3334 ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr); 3335 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 3336 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 3337 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 3338 3339 ierr = ISGetIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3340 if (contiguous) { 3341 ranks_send_to_idx[0] = oldranks[is_indices[0]]; /* contiguos set of processes */ 3342 } else { 3343 ranks_send_to_idx[0] = coarsening_ratio*oldranks[is_indices[0]]; /* scattered set of processes */ 3344 } 3345 ierr = ISRestoreIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3346 /* clean up */ 3347 ierr = PetscFree(oldranks);CHKERRQ(ierr); 3348 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 3349 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 3350 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 3351 } 3352 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 3353 3354 /* assemble parallel IS for sends */ 3355 i = 1; 3356 if (color) i=0; 3357 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr); 3358 3359 /* get back IS */ 3360 *is_sends = ranks_send_to; 3361 PetscFunctionReturn(0); 3362 } 3363 3364 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 3365 3366 #undef __FUNCT__ 3367 #define __FUNCT__ "MatISSubassemble" 3368 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[]) 3369 { 3370 Mat local_mat; 3371 Mat_IS *matis; 3372 IS is_sends_internal; 3373 PetscInt rows,cols,new_local_rows; 3374 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals; 3375 PetscBool ismatis,isdense,newisdense,destroy_mat; 3376 ISLocalToGlobalMapping l2gmap; 3377 PetscInt* l2gmap_indices; 3378 const PetscInt* is_indices; 3379 MatType new_local_type; 3380 /* buffers */ 3381 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 3382 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 3383 PetscInt *recv_buffer_idxs_local; 3384 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 3385 /* MPI */ 3386 MPI_Comm comm,comm_n; 3387 PetscSubcomm subcomm; 3388 PetscMPIInt n_sends,n_recvs,commsize; 3389 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 3390 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 3391 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,source_dest; 3392 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals; 3393 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals; 3394 PetscErrorCode ierr; 3395 3396 PetscFunctionBegin; 3397 /* TODO: add missing checks */ 3398 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 3399 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 3400 PetscValidLogicalCollectiveEnum(mat,reuse,5); 3401 PetscValidLogicalCollectiveInt(mat,nis,7); 3402 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 3403 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 3404 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 3405 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 3406 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 3407 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 3408 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 3409 if (reuse == MAT_REUSE_MATRIX && *mat_n) { 3410 PetscInt mrows,mcols,mnrows,mncols; 3411 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 3412 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 3413 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 3414 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 3415 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 3416 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 3417 } 3418 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 3419 PetscValidLogicalCollectiveInt(mat,bs,0); 3420 /* prepare IS for sending if not provided */ 3421 if (!is_sends) { 3422 PetscBool pcontig = PETSC_TRUE; 3423 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 3424 ierr = MatISGetSubassemblingPattern(mat,n_subdomains,pcontig,&is_sends_internal);CHKERRQ(ierr); 3425 } else { 3426 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 3427 is_sends_internal = is_sends; 3428 } 3429 3430 /* get pointer of MATIS data */ 3431 matis = (Mat_IS*)mat->data; 3432 3433 /* get comm */ 3434 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3435 3436 /* compute number of sends */ 3437 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 3438 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 3439 3440 /* compute number of receives */ 3441 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 3442 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 3443 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 3444 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 3445 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 3446 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 3447 ierr = PetscFree(iflags);CHKERRQ(ierr); 3448 3449 /* restrict comm if requested */ 3450 subcomm = 0; 3451 destroy_mat = PETSC_FALSE; 3452 if (restrict_comm) { 3453 PetscMPIInt color,subcommsize; 3454 3455 color = 0; 3456 if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm */ 3457 ierr = MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 3458 subcommsize = commsize - subcommsize; 3459 /* check if reuse has been requested */ 3460 if (reuse == MAT_REUSE_MATRIX) { 3461 if (*mat_n) { 3462 PetscMPIInt subcommsize2; 3463 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 3464 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 3465 comm_n = PetscObjectComm((PetscObject)*mat_n); 3466 } else { 3467 comm_n = PETSC_COMM_SELF; 3468 } 3469 } else { /* MAT_INITIAL_MATRIX */ 3470 PetscMPIInt rank; 3471 3472 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3473 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 3474 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 3475 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 3476 comm_n = subcomm->comm; 3477 } 3478 /* flag to destroy *mat_n if not significative */ 3479 if (color) destroy_mat = PETSC_TRUE; 3480 } else { 3481 comm_n = comm; 3482 } 3483 3484 /* prepare send/receive buffers */ 3485 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 3486 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 3487 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 3488 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 3489 if (nis) { 3490 ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr); 3491 } 3492 3493 /* Get data from local matrices */ 3494 if (!isdense) { 3495 SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 3496 /* TODO: See below some guidelines on how to prepare the local buffers */ 3497 /* 3498 send_buffer_vals should contain the raw values of the local matrix 3499 send_buffer_idxs should contain: 3500 - MatType_PRIVATE type 3501 - PetscInt size_of_l2gmap 3502 - PetscInt global_row_indices[size_of_l2gmap] 3503 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 3504 */ 3505 } else { 3506 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 3507 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&i);CHKERRQ(ierr); 3508 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 3509 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 3510 send_buffer_idxs[1] = i; 3511 ierr = ISLocalToGlobalMappingGetIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 3512 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 3513 ierr = ISLocalToGlobalMappingRestoreIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 3514 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 3515 for (i=0;i<n_sends;i++) { 3516 ilengths_vals[is_indices[i]] = len*len; 3517 ilengths_idxs[is_indices[i]] = len+2; 3518 } 3519 } 3520 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 3521 /* additional is (if any) */ 3522 if (nis) { 3523 PetscMPIInt psum; 3524 PetscInt j; 3525 for (j=0,psum=0;j<nis;j++) { 3526 PetscInt plen; 3527 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 3528 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 3529 psum += len+1; /* indices + lenght */ 3530 } 3531 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 3532 for (j=0,psum=0;j<nis;j++) { 3533 PetscInt plen; 3534 const PetscInt *is_array_idxs; 3535 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 3536 send_buffer_idxs_is[psum] = plen; 3537 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 3538 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 3539 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 3540 psum += plen+1; /* indices + lenght */ 3541 } 3542 for (i=0;i<n_sends;i++) { 3543 ilengths_idxs_is[is_indices[i]] = psum; 3544 } 3545 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 3546 } 3547 3548 buf_size_idxs = 0; 3549 buf_size_vals = 0; 3550 buf_size_idxs_is = 0; 3551 for (i=0;i<n_recvs;i++) { 3552 buf_size_idxs += (PetscInt)olengths_idxs[i]; 3553 buf_size_vals += (PetscInt)olengths_vals[i]; 3554 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 3555 } 3556 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 3557 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 3558 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 3559 3560 /* get new tags for clean communications */ 3561 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 3562 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 3563 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 3564 3565 /* allocate for requests */ 3566 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 3567 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 3568 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 3569 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 3570 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 3571 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 3572 3573 /* communications */ 3574 ptr_idxs = recv_buffer_idxs; 3575 ptr_vals = recv_buffer_vals; 3576 ptr_idxs_is = recv_buffer_idxs_is; 3577 for (i=0;i<n_recvs;i++) { 3578 source_dest = onodes[i]; 3579 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 3580 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 3581 ptr_idxs += olengths_idxs[i]; 3582 ptr_vals += olengths_vals[i]; 3583 if (nis) { 3584 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 3585 ptr_idxs_is += olengths_idxs_is[i]; 3586 } 3587 } 3588 for (i=0;i<n_sends;i++) { 3589 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 3590 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 3591 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 3592 if (nis) { 3593 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 3594 } 3595 } 3596 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 3597 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 3598 3599 /* assemble new l2g map */ 3600 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3601 ptr_idxs = recv_buffer_idxs; 3602 new_local_rows = 0; 3603 for (i=0;i<n_recvs;i++) { 3604 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 3605 ptr_idxs += olengths_idxs[i]; 3606 } 3607 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 3608 ptr_idxs = recv_buffer_idxs; 3609 new_local_rows = 0; 3610 for (i=0;i<n_recvs;i++) { 3611 ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 3612 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 3613 ptr_idxs += olengths_idxs[i]; 3614 } 3615 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 3616 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 3617 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 3618 3619 /* infer new local matrix type from received local matrices type */ 3620 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 3621 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 3622 if (n_recvs) { 3623 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 3624 ptr_idxs = recv_buffer_idxs; 3625 for (i=0;i<n_recvs;i++) { 3626 if ((PetscInt)new_local_type_private != *ptr_idxs) { 3627 new_local_type_private = MATAIJ_PRIVATE; 3628 break; 3629 } 3630 ptr_idxs += olengths_idxs[i]; 3631 } 3632 switch (new_local_type_private) { 3633 case MATDENSE_PRIVATE: 3634 if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */ 3635 new_local_type = MATSEQAIJ; 3636 bs = 1; 3637 } else { /* if I receive only 1 dense matrix */ 3638 new_local_type = MATSEQDENSE; 3639 bs = 1; 3640 } 3641 break; 3642 case MATAIJ_PRIVATE: 3643 new_local_type = MATSEQAIJ; 3644 bs = 1; 3645 break; 3646 case MATBAIJ_PRIVATE: 3647 new_local_type = MATSEQBAIJ; 3648 break; 3649 case MATSBAIJ_PRIVATE: 3650 new_local_type = MATSEQSBAIJ; 3651 break; 3652 default: 3653 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__); 3654 break; 3655 } 3656 } else { /* by default, new_local_type is seqdense */ 3657 new_local_type = MATSEQDENSE; 3658 bs = 1; 3659 } 3660 3661 /* create MATIS object if needed */ 3662 if (reuse == MAT_INITIAL_MATRIX) { 3663 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 3664 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,mat_n);CHKERRQ(ierr); 3665 } else { 3666 /* it also destroys the local matrices */ 3667 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 3668 } 3669 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 3670 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 3671 3672 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3673 3674 /* Global to local map of received indices */ 3675 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 3676 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 3677 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 3678 3679 /* restore attributes -> type of incoming data and its size */ 3680 buf_size_idxs = 0; 3681 for (i=0;i<n_recvs;i++) { 3682 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 3683 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 3684 buf_size_idxs += (PetscInt)olengths_idxs[i]; 3685 } 3686 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 3687 3688 /* set preallocation */ 3689 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 3690 if (!newisdense) { 3691 PetscInt *new_local_nnz=0; 3692 3693 ptr_vals = recv_buffer_vals; 3694 ptr_idxs = recv_buffer_idxs_local; 3695 if (n_recvs) { 3696 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 3697 } 3698 for (i=0;i<n_recvs;i++) { 3699 PetscInt j; 3700 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 3701 for (j=0;j<*(ptr_idxs+1);j++) { 3702 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 3703 } 3704 } else { 3705 /* TODO */ 3706 } 3707 ptr_idxs += olengths_idxs[i]; 3708 } 3709 if (new_local_nnz) { 3710 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 3711 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 3712 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 3713 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 3714 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 3715 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 3716 } else { 3717 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 3718 } 3719 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 3720 } else { 3721 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 3722 } 3723 3724 /* set values */ 3725 ptr_vals = recv_buffer_vals; 3726 ptr_idxs = recv_buffer_idxs_local; 3727 for (i=0;i<n_recvs;i++) { 3728 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 3729 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 3730 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 3731 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 3732 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 3733 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 3734 } else { 3735 /* TODO */ 3736 } 3737 ptr_idxs += olengths_idxs[i]; 3738 ptr_vals += olengths_vals[i]; 3739 } 3740 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3741 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3742 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3743 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3744 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 3745 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 3746 3747 #if 0 3748 if (!restrict_comm) { /* check */ 3749 Vec lvec,rvec; 3750 PetscReal infty_error; 3751 3752 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 3753 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 3754 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 3755 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 3756 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 3757 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 3758 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 3759 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 3760 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 3761 } 3762 #endif 3763 3764 /* assemble new additional is (if any) */ 3765 if (nis) { 3766 PetscInt **temp_idxs,*count_is,j,psum; 3767 3768 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3769 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 3770 ptr_idxs = recv_buffer_idxs_is; 3771 psum = 0; 3772 for (i=0;i<n_recvs;i++) { 3773 for (j=0;j<nis;j++) { 3774 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 3775 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 3776 psum += plen; 3777 ptr_idxs += plen+1; /* shift pointer to received data */ 3778 } 3779 } 3780 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 3781 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 3782 for (i=1;i<nis;i++) { 3783 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 3784 } 3785 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 3786 ptr_idxs = recv_buffer_idxs_is; 3787 for (i=0;i<n_recvs;i++) { 3788 for (j=0;j<nis;j++) { 3789 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 3790 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 3791 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 3792 ptr_idxs += plen+1; /* shift pointer to received data */ 3793 } 3794 } 3795 for (i=0;i<nis;i++) { 3796 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 3797 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 3798 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 3799 } 3800 ierr = PetscFree(count_is);CHKERRQ(ierr); 3801 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 3802 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 3803 } 3804 /* free workspace */ 3805 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 3806 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3807 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 3808 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3809 if (isdense) { 3810 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 3811 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 3812 } else { 3813 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 3814 } 3815 if (nis) { 3816 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3817 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 3818 } 3819 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 3820 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 3821 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 3822 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 3823 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 3824 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 3825 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 3826 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 3827 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 3828 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 3829 ierr = PetscFree(onodes);CHKERRQ(ierr); 3830 if (nis) { 3831 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 3832 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 3833 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 3834 } 3835 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 3836 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 3837 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 3838 for (i=0;i<nis;i++) { 3839 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 3840 } 3841 } 3842 PetscFunctionReturn(0); 3843 } 3844 3845 /* temporary hack into ksp private data structure */ 3846 #include <petsc-private/kspimpl.h> 3847 3848 #undef __FUNCT__ 3849 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 3850 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 3851 { 3852 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3853 PC_IS *pcis = (PC_IS*)pc->data; 3854 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 3855 MatNullSpace CoarseNullSpace=NULL; 3856 ISLocalToGlobalMapping coarse_islg; 3857 IS coarse_is,*isarray; 3858 PetscInt i,im_active=-1,active_procs=-1; 3859 PetscInt nis,nisdofs,nisneu; 3860 PC pc_temp; 3861 PCType coarse_pc_type; 3862 KSPType coarse_ksp_type; 3863 PetscBool multilevel_requested,multilevel_allowed; 3864 PetscBool isredundant,isbddc,isnn,coarse_reuse; 3865 Mat t_coarse_mat_is; 3866 PetscInt void_procs,ncoarse_ml,ncoarse_ds,ncoarse; 3867 PetscMPIInt all_procs; 3868 PetscBool csin_ml,csin_ds,csin,csin_type_simple,redist; 3869 PetscBool compute_vecs = PETSC_FALSE; 3870 PetscScalar *array; 3871 PetscErrorCode ierr; 3872 3873 PetscFunctionBegin; 3874 /* Assign global numbering to coarse dofs */ 3875 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 3876 compute_vecs = PETSC_TRUE; 3877 PetscInt ocoarse_size; 3878 ocoarse_size = pcbddc->coarse_size; 3879 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 3880 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 3881 /* see if we can avoid some work */ 3882 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 3883 if (ocoarse_size != pcbddc->coarse_size) { /* ...but with different size, so reset it and set reuse flag to false */ 3884 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 3885 coarse_reuse = PETSC_FALSE; 3886 } else { /* we can safely reuse already computed coarse matrix */ 3887 coarse_reuse = PETSC_TRUE; 3888 } 3889 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 3890 coarse_reuse = PETSC_FALSE; 3891 } 3892 /* reset any subassembling information */ 3893 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 3894 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 3895 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 3896 coarse_reuse = PETSC_TRUE; 3897 } 3898 3899 /* count "active" (i.e. with positive local size) and "void" processes */ 3900 im_active = !!(pcis->n); 3901 ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3902 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr); 3903 void_procs = all_procs-active_procs; 3904 csin_type_simple = PETSC_TRUE; 3905 redist = PETSC_FALSE; 3906 if (pcbddc->current_level && void_procs) { 3907 csin_ml = PETSC_TRUE; 3908 ncoarse_ml = void_procs; 3909 /* it has no sense to redistribute on a set of processors larger than the number of active processes */ 3910 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < active_procs) { 3911 csin_ds = PETSC_TRUE; 3912 ncoarse_ds = pcbddc->redistribute_coarse; 3913 redist = PETSC_TRUE; 3914 } else { 3915 csin_ds = PETSC_TRUE; 3916 ncoarse_ds = active_procs; 3917 redist = PETSC_TRUE; 3918 } 3919 } else { 3920 csin_ml = PETSC_FALSE; 3921 ncoarse_ml = all_procs; 3922 if (void_procs) { 3923 csin_ds = PETSC_TRUE; 3924 ncoarse_ds = void_procs; 3925 csin_type_simple = PETSC_FALSE; 3926 } else { 3927 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < all_procs) { 3928 csin_ds = PETSC_TRUE; 3929 ncoarse_ds = pcbddc->redistribute_coarse; 3930 redist = PETSC_TRUE; 3931 } else { 3932 csin_ds = PETSC_FALSE; 3933 ncoarse_ds = all_procs; 3934 } 3935 } 3936 } 3937 3938 /* 3939 test if we can go multilevel: three conditions must be satisfied: 3940 - we have not exceeded the number of levels requested 3941 - we can actually subassemble the active processes 3942 - we can find a suitable number of MPI processes where we can place the subassembled problem 3943 */ 3944 multilevel_allowed = PETSC_FALSE; 3945 multilevel_requested = PETSC_FALSE; 3946 if (pcbddc->current_level < pcbddc->max_levels) { 3947 multilevel_requested = PETSC_TRUE; 3948 if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) { 3949 multilevel_allowed = PETSC_FALSE; 3950 } else { 3951 multilevel_allowed = PETSC_TRUE; 3952 } 3953 } 3954 /* determine number of process partecipating to coarse solver */ 3955 if (multilevel_allowed) { 3956 ncoarse = ncoarse_ml; 3957 csin = csin_ml; 3958 } else { 3959 ncoarse = ncoarse_ds; 3960 csin = csin_ds; 3961 } 3962 3963 /* creates temporary l2gmap and IS for coarse indexes */ 3964 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 3965 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 3966 3967 /* creates temporary MATIS object for coarse matrix */ 3968 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr); 3969 ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr); 3970 ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr); 3971 ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr); 3972 #if 0 3973 { 3974 PetscViewer viewer; 3975 char filename[256]; 3976 sprintf(filename,"local_coarse_mat%d.m",PetscGlobalRank); 3977 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 3978 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 3979 ierr = MatView(coarse_submat_dense,viewer);CHKERRQ(ierr); 3980 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 3981 } 3982 #endif 3983 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,&t_coarse_mat_is);CHKERRQ(ierr); 3984 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 3985 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3986 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3987 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 3988 3989 /* compute dofs splitting and neumann boundaries for coarse dofs */ 3990 if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal) ) { /* protects from unneded computations */ 3991 PetscInt *tidxs,*tidxs2,nout,tsize,i; 3992 const PetscInt *idxs; 3993 ISLocalToGlobalMapping tmap; 3994 3995 /* create map between primal indices (in local representative ordering) and local primal numbering */ 3996 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 3997 /* allocate space for temporary storage */ 3998 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 3999 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 4000 /* allocate for IS array */ 4001 nisdofs = pcbddc->n_ISForDofsLocal; 4002 nisneu = !!pcbddc->NeumannBoundariesLocal; 4003 nis = nisdofs + nisneu; 4004 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 4005 /* dofs splitting */ 4006 for (i=0;i<nisdofs;i++) { 4007 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 4008 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 4009 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 4010 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 4011 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 4012 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 4013 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->ISForDofsLocal[i]),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 4014 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 4015 } 4016 /* neumann boundaries */ 4017 if (pcbddc->NeumannBoundariesLocal) { 4018 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 4019 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 4020 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 4021 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 4022 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 4023 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 4024 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->NeumannBoundariesLocal),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 4025 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 4026 } 4027 /* free memory */ 4028 ierr = PetscFree(tidxs);CHKERRQ(ierr); 4029 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 4030 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 4031 } else { 4032 nis = 0; 4033 nisdofs = 0; 4034 nisneu = 0; 4035 isarray = NULL; 4036 } 4037 /* destroy no longer needed map */ 4038 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 4039 4040 /* restrict on coarse candidates (if needed) */ 4041 coarse_mat_is = NULL; 4042 if (csin) { 4043 if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */ 4044 if (redist) { 4045 PetscMPIInt rank; 4046 PetscInt spc,n_spc_p1,dest[1],destsize; 4047 4048 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 4049 spc = active_procs/pcbddc->redistribute_coarse; 4050 n_spc_p1 = active_procs%pcbddc->redistribute_coarse; 4051 if (im_active) { 4052 destsize = 1; 4053 if (rank > n_spc_p1*(spc+1)-1) { 4054 dest[0] = n_spc_p1+(rank-(n_spc_p1*(spc+1)))/spc; 4055 } else { 4056 dest[0] = rank/(spc+1); 4057 } 4058 } else { 4059 destsize = 0; 4060 } 4061 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),destsize,dest,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4062 } else if (csin_type_simple) { 4063 PetscMPIInt rank; 4064 PetscInt issize,isidx; 4065 4066 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 4067 if (im_active) { 4068 issize = 1; 4069 isidx = (PetscInt)rank; 4070 } else { 4071 issize = 0; 4072 isidx = -1; 4073 } 4074 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4075 } else { /* get a suitable subassembling pattern from MATIS code */ 4076 ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,PETSC_TRUE,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4077 } 4078 4079 /* we need to shift on coarse candidates either if we are not redistributing or we are redistributing and we have enough void processes */ 4080 if (!redist || ncoarse <= void_procs) { 4081 PetscInt ncoarse_cand,tissize,*nisindices; 4082 PetscInt *coarse_candidates; 4083 const PetscInt* tisindices; 4084 4085 /* get coarse candidates' ranks in pc communicator */ 4086 ierr = PetscMalloc1(all_procs,&coarse_candidates);CHKERRQ(ierr); 4087 ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4088 for (i=0,ncoarse_cand=0;i<all_procs;i++) { 4089 if (!coarse_candidates[i]) { 4090 coarse_candidates[ncoarse_cand++]=i; 4091 } 4092 } 4093 if (ncoarse_cand < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",ncoarse_cand,ncoarse); 4094 4095 4096 if (pcbddc->dbg_flag) { 4097 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4098 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr); 4099 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 4100 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr); 4101 for (i=0;i<ncoarse_cand;i++) { 4102 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr); 4103 } 4104 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr); 4105 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4106 } 4107 /* shift the pattern on coarse candidates */ 4108 ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr); 4109 ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 4110 ierr = PetscMalloc1(tissize,&nisindices);CHKERRQ(ierr); 4111 for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]]; 4112 ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 4113 ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr); 4114 ierr = PetscFree(coarse_candidates);CHKERRQ(ierr); 4115 } 4116 if (pcbddc->dbg_flag) { 4117 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4118 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr); 4119 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 4120 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4121 } 4122 } 4123 /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */ 4124 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 4125 } else { 4126 if (pcbddc->dbg_flag) { 4127 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4128 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr); 4129 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4130 } 4131 ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr); 4132 coarse_mat_is = t_coarse_mat_is; 4133 } 4134 4135 /* create local to global scatters for coarse problem */ 4136 if (compute_vecs) { 4137 PetscInt lrows; 4138 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 4139 if (coarse_mat_is) { 4140 ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr); 4141 } else { 4142 lrows = 0; 4143 } 4144 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 4145 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 4146 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 4147 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 4148 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 4149 } 4150 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 4151 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 4152 4153 /* set defaults for coarse KSP and PC */ 4154 if (multilevel_allowed) { 4155 coarse_ksp_type = KSPRICHARDSON; 4156 coarse_pc_type = PCBDDC; 4157 } else { 4158 coarse_ksp_type = KSPPREONLY; 4159 coarse_pc_type = PCREDUNDANT; 4160 } 4161 4162 /* print some info if requested */ 4163 if (pcbddc->dbg_flag) { 4164 if (!multilevel_allowed) { 4165 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4166 if (multilevel_requested) { 4167 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 4168 } else if (pcbddc->max_levels) { 4169 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 4170 } 4171 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4172 } 4173 } 4174 4175 /* create the coarse KSP object only once with defaults */ 4176 if (coarse_mat_is) { 4177 MatReuse coarse_mat_reuse; 4178 PetscViewer dbg_viewer = NULL; 4179 if (pcbddc->dbg_flag) { 4180 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is)); 4181 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 4182 } 4183 if (!pcbddc->coarse_ksp) { 4184 char prefix[256],str_level[16]; 4185 size_t len; 4186 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr); 4187 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 4188 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 4189 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr); 4190 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 4191 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 4192 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 4193 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 4194 /* prefix */ 4195 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 4196 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 4197 if (!pcbddc->current_level) { 4198 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 4199 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 4200 } else { 4201 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 4202 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 4203 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 4204 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 4205 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 4206 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 4207 } 4208 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 4209 /* allow user customization */ 4210 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 4211 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 4212 } 4213 4214 /* get some info after set from options */ 4215 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 4216 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 4217 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 4218 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 4219 if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */ 4220 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 4221 isbddc = PETSC_FALSE; 4222 } 4223 if (isredundant) { 4224 KSP inner_ksp; 4225 PC inner_pc; 4226 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 4227 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 4228 ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr); 4229 } 4230 4231 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 4232 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 4233 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 4234 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 4235 if (nisdofs) { 4236 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 4237 for (i=0;i<nisdofs;i++) { 4238 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 4239 } 4240 } 4241 if (nisneu) { 4242 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 4243 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 4244 } 4245 4246 /* assemble coarse matrix */ 4247 if (coarse_reuse) { 4248 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 4249 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 4250 coarse_mat_reuse = MAT_REUSE_MATRIX; 4251 } else { 4252 coarse_mat_reuse = MAT_INITIAL_MATRIX; 4253 } 4254 if (isbddc || isnn) { 4255 if (pcbddc->coarsening_ratio > 1) { 4256 if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */ 4257 ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,PETSC_TRUE,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 4258 if (pcbddc->dbg_flag) { 4259 ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4260 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr); 4261 ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr); 4262 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 4263 } 4264 } 4265 ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr); 4266 } else { 4267 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 4268 coarse_mat = coarse_mat_is; 4269 } 4270 } else { 4271 ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 4272 } 4273 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 4274 4275 /* propagate symmetry info to coarse matrix */ 4276 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pcbddc->issym);CHKERRQ(ierr); 4277 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 4278 4279 /* set operators */ 4280 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 4281 if (pcbddc->dbg_flag) { 4282 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 4283 } 4284 } else { /* processes non partecipating to coarse solver (if any) */ 4285 coarse_mat = 0; 4286 } 4287 ierr = PetscFree(isarray);CHKERRQ(ierr); 4288 #if 0 4289 { 4290 PetscViewer viewer; 4291 char filename[256]; 4292 sprintf(filename,"coarse_mat.m"); 4293 ierr = PetscViewerASCIIOpen(PETSC_COMM_WORLD,filename,&viewer);CHKERRQ(ierr); 4294 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 4295 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 4296 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 4297 } 4298 #endif 4299 4300 /* Compute coarse null space (special handling by BDDC only) */ 4301 if (pcbddc->NullSpace) { 4302 ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr); 4303 } 4304 4305 if (pcbddc->coarse_ksp) { 4306 Vec crhs,csol; 4307 PetscBool ispreonly; 4308 if (CoarseNullSpace) { 4309 if (isbddc) { 4310 ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr); 4311 } else { 4312 ierr = KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);CHKERRQ(ierr); 4313 } 4314 } 4315 /* setup coarse ksp */ 4316 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 4317 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 4318 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 4319 /* hack */ 4320 if (!csol) { 4321 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 4322 } 4323 if (!crhs) { 4324 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 4325 } 4326 /* Check coarse problem if in debug mode or if solving with an iterative method */ 4327 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 4328 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 4329 KSP check_ksp; 4330 KSPType check_ksp_type; 4331 PC check_pc; 4332 Vec check_vec,coarse_vec; 4333 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 4334 PetscInt its; 4335 PetscBool compute_eigs; 4336 PetscReal *eigs_r,*eigs_c; 4337 PetscInt neigs; 4338 const char *prefix; 4339 4340 /* Create ksp object suitable for estimation of extreme eigenvalues */ 4341 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 4342 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 4343 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 4344 if (ispreonly) { 4345 check_ksp_type = KSPPREONLY; 4346 compute_eigs = PETSC_FALSE; 4347 } else { 4348 check_ksp_type = KSPGMRES; 4349 compute_eigs = PETSC_TRUE; 4350 } 4351 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 4352 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 4353 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 4354 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 4355 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 4356 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 4357 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 4358 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 4359 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 4360 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 4361 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 4362 /* create random vec */ 4363 ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr); 4364 ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr); 4365 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 4366 if (CoarseNullSpace) { 4367 ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr); 4368 } 4369 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 4370 /* solve coarse problem */ 4371 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 4372 if (CoarseNullSpace) { 4373 ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr); 4374 } 4375 /* set eigenvalue estimation if preonly has not been requested */ 4376 if (compute_eigs) { 4377 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 4378 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 4379 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 4380 lambda_max = eigs_r[neigs-1]; 4381 lambda_min = eigs_r[0]; 4382 if (pcbddc->use_coarse_estimates) { 4383 if (lambda_max>lambda_min) { 4384 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr); 4385 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 4386 } 4387 } 4388 } 4389 4390 /* check coarse problem residual error */ 4391 if (pcbddc->dbg_flag) { 4392 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 4393 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 4394 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 4395 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 4396 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 4397 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 4398 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 4399 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 4400 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 4401 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 4402 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 4403 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 4404 if (compute_eigs) { 4405 PetscReal lambda_max_s,lambda_min_s; 4406 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 4407 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 4408 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 4409 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 4410 for (i=0;i<neigs;i++) { 4411 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 4412 } 4413 } 4414 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 4415 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 4416 } 4417 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 4418 if (compute_eigs) { 4419 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 4420 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 4421 } 4422 } 4423 } 4424 /* print additional info */ 4425 if (pcbddc->dbg_flag) { 4426 /* waits until all processes reaches this point */ 4427 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 4428 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 4429 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4430 } 4431 4432 /* free memory */ 4433 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 4434 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 4435 PetscFunctionReturn(0); 4436 } 4437 4438 #undef __FUNCT__ 4439 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 4440 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 4441 { 4442 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 4443 PC_IS* pcis = (PC_IS*)pc->data; 4444 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 4445 PetscInt i,coarse_size; 4446 PetscInt *local_primal_indices; 4447 PetscErrorCode ierr; 4448 4449 PetscFunctionBegin; 4450 /* Compute global number of coarse dofs */ 4451 if (!pcbddc->primal_indices_local_idxs && pcbddc->local_primal_size) { 4452 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Local primal indices have not been created"); 4453 } 4454 ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)(pc->pmat)),matis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,NULL,&coarse_size,&local_primal_indices);CHKERRQ(ierr); 4455 4456 /* check numbering */ 4457 if (pcbddc->dbg_flag) { 4458 PetscScalar coarsesum,*array; 4459 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 4460 4461 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4462 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4463 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 4464 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 4465 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 4466 for (i=0;i<pcbddc->local_primal_size;i++) { 4467 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 4468 } 4469 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 4470 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 4471 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4472 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4473 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4474 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4475 ierr = VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4476 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4477 for (i=0;i<pcis->n;i++) { 4478 if (array[i] == 1.0) { 4479 set_error = PETSC_TRUE; 4480 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr); 4481 } 4482 } 4483 ierr = MPI_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4484 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4485 for (i=0;i<pcis->n;i++) { 4486 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 4487 } 4488 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4489 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4490 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4491 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4492 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 4493 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 4494 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 4495 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 4496 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4497 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 4498 for (i=0;i<pcbddc->local_primal_size;i++) { 4499 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i]); 4500 } 4501 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4502 } 4503 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4504 if (set_error_reduced) { 4505 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 4506 } 4507 } 4508 /* get back data */ 4509 *coarse_size_n = coarse_size; 4510 *local_primal_indices_n = local_primal_indices; 4511 PetscFunctionReturn(0); 4512 } 4513 4514 #undef __FUNCT__ 4515 #define __FUNCT__ "PCBDDCGlobalToLocal" 4516 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 4517 { 4518 IS localis_t; 4519 PetscInt i,lsize,*idxs,n; 4520 PetscScalar *vals; 4521 PetscErrorCode ierr; 4522 4523 PetscFunctionBegin; 4524 /* get indices in local ordering exploiting local to global map */ 4525 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 4526 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 4527 for (i=0;i<lsize;i++) vals[i] = 1.0; 4528 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 4529 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 4530 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 4531 if (idxs) { /* multilevel guard */ 4532 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 4533 } 4534 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 4535 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 4536 ierr = PetscFree(vals);CHKERRQ(ierr); 4537 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 4538 /* now compute set in local ordering */ 4539 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4540 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4541 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 4542 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 4543 for (i=0,lsize=0;i<n;i++) { 4544 if (PetscRealPart(vals[i]) > 0.5) { 4545 lsize++; 4546 } 4547 } 4548 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 4549 for (i=0,lsize=0;i<n;i++) { 4550 if (PetscRealPart(vals[i]) > 0.5) { 4551 idxs[lsize++] = i; 4552 } 4553 } 4554 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 4555 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 4556 *localis = localis_t; 4557 PetscFunctionReturn(0); 4558 } 4559 4560 /* the next two functions will be called in KSPMatMult if a change of basis has been requested */ 4561 #undef __FUNCT__ 4562 #define __FUNCT__ "PCBDDCMatMult_Private" 4563 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y) 4564 { 4565 PCBDDCChange_ctx change_ctx; 4566 PetscErrorCode ierr; 4567 4568 PetscFunctionBegin; 4569 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 4570 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 4571 ierr = MatMult(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 4572 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 4573 PetscFunctionReturn(0); 4574 } 4575 4576 #undef __FUNCT__ 4577 #define __FUNCT__ "PCBDDCMatMultTranspose_Private" 4578 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y) 4579 { 4580 PCBDDCChange_ctx change_ctx; 4581 PetscErrorCode ierr; 4582 4583 PetscFunctionBegin; 4584 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 4585 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 4586 ierr = MatMultTranspose(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 4587 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 4588 PetscFunctionReturn(0); 4589 } 4590 4591 #undef __FUNCT__ 4592 #define __FUNCT__ "PCBDDCSetUpSubSchurs" 4593 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 4594 { 4595 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 4596 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 4597 PetscInt *used_xadj,*used_adjncy; 4598 PetscBool free_used_adj; 4599 PetscErrorCode ierr; 4600 4601 PetscFunctionBegin; 4602 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 4603 free_used_adj = PETSC_FALSE; 4604 if (pcbddc->sub_schurs_layers == -1) { 4605 used_xadj = NULL; 4606 used_adjncy = NULL; 4607 } else { 4608 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 4609 used_xadj = pcbddc->mat_graph->xadj; 4610 used_adjncy = pcbddc->mat_graph->adjncy; 4611 } else if (pcbddc->computed_rowadj) { 4612 used_xadj = pcbddc->mat_graph->xadj; 4613 used_adjncy = pcbddc->mat_graph->adjncy; 4614 } else { 4615 Mat mat_adj; 4616 PetscBool flg_row=PETSC_TRUE; 4617 const PetscInt *xadj,*adjncy; 4618 PetscInt nvtxs; 4619 4620 ierr = MatConvert(pcbddc->local_mat,MATMPIADJ,MAT_INITIAL_MATRIX,&mat_adj);CHKERRQ(ierr); 4621 ierr = MatGetRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4622 if (!flg_row) { 4623 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__); 4624 } 4625 ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr); 4626 ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr); 4627 ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr); 4628 ierr = MatRestoreRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4629 if (!flg_row) { 4630 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__); 4631 } 4632 ierr = MatDestroy(&mat_adj);CHKERRQ(ierr); 4633 free_used_adj = PETSC_TRUE; 4634 } 4635 } 4636 ierr = PCBDDCSubSchursSetUp(sub_schurs,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->adaptive_selection,pcbddc->use_deluxe_scaling,pcbddc->adaptive_invert_Stildas,pcbddc->use_edges,pcbddc->use_faces);CHKERRQ(ierr); 4637 4638 /* free adjacency */ 4639 if (free_used_adj) { 4640 ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr); 4641 } 4642 PetscFunctionReturn(0); 4643 } 4644 4645 #undef __FUNCT__ 4646 #define __FUNCT__ "PCBDDCInitSubSchurs" 4647 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 4648 { 4649 PC_IS *pcis=(PC_IS*)pc->data; 4650 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 4651 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 4652 PCBDDCGraph graph; 4653 Mat S_j; 4654 PetscErrorCode ierr; 4655 4656 PetscFunctionBegin; 4657 /* attach interface graph for determining subsets */ 4658 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 4659 IS verticesIS; 4660 4661 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 4662 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 4663 ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap);CHKERRQ(ierr); 4664 ierr = PCBDDCGraphSetUp(graph,0,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticesIS);CHKERRQ(ierr); 4665 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 4666 ierr = ISDestroy(&verticesIS);CHKERRQ(ierr); 4667 /* 4668 if (pcbddc->dbg_flag) { 4669 ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 4670 } 4671 */ 4672 } else { 4673 graph = pcbddc->mat_graph; 4674 } 4675 4676 /* Create Schur complement matrix */ 4677 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 4678 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 4679 4680 /* sub_schurs init */ 4681 ierr = PCBDDCSubSchursInit(sub_schurs,pcbddc->local_mat,S_j,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap,pcbddc->sub_schurs_threshold);CHKERRQ(ierr); 4682 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 4683 /* free graph struct */ 4684 if (pcbddc->sub_schurs_rebuild) { 4685 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 4686 } 4687 PetscFunctionReturn(0); 4688 } 4689