1 #include <../src/ksp/pc/impls/bddc/bddc.h> 2 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 3 #include <petscblaslapack.h> 4 5 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y); 6 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y); 7 8 #undef __FUNCT__ 9 #define __FUNCT__ "PCBDDCAdaptiveSelection" 10 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 11 { 12 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 13 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 14 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 15 PetscBLASInt *B_iwork,*B_ifail; 16 PetscScalar *work,lwork; 17 PetscScalar *St,*S,*eigv; 18 PetscScalar *Sarray,*Starray; 19 PetscScalar *Smult,*Seigv; 20 PetscReal *eigs,thresh; 21 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs; 22 #if defined(PETSC_USE_COMPLEX) 23 PetscReal *rwork; 24 #endif 25 PetscBool symmetric; 26 PetscErrorCode ierr; 27 28 PetscFunctionBegin; 29 if (!sub_schurs->use_mumps) { 30 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS"); 31 } 32 ierr = MatIsSymmetric(sub_schurs->A,0.0,&symmetric);CHKERRQ(ierr); 33 if (!symmetric) { 34 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Not yet implemented"); 35 } 36 37 /* max size of subsets */ 38 mss = 0; 39 for (i=0;i<sub_schurs->n_subs;i++) { 40 if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) { 41 PetscInt subset_size; 42 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 43 mss = PetscMax(mss,subset_size); 44 } 45 } 46 47 /* min/max and threshold */ 48 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 49 nmin = pcbddc->adaptive_nmin > -1 ? pcbddc->adaptive_nmin : 1; 50 nmax = PetscMax(nmin,nmax); 51 if (pcbddc->adaptive_threshold > 1.0) { 52 thresh = 1.0/pcbddc->adaptive_threshold; 53 } else { 54 thresh = 1.0; 55 } 56 57 /* allocate lapack workspace */ 58 cum = cum2 = 0; 59 maxneigs = 0; 60 for (i=0;i<sub_schurs->n_subs;i++) { 61 if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) { 62 PetscInt n,subset_size; 63 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 64 n = PetscMin(subset_size,nmax); 65 cum += subset_size*n; 66 cum2 += n; 67 maxneigs = PetscMax(maxneigs,n); 68 } 69 } 70 71 if (mss) { 72 if (symmetric) { 73 PetscBLASInt B_itype = 1; 74 PetscBLASInt B_N = mss; 75 PetscScalar zero = 0.0; 76 PetscScalar eps = 0.0; /* dlamch? */ 77 78 B_lwork = -1; 79 S = NULL; 80 St = NULL; 81 eigs = NULL; 82 eigv = NULL; 83 B_iwork = NULL; 84 B_ifail = NULL; 85 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 86 #if defined(PETSC_USE_COMPLEX) 87 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 88 #else 89 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 90 #endif 91 if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 92 ierr = PetscFPTrapPop();CHKERRQ(ierr); 93 } else { 94 /* TODO */ 95 } 96 } else { 97 lwork = 0; 98 } 99 100 nv = 0; 101 if (sub_schurs->is_Ej_com) { /* complement of subsets, each entry is a vertex */ 102 ierr = ISGetLocalSize(sub_schurs->is_Ej_com,&nv);CHKERRQ(ierr); 103 } 104 ierr = PetscBLASIntCast((PetscInt)lwork,&B_lwork);CHKERRQ(ierr); 105 ierr = PetscMalloc7(mss*mss,&S,mss*mss,&St,mss*mss,&eigv,mss,&eigs, 106 B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr); 107 #if defined(PETSC_USE_COMPLEX) 108 ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr); 109 #endif 110 ierr = PetscMalloc2(mss*mss,&Smult,mss*mss,&Seigv);CHKERRQ(ierr); 111 ierr = PetscMalloc4(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 112 nv+cum2+1,&pcbddc->adaptive_constraints_ptrs, 113 nv+cum,&pcbddc->adaptive_constraints_idxs, 114 nv+cum,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 115 ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr); 116 117 maxneigs = 0; 118 cum = cum2 = cumarray = 0; 119 if (sub_schurs->is_Ej_com) { 120 const PetscInt *idxs; 121 122 ierr = ISGetIndices(sub_schurs->is_Ej_com,&idxs);CHKERRQ(ierr); 123 for (cum=0;cum<nv;cum++) { 124 pcbddc->adaptive_constraints_n[cum] = 1; 125 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 126 pcbddc->adaptive_constraints_ptrs[cum] = cum; 127 pcbddc->adaptive_constraints_data[cum] = 1.0; 128 } 129 cum2 = cum; 130 ierr = ISRestoreIndices(sub_schurs->is_Ej_com,&idxs);CHKERRQ(ierr); 131 } 132 133 if (mss) { /* multilevel */ 134 if (pcbddc->use_deluxe_scaling) { 135 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 136 } else { 137 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_all,&Sarray);CHKERRQ(ierr); 138 } 139 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 140 } 141 142 for (i=0;i<sub_schurs->n_subs;i++) { 143 PetscInt j,subset_size; 144 145 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 146 if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) { 147 const PetscInt *idxs; 148 PetscScalar one = 1.0,zero=0.0; 149 PetscBLASInt B_N; 150 151 /* S should be copied since we need it for deluxe scaling */ 152 if (symmetric) { 153 PetscInt j; 154 for (j=0;j<subset_size;j++) { 155 ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 156 } 157 for (j=0;j<subset_size;j++) { 158 ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 159 } 160 } else { 161 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 162 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 163 } 164 /* is always this the right matrix? */ 165 ierr = PetscMemcpy(Smult,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 166 167 /* we could reuse space already allocated when building sum_S_Ej_tilda_all */ 168 /* St = Starray+cumarray; */ 169 170 if (symmetric) { 171 PetscBLASInt B_itype = 1; 172 PetscBLASInt B_IL = 1, B_IU; 173 PetscScalar eps = -1.0; /* dlamch? */ 174 PetscInt nmin_s; 175 176 /* ask for eigenvalues lower than thresh */ 177 PetscPrintf(PETSC_COMM_SELF,"[%d] Computing for sub %d/%d.\n",PetscGlobalRank,i,sub_schurs->n_subs); 178 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 179 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 180 #if defined(PETSC_USE_COMPLEX) 181 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 182 #else 183 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 184 #endif 185 ierr = PetscFPTrapPop();CHKERRQ(ierr); 186 if (B_ierr) { 187 if (B_ierr < 0 ) { 188 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 189 } else if (B_ierr <= B_N) { 190 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 191 } else { 192 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 193 } 194 } 195 196 if (B_neigs > nmax) { 197 PetscPrintf(PETSC_COMM_SELF,"[%d] found %d eigs, more than maximum required %d.\n",PetscGlobalRank,B_neigs,nmax); 198 B_neigs = nmax; 199 } 200 201 nmin_s = PetscMin(nmin,B_N); 202 if (B_neigs < nmin_s) { 203 PetscBLASInt B_neigs2; 204 205 B_IL = B_neigs + 1; 206 ierr = PetscBLASIntCast(nmin_s,&B_IU);CHKERRQ(ierr); 207 PetscPrintf(PETSC_COMM_SELF,"[%d] found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",PetscGlobalRank,B_neigs,nmin,B_IL,B_IU); 208 if (symmetric) { 209 PetscInt j; 210 for (j=0;j<subset_size;j++) { 211 ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 212 } 213 for (j=0;j<subset_size;j++) { 214 ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 215 } 216 } else { 217 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 218 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 219 } 220 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 221 #if defined(PETSC_USE_COMPLEX) 222 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 223 #else 224 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 225 #endif 226 ierr = PetscFPTrapPop();CHKERRQ(ierr); 227 B_neigs += B_neigs2; 228 } 229 if (B_ierr) { 230 if (B_ierr < 0 ) { 231 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 232 } else if (B_ierr <= B_N) { 233 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 234 } else { 235 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 236 } 237 } 238 PetscPrintf(PETSC_COMM_SELF,"[%d] -> Got %d eigs\n",PetscGlobalRank,B_neigs); 239 for (j=0;j<B_neigs;j++) { 240 if (eigs[j] == 0.0) { 241 PetscPrintf(PETSC_COMM_SELF,"[%d] Inf\n",PetscGlobalRank); 242 } else { 243 PetscPrintf(PETSC_COMM_SELF,"[%d] %1.6e\n",PetscGlobalRank,1.0/eigs[j]); 244 } 245 } 246 } else { 247 /* TODO */ 248 } 249 maxneigs = PetscMax(B_neigs,maxneigs); 250 pcbddc->adaptive_constraints_n[i+nv] = B_neigs; 251 252 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 253 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&B_N,&B_neigs,&B_N,&one,Smult,&B_N,eigv,&B_N,&zero,Seigv,&B_N)); 254 ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+cum2,Seigv,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 255 #if 0 256 PetscInt ii; 257 for (ii=0;ii<B_neigs;ii++) { 258 PetscPrintf(PETSC_COMM_SELF,"[%d] -> Eigenvector %d/%d (%d)\n",PetscGlobalRank,ii,B_neigs,B_N); 259 for (j=0;j<B_N;j++) { 260 PetscPrintf(PETSC_COMM_SELF,"[%d] %1.4e %1.4e\n",PetscGlobalRank,eigv[ii*B_N+j],Seigv[ii*B_N+j]); 261 } 262 } 263 #endif 264 ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 265 for (j=0;j<B_neigs;j++) { 266 #if 0 267 { 268 PetscBLASInt Blas_N,Blas_one = 1.0; 269 PetscScalar norm; 270 ierr = PetscBLASIntCast(subset_size,&Blas_N);CHKERRQ(ierr); 271 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,pcbddc->adaptive_constraints_data+cum2,&Blas_one,pcbddc->adaptive_constraints_data+cum2,&Blas_one)); 272 if (pcbddc->adaptive_constraints_data[cum2] > 0.0) { 273 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 274 } else { 275 norm = -1.0/PetscSqrtReal(PetscRealPart(norm)); 276 } 277 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,pcbddc->adaptive_constraints_data+cum2,&Blas_one)); 278 } 279 #endif 280 ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+cum2,idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr); 281 pcbddc->adaptive_constraints_ptrs[cum++] = cum2; 282 cum2 += subset_size; 283 } 284 ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 285 } 286 /* shift for next computation */ 287 cumarray += subset_size*subset_size; 288 } 289 pcbddc->adaptive_constraints_ptrs[cum] = cum2; 290 ierr = PetscFree2(Smult,Seigv);CHKERRQ(ierr); 291 292 if (mss) { 293 if (pcbddc->use_deluxe_scaling) { 294 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 295 } else { 296 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_all,&Sarray);CHKERRQ(ierr); 297 } 298 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 299 } 300 ierr = PetscFree7(S,St,eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr); 301 #if defined(PETSC_USE_COMPLEX) 302 ierr = PetscFree(rwork);CHKERRQ(ierr); 303 #endif 304 if (pcbddc->dbg_flag) { 305 PetscInt maxneigs_r; 306 ierr = MPI_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 307 ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr); 308 } 309 PetscFunctionReturn(0); 310 } 311 312 #undef __FUNCT__ 313 #define __FUNCT__ "PCBDDCSetUpSolvers" 314 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 315 { 316 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 317 PetscScalar *coarse_submat_vals; 318 PetscErrorCode ierr; 319 320 PetscFunctionBegin; 321 /* Setup local scatters R_to_B and (optionally) R_to_D */ 322 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 323 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 324 325 /* Setup local neumann solver ksp_R */ 326 /* PCBDDCSetUpLocalScatters should be called first! */ 327 ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 328 329 /* Change global null space passed in by the user if change of basis has been requested */ 330 if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) { 331 ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr); 332 } 333 334 /* 335 Setup local correction and local part of coarse basis. 336 Gives back the dense local part of the coarse matrix in column major ordering 337 */ 338 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 339 340 /* Compute total number of coarse nodes and setup coarse solver */ 341 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 342 343 /* free */ 344 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 345 PetscFunctionReturn(0); 346 } 347 348 #undef __FUNCT__ 349 #define __FUNCT__ "PCBDDCResetCustomization" 350 PetscErrorCode PCBDDCResetCustomization(PC pc) 351 { 352 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 353 PetscErrorCode ierr; 354 355 PetscFunctionBegin; 356 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 357 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 358 ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr); 359 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 360 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 361 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 362 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 363 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 364 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 365 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 366 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 367 PetscFunctionReturn(0); 368 } 369 370 #undef __FUNCT__ 371 #define __FUNCT__ "PCBDDCResetTopography" 372 PetscErrorCode PCBDDCResetTopography(PC pc) 373 { 374 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 375 PetscErrorCode ierr; 376 377 PetscFunctionBegin; 378 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 379 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 380 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 381 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 382 ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr); 383 PetscFunctionReturn(0); 384 } 385 386 #undef __FUNCT__ 387 #define __FUNCT__ "PCBDDCResetSolvers" 388 PetscErrorCode PCBDDCResetSolvers(PC pc) 389 { 390 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 391 PetscScalar *array; 392 PetscErrorCode ierr; 393 394 PetscFunctionBegin; 395 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 396 ierr = VecDestroy(&pcbddc->coarse_rhs);CHKERRQ(ierr); 397 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr); 398 ierr = PetscFree(array);CHKERRQ(ierr); 399 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 400 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 401 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 402 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 403 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 404 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 405 if (pcbddc->local_auxmat2) { 406 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 407 ierr = PetscFree(array);CHKERRQ(ierr); 408 } 409 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 410 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 411 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 412 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 413 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 414 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 415 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 416 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 417 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 418 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 419 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 420 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 421 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 422 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 423 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 424 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 425 PetscFunctionReturn(0); 426 } 427 428 #undef __FUNCT__ 429 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors" 430 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 431 { 432 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 433 PC_IS *pcis = (PC_IS*)pc->data; 434 VecType impVecType; 435 PetscInt n_constraints,n_R,old_size; 436 PetscErrorCode ierr; 437 438 PetscFunctionBegin; 439 if (!pcbddc->ConstraintMatrix) { 440 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created"); 441 } 442 /* get sizes */ 443 n_constraints = pcbddc->local_primal_size - pcbddc->n_actual_vertices; 444 n_R = pcis->n-pcbddc->n_actual_vertices; 445 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 446 /* local work vectors (try to avoid unneeded work)*/ 447 /* R nodes */ 448 old_size = -1; 449 if (pcbddc->vec1_R) { 450 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 451 } 452 if (n_R != old_size) { 453 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 454 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 455 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 456 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 457 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 458 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 459 } 460 /* local primal dofs */ 461 old_size = -1; 462 if (pcbddc->vec1_P) { 463 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 464 } 465 if (pcbddc->local_primal_size != old_size) { 466 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 467 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 468 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 469 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 470 } 471 /* local explicit constraints */ 472 old_size = -1; 473 if (pcbddc->vec1_C) { 474 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 475 } 476 if (n_constraints && n_constraints != old_size) { 477 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 478 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 479 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 480 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 481 } 482 PetscFunctionReturn(0); 483 } 484 485 #undef __FUNCT__ 486 #define __FUNCT__ "PCBDDCSetUpCorrection" 487 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 488 { 489 PetscErrorCode ierr; 490 /* pointers to pcis and pcbddc */ 491 PC_IS* pcis = (PC_IS*)pc->data; 492 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 493 /* submatrices of local problem */ 494 Mat A_RV,A_VR,A_VV; 495 /* submatrices of local coarse problem */ 496 Mat S_VV,S_CV,S_VC,S_CC; 497 /* working matrices */ 498 Mat C_CR; 499 /* additional working stuff */ 500 PC pc_R; 501 Mat F; 502 PetscBool isLU,isCHOL,isILU; 503 504 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 505 PetscScalar *work; 506 PetscInt *idx_V_B; 507 PetscInt n,n_vertices,n_constraints; 508 PetscInt i,n_R,n_D,n_B; 509 PetscBool unsymmetric_check; 510 /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */ 511 MatType impMatType; 512 /* some shortcuts to scalars */ 513 PetscScalar one=1.0,m_one=-1.0; 514 515 PetscFunctionBegin; 516 /* get number of vertices (corners plus constraints with change of basis) 517 pcbddc->n_actual_vertices stores the actual number of vertices, pcbddc->n_vertices the number of corners computed */ 518 n_vertices = pcbddc->n_actual_vertices; 519 n_constraints = pcbddc->local_primal_size-n_vertices; 520 /* Set Non-overlapping dimensions */ 521 n_B = pcis->n_B; n_D = pcis->n - n_B; 522 n_R = pcis->n-n_vertices; 523 524 /* Set types for local objects needed by BDDC precondtioner */ 525 impMatType = MATSEQDENSE; 526 527 /* vertices in boundary numbering */ 528 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 529 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->primal_indices_local_idxs,&i,idx_V_B);CHKERRQ(ierr); 530 if (i != n_vertices) { 531 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i); 532 } 533 534 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 535 ierr = PetscMalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 536 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr); 537 ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr); 538 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr); 539 ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr); 540 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr); 541 ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr); 542 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr); 543 ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr); 544 545 unsymmetric_check = PETSC_FALSE; 546 /* allocate workspace */ 547 n = 0; 548 if (n_constraints) { 549 n += n_R*n_constraints; 550 } 551 if (n_vertices) { 552 n = PetscMax(2*n_R*n_vertices,n); 553 } 554 if (!pcbddc->issym) { 555 n = PetscMax(2*n_R*pcbddc->local_primal_size,n); 556 unsymmetric_check = PETSC_TRUE; 557 } 558 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 559 560 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 561 ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr); 562 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr); 563 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr); 564 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr); 565 if (isLU || isILU || isCHOL) { 566 ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr); 567 } else { 568 F = NULL; 569 } 570 571 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 572 if (n_constraints) { 573 Mat M1,M2,M3; 574 IS is_aux; 575 /* see if we can save some allocations */ 576 if (pcbddc->local_auxmat2) { 577 PetscInt on_R,on_constraints; 578 ierr = MatGetSize(pcbddc->local_auxmat2,&on_R,&on_constraints);CHKERRQ(ierr); 579 if (on_R != n_R || on_constraints != n_constraints) { 580 PetscScalar *marray; 581 582 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&marray);CHKERRQ(ierr); 583 ierr = PetscFree(marray);CHKERRQ(ierr); 584 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 585 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 586 } 587 } 588 /* auxiliary matrices */ 589 if (!pcbddc->local_auxmat2) { 590 PetscScalar *marray; 591 592 ierr = PetscMalloc1(2*n_R*n_constraints,&marray);CHKERRQ(ierr); 593 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,marray,&pcbddc->local_auxmat2);CHKERRQ(ierr); 594 marray += n_R*n_constraints; 595 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_R,marray,&pcbddc->local_auxmat1);CHKERRQ(ierr); 596 } 597 598 /* Extract constraints on R nodes: C_{CR} */ 599 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 600 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 601 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 602 603 /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */ 604 ierr = PetscMemzero(work,n_R*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 605 for (i=0;i<n_constraints;i++) { 606 const PetscScalar *row_cmat_values; 607 const PetscInt *row_cmat_indices; 608 PetscInt size_of_constraint,j; 609 610 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 611 for (j=0;j<size_of_constraint;j++) { 612 work[row_cmat_indices[j]+i*n_R] = -row_cmat_values[j]; 613 } 614 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 615 } 616 if (F) { 617 Mat B; 618 619 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 620 ierr = MatMatSolve(F,B,pcbddc->local_auxmat2);CHKERRQ(ierr); 621 ierr = MatDestroy(&B);CHKERRQ(ierr); 622 } else { 623 PetscScalar *xarray; 624 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&xarray);CHKERRQ(ierr); 625 for (i=0;i<n_constraints;i++) { 626 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 627 ierr = VecPlaceArray(pcbddc->vec2_R,xarray+i*n_R);CHKERRQ(ierr); 628 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 629 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 630 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 631 } 632 ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&xarray);CHKERRQ(ierr); 633 } 634 635 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 636 ierr = MatConvert(C_CR,impMatType,MAT_REUSE_MATRIX,&C_CR);CHKERRQ(ierr); 637 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 638 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr); 639 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 640 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 641 ierr = VecSet(pcbddc->vec1_C,m_one);CHKERRQ(ierr); 642 ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr); 643 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 644 ierr = MatDestroy(&M2);CHKERRQ(ierr); 645 ierr = MatDestroy(&M3);CHKERRQ(ierr); 646 /* Assemble local_auxmat1 = S_CC*C_{CR} needed by BDDC application in KSP and in preproc */ 647 ierr = MatMatMult(M1,C_CR,MAT_REUSE_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 648 ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 649 ierr = MatDestroy(&M1);CHKERRQ(ierr); 650 } 651 /* Get submatrices from subdomain matrix */ 652 if (n_vertices) { 653 Mat newmat; 654 IS is_aux; 655 PetscInt ibs,mbs; 656 PetscBool issbaij; 657 658 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 659 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 660 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 661 if (ibs != mbs) { /* need to convert to SEQAIJ */ 662 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 663 ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 664 ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 665 ierr = MatGetSubMatrix(newmat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 666 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 667 } else { 668 /* this is safe */ 669 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 670 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 671 if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 672 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 673 ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 674 ierr = MatTranspose(A_VR,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 675 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 676 } else { 677 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 678 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 679 } 680 } 681 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 682 } 683 684 /* Matrix of coarse basis functions (local) */ 685 if (pcbddc->coarse_phi_B) { 686 PetscInt on_B,on_primal,on_D=n_D; 687 if (pcbddc->coarse_phi_D) { 688 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr); 689 } 690 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 691 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 692 PetscScalar *marray; 693 694 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr); 695 ierr = PetscFree(marray);CHKERRQ(ierr); 696 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 697 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 698 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 699 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 700 } 701 } 702 703 if (!pcbddc->coarse_phi_B) { 704 PetscScalar *marray; 705 706 n = n_B*pcbddc->local_primal_size; 707 if (pcbddc->switch_static || pcbddc->dbg_flag) { 708 n += n_D*pcbddc->local_primal_size; 709 } 710 if (!pcbddc->issym) { 711 n *= 2; 712 } 713 ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr); 714 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 715 n = n_B*pcbddc->local_primal_size; 716 if (pcbddc->switch_static || pcbddc->dbg_flag) { 717 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 718 n += n_D*pcbddc->local_primal_size; 719 } 720 if (!pcbddc->issym) { 721 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 722 if (pcbddc->switch_static || pcbddc->dbg_flag) { 723 n = n_B*pcbddc->local_primal_size; 724 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 725 } 726 } else { 727 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 728 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 729 if (pcbddc->switch_static || pcbddc->dbg_flag) { 730 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 731 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 732 } 733 } 734 } 735 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 736 /* vertices */ 737 if (n_vertices) { 738 739 if (n_R) { 740 Mat A_RRmA_RV,S_VVt; /* S_VVt with LDA=N */ 741 PetscBLASInt B_N,B_one = 1; 742 PetscScalar *x,*y; 743 744 ierr = PetscMemzero(work,2*n_R*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 745 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 746 ierr = MatConvert(A_RV,impMatType,MAT_REUSE_MATRIX,&A_RV);CHKERRQ(ierr); 747 if (F) { 748 ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr); 749 } else { 750 ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr); 751 for (i=0;i<n_vertices;i++) { 752 ierr = VecPlaceArray(pcbddc->vec1_R,y+i*n_R);CHKERRQ(ierr); 753 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 754 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 755 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 756 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 757 } 758 ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr); 759 } 760 ierr = MatScale(A_RRmA_RV,m_one);CHKERRQ(ierr); 761 /* S_VV and S_CV are the subdomain contribution to coarse matrix. WARNING -> column major ordering */ 762 if (n_constraints) { 763 Mat B; 764 ierr = MatMatMult(pcbddc->local_auxmat1,A_RRmA_RV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr); 765 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr); 766 ierr = MatMatMult(pcbddc->local_auxmat2,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 767 ierr = MatScale(S_CV,m_one);CHKERRQ(ierr); 768 ierr = PetscBLASIntCast(n_R*n_vertices,&B_N);CHKERRQ(ierr); 769 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+n_R*n_vertices,&B_one,work,&B_one)); 770 ierr = MatDestroy(&B);CHKERRQ(ierr); 771 } 772 ierr = MatConvert(A_VR,impMatType,MAT_REUSE_MATRIX,&A_VR);CHKERRQ(ierr); 773 ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr); 774 ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr); 775 ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr); 776 ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr); 777 ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr); 778 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one)); 779 ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr); 780 ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr); 781 ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 782 ierr = MatDestroy(&S_VVt);CHKERRQ(ierr); 783 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 784 } else { 785 ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr); 786 ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 787 } 788 /* coarse basis functions */ 789 for (i=0;i<n_vertices;i++) { 790 PetscScalar *y; 791 792 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr); 793 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 794 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 795 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 796 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 797 y[n_B*i+idx_V_B[i]] = 1.0; 798 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 799 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 800 801 if (pcbddc->switch_static || pcbddc->dbg_flag) { 802 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 803 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 804 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 805 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 806 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 807 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 808 } 809 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 810 } 811 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 812 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 813 } 814 815 if (n_constraints) { 816 Mat B; 817 818 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 819 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 820 ierr = MatMatMult(pcbddc->local_auxmat2,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 821 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 822 if (n_vertices) { 823 ierr = MatMatMult(A_VR,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_VC);CHKERRQ(ierr); 824 } 825 ierr = MatDestroy(&B);CHKERRQ(ierr); 826 /* coarse basis functions */ 827 for (i=0;i<n_constraints;i++) { 828 PetscScalar *y; 829 830 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr); 831 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 832 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr); 833 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 834 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 835 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 836 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 837 838 if (pcbddc->switch_static || pcbddc->dbg_flag) { 839 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 840 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr); 841 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 842 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 843 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 844 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 845 } 846 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 847 } 848 } 849 850 /* compute other basis functions for non-symmetric problems */ 851 if (!pcbddc->issym) { 852 Mat B,X; 853 854 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,pcbddc->local_primal_size,work,&B);CHKERRQ(ierr); 855 856 if (n_constraints) { 857 Mat S_CCT,B_C; 858 859 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work+n_vertices*n_R,&B_C);CHKERRQ(ierr); 860 ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr); 861 ierr = MatTransposeMatMult(C_CR,S_CCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr); 862 ierr = MatDestroy(&S_CCT);CHKERRQ(ierr); 863 if (n_vertices) { 864 Mat B_V,S_VCT; 865 866 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&B_V);CHKERRQ(ierr); 867 ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr); 868 ierr = MatTransposeMatMult(C_CR,S_VCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr); 869 ierr = MatDestroy(&B_V);CHKERRQ(ierr); 870 ierr = MatDestroy(&S_VCT);CHKERRQ(ierr); 871 } 872 ierr = MatDestroy(&B_C);CHKERRQ(ierr); 873 } 874 if (n_vertices && n_R) { 875 Mat A_VRT; 876 PetscBLASInt B_N,B_one = 1; 877 878 if (!n_constraints) { /* if there are no constraints, reset work */ 879 ierr = PetscMemzero(work,n_R*pcbddc->local_primal_size*sizeof(PetscScalar));CHKERRQ(ierr); 880 } 881 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work+pcbddc->local_primal_size*n_R,&A_VRT);CHKERRQ(ierr); 882 ierr = MatTranspose(A_VR,MAT_REUSE_MATRIX,&A_VRT);CHKERRQ(ierr); 883 ierr = PetscBLASIntCast(n_vertices*n_R,&B_N);CHKERRQ(ierr); 884 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&m_one,work+pcbddc->local_primal_size*n_R,&B_one,work,&B_one)); 885 ierr = MatDestroy(&A_VRT);CHKERRQ(ierr); 886 } 887 888 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,pcbddc->local_primal_size,work+pcbddc->local_primal_size*n_R,&X);CHKERRQ(ierr); 889 if (F) { /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 890 for (i=0;i<pcbddc->local_primal_size;i++) { 891 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 892 ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr); 893 ierr = MatSolveTranspose(F,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 894 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 895 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 896 } 897 } else { 898 for (i=0;i<pcbddc->local_primal_size;i++) { 899 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 900 ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr); 901 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 902 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 903 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 904 } 905 } 906 ierr = MatDestroy(&B);CHKERRQ(ierr); 907 /* coarse basis functions */ 908 for (i=0;i<pcbddc->local_primal_size;i++) { 909 PetscScalar *y; 910 911 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*(i+pcbddc->local_primal_size));CHKERRQ(ierr); 912 ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 913 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 914 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 915 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 916 if (i<n_vertices) { 917 y[n_B*i+idx_V_B[i]] = 1.0; 918 } 919 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 920 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 921 922 if (pcbddc->switch_static || pcbddc->dbg_flag) { 923 ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 924 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 925 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 926 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 927 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 928 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 929 } 930 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 931 } 932 ierr = MatDestroy(&X);CHKERRQ(ierr); 933 } 934 935 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 936 ierr = MatDestroy(&S_VV);CHKERRQ(ierr); 937 ierr = MatDestroy(&S_CV);CHKERRQ(ierr); 938 ierr = MatDestroy(&S_VC);CHKERRQ(ierr); 939 ierr = MatDestroy(&S_CC);CHKERRQ(ierr); 940 /* Checking coarse_sub_mat and coarse basis functios */ 941 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 942 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 943 if (pcbddc->dbg_flag) { 944 Mat coarse_sub_mat; 945 Mat AUXMAT,TM1,TM2,TM3,TM4; 946 Mat coarse_phi_D,coarse_phi_B; 947 Mat coarse_psi_D,coarse_psi_B; 948 Mat A_II,A_BB,A_IB,A_BI; 949 MatType checkmattype=MATSEQAIJ; 950 PetscReal real_value; 951 952 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 953 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 954 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 955 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 956 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 957 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 958 if (unsymmetric_check) { 959 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 960 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 961 } 962 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 963 964 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 965 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation\n");CHKERRQ(ierr); 966 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 967 if (unsymmetric_check) { 968 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 969 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 970 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 971 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 972 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 973 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 974 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 975 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 976 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 977 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 978 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 979 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 980 } else { 981 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 982 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 983 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 984 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 985 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 986 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 987 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 988 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 989 } 990 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 991 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 992 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 993 ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr); 994 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 995 ierr = MatNorm(TM1,NORM_INFINITY,&real_value);CHKERRQ(ierr); 996 997 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 998 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 999 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1000 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 1001 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 1002 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 1003 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 1004 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 1005 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 1006 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 1007 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 1008 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 1009 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 1010 if (unsymmetric_check) { 1011 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 1012 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 1013 } 1014 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 1015 } 1016 1017 /* free memory */ 1018 ierr = PetscFree(work);CHKERRQ(ierr); 1019 if (n_vertices) { 1020 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 1021 } 1022 if (n_constraints) { 1023 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 1024 } 1025 /* get back data */ 1026 *coarse_submat_vals_n = coarse_submat_vals; 1027 PetscFunctionReturn(0); 1028 } 1029 1030 #undef __FUNCT__ 1031 #define __FUNCT__ "MatGetSubMatrixUnsorted" 1032 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, MatStructure reuse, Mat* B) 1033 { 1034 Mat *work_mat; 1035 IS isrow_s,iscol_s; 1036 PetscBool rsorted,csorted; 1037 PetscInt rsize,*idxs_perm_r,csize,*idxs_perm_c; 1038 PetscErrorCode ierr; 1039 1040 PetscFunctionBegin; 1041 ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr); 1042 ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr); 1043 ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr); 1044 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 1045 1046 if (!rsorted) { 1047 const PetscInt *idxs; 1048 PetscInt *idxs_sorted,i; 1049 1050 ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr); 1051 ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr); 1052 for (i=0;i<rsize;i++) { 1053 idxs_perm_r[i] = i; 1054 } 1055 ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr); 1056 ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr); 1057 for (i=0;i<rsize;i++) { 1058 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 1059 } 1060 ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr); 1061 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr); 1062 } else { 1063 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1064 isrow_s = isrow; 1065 } 1066 1067 if (!csorted) { 1068 if (isrow == iscol) { 1069 ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr); 1070 iscol_s = isrow_s; 1071 } else { 1072 const PetscInt *idxs; 1073 PetscInt *idxs_sorted,i; 1074 1075 ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr); 1076 ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr); 1077 for (i=0;i<csize;i++) { 1078 idxs_perm_c[i] = i; 1079 } 1080 ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr); 1081 ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr); 1082 for (i=0;i<csize;i++) { 1083 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 1084 } 1085 ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr); 1086 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr); 1087 } 1088 } else { 1089 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1090 iscol_s = iscol; 1091 } 1092 1093 ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,reuse,&work_mat);CHKERRQ(ierr); 1094 1095 if (!rsorted || !csorted) { 1096 Mat new_mat; 1097 IS is_perm_r,is_perm_c; 1098 1099 if (!rsorted) { 1100 PetscInt *idxs_r,i; 1101 ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr); 1102 for (i=0;i<rsize;i++) { 1103 idxs_r[idxs_perm_r[i]] = i; 1104 } 1105 ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr); 1106 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr); 1107 } else { 1108 ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr); 1109 } 1110 ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr); 1111 1112 if (!csorted) { 1113 if (isrow_s == iscol_s) { 1114 ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr); 1115 is_perm_c = is_perm_r; 1116 } else { 1117 PetscInt *idxs_c,i; 1118 ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr); 1119 for (i=0;i<csize;i++) { 1120 idxs_c[idxs_perm_c[i]] = i; 1121 } 1122 ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr); 1123 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr); 1124 } 1125 } else { 1126 ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr); 1127 } 1128 ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr); 1129 1130 ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr); 1131 ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr); 1132 work_mat[0] = new_mat; 1133 ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr); 1134 ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr); 1135 } 1136 1137 ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr); 1138 *B = work_mat[0]; 1139 ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr); 1140 ierr = ISDestroy(&isrow_s);CHKERRQ(ierr); 1141 ierr = ISDestroy(&iscol_s);CHKERRQ(ierr); 1142 PetscFunctionReturn(0); 1143 } 1144 1145 #undef __FUNCT__ 1146 #define __FUNCT__ "PCBDDCComputeLocalMatrix" 1147 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 1148 { 1149 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1150 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1151 Mat new_mat; 1152 IS is_local,is_global; 1153 PetscInt local_size; 1154 PetscBool isseqaij; 1155 PetscErrorCode ierr; 1156 1157 PetscFunctionBegin; 1158 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1159 ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr); 1160 ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr); 1161 ierr = ISLocalToGlobalMappingApplyIS(matis->mapping,is_local,&is_global);CHKERRQ(ierr); 1162 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 1163 ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,MAT_INITIAL_MATRIX,&new_mat);CHKERRQ(ierr); 1164 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 1165 1166 /* check */ 1167 if (pcbddc->dbg_flag) { 1168 Vec x,x_change; 1169 PetscReal error; 1170 1171 ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr); 1172 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 1173 ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr); 1174 ierr = VecScatterBegin(matis->ctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1175 ierr = VecScatterEnd(matis->ctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1176 ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr); 1177 ierr = VecScatterBegin(matis->ctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1178 ierr = VecScatterEnd(matis->ctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1179 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 1180 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 1181 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1182 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr); 1183 ierr = VecDestroy(&x);CHKERRQ(ierr); 1184 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 1185 } 1186 1187 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 1188 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 1189 if (isseqaij) { 1190 ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 1191 } else { 1192 Mat work_mat; 1193 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 1194 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 1195 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 1196 } 1197 ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,pcbddc->issym);CHKERRQ(ierr); 1198 /* 1199 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 1200 ierr = MatView(new_mat,(PetscViewer)0);CHKERRQ(ierr); 1201 */ 1202 ierr = MatDestroy(&new_mat);CHKERRQ(ierr); 1203 PetscFunctionReturn(0); 1204 } 1205 1206 #undef __FUNCT__ 1207 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 1208 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 1209 { 1210 PC_IS* pcis = (PC_IS*)(pc->data); 1211 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1212 IS is_aux1,is_aux2; 1213 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 1214 PetscInt n_vertices,i,j,n_R,n_D,n_B; 1215 PetscInt vbs,bs; 1216 PetscBT bitmask; 1217 PetscErrorCode ierr; 1218 1219 PetscFunctionBegin; 1220 /* 1221 No need to setup local scatters if 1222 - primal space is unchanged 1223 AND 1224 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 1225 AND 1226 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 1227 */ 1228 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 1229 PetscFunctionReturn(0); 1230 } 1231 /* destroy old objects */ 1232 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 1233 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 1234 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 1235 /* Set Non-overlapping dimensions */ 1236 n_B = pcis->n_B; n_D = pcis->n - n_B; 1237 n_vertices = pcbddc->n_actual_vertices; 1238 /* create auxiliary bitmask */ 1239 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 1240 for (i=0;i<n_vertices;i++) { 1241 ierr = PetscBTSet(bitmask,pcbddc->primal_indices_local_idxs[i]);CHKERRQ(ierr); 1242 } 1243 1244 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 1245 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 1246 for (i=0, n_R=0; i<pcis->n; i++) { 1247 if (!PetscBTLookup(bitmask,i)) { 1248 idx_R_local[n_R] = i; 1249 n_R++; 1250 } 1251 } 1252 1253 /* Block code */ 1254 vbs = 1; 1255 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 1256 if (bs>1 && !(n_vertices%bs)) { 1257 PetscBool is_blocked = PETSC_TRUE; 1258 PetscInt *vary; 1259 /* Verify if the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 1260 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 1261 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 1262 for (i=0; i<n_vertices; i++) vary[pcbddc->primal_indices_local_idxs[i]/bs]++; 1263 for (i=0; i<n_vertices; i++) { 1264 if (vary[i]!=0 && vary[i]!=bs) { 1265 is_blocked = PETSC_FALSE; 1266 break; 1267 } 1268 } 1269 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 1270 vbs = bs; 1271 for (i=0;i<n_R/vbs;i++) { 1272 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 1273 } 1274 } 1275 ierr = PetscFree(vary);CHKERRQ(ierr); 1276 } 1277 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 1278 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 1279 1280 /* print some info if requested */ 1281 if (pcbddc->dbg_flag) { 1282 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1283 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1284 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1285 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 1286 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 1287 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices,pcbddc->local_primal_size);CHKERRQ(ierr); 1288 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"pcbddc->n_vertices = %d, pcbddc->n_constraints = %d\n",pcbddc->n_vertices,pcbddc->n_constraints);CHKERRQ(ierr); 1289 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1290 } 1291 1292 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 1293 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1294 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 1295 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 1296 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1297 for (i=0; i<n_D; i++) { 1298 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 1299 } 1300 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1301 for (i=0, j=0; i<n_R; i++) { 1302 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 1303 aux_array1[j++] = i; 1304 } 1305 } 1306 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 1307 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1308 for (i=0, j=0; i<n_B; i++) { 1309 if (!PetscBTLookup(bitmask,is_indices[i])) { 1310 aux_array2[j++] = i; 1311 } 1312 } 1313 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1314 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 1315 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 1316 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1317 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 1318 1319 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1320 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 1321 for (i=0, j=0; i<n_R; i++) { 1322 if (PetscBTLookup(bitmask,idx_R_local[i])) { 1323 aux_array1[j++] = i; 1324 } 1325 } 1326 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 1327 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 1328 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1329 } 1330 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 1331 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1332 PetscFunctionReturn(0); 1333 } 1334 1335 1336 #undef __FUNCT__ 1337 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 1338 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 1339 { 1340 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1341 PC_IS *pcis = (PC_IS*)pc->data; 1342 PC pc_temp; 1343 Mat A_RR; 1344 MatReuse reuse; 1345 PetscScalar m_one = -1.0; 1346 PetscReal value; 1347 PetscInt n_D,n_R,ibs,mbs; 1348 PetscBool use_exact,use_exact_reduced,issbaij; 1349 PetscErrorCode ierr; 1350 /* prefixes stuff */ 1351 char dir_prefix[256],neu_prefix[256],str_level[16]; 1352 size_t len; 1353 1354 PetscFunctionBegin; 1355 1356 /* compute prefixes */ 1357 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 1358 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 1359 if (!pcbddc->current_level) { 1360 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1361 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1362 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1363 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1364 } else { 1365 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 1366 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 1367 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 1368 len -= 15; /* remove "pc_bddc_coarse_" */ 1369 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 1370 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 1371 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 1372 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 1373 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1374 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1375 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 1376 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 1377 } 1378 1379 /* DIRICHLET PROBLEM */ 1380 if (dirichlet) { 1381 /* Matrix for Dirichlet problem is pcis->A_II */ 1382 ierr = ISGetSize(pcis->is_I_local,&n_D);CHKERRQ(ierr); 1383 if (!pcbddc->ksp_D) { /* create object if not yet build */ 1384 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 1385 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 1386 /* default */ 1387 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 1388 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 1389 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1390 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1391 if (issbaij) { 1392 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1393 } else { 1394 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1395 } 1396 /* Allow user's customization */ 1397 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 1398 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1399 } 1400 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 1401 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1402 if (!n_D) { 1403 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1404 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1405 } 1406 /* Set Up KSP for Dirichlet problem of BDDC */ 1407 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 1408 /* set ksp_D into pcis data */ 1409 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 1410 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 1411 pcis->ksp_D = pcbddc->ksp_D; 1412 } 1413 1414 /* NEUMANN PROBLEM */ 1415 A_RR = 0; 1416 if (neumann) { 1417 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 1418 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 1419 if (pcbddc->ksp_R) { /* already created ksp */ 1420 PetscInt nn_R; 1421 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 1422 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 1423 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 1424 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 1425 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 1426 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1427 reuse = MAT_INITIAL_MATRIX; 1428 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 1429 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 1430 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1431 reuse = MAT_INITIAL_MATRIX; 1432 } else { /* safe to reuse the matrix */ 1433 reuse = MAT_REUSE_MATRIX; 1434 } 1435 } 1436 /* last check */ 1437 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 1438 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1439 reuse = MAT_INITIAL_MATRIX; 1440 } 1441 } else { /* first time, so we need to create the matrix */ 1442 reuse = MAT_INITIAL_MATRIX; 1443 } 1444 /* extract A_RR */ 1445 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 1446 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 1447 if (ibs != mbs) { 1448 Mat newmat; 1449 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 1450 ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 1451 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 1452 } else { 1453 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 1454 } 1455 if (!pcbddc->ksp_R) { /* create object if not present */ 1456 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 1457 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 1458 /* default */ 1459 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 1460 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 1461 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1462 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1463 if (issbaij) { 1464 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1465 } else { 1466 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1467 } 1468 /* Allow user's customization */ 1469 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 1470 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1471 } 1472 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 1473 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1474 if (!n_R) { 1475 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1476 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1477 } 1478 /* Set Up KSP for Neumann problem of BDDC */ 1479 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 1480 } 1481 1482 /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */ 1483 if (pcbddc->NullSpace || pcbddc->dbg_flag) { 1484 if (pcbddc->dbg_flag) { 1485 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1486 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1487 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1488 } 1489 if (dirichlet) { /* Dirichlet */ 1490 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 1491 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 1492 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 1493 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 1494 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 1495 /* need to be adapted? */ 1496 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1497 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1498 ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr); 1499 /* print info */ 1500 if (pcbddc->dbg_flag) { 1501 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 1502 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1503 } 1504 if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) { 1505 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr); 1506 } 1507 } 1508 if (neumann) { /* Neumann */ 1509 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 1510 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1511 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 1512 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 1513 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 1514 /* need to be adapted? */ 1515 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1516 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1517 /* print info */ 1518 if (pcbddc->dbg_flag) { 1519 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 1520 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1521 } 1522 if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */ 1523 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr); 1524 } 1525 } 1526 } 1527 /* free Neumann problem's matrix */ 1528 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1529 PetscFunctionReturn(0); 1530 } 1531 1532 #undef __FUNCT__ 1533 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection" 1534 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec rhs, Vec sol, Vec work, PetscBool applytranspose) 1535 { 1536 PetscErrorCode ierr; 1537 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1538 1539 PetscFunctionBegin; 1540 if (applytranspose) { 1541 if (pcbddc->local_auxmat1) { 1542 ierr = MatMultTranspose(pcbddc->local_auxmat2,rhs,work);CHKERRQ(ierr); 1543 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,work,rhs,rhs);CHKERRQ(ierr); 1544 } 1545 ierr = KSPSolveTranspose(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr); 1546 } else { 1547 ierr = KSPSolve(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr); 1548 if (pcbddc->local_auxmat1) { 1549 ierr = MatMult(pcbddc->local_auxmat1,sol,work);CHKERRQ(ierr); 1550 ierr = MatMultAdd(pcbddc->local_auxmat2,work,sol,sol);CHKERRQ(ierr); 1551 } 1552 } 1553 PetscFunctionReturn(0); 1554 } 1555 1556 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 1557 #undef __FUNCT__ 1558 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 1559 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 1560 { 1561 PetscErrorCode ierr; 1562 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1563 PC_IS* pcis = (PC_IS*) (pc->data); 1564 const PetscScalar zero = 0.0; 1565 1566 PetscFunctionBegin; 1567 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 1568 if (applytranspose) { 1569 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1570 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1571 } else { 1572 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1573 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1574 } 1575 /* start communications from local primal nodes to rhs of coarse solver */ 1576 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 1577 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1578 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1579 1580 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 1581 /* TODO remove null space when doing multilevel */ 1582 if (pcbddc->coarse_ksp) { 1583 if (applytranspose) { 1584 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,NULL,NULL);CHKERRQ(ierr); 1585 } else { 1586 ierr = KSPSolve(pcbddc->coarse_ksp,NULL,NULL);CHKERRQ(ierr); 1587 } 1588 } 1589 1590 /* Local solution on R nodes */ 1591 if (pcis->n) { 1592 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 1593 ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1594 ierr = VecScatterEnd(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1595 if (pcbddc->switch_static) { 1596 ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1597 ierr = VecScatterEnd(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1598 } 1599 ierr = PCBDDCSolveSubstructureCorrection(pc,pcbddc->vec1_R,pcbddc->vec2_R,pcbddc->vec1_C,applytranspose);CHKERRQ(ierr); 1600 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 1601 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1602 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1603 if (pcbddc->switch_static) { 1604 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1605 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1606 } 1607 } 1608 1609 /* communications from coarse sol to local primal nodes */ 1610 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1611 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1612 1613 /* Sum contributions from two levels */ 1614 if (applytranspose) { 1615 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1616 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1617 } else { 1618 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1619 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1620 } 1621 PetscFunctionReturn(0); 1622 } 1623 1624 /* TODO: the following two function can be optimized using VecPlaceArray whenever possible and using overlap flag */ 1625 #undef __FUNCT__ 1626 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 1627 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 1628 { 1629 PetscErrorCode ierr; 1630 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1631 PetscScalar *array,*array2; 1632 Vec from,to; 1633 1634 PetscFunctionBegin; 1635 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 1636 from = pcbddc->coarse_vec; 1637 to = pcbddc->vec1_P; 1638 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 1639 Vec tvec; 1640 PetscInt lsize; 1641 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1642 ierr = VecGetLocalSize(tvec,&lsize);CHKERRQ(ierr); 1643 ierr = VecGetArrayRead(tvec,(const PetscScalar**)&array);CHKERRQ(ierr); 1644 ierr = VecGetArray(from,&array2);CHKERRQ(ierr); 1645 ierr = PetscMemcpy(array2,array,lsize*sizeof(PetscScalar));CHKERRQ(ierr); 1646 ierr = VecRestoreArrayRead(tvec,(const PetscScalar**)&array);CHKERRQ(ierr); 1647 ierr = VecRestoreArray(from,&array2);CHKERRQ(ierr); 1648 } 1649 } else { /* from local to global -> put data in coarse right hand side */ 1650 from = pcbddc->vec1_P; 1651 to = pcbddc->coarse_vec; 1652 } 1653 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 1654 PetscFunctionReturn(0); 1655 } 1656 1657 #undef __FUNCT__ 1658 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 1659 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 1660 { 1661 PetscErrorCode ierr; 1662 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1663 PetscScalar *array,*array2; 1664 Vec from,to; 1665 1666 PetscFunctionBegin; 1667 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 1668 from = pcbddc->coarse_vec; 1669 to = pcbddc->vec1_P; 1670 } else { /* from local to global -> put data in coarse right hand side */ 1671 from = pcbddc->vec1_P; 1672 to = pcbddc->coarse_vec; 1673 } 1674 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 1675 if (smode == SCATTER_FORWARD) { 1676 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 1677 Vec tvec; 1678 PetscInt lsize; 1679 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1680 ierr = VecGetLocalSize(tvec,&lsize);CHKERRQ(ierr); 1681 ierr = VecGetArrayRead(to,(const PetscScalar**)&array);CHKERRQ(ierr); 1682 ierr = VecGetArray(tvec,&array2);CHKERRQ(ierr); 1683 ierr = PetscMemcpy(array2,array,lsize*sizeof(PetscScalar));CHKERRQ(ierr); 1684 ierr = VecRestoreArrayRead(to,(const PetscScalar**)&array);CHKERRQ(ierr); 1685 ierr = VecRestoreArray(tvec,&array2);CHKERRQ(ierr); 1686 } 1687 } 1688 PetscFunctionReturn(0); 1689 } 1690 1691 /* uncomment for testing purposes */ 1692 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 1693 #undef __FUNCT__ 1694 #define __FUNCT__ "PCBDDCConstraintsSetUp" 1695 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 1696 { 1697 PetscErrorCode ierr; 1698 PC_IS* pcis = (PC_IS*)(pc->data); 1699 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1700 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1701 /* one and zero */ 1702 PetscScalar one=1.0,zero=0.0; 1703 /* space to store constraints and their local indices */ 1704 PetscScalar *temp_quadrature_constraint; 1705 PetscInt *temp_indices,*temp_indices_to_constraint,*temp_indices_to_constraint_B; 1706 /* iterators */ 1707 PetscInt i,j,k,total_counts,temp_start_ptr; 1708 /* stuff to store connected components stored in pcbddc->mat_graph */ 1709 IS ISForVertices,*ISForFaces,*ISForEdges,*used_IS; 1710 PetscInt n_ISForFaces,n_ISForEdges; 1711 /* near null space stuff */ 1712 MatNullSpace nearnullsp; 1713 const Vec *nearnullvecs; 1714 Vec *localnearnullsp; 1715 PetscBool nnsp_has_cnst; 1716 PetscInt nnsp_size; 1717 PetscScalar *array; 1718 /* BLAS integers */ 1719 PetscBLASInt lwork,lierr; 1720 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 1721 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 1722 /* LAPACK working arrays for SVD or POD */ 1723 PetscBool skip_lapack; 1724 PetscScalar *work; 1725 PetscReal *singular_vals; 1726 #if defined(PETSC_USE_COMPLEX) 1727 PetscReal *rwork; 1728 #endif 1729 #if defined(PETSC_MISSING_LAPACK_GESVD) 1730 PetscBLASInt Blas_one_2=1; 1731 PetscScalar *temp_basis,*correlation_mat; 1732 #else 1733 PetscBLASInt dummy_int_1=1,dummy_int_2=1; 1734 PetscScalar dummy_scalar_1=0.0,dummy_scalar_2=0.0; 1735 #endif 1736 /* reuse */ 1737 PetscInt olocal_primal_size; 1738 PetscInt *oprimal_indices_local_idxs; 1739 /* change of basis */ 1740 PetscInt *aux_primal_numbering,*aux_primal_minloc,*global_indices; 1741 PetscBool boolforchange,qr_needed; 1742 PetscBT touched,change_basis,qr_needed_idx; 1743 /* auxiliary stuff */ 1744 PetscInt *nnz,*is_indices,*aux_primal_numbering_B; 1745 PetscInt ncc,*gidxs=NULL,*permutation=NULL,*temp_indices_to_constraint_work=NULL; 1746 PetscScalar *temp_quadrature_constraint_work=NULL; 1747 /* some quantities */ 1748 PetscInt n_vertices,total_primal_vertices,valid_constraints; 1749 PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints; 1750 1751 1752 PetscFunctionBegin; 1753 /* Destroy Mat objects computed previously */ 1754 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1755 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1756 /* TODO synch with adaptive selection */ 1757 /* Get index sets for faces, edges and vertices from graph */ 1758 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 1759 /* free unneeded index sets */ 1760 if (!pcbddc->use_vertices) { 1761 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 1762 } 1763 if (!pcbddc->use_edges) { 1764 for (i=0;i<n_ISForEdges;i++) { 1765 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 1766 } 1767 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 1768 n_ISForEdges = 0; 1769 } 1770 if (!pcbddc->use_faces) { 1771 for (i=0;i<n_ISForFaces;i++) { 1772 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 1773 } 1774 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 1775 n_ISForFaces = 0; 1776 } 1777 /* HACKS (the following two blocks of code) */ 1778 if (!ISForVertices && pcbddc->NullSpace && !pcbddc->user_ChangeOfBasisMatrix) { 1779 pcbddc->use_change_of_basis = PETSC_TRUE; 1780 if (!ISForEdges) { 1781 pcbddc->use_change_on_faces = PETSC_TRUE; 1782 } 1783 } 1784 if (pcbddc->NullSpace) { 1785 /* use_change_of_basis should be consistent among processors */ 1786 PetscBool tbool[2],gbool[2]; 1787 tbool [0] = pcbddc->use_change_of_basis; 1788 tbool [1] = pcbddc->use_change_on_faces; 1789 ierr = MPI_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1790 pcbddc->use_change_of_basis = gbool[0]; 1791 pcbddc->use_change_on_faces = gbool[1]; 1792 } 1793 /* print some info */ 1794 if (pcbddc->dbg_flag) { 1795 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1796 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 1797 i = 0; 1798 if (ISForVertices) { 1799 ierr = ISGetSize(ISForVertices,&i);CHKERRQ(ierr); 1800 } 1801 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices\n",PetscGlobalRank,i);CHKERRQ(ierr); 1802 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges\n",PetscGlobalRank,n_ISForEdges);CHKERRQ(ierr); 1803 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces\n",PetscGlobalRank,n_ISForFaces);CHKERRQ(ierr); 1804 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1805 } 1806 1807 if (!pcbddc->adaptive_selection) { 1808 /* check if near null space is attached to global mat */ 1809 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 1810 if (nearnullsp) { 1811 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 1812 /* remove any stored info */ 1813 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 1814 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 1815 /* store information for BDDC solver reuse */ 1816 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 1817 pcbddc->onearnullspace = nearnullsp; 1818 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 1819 for (i=0;i<nnsp_size;i++) { 1820 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 1821 } 1822 } else { /* if near null space is not provided BDDC uses constants by default */ 1823 nnsp_size = 0; 1824 nnsp_has_cnst = PETSC_TRUE; 1825 } 1826 /* get max number of constraints on a single cc */ 1827 max_constraints = nnsp_size; 1828 if (nnsp_has_cnst) max_constraints++; 1829 1830 /* 1831 Evaluate maximum storage size needed by the procedure 1832 - temp_indices will contain start index of each constraint stored as follows 1833 - temp_indices_to_constraint [temp_indices[i],...,temp_indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts 1834 - temp_indices_to_constraint_B[temp_indices[i],...,temp_indices[i+1]-1] will contain the indices (in boundary numbering) on which the constraint acts 1835 - temp_quadrature_constraint [temp_indices[i],...,temp_indices[i+1]-1] will contain the scalars representing the constraint itself 1836 */ 1837 total_counts = n_ISForFaces+n_ISForEdges; 1838 total_counts *= max_constraints; 1839 n_vertices = 0; 1840 if (ISForVertices) { 1841 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 1842 } 1843 total_counts += n_vertices; 1844 ierr = PetscMalloc1(total_counts+1,&temp_indices);CHKERRQ(ierr); 1845 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 1846 total_counts = 0; 1847 max_size_of_constraint = 0; 1848 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 1849 if (i<n_ISForEdges) { 1850 used_IS = &ISForEdges[i]; 1851 } else { 1852 used_IS = &ISForFaces[i-n_ISForEdges]; 1853 } 1854 ierr = ISGetSize(*used_IS,&j);CHKERRQ(ierr); 1855 total_counts += j; 1856 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 1857 } 1858 total_counts *= max_constraints; 1859 total_counts += n_vertices; 1860 ierr = PetscMalloc3(total_counts,&temp_quadrature_constraint,total_counts,&temp_indices_to_constraint,total_counts,&temp_indices_to_constraint_B);CHKERRQ(ierr); 1861 /* get local part of global near null space vectors */ 1862 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 1863 for (k=0;k<nnsp_size;k++) { 1864 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 1865 ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1866 ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1867 } 1868 1869 /* whether or not to skip lapack calls */ 1870 skip_lapack = PETSC_TRUE; 1871 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 1872 1873 /* allocate some auxiliary stuff */ 1874 if (!skip_lapack || pcbddc->use_qr_single) { 1875 ierr = PetscMalloc4(max_size_of_constraint,&gidxs,max_size_of_constraint,&permutation,max_size_of_constraint,&temp_indices_to_constraint_work,max_size_of_constraint,&temp_quadrature_constraint_work);CHKERRQ(ierr); 1876 } 1877 1878 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 1879 if (!skip_lapack) { 1880 PetscScalar temp_work; 1881 1882 #if defined(PETSC_MISSING_LAPACK_GESVD) 1883 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 1884 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 1885 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 1886 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 1887 #if defined(PETSC_USE_COMPLEX) 1888 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 1889 #endif 1890 /* now we evaluate the optimal workspace using query with lwork=-1 */ 1891 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 1892 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 1893 lwork = -1; 1894 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1895 #if !defined(PETSC_USE_COMPLEX) 1896 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 1897 #else 1898 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 1899 #endif 1900 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1901 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 1902 #else /* on missing GESVD */ 1903 /* SVD */ 1904 PetscInt max_n,min_n; 1905 max_n = max_size_of_constraint; 1906 min_n = max_constraints; 1907 if (max_size_of_constraint < max_constraints) { 1908 min_n = max_size_of_constraint; 1909 max_n = max_constraints; 1910 } 1911 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 1912 #if defined(PETSC_USE_COMPLEX) 1913 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 1914 #endif 1915 /* now we evaluate the optimal workspace using query with lwork=-1 */ 1916 lwork = -1; 1917 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 1918 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 1919 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 1920 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1921 #if !defined(PETSC_USE_COMPLEX) 1922 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,&lierr)); 1923 #else 1924 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,rwork,&lierr)); 1925 #endif 1926 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1927 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 1928 #endif /* on missing GESVD */ 1929 /* Allocate optimal workspace */ 1930 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 1931 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 1932 } 1933 /* Now we can loop on constraining sets */ 1934 total_counts = 0; 1935 temp_indices[0] = 0; 1936 /* vertices */ 1937 if (ISForVertices) { 1938 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1939 if (nnsp_has_cnst) { /* consider all vertices */ 1940 ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 1941 for (i=0;i<n_vertices;i++) { 1942 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 1943 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 1944 total_counts++; 1945 } 1946 } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */ 1947 PetscBool used_vertex; 1948 for (i=0;i<n_vertices;i++) { 1949 used_vertex = PETSC_FALSE; 1950 k = 0; 1951 while (!used_vertex && k<nnsp_size) { 1952 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1953 if (PetscAbsScalar(array[is_indices[i]])>0.0) { 1954 temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i]; 1955 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 1956 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 1957 total_counts++; 1958 used_vertex = PETSC_TRUE; 1959 } 1960 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1961 k++; 1962 } 1963 } 1964 } 1965 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1966 n_vertices = total_counts; 1967 } 1968 1969 /* edges and faces */ 1970 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 1971 if (ncc<n_ISForEdges) { 1972 used_IS = &ISForEdges[ncc]; 1973 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 1974 } else { 1975 used_IS = &ISForFaces[ncc-n_ISForEdges]; 1976 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 1977 } 1978 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 1979 temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */ 1980 ierr = ISGetSize(*used_IS,&size_of_constraint);CHKERRQ(ierr); 1981 ierr = ISGetIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1982 /* change of basis should not be performed on local periodic nodes */ 1983 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 1984 if (nnsp_has_cnst) { 1985 PetscScalar quad_value; 1986 temp_constraints++; 1987 if (!pcbddc->use_nnsp_true) { 1988 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 1989 } else { 1990 quad_value = 1.0; 1991 } 1992 ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 1993 for (j=0;j<size_of_constraint;j++) { 1994 temp_quadrature_constraint[temp_indices[total_counts]+j]=quad_value; 1995 } 1996 /* sort by global ordering if using lapack subroutines (not needed!) */ 1997 if (!skip_lapack || pcbddc->use_qr_single) { 1998 ierr = ISLocalToGlobalMappingApply(matis->mapping,size_of_constraint,temp_indices_to_constraint+temp_indices[total_counts],gidxs);CHKERRQ(ierr); 1999 for (j=0;j<size_of_constraint;j++) { 2000 permutation[j]=j; 2001 } 2002 ierr = PetscSortIntWithPermutation(size_of_constraint,gidxs,permutation);CHKERRQ(ierr); 2003 for (j=0;j<size_of_constraint;j++) { 2004 if (permutation[j]!=j) SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"This should not happen"); 2005 } 2006 for (j=0;j<size_of_constraint;j++) { 2007 temp_indices_to_constraint_work[j] = temp_indices_to_constraint[temp_indices[total_counts]+permutation[j]]; 2008 temp_quadrature_constraint_work[j] = temp_quadrature_constraint[temp_indices[total_counts]+permutation[j]]; 2009 } 2010 ierr = PetscMemcpy(temp_indices_to_constraint+temp_indices[total_counts],temp_indices_to_constraint_work,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 2011 ierr = PetscMemcpy(temp_quadrature_constraint+temp_indices[total_counts],temp_quadrature_constraint_work,size_of_constraint*sizeof(PetscScalar));CHKERRQ(ierr); 2012 } 2013 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 2014 total_counts++; 2015 } 2016 for (k=0;k<nnsp_size;k++) { 2017 PetscReal real_value; 2018 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2019 ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 2020 for (j=0;j<size_of_constraint;j++) { 2021 temp_quadrature_constraint[temp_indices[total_counts]+j]=array[is_indices[j]]; 2022 } 2023 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2024 /* check if array is null on the connected component */ 2025 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2026 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_one)); 2027 if (real_value > 0.0) { /* keep indices and values */ 2028 /* sort by global ordering if using lapack subroutines */ 2029 if (!skip_lapack || pcbddc->use_qr_single) { 2030 ierr = ISLocalToGlobalMappingApply(matis->mapping,size_of_constraint,temp_indices_to_constraint+temp_indices[total_counts],gidxs);CHKERRQ(ierr); 2031 for (j=0;j<size_of_constraint;j++) { 2032 permutation[j]=j; 2033 } 2034 ierr = PetscSortIntWithPermutation(size_of_constraint,gidxs,permutation);CHKERRQ(ierr); 2035 for (j=0;j<size_of_constraint;j++) { 2036 temp_indices_to_constraint_work[j] = temp_indices_to_constraint[temp_indices[total_counts]+permutation[j]]; 2037 temp_quadrature_constraint_work[j] = temp_quadrature_constraint[temp_indices[total_counts]+permutation[j]]; 2038 } 2039 ierr = PetscMemcpy(temp_indices_to_constraint+temp_indices[total_counts],temp_indices_to_constraint_work,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 2040 ierr = PetscMemcpy(temp_quadrature_constraint+temp_indices[total_counts],temp_quadrature_constraint_work,size_of_constraint*sizeof(PetscScalar));CHKERRQ(ierr); 2041 } 2042 temp_constraints++; 2043 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 2044 total_counts++; 2045 } 2046 } 2047 ierr = ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2048 valid_constraints = temp_constraints; 2049 if (!pcbddc->use_nnsp_true && temp_constraints) { 2050 if (temp_constraints == 1) { /* just normalize the constraint */ 2051 PetscScalar norm; 2052 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2053 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one)); 2054 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 2055 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one)); 2056 } else { /* perform SVD */ 2057 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 2058 2059 #if defined(PETSC_MISSING_LAPACK_GESVD) 2060 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 2061 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 2062 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 2063 the constraints basis will differ (by a complex factor with absolute value equal to 1) 2064 from that computed using LAPACKgesvd 2065 -> This is due to a different computation of eigenvectors in LAPACKheev 2066 -> The quality of the POD-computed basis will be the same */ 2067 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 2068 /* Store upper triangular part of correlation matrix */ 2069 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2070 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2071 for (j=0;j<temp_constraints;j++) { 2072 for (k=0;k<j+1;k++) { 2073 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k]=BLASdot_(&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Blas_one,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Blas_one_2)); 2074 } 2075 } 2076 /* compute eigenvalues and eigenvectors of correlation matrix */ 2077 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2078 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 2079 #if !defined(PETSC_USE_COMPLEX) 2080 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 2081 #else 2082 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 2083 #endif 2084 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2085 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 2086 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 2087 j = 0; 2088 while (j < temp_constraints && singular_vals[j] < tol) j++; 2089 total_counts = total_counts-j; 2090 valid_constraints = temp_constraints-j; 2091 /* scale and copy POD basis into used quadrature memory */ 2092 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2093 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2094 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 2095 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2096 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 2097 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 2098 if (j<temp_constraints) { 2099 PetscInt ii; 2100 for (k=j;k<temp_constraints;k++) singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]); 2101 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2102 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 2103 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2104 for (k=0;k<temp_constraints-j;k++) { 2105 for (ii=0;ii<size_of_constraint;ii++) { 2106 temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 2107 } 2108 } 2109 } 2110 #else /* on missing GESVD */ 2111 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2112 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2113 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2114 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2115 #if !defined(PETSC_USE_COMPLEX) 2116 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,&lierr)); 2117 #else 2118 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,rwork,&lierr)); 2119 #endif 2120 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 2121 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2122 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 2123 k = temp_constraints; 2124 if (k > size_of_constraint) k = size_of_constraint; 2125 j = 0; 2126 while (j < k && singular_vals[k-j-1] < tol) j++; 2127 valid_constraints = k-j; 2128 total_counts = total_counts-temp_constraints+valid_constraints; 2129 #endif /* on missing GESVD */ 2130 } 2131 } 2132 /* setting change_of_basis flag is safe now */ 2133 if (boolforchange) { 2134 for (j=0;j<valid_constraints;j++) { 2135 PetscBTSet(change_basis,total_counts-j-1); 2136 } 2137 } 2138 } 2139 /* free workspace */ 2140 if (!skip_lapack || pcbddc->use_qr_single) { 2141 ierr = PetscFree4(gidxs,permutation,temp_indices_to_constraint_work,temp_quadrature_constraint_work);CHKERRQ(ierr); 2142 } 2143 if (!skip_lapack) { 2144 ierr = PetscFree(work);CHKERRQ(ierr); 2145 #if defined(PETSC_USE_COMPLEX) 2146 ierr = PetscFree(rwork);CHKERRQ(ierr); 2147 #endif 2148 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 2149 #if defined(PETSC_MISSING_LAPACK_GESVD) 2150 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 2151 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 2152 #endif 2153 } 2154 for (k=0;k<nnsp_size;k++) { 2155 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 2156 } 2157 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 2158 } else { 2159 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2160 PetscInt cum = 0; 2161 2162 total_counts = 0; 2163 n_vertices = 0; 2164 if (sub_schurs->is_Ej_com) { 2165 ierr = ISGetLocalSize(sub_schurs->is_Ej_com,&n_vertices);CHKERRQ(ierr); 2166 } 2167 max_constraints = 0; 2168 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 2169 total_counts += pcbddc->adaptive_constraints_n[i]; 2170 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 2171 } 2172 temp_indices = pcbddc->adaptive_constraints_ptrs; 2173 temp_indices_to_constraint = pcbddc->adaptive_constraints_idxs; 2174 temp_quadrature_constraint = pcbddc->adaptive_constraints_data; 2175 2176 #if 0 2177 printf("Found %d totals\n",total_counts); 2178 for (i=0;i<total_counts;i++) { 2179 printf("const %d, start %d",i,temp_indices[i]); 2180 printf(" end %d:\n",temp_indices[i+1]); 2181 for (j=temp_indices[i];j<temp_indices[i+1];j++) { 2182 printf(" idxs %d",temp_indices_to_constraint[j]); 2183 printf(" data %1.2e\n",temp_quadrature_constraint[j]); 2184 } 2185 } 2186 for (i=0;i<n_vertices;i++) { 2187 PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i+n_vertices]); 2188 } 2189 for (i=0;i<sub_schurs->n_subs;i++) { 2190 PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]); 2191 } 2192 #endif 2193 2194 for (i=0;i<total_counts;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,temp_indices[i+1]-temp_indices[i]); 2195 ierr = PetscMalloc1(temp_indices[total_counts],&temp_indices_to_constraint_B);CHKERRQ(ierr); 2196 /* Change of basis */ 2197 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 2198 if (pcbddc->use_change_of_basis) { 2199 cum = n_vertices; 2200 for (i=0;i<sub_schurs->n_subs;i++) { 2201 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 2202 for (j=0;j<pcbddc->adaptive_constraints_n[i+n_vertices];j++) { 2203 ierr = PetscBTSet(change_basis,cum+j);CHKERRQ(ierr); 2204 } 2205 } 2206 cum += pcbddc->adaptive_constraints_n[i+n_vertices]; 2207 } 2208 } 2209 } 2210 2211 /* free index sets of faces, edges and vertices */ 2212 for (i=0;i<n_ISForFaces;i++) { 2213 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 2214 } 2215 if (n_ISForFaces) { 2216 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 2217 } 2218 for (i=0;i<n_ISForEdges;i++) { 2219 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 2220 } 2221 if (n_ISForEdges) { 2222 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 2223 } 2224 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 2225 2226 /* map temp_indices_to_constraint in boundary numbering */ 2227 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,temp_indices[total_counts],temp_indices_to_constraint,&i,temp_indices_to_constraint_B);CHKERRQ(ierr); 2228 if (i != temp_indices[total_counts]) { 2229 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",temp_indices[total_counts],i); 2230 } 2231 2232 /* set quantities in pcbddc data structure and store previous primal size */ 2233 /* n_vertices defines the number of subdomain corners in the primal space */ 2234 /* n_constraints defines the number of averages (they can be point primal dofs if change of basis is requested) */ 2235 olocal_primal_size = pcbddc->local_primal_size; 2236 pcbddc->local_primal_size = total_counts; 2237 pcbddc->n_vertices = n_vertices; 2238 pcbddc->n_constraints = pcbddc->local_primal_size-pcbddc->n_vertices; 2239 2240 /* Create constraint matrix */ 2241 /* The constraint matrix is used to compute the l2g map of primal dofs */ 2242 /* so we need to set it up properly either with or without change of basis */ 2243 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 2244 ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr); 2245 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 2246 /* array to compute a local numbering of constraints : vertices first then constraints */ 2247 ierr = PetscMalloc1(pcbddc->local_primal_size,&aux_primal_numbering);CHKERRQ(ierr); 2248 /* array to select the proper local node (of minimum index with respect to global ordering) when changing the basis */ 2249 /* note: it should not be needed since IS for faces and edges are already sorted by global ordering when analyzing the graph but... just in case */ 2250 ierr = PetscMalloc1(pcbddc->local_primal_size,&aux_primal_minloc);CHKERRQ(ierr); 2251 /* auxiliary stuff for basis change */ 2252 ierr = PetscMalloc1(max_size_of_constraint,&global_indices);CHKERRQ(ierr); 2253 ierr = PetscBTCreate(pcis->n_B,&touched);CHKERRQ(ierr); 2254 2255 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 2256 total_primal_vertices=0; 2257 for (i=0;i<pcbddc->local_primal_size;i++) { 2258 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 2259 if (size_of_constraint == 1) { 2260 ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]]);CHKERRQ(ierr); 2261 aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]]; 2262 aux_primal_minloc[total_primal_vertices]=0; 2263 total_primal_vertices++; 2264 } else if (PetscBTLookup(change_basis,i)) { /* Same procedure used in PCBDDCGetPrimalConstraintsLocalIdx */ 2265 PetscInt min_loc,min_index; 2266 ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],global_indices);CHKERRQ(ierr); 2267 /* find first untouched local node */ 2268 k = 0; 2269 while (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) k++; 2270 min_index = global_indices[k]; 2271 min_loc = k; 2272 /* search the minimum among global nodes already untouched on the cc */ 2273 for (k=1;k<size_of_constraint;k++) { 2274 /* there can be more than one constraint on a single connected component */ 2275 if (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k]) && min_index > global_indices[k]) { 2276 min_index = global_indices[k]; 2277 min_loc = k; 2278 } 2279 } 2280 ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]+min_loc]);CHKERRQ(ierr); 2281 aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]+min_loc]; 2282 aux_primal_minloc[total_primal_vertices]=min_loc; 2283 total_primal_vertices++; 2284 } 2285 } 2286 /* determine if a QR strategy is needed for change of basis */ 2287 qr_needed = PETSC_FALSE; 2288 ierr = PetscBTCreate(pcbddc->local_primal_size,&qr_needed_idx);CHKERRQ(ierr); 2289 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 2290 if (PetscBTLookup(change_basis,i)) { 2291 if (!pcbddc->use_qr_single) { 2292 size_of_constraint = temp_indices[i+1]-temp_indices[i]; 2293 j = 0; 2294 for (k=0;k<size_of_constraint;k++) { 2295 if (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) { 2296 j++; 2297 } 2298 } 2299 /* found more than one primal dof on the cc */ 2300 if (j > 1) { 2301 PetscBTSet(qr_needed_idx,i); 2302 qr_needed = PETSC_TRUE; 2303 } 2304 } else { 2305 PetscBTSet(qr_needed_idx,i); 2306 qr_needed = PETSC_TRUE; 2307 } 2308 } 2309 } 2310 /* free workspace */ 2311 ierr = PetscFree(global_indices);CHKERRQ(ierr); 2312 2313 /* permute indices in order to have a sorted set of vertices */ 2314 ierr = PetscSortInt(total_primal_vertices,aux_primal_numbering);CHKERRQ(ierr); 2315 2316 /* nonzero structure of constraint matrix */ 2317 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 2318 for (i=0;i<total_primal_vertices;i++) nnz[i]=1; 2319 j=total_primal_vertices; 2320 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 2321 if (!PetscBTLookup(change_basis,i)) { 2322 nnz[j]=temp_indices[i+1]-temp_indices[i]; 2323 j++; 2324 } 2325 } 2326 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 2327 ierr = PetscFree(nnz);CHKERRQ(ierr); 2328 /* set values in constraint matrix */ 2329 for (i=0;i<total_primal_vertices;i++) { 2330 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,aux_primal_numbering[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 2331 } 2332 total_counts = total_primal_vertices; 2333 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 2334 if (!PetscBTLookup(change_basis,i)) { 2335 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 2336 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&total_counts,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);CHKERRQ(ierr); 2337 total_counts++; 2338 } 2339 } 2340 /* assembling */ 2341 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2342 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2343 /* 2344 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 2345 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 2346 */ 2347 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 2348 if (pcbddc->use_change_of_basis) { 2349 /* dual and primal dofs on a single cc */ 2350 PetscInt dual_dofs,primal_dofs; 2351 /* iterator on aux_primal_minloc (ordered as read from nearnullspace: vertices, edges and then constraints) */ 2352 PetscInt primal_counter; 2353 /* working stuff for GEQRF */ 2354 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 2355 PetscBLASInt lqr_work; 2356 /* working stuff for UNGQR */ 2357 PetscScalar *gqr_work,lgqr_work_t; 2358 PetscBLASInt lgqr_work; 2359 /* working stuff for TRTRS */ 2360 PetscScalar *trs_rhs; 2361 PetscBLASInt Blas_NRHS; 2362 /* pointers for values insertion into change of basis matrix */ 2363 PetscInt *start_rows,*start_cols; 2364 PetscScalar *start_vals; 2365 /* working stuff for values insertion */ 2366 PetscBT is_primal; 2367 /* matrix sizes */ 2368 PetscInt global_size,local_size; 2369 /* temporary change of basis */ 2370 Mat localChangeOfBasisMatrix; 2371 2372 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 2373 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 2374 ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 2375 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 2376 /* nonzeros for local mat */ 2377 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 2378 for (i=0;i<pcis->n;i++) nnz[i]=1; 2379 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 2380 if (PetscBTLookup(change_basis,i)) { 2381 size_of_constraint = temp_indices[i+1]-temp_indices[i]; 2382 if (PetscBTLookup(qr_needed_idx,i)) { 2383 for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint[temp_indices[i]+j]] = size_of_constraint; 2384 } else { 2385 for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint[temp_indices[i]+j]] = 2; 2386 /* get local primal index on the cc */ 2387 j = 0; 2388 while (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+j])) j++; 2389 nnz[temp_indices_to_constraint[temp_indices[i]+j]] = size_of_constraint; 2390 } 2391 } 2392 } 2393 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 2394 ierr = PetscFree(nnz);CHKERRQ(ierr); 2395 /* Set initial identity in the matrix */ 2396 for (i=0;i<pcis->n;i++) { 2397 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 2398 } 2399 2400 if (pcbddc->dbg_flag) { 2401 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 2402 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 2403 } 2404 2405 2406 /* Now we loop on the constraints which need a change of basis */ 2407 /* 2408 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 2409 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 2410 2411 Basic blocks of change of basis matrix T computed by 2412 2413 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 2414 2415 | 1 0 ... 0 s_1/S | 2416 | 0 1 ... 0 s_2/S | 2417 | ... | 2418 | 0 ... 1 s_{n-1}/S | 2419 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 2420 2421 with S = \sum_{i=1}^n s_i^2 2422 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 2423 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 2424 2425 - QR decomposition of constraints otherwise 2426 */ 2427 if (qr_needed) { 2428 /* space to store Q */ 2429 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 2430 /* first we issue queries for optimal work */ 2431 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 2432 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 2433 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2434 lqr_work = -1; 2435 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 2436 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 2437 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 2438 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 2439 lgqr_work = -1; 2440 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 2441 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 2442 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 2443 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2444 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 2445 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 2446 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 2447 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 2448 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 2449 /* array to store scaling factors for reflectors */ 2450 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 2451 /* array to store rhs and solution of triangular solver */ 2452 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 2453 /* allocating workspace for check */ 2454 if (pcbddc->dbg_flag) { 2455 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&work);CHKERRQ(ierr); 2456 } 2457 } 2458 /* array to store whether a node is primal or not */ 2459 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 2460 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 2461 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,aux_primal_numbering,&i,aux_primal_numbering_B);CHKERRQ(ierr); 2462 if (i != total_primal_vertices) { 2463 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i); 2464 } 2465 for (i=0;i<total_primal_vertices;i++) { 2466 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 2467 } 2468 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 2469 2470 /* loop on constraints and see whether or not they need a change of basis and compute it */ 2471 /* -> using implicit ordering contained in temp_indices data */ 2472 total_counts = pcbddc->n_vertices; 2473 primal_counter = total_counts; 2474 while (total_counts<pcbddc->local_primal_size) { 2475 primal_dofs = 1; 2476 if (PetscBTLookup(change_basis,total_counts)) { 2477 /* get all constraints with same support: if more then one constraint is present on the cc then surely indices are stored contiguosly */ 2478 while (total_counts+primal_dofs < pcbddc->local_primal_size && temp_indices_to_constraint[temp_indices[total_counts]] == temp_indices_to_constraint[temp_indices[total_counts+primal_dofs]]) { 2479 primal_dofs++; 2480 } 2481 /* get constraint info */ 2482 size_of_constraint = temp_indices[total_counts+1]-temp_indices[total_counts]; 2483 dual_dofs = size_of_constraint-primal_dofs; 2484 2485 if (pcbddc->dbg_flag) { 2486 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d to %d (incl) need a change of basis (size %d)\n",total_counts,total_counts+primal_dofs-1,size_of_constraint);CHKERRQ(ierr); 2487 } 2488 2489 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 2490 2491 /* copy quadrature constraints for change of basis check */ 2492 if (pcbddc->dbg_flag) { 2493 ierr = PetscMemcpy(work,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2494 } 2495 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 2496 ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2497 2498 /* compute QR decomposition of constraints */ 2499 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2500 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2501 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2502 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2503 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 2504 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 2505 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2506 2507 /* explictly compute R^-T */ 2508 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 2509 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 2510 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2511 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 2512 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2513 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 2514 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2515 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 2516 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 2517 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2518 2519 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 2520 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2521 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2522 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 2523 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2524 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2525 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 2526 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 2527 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2528 2529 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 2530 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 2531 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 2532 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2533 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2534 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 2535 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2536 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 2537 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 2538 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2539 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_LDC)); 2540 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2541 ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2542 2543 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 2544 start_rows = &temp_indices_to_constraint[temp_indices[total_counts]]; 2545 /* insert cols for primal dofs */ 2546 for (j=0;j<primal_dofs;j++) { 2547 start_vals = &qr_basis[j*size_of_constraint]; 2548 start_cols = &temp_indices_to_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter+j]]; 2549 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 2550 } 2551 /* insert cols for dual dofs */ 2552 for (j=0,k=0;j<dual_dofs;k++) { 2553 if (!PetscBTLookup(is_primal,temp_indices_to_constraint_B[temp_indices[total_counts]+k])) { 2554 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 2555 start_cols = &temp_indices_to_constraint[temp_indices[total_counts]+k]; 2556 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 2557 j++; 2558 } 2559 } 2560 2561 /* check change of basis */ 2562 if (pcbddc->dbg_flag) { 2563 PetscInt ii,jj; 2564 PetscBool valid_qr=PETSC_TRUE; 2565 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 2566 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2567 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 2568 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2569 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 2570 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 2571 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2572 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&work[size_of_constraint*primal_dofs],&Blas_LDC)); 2573 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2574 for (jj=0;jj<size_of_constraint;jj++) { 2575 for (ii=0;ii<primal_dofs;ii++) { 2576 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 2577 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 2578 } 2579 } 2580 if (!valid_qr) { 2581 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 2582 for (jj=0;jj<size_of_constraint;jj++) { 2583 for (ii=0;ii<primal_dofs;ii++) { 2584 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 2585 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2586 } 2587 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 2588 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2589 } 2590 } 2591 } 2592 } else { 2593 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 2594 } 2595 } 2596 } else { /* simple transformation block */ 2597 PetscInt row,col; 2598 PetscScalar val,norm; 2599 2600 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2601 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,temp_quadrature_constraint+temp_indices[total_counts],&Blas_one,temp_quadrature_constraint+temp_indices[total_counts],&Blas_one)); 2602 for (j=0;j<size_of_constraint;j++) { 2603 PetscInt row_B = temp_indices_to_constraint_B[temp_indices[total_counts]+j]; 2604 row = temp_indices_to_constraint[temp_indices[total_counts]+j]; 2605 if (!PetscBTLookup(is_primal,row_B)) { 2606 col = temp_indices_to_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]; 2607 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 2608 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,temp_quadrature_constraint[temp_indices[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 2609 } else { 2610 for (k=0;k<size_of_constraint;k++) { 2611 col = temp_indices_to_constraint[temp_indices[total_counts]+k]; 2612 if (row != col) { 2613 val = -temp_quadrature_constraint[temp_indices[total_counts]+k]/temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]; 2614 } else { 2615 val = temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]/norm; 2616 } 2617 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 2618 } 2619 } 2620 } 2621 if (pcbddc->dbg_flag) { 2622 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 2623 } 2624 } 2625 /* increment primal counter */ 2626 primal_counter += primal_dofs; 2627 } else { 2628 if (pcbddc->dbg_flag) { 2629 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,temp_indices[total_counts+1]-temp_indices[total_counts]);CHKERRQ(ierr); 2630 } 2631 } 2632 /* increment constraint counter total_counts */ 2633 total_counts += primal_dofs; 2634 } 2635 2636 /* free workspace */ 2637 if (qr_needed) { 2638 if (pcbddc->dbg_flag) { 2639 ierr = PetscFree(work);CHKERRQ(ierr); 2640 } 2641 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 2642 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 2643 ierr = PetscFree(qr_work);CHKERRQ(ierr); 2644 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 2645 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 2646 } 2647 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 2648 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2649 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2650 2651 /* assembling of global change of variable */ 2652 { 2653 Mat tmat; 2654 PetscInt bs; 2655 2656 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 2657 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 2658 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 2659 ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr); 2660 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2661 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 2662 ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr); 2663 ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr); 2664 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 2665 ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr); 2666 ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2667 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 2668 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 2669 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 2670 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2671 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2672 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 2673 ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr); 2674 } 2675 /* check */ 2676 if (pcbddc->dbg_flag) { 2677 PetscReal error; 2678 Vec x,x_change; 2679 2680 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 2681 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 2682 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 2683 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 2684 ierr = VecScatterBegin(matis->ctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2685 ierr = VecScatterEnd(matis->ctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2686 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 2687 ierr = VecScatterBegin(matis->ctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2688 ierr = VecScatterEnd(matis->ctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2689 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 2690 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 2691 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 2692 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2693 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr); 2694 ierr = VecDestroy(&x);CHKERRQ(ierr); 2695 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 2696 } 2697 2698 /* adapt sub_schurs computed (if any) */ 2699 if (pcbddc->use_deluxe_scaling) { 2700 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 2701 if (sub_schurs->n_subs_par_g) { 2702 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Change of basis with deluxe scaling and parallel problems still needs to be implemented"); 2703 } 2704 if (sub_schurs->S_Ej_all) { 2705 Mat S_1,S_2,tmat; 2706 IS is_all_N; 2707 2708 ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr); 2709 ierr = MatGetSubMatrixUnsorted(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr); 2710 ierr = ISDestroy(&is_all_N);CHKERRQ(ierr); 2711 ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_1);CHKERRQ(ierr); 2712 ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr); 2713 sub_schurs->S_Ej_all = S_1; 2714 ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_2);CHKERRQ(ierr); 2715 ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 2716 sub_schurs->sum_S_Ej_all = S_2; 2717 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 2718 } 2719 } 2720 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 2721 } else if (pcbddc->user_ChangeOfBasisMatrix) { 2722 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 2723 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 2724 } 2725 2726 /* set up change of basis context */ 2727 if (pcbddc->ChangeOfBasisMatrix) { 2728 PCBDDCChange_ctx change_ctx; 2729 2730 if (!pcbddc->new_global_mat) { 2731 PetscInt global_size,local_size; 2732 2733 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 2734 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 2735 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->new_global_mat);CHKERRQ(ierr); 2736 ierr = MatSetSizes(pcbddc->new_global_mat,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 2737 ierr = MatSetType(pcbddc->new_global_mat,MATSHELL);CHKERRQ(ierr); 2738 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT,(void (*)(void))PCBDDCMatMult_Private);CHKERRQ(ierr); 2739 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCMatMultTranspose_Private);CHKERRQ(ierr); 2740 ierr = PetscNew(&change_ctx);CHKERRQ(ierr); 2741 ierr = MatShellSetContext(pcbddc->new_global_mat,change_ctx);CHKERRQ(ierr); 2742 } else { 2743 ierr = MatShellGetContext(pcbddc->new_global_mat,&change_ctx);CHKERRQ(ierr); 2744 ierr = MatDestroy(&change_ctx->global_change);CHKERRQ(ierr); 2745 ierr = VecDestroyVecs(2,&change_ctx->work);CHKERRQ(ierr); 2746 } 2747 if (!pcbddc->user_ChangeOfBasisMatrix) { 2748 ierr = PetscObjectReference((PetscObject)pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2749 change_ctx->global_change = pcbddc->ChangeOfBasisMatrix; 2750 } else { 2751 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 2752 change_ctx->global_change = pcbddc->user_ChangeOfBasisMatrix; 2753 } 2754 ierr = VecDuplicateVecs(pcis->vec1_global,2,&change_ctx->work);CHKERRQ(ierr); 2755 ierr = MatSetUp(pcbddc->new_global_mat);CHKERRQ(ierr); 2756 ierr = MatAssemblyBegin(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2757 ierr = MatAssemblyEnd(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2758 } 2759 2760 /* get indices in local ordering for vertices and constraints */ 2761 if (olocal_primal_size == pcbddc->local_primal_size) { /* if this is true, I need to check if a new primal space has been introduced */ 2762 ierr = PetscMalloc1(olocal_primal_size,&oprimal_indices_local_idxs);CHKERRQ(ierr); 2763 ierr = PetscMemcpy(oprimal_indices_local_idxs,pcbddc->primal_indices_local_idxs,olocal_primal_size*sizeof(PetscInt));CHKERRQ(ierr); 2764 } 2765 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 2766 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2767 ierr = PetscMalloc1(pcbddc->local_primal_size,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2768 ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&i,&aux_primal_numbering);CHKERRQ(ierr); 2769 ierr = PetscMemcpy(pcbddc->primal_indices_local_idxs,aux_primal_numbering,i*sizeof(PetscInt));CHKERRQ(ierr); 2770 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 2771 ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&j,&aux_primal_numbering);CHKERRQ(ierr); 2772 ierr = PetscMemcpy(&pcbddc->primal_indices_local_idxs[i],aux_primal_numbering,j*sizeof(PetscInt));CHKERRQ(ierr); 2773 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 2774 /* set quantities in PCBDDC data struct */ 2775 pcbddc->n_actual_vertices = i; 2776 /* check if a new primal space has been introduced */ 2777 pcbddc->new_primal_space_local = PETSC_TRUE; 2778 if (olocal_primal_size == pcbddc->local_primal_size) { 2779 ierr = PetscMemcmp(pcbddc->primal_indices_local_idxs,oprimal_indices_local_idxs,olocal_primal_size,&pcbddc->new_primal_space_local);CHKERRQ(ierr); 2780 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 2781 ierr = PetscFree(oprimal_indices_local_idxs);CHKERRQ(ierr); 2782 } 2783 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 2784 ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2785 2786 /* flush dbg viewer */ 2787 if (pcbddc->dbg_flag) { 2788 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2789 } 2790 2791 /* free workspace */ 2792 ierr = PetscBTDestroy(&touched);CHKERRQ(ierr); 2793 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 2794 ierr = PetscFree(aux_primal_minloc);CHKERRQ(ierr); 2795 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 2796 if (!pcbddc->adaptive_selection) { 2797 ierr = PetscFree(temp_indices);CHKERRQ(ierr); 2798 ierr = PetscFree3(temp_quadrature_constraint,temp_indices_to_constraint,temp_indices_to_constraint_B);CHKERRQ(ierr); 2799 } else { 2800 ierr = PetscFree4(pcbddc->adaptive_constraints_n, 2801 pcbddc->adaptive_constraints_ptrs, 2802 pcbddc->adaptive_constraints_idxs, 2803 pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 2804 ierr = PetscFree(temp_indices_to_constraint_B);CHKERRQ(ierr); 2805 } 2806 PetscFunctionReturn(0); 2807 } 2808 2809 #undef __FUNCT__ 2810 #define __FUNCT__ "PCBDDCAnalyzeInterface" 2811 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 2812 { 2813 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2814 PC_IS *pcis = (PC_IS*)pc->data; 2815 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2816 PetscInt ierr,i,vertex_size; 2817 PetscViewer viewer=pcbddc->dbg_viewer; 2818 2819 PetscFunctionBegin; 2820 /* Reset previously computed graph */ 2821 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 2822 /* Init local Graph struct */ 2823 ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping);CHKERRQ(ierr); 2824 2825 /* Check validity of the csr graph passed in by the user */ 2826 if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) { 2827 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 2828 } 2829 2830 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 2831 if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) { 2832 Mat mat_adj; 2833 PetscInt *xadj,*adjncy; 2834 PetscInt nvtxs; 2835 PetscBool flg_row=PETSC_TRUE; 2836 2837 ierr = MatConvert(matis->A,MATMPIADJ,MAT_INITIAL_MATRIX,&mat_adj);CHKERRQ(ierr); 2838 ierr = MatGetRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 2839 if (!flg_row) { 2840 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__); 2841 } 2842 if (pcbddc->use_local_adj) { 2843 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 2844 pcbddc->computed_rowadj = PETSC_TRUE; 2845 } else { /* just compute subdomain's connected components */ 2846 IS is_dummy; 2847 ISLocalToGlobalMapping l2gmap_dummy; 2848 PetscInt j,sum; 2849 PetscInt *cxadj,*cadjncy; 2850 const PetscInt *idxs; 2851 PCBDDCGraph graph; 2852 PetscBT is_on_boundary; 2853 2854 ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n,0,1,&is_dummy);CHKERRQ(ierr); 2855 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 2856 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2857 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 2858 ierr = PCBDDCGraphInit(graph,l2gmap_dummy);CHKERRQ(ierr); 2859 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 2860 graph->xadj = xadj; 2861 graph->adjncy = adjncy; 2862 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 2863 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 2864 2865 if (pcbddc->dbg_flag) { 2866 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] Found %d subdomains\n",PetscGlobalRank,graph->ncc);CHKERRQ(ierr); 2867 for (i=0;i<graph->ncc;i++) { 2868 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] %d cc size %d\n",PetscGlobalRank,i,graph->cptr[i+1]-graph->cptr[i]);CHKERRQ(ierr); 2869 } 2870 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2871 } 2872 2873 ierr = PetscBTCreate(nvtxs,&is_on_boundary);CHKERRQ(ierr); 2874 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2875 for (i=0;i<pcis->n_B;i++) { 2876 ierr = PetscBTSet(is_on_boundary,idxs[i]);CHKERRQ(ierr); 2877 } 2878 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2879 2880 ierr = PetscCalloc1(nvtxs+1,&cxadj);CHKERRQ(ierr); 2881 sum = 0; 2882 for (i=0;i<graph->ncc;i++) { 2883 PetscInt sizecc = 0; 2884 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 2885 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 2886 sizecc++; 2887 } 2888 } 2889 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 2890 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 2891 cxadj[graph->queue[j]] = sizecc; 2892 } 2893 } 2894 sum += sizecc*sizecc; 2895 } 2896 ierr = PetscMalloc1(sum,&cadjncy);CHKERRQ(ierr); 2897 sum = 0; 2898 for (i=0;i<nvtxs;i++) { 2899 PetscInt temp = cxadj[i]; 2900 cxadj[i] = sum; 2901 sum += temp; 2902 } 2903 cxadj[nvtxs] = sum; 2904 for (i=0;i<graph->ncc;i++) { 2905 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 2906 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 2907 PetscInt k,sizecc = 0; 2908 for (k=graph->cptr[i];k<graph->cptr[i+1];k++) { 2909 if (PetscBTLookup(is_on_boundary,graph->queue[k])) { 2910 cadjncy[cxadj[graph->queue[j]]+sizecc]=graph->queue[k]; 2911 sizecc++; 2912 } 2913 } 2914 } 2915 } 2916 } 2917 if (nvtxs) { 2918 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,cxadj,cadjncy,PETSC_OWN_POINTER);CHKERRQ(ierr); 2919 } else { 2920 ierr = PetscFree(cxadj);CHKERRQ(ierr); 2921 ierr = PetscFree(cadjncy);CHKERRQ(ierr); 2922 } 2923 graph->xadj = 0; 2924 graph->adjncy = 0; 2925 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 2926 ierr = PetscBTDestroy(&is_on_boundary);CHKERRQ(ierr); 2927 } 2928 ierr = MatRestoreRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 2929 if (!flg_row) { 2930 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__); 2931 } 2932 ierr = MatDestroy(&mat_adj);CHKERRQ(ierr); 2933 } 2934 2935 /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */ 2936 vertex_size = 1; 2937 if (pcbddc->user_provided_isfordofs) { 2938 if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */ 2939 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 2940 for (i=0;i<pcbddc->n_ISForDofs;i++) { 2941 ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 2942 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 2943 } 2944 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 2945 pcbddc->n_ISForDofs = 0; 2946 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 2947 } 2948 /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */ 2949 ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr); 2950 } else { 2951 if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */ 2952 ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr); 2953 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 2954 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 2955 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 2956 } 2957 } 2958 } 2959 2960 /* Setup of Graph */ 2961 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */ 2962 ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 2963 } 2964 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */ 2965 ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 2966 } 2967 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices); 2968 2969 /* Graph's connected components analysis */ 2970 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 2971 2972 /* print some info to stdout */ 2973 if (pcbddc->dbg_flag) { 2974 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer); 2975 } 2976 2977 /* mark topography has done */ 2978 pcbddc->recompute_topography = PETSC_FALSE; 2979 PetscFunctionReturn(0); 2980 } 2981 2982 #undef __FUNCT__ 2983 #define __FUNCT__ "PCBDDCGetPrimalVerticesLocalIdx" 2984 PetscErrorCode PCBDDCGetPrimalVerticesLocalIdx(PC pc, PetscInt *n_vertices, PetscInt **vertices_idx) 2985 { 2986 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 2987 PetscInt *vertices,*row_cmat_indices,n,i,size_of_constraint,local_primal_size; 2988 PetscErrorCode ierr; 2989 2990 PetscFunctionBegin; 2991 n = 0; 2992 vertices = 0; 2993 if (pcbddc->ConstraintMatrix) { 2994 ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&i);CHKERRQ(ierr); 2995 for (i=0;i<local_primal_size;i++) { 2996 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2997 if (size_of_constraint == 1) n++; 2998 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2999 } 3000 if (vertices_idx) { 3001 ierr = PetscMalloc1(n,&vertices);CHKERRQ(ierr); 3002 n = 0; 3003 for (i=0;i<local_primal_size;i++) { 3004 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 3005 if (size_of_constraint == 1) { 3006 vertices[n++]=row_cmat_indices[0]; 3007 } 3008 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 3009 } 3010 } 3011 } 3012 *n_vertices = n; 3013 if (vertices_idx) *vertices_idx = vertices; 3014 PetscFunctionReturn(0); 3015 } 3016 3017 #undef __FUNCT__ 3018 #define __FUNCT__ "PCBDDCGetPrimalConstraintsLocalIdx" 3019 PetscErrorCode PCBDDCGetPrimalConstraintsLocalIdx(PC pc, PetscInt *n_constraints, PetscInt **constraints_idx) 3020 { 3021 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 3022 PetscInt *constraints_index,*row_cmat_indices,*row_cmat_global_indices; 3023 PetscInt n,i,j,size_of_constraint,local_primal_size,local_size,max_size_of_constraint,min_index,min_loc; 3024 PetscBT touched; 3025 PetscErrorCode ierr; 3026 3027 /* This function assumes that the number of local constraints per connected component 3028 is not greater than the number of nodes defined for the connected component 3029 (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */ 3030 PetscFunctionBegin; 3031 n = 0; 3032 constraints_index = 0; 3033 if (pcbddc->ConstraintMatrix) { 3034 ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&local_size);CHKERRQ(ierr); 3035 max_size_of_constraint = 0; 3036 for (i=0;i<local_primal_size;i++) { 3037 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 3038 if (size_of_constraint > 1) { 3039 n++; 3040 } 3041 max_size_of_constraint = PetscMax(size_of_constraint,max_size_of_constraint); 3042 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 3043 } 3044 if (constraints_idx) { 3045 ierr = PetscMalloc1(n,&constraints_index);CHKERRQ(ierr); 3046 ierr = PetscMalloc1(max_size_of_constraint,&row_cmat_global_indices);CHKERRQ(ierr); 3047 ierr = PetscBTCreate(local_size,&touched);CHKERRQ(ierr); 3048 n = 0; 3049 for (i=0;i<local_primal_size;i++) { 3050 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 3051 if (size_of_constraint > 1) { 3052 ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,row_cmat_indices,row_cmat_global_indices);CHKERRQ(ierr); 3053 /* find first untouched local node */ 3054 j = 0; 3055 while (PetscBTLookup(touched,row_cmat_indices[j])) j++; 3056 min_index = row_cmat_global_indices[j]; 3057 min_loc = j; 3058 /* search the minimum among nodes not yet touched on the connected component 3059 since there can be more than one constraint on a single cc */ 3060 for (j=1;j<size_of_constraint;j++) { 3061 if (!PetscBTLookup(touched,row_cmat_indices[j]) && min_index > row_cmat_global_indices[j]) { 3062 min_index = row_cmat_global_indices[j]; 3063 min_loc = j; 3064 } 3065 } 3066 ierr = PetscBTSet(touched,row_cmat_indices[min_loc]);CHKERRQ(ierr); 3067 constraints_index[n++] = row_cmat_indices[min_loc]; 3068 } 3069 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 3070 } 3071 ierr = PetscBTDestroy(&touched);CHKERRQ(ierr); 3072 ierr = PetscFree(row_cmat_global_indices);CHKERRQ(ierr); 3073 } 3074 } 3075 *n_constraints = n; 3076 if (constraints_idx) *constraints_idx = constraints_index; 3077 PetscFunctionReturn(0); 3078 } 3079 3080 #undef __FUNCT__ 3081 #define __FUNCT__ "PCBDDCSubsetNumbering" 3082 PetscErrorCode PCBDDCSubsetNumbering(MPI_Comm comm,ISLocalToGlobalMapping l2gmap, PetscInt n_local_dofs, PetscInt local_dofs[], PetscInt local_dofs_mult[], PetscInt* n_global_subset, PetscInt* global_numbering_subset[]) 3083 { 3084 Vec local_vec,global_vec; 3085 IS seqis,paris; 3086 VecScatter scatter_ctx; 3087 PetscScalar *array; 3088 PetscInt *temp_global_dofs; 3089 PetscScalar globalsum; 3090 PetscInt i,j,s; 3091 PetscInt nlocals,first_index,old_index,max_local; 3092 PetscMPIInt rank_prec_comm,size_prec_comm,max_global; 3093 PetscMPIInt *dof_sizes,*dof_displs; 3094 PetscBool first_found; 3095 PetscErrorCode ierr; 3096 3097 PetscFunctionBegin; 3098 /* mpi buffers */ 3099 ierr = MPI_Comm_size(comm,&size_prec_comm);CHKERRQ(ierr); 3100 ierr = MPI_Comm_rank(comm,&rank_prec_comm);CHKERRQ(ierr); 3101 j = ( !rank_prec_comm ? size_prec_comm : 0); 3102 ierr = PetscMalloc1(j,&dof_sizes);CHKERRQ(ierr); 3103 ierr = PetscMalloc1(j,&dof_displs);CHKERRQ(ierr); 3104 /* get maximum size of subset */ 3105 ierr = PetscMalloc1(n_local_dofs,&temp_global_dofs);CHKERRQ(ierr); 3106 ierr = ISLocalToGlobalMappingApply(l2gmap,n_local_dofs,local_dofs,temp_global_dofs);CHKERRQ(ierr); 3107 max_local = 0; 3108 for (i=0;i<n_local_dofs;i++) { 3109 if (max_local < temp_global_dofs[i] ) { 3110 max_local = temp_global_dofs[i]; 3111 } 3112 } 3113 ierr = MPI_Allreduce(&max_local,&max_global,1,MPIU_INT,MPI_MAX,comm);CHKERRQ(ierr); 3114 max_global++; 3115 max_local = 0; 3116 for (i=0;i<n_local_dofs;i++) { 3117 if (max_local < local_dofs[i] ) { 3118 max_local = local_dofs[i]; 3119 } 3120 } 3121 max_local++; 3122 /* allocate workspace */ 3123 ierr = VecCreate(PETSC_COMM_SELF,&local_vec);CHKERRQ(ierr); 3124 ierr = VecSetSizes(local_vec,PETSC_DECIDE,max_local);CHKERRQ(ierr); 3125 ierr = VecSetType(local_vec,VECSEQ);CHKERRQ(ierr); 3126 ierr = VecCreate(comm,&global_vec);CHKERRQ(ierr); 3127 ierr = VecSetSizes(global_vec,PETSC_DECIDE,max_global);CHKERRQ(ierr); 3128 ierr = VecSetType(global_vec,VECMPI);CHKERRQ(ierr); 3129 /* create scatter */ 3130 ierr = ISCreateGeneral(PETSC_COMM_SELF,n_local_dofs,local_dofs,PETSC_COPY_VALUES,&seqis);CHKERRQ(ierr); 3131 ierr = ISCreateGeneral(comm,n_local_dofs,temp_global_dofs,PETSC_COPY_VALUES,&paris);CHKERRQ(ierr); 3132 ierr = VecScatterCreate(local_vec,seqis,global_vec,paris,&scatter_ctx);CHKERRQ(ierr); 3133 ierr = ISDestroy(&seqis);CHKERRQ(ierr); 3134 ierr = ISDestroy(&paris);CHKERRQ(ierr); 3135 /* init array */ 3136 ierr = VecSet(global_vec,0.0);CHKERRQ(ierr); 3137 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 3138 ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr); 3139 if (local_dofs_mult) { 3140 for (i=0;i<n_local_dofs;i++) { 3141 array[local_dofs[i]]=(PetscScalar)local_dofs_mult[i]; 3142 } 3143 } else { 3144 for (i=0;i<n_local_dofs;i++) { 3145 array[local_dofs[i]]=1.0; 3146 } 3147 } 3148 ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr); 3149 /* scatter into global vec and get total number of global dofs */ 3150 ierr = VecScatterBegin(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3151 ierr = VecScatterEnd(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3152 ierr = VecSum(global_vec,&globalsum);CHKERRQ(ierr); 3153 *n_global_subset = (PetscInt)PetscRealPart(globalsum); 3154 /* Fill global_vec with cumulative function for global numbering */ 3155 ierr = VecGetArray(global_vec,&array);CHKERRQ(ierr); 3156 ierr = VecGetLocalSize(global_vec,&s);CHKERRQ(ierr); 3157 nlocals = 0; 3158 first_index = -1; 3159 first_found = PETSC_FALSE; 3160 for (i=0;i<s;i++) { 3161 if (!first_found && PetscRealPart(array[i]) > 0.1) { 3162 first_found = PETSC_TRUE; 3163 first_index = i; 3164 } 3165 nlocals += (PetscInt)PetscRealPart(array[i]); 3166 } 3167 ierr = MPI_Gather(&nlocals,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr); 3168 if (!rank_prec_comm) { 3169 dof_displs[0]=0; 3170 for (i=1;i<size_prec_comm;i++) { 3171 dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1]; 3172 } 3173 } 3174 ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&nlocals,1,MPIU_INT,0,comm);CHKERRQ(ierr); 3175 if (first_found) { 3176 array[first_index] += (PetscScalar)nlocals; 3177 old_index = first_index; 3178 for (i=first_index+1;i<s;i++) { 3179 if (PetscRealPart(array[i]) > 0.1) { 3180 array[i] += array[old_index]; 3181 old_index = i; 3182 } 3183 } 3184 } 3185 ierr = VecRestoreArray(global_vec,&array);CHKERRQ(ierr); 3186 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 3187 ierr = VecScatterBegin(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3188 ierr = VecScatterEnd(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3189 /* get global ordering of local dofs */ 3190 ierr = VecGetArrayRead(local_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 3191 if (local_dofs_mult) { 3192 for (i=0;i<n_local_dofs;i++) { 3193 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-local_dofs_mult[i]; 3194 } 3195 } else { 3196 for (i=0;i<n_local_dofs;i++) { 3197 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-1; 3198 } 3199 } 3200 ierr = VecRestoreArrayRead(local_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 3201 /* free workspace */ 3202 ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr); 3203 ierr = VecDestroy(&local_vec);CHKERRQ(ierr); 3204 ierr = VecDestroy(&global_vec);CHKERRQ(ierr); 3205 ierr = PetscFree(dof_sizes);CHKERRQ(ierr); 3206 ierr = PetscFree(dof_displs);CHKERRQ(ierr); 3207 /* return pointer to global ordering of local dofs */ 3208 *global_numbering_subset = temp_global_dofs; 3209 PetscFunctionReturn(0); 3210 } 3211 3212 #undef __FUNCT__ 3213 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 3214 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 3215 { 3216 PetscInt i,j; 3217 PetscScalar *alphas; 3218 PetscErrorCode ierr; 3219 3220 PetscFunctionBegin; 3221 /* this implements stabilized Gram-Schmidt */ 3222 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 3223 for (i=0;i<n;i++) { 3224 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 3225 if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); } 3226 for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); } 3227 } 3228 ierr = PetscFree(alphas);CHKERRQ(ierr); 3229 PetscFunctionReturn(0); 3230 } 3231 3232 #undef __FUNCT__ 3233 #define __FUNCT__ "MatISGetSubassemblingPattern" 3234 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscBool contiguous, IS* is_sends) 3235 { 3236 Mat subdomain_adj; 3237 IS new_ranks,ranks_send_to; 3238 MatPartitioning partitioner; 3239 Mat_IS *matis; 3240 PetscInt n_neighs,*neighs,*n_shared,**shared; 3241 PetscInt prank; 3242 PetscMPIInt size,rank,color; 3243 PetscInt *xadj,*adjncy,*oldranks; 3244 PetscInt *adjncy_wgt,*v_wgt,*is_indices,*ranks_send_to_idx; 3245 PetscInt i,local_size,threshold=0; 3246 PetscErrorCode ierr; 3247 PetscBool use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE; 3248 PetscSubcomm subcomm; 3249 3250 PetscFunctionBegin; 3251 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr); 3252 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 3253 ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 3254 3255 /* Get info on mapping */ 3256 matis = (Mat_IS*)(mat->data); 3257 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&local_size);CHKERRQ(ierr); 3258 ierr = ISLocalToGlobalMappingGetInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 3259 3260 /* build local CSR graph of subdomains' connectivity */ 3261 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 3262 xadj[0] = 0; 3263 xadj[1] = PetscMax(n_neighs-1,0); 3264 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 3265 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 3266 3267 if (threshold) { 3268 PetscInt xadj_count = 0; 3269 for (i=1;i<n_neighs;i++) { 3270 if (n_shared[i] > threshold) { 3271 adjncy[xadj_count] = neighs[i]; 3272 adjncy_wgt[xadj_count] = n_shared[i]; 3273 xadj_count++; 3274 } 3275 } 3276 xadj[1] = xadj_count; 3277 } else { 3278 if (xadj[1]) { 3279 ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr); 3280 ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr); 3281 } 3282 } 3283 ierr = ISLocalToGlobalMappingRestoreInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 3284 if (use_square) { 3285 for (i=0;i<xadj[1];i++) { 3286 adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i]; 3287 } 3288 } 3289 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 3290 3291 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 3292 3293 /* 3294 Restrict work on active processes only. 3295 */ 3296 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr); 3297 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 3298 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 3299 ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr); 3300 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 3301 if (color) { 3302 ierr = PetscFree(xadj);CHKERRQ(ierr); 3303 ierr = PetscFree(adjncy);CHKERRQ(ierr); 3304 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 3305 } else { 3306 PetscInt coarsening_ratio; 3307 ierr = MPI_Comm_size(subcomm->comm,&size);CHKERRQ(ierr); 3308 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 3309 prank = rank; 3310 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm->comm);CHKERRQ(ierr); 3311 /* 3312 for (i=0;i<size;i++) { 3313 PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]); 3314 } 3315 */ 3316 for (i=0;i<xadj[1];i++) { 3317 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 3318 } 3319 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 3320 ierr = MatCreateMPIAdj(subcomm->comm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 3321 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 3322 3323 /* Partition */ 3324 ierr = MatPartitioningCreate(subcomm->comm,&partitioner);CHKERRQ(ierr); 3325 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 3326 if (use_vwgt) { 3327 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 3328 v_wgt[0] = local_size; 3329 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 3330 } 3331 n_subdomains = PetscMin((PetscInt)size,n_subdomains); 3332 coarsening_ratio = size/n_subdomains; 3333 ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr); 3334 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 3335 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 3336 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 3337 3338 ierr = ISGetIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3339 if (contiguous) { 3340 ranks_send_to_idx[0] = oldranks[is_indices[0]]; /* contiguos set of processes */ 3341 } else { 3342 ranks_send_to_idx[0] = coarsening_ratio*oldranks[is_indices[0]]; /* scattered set of processes */ 3343 } 3344 ierr = ISRestoreIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3345 /* clean up */ 3346 ierr = PetscFree(oldranks);CHKERRQ(ierr); 3347 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 3348 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 3349 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 3350 } 3351 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 3352 3353 /* assemble parallel IS for sends */ 3354 i = 1; 3355 if (color) i=0; 3356 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr); 3357 3358 /* get back IS */ 3359 *is_sends = ranks_send_to; 3360 PetscFunctionReturn(0); 3361 } 3362 3363 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 3364 3365 #undef __FUNCT__ 3366 #define __FUNCT__ "MatISSubassemble" 3367 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[]) 3368 { 3369 Mat local_mat; 3370 Mat_IS *matis; 3371 IS is_sends_internal; 3372 PetscInt rows,cols,new_local_rows; 3373 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals; 3374 PetscBool ismatis,isdense,newisdense,destroy_mat; 3375 ISLocalToGlobalMapping l2gmap; 3376 PetscInt* l2gmap_indices; 3377 const PetscInt* is_indices; 3378 MatType new_local_type; 3379 /* buffers */ 3380 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 3381 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 3382 PetscInt *recv_buffer_idxs_local; 3383 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 3384 /* MPI */ 3385 MPI_Comm comm,comm_n; 3386 PetscSubcomm subcomm; 3387 PetscMPIInt n_sends,n_recvs,commsize; 3388 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 3389 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 3390 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,source_dest; 3391 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals; 3392 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals; 3393 PetscErrorCode ierr; 3394 3395 PetscFunctionBegin; 3396 /* TODO: add missing checks */ 3397 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 3398 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 3399 PetscValidLogicalCollectiveEnum(mat,reuse,5); 3400 PetscValidLogicalCollectiveInt(mat,nis,7); 3401 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 3402 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 3403 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 3404 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 3405 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 3406 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 3407 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 3408 if (reuse == MAT_REUSE_MATRIX && *mat_n) { 3409 PetscInt mrows,mcols,mnrows,mncols; 3410 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 3411 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 3412 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 3413 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 3414 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 3415 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 3416 } 3417 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 3418 PetscValidLogicalCollectiveInt(mat,bs,0); 3419 /* prepare IS for sending if not provided */ 3420 if (!is_sends) { 3421 PetscBool pcontig = PETSC_TRUE; 3422 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 3423 ierr = MatISGetSubassemblingPattern(mat,n_subdomains,pcontig,&is_sends_internal);CHKERRQ(ierr); 3424 } else { 3425 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 3426 is_sends_internal = is_sends; 3427 } 3428 3429 /* get pointer of MATIS data */ 3430 matis = (Mat_IS*)mat->data; 3431 3432 /* get comm */ 3433 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3434 3435 /* compute number of sends */ 3436 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 3437 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 3438 3439 /* compute number of receives */ 3440 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 3441 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 3442 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 3443 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 3444 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 3445 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 3446 ierr = PetscFree(iflags);CHKERRQ(ierr); 3447 3448 /* restrict comm if requested */ 3449 subcomm = 0; 3450 destroy_mat = PETSC_FALSE; 3451 if (restrict_comm) { 3452 PetscMPIInt color,subcommsize; 3453 3454 color = 0; 3455 if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm */ 3456 ierr = MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 3457 subcommsize = commsize - subcommsize; 3458 /* check if reuse has been requested */ 3459 if (reuse == MAT_REUSE_MATRIX) { 3460 if (*mat_n) { 3461 PetscMPIInt subcommsize2; 3462 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 3463 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 3464 comm_n = PetscObjectComm((PetscObject)*mat_n); 3465 } else { 3466 comm_n = PETSC_COMM_SELF; 3467 } 3468 } else { /* MAT_INITIAL_MATRIX */ 3469 PetscMPIInt rank; 3470 3471 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3472 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 3473 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 3474 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 3475 comm_n = subcomm->comm; 3476 } 3477 /* flag to destroy *mat_n if not significative */ 3478 if (color) destroy_mat = PETSC_TRUE; 3479 } else { 3480 comm_n = comm; 3481 } 3482 3483 /* prepare send/receive buffers */ 3484 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 3485 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 3486 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 3487 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 3488 if (nis) { 3489 ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr); 3490 } 3491 3492 /* Get data from local matrices */ 3493 if (!isdense) { 3494 SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 3495 /* TODO: See below some guidelines on how to prepare the local buffers */ 3496 /* 3497 send_buffer_vals should contain the raw values of the local matrix 3498 send_buffer_idxs should contain: 3499 - MatType_PRIVATE type 3500 - PetscInt size_of_l2gmap 3501 - PetscInt global_row_indices[size_of_l2gmap] 3502 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 3503 */ 3504 } else { 3505 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 3506 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&i);CHKERRQ(ierr); 3507 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 3508 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 3509 send_buffer_idxs[1] = i; 3510 ierr = ISLocalToGlobalMappingGetIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 3511 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 3512 ierr = ISLocalToGlobalMappingRestoreIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 3513 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 3514 for (i=0;i<n_sends;i++) { 3515 ilengths_vals[is_indices[i]] = len*len; 3516 ilengths_idxs[is_indices[i]] = len+2; 3517 } 3518 } 3519 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 3520 /* additional is (if any) */ 3521 if (nis) { 3522 PetscMPIInt psum; 3523 PetscInt j; 3524 for (j=0,psum=0;j<nis;j++) { 3525 PetscInt plen; 3526 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 3527 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 3528 psum += len+1; /* indices + lenght */ 3529 } 3530 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 3531 for (j=0,psum=0;j<nis;j++) { 3532 PetscInt plen; 3533 const PetscInt *is_array_idxs; 3534 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 3535 send_buffer_idxs_is[psum] = plen; 3536 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 3537 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 3538 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 3539 psum += plen+1; /* indices + lenght */ 3540 } 3541 for (i=0;i<n_sends;i++) { 3542 ilengths_idxs_is[is_indices[i]] = psum; 3543 } 3544 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 3545 } 3546 3547 buf_size_idxs = 0; 3548 buf_size_vals = 0; 3549 buf_size_idxs_is = 0; 3550 for (i=0;i<n_recvs;i++) { 3551 buf_size_idxs += (PetscInt)olengths_idxs[i]; 3552 buf_size_vals += (PetscInt)olengths_vals[i]; 3553 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 3554 } 3555 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 3556 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 3557 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 3558 3559 /* get new tags for clean communications */ 3560 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 3561 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 3562 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 3563 3564 /* allocate for requests */ 3565 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 3566 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 3567 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 3568 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 3569 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 3570 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 3571 3572 /* communications */ 3573 ptr_idxs = recv_buffer_idxs; 3574 ptr_vals = recv_buffer_vals; 3575 ptr_idxs_is = recv_buffer_idxs_is; 3576 for (i=0;i<n_recvs;i++) { 3577 source_dest = onodes[i]; 3578 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 3579 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 3580 ptr_idxs += olengths_idxs[i]; 3581 ptr_vals += olengths_vals[i]; 3582 if (nis) { 3583 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 3584 ptr_idxs_is += olengths_idxs_is[i]; 3585 } 3586 } 3587 for (i=0;i<n_sends;i++) { 3588 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 3589 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 3590 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 3591 if (nis) { 3592 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 3593 } 3594 } 3595 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 3596 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 3597 3598 /* assemble new l2g map */ 3599 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3600 ptr_idxs = recv_buffer_idxs; 3601 new_local_rows = 0; 3602 for (i=0;i<n_recvs;i++) { 3603 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 3604 ptr_idxs += olengths_idxs[i]; 3605 } 3606 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 3607 ptr_idxs = recv_buffer_idxs; 3608 new_local_rows = 0; 3609 for (i=0;i<n_recvs;i++) { 3610 ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 3611 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 3612 ptr_idxs += olengths_idxs[i]; 3613 } 3614 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 3615 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 3616 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 3617 3618 /* infer new local matrix type from received local matrices type */ 3619 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 3620 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 3621 if (n_recvs) { 3622 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 3623 ptr_idxs = recv_buffer_idxs; 3624 for (i=0;i<n_recvs;i++) { 3625 if ((PetscInt)new_local_type_private != *ptr_idxs) { 3626 new_local_type_private = MATAIJ_PRIVATE; 3627 break; 3628 } 3629 ptr_idxs += olengths_idxs[i]; 3630 } 3631 switch (new_local_type_private) { 3632 case MATDENSE_PRIVATE: 3633 if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */ 3634 new_local_type = MATSEQAIJ; 3635 bs = 1; 3636 } else { /* if I receive only 1 dense matrix */ 3637 new_local_type = MATSEQDENSE; 3638 bs = 1; 3639 } 3640 break; 3641 case MATAIJ_PRIVATE: 3642 new_local_type = MATSEQAIJ; 3643 bs = 1; 3644 break; 3645 case MATBAIJ_PRIVATE: 3646 new_local_type = MATSEQBAIJ; 3647 break; 3648 case MATSBAIJ_PRIVATE: 3649 new_local_type = MATSEQSBAIJ; 3650 break; 3651 default: 3652 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__); 3653 break; 3654 } 3655 } else { /* by default, new_local_type is seqdense */ 3656 new_local_type = MATSEQDENSE; 3657 bs = 1; 3658 } 3659 3660 /* create MATIS object if needed */ 3661 if (reuse == MAT_INITIAL_MATRIX) { 3662 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 3663 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,mat_n);CHKERRQ(ierr); 3664 } else { 3665 /* it also destroys the local matrices */ 3666 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 3667 } 3668 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 3669 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 3670 3671 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3672 3673 /* Global to local map of received indices */ 3674 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 3675 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 3676 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 3677 3678 /* restore attributes -> type of incoming data and its size */ 3679 buf_size_idxs = 0; 3680 for (i=0;i<n_recvs;i++) { 3681 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 3682 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 3683 buf_size_idxs += (PetscInt)olengths_idxs[i]; 3684 } 3685 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 3686 3687 /* set preallocation */ 3688 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 3689 if (!newisdense) { 3690 PetscInt *new_local_nnz=0; 3691 3692 ptr_vals = recv_buffer_vals; 3693 ptr_idxs = recv_buffer_idxs_local; 3694 if (n_recvs) { 3695 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 3696 } 3697 for (i=0;i<n_recvs;i++) { 3698 PetscInt j; 3699 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 3700 for (j=0;j<*(ptr_idxs+1);j++) { 3701 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 3702 } 3703 } else { 3704 /* TODO */ 3705 } 3706 ptr_idxs += olengths_idxs[i]; 3707 } 3708 if (new_local_nnz) { 3709 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 3710 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 3711 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 3712 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 3713 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 3714 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 3715 } else { 3716 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 3717 } 3718 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 3719 } else { 3720 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 3721 } 3722 3723 /* set values */ 3724 ptr_vals = recv_buffer_vals; 3725 ptr_idxs = recv_buffer_idxs_local; 3726 for (i=0;i<n_recvs;i++) { 3727 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 3728 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 3729 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 3730 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 3731 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 3732 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 3733 } else { 3734 /* TODO */ 3735 } 3736 ptr_idxs += olengths_idxs[i]; 3737 ptr_vals += olengths_vals[i]; 3738 } 3739 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3740 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3741 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3742 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3743 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 3744 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 3745 3746 #if 0 3747 if (!restrict_comm) { /* check */ 3748 Vec lvec,rvec; 3749 PetscReal infty_error; 3750 3751 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 3752 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 3753 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 3754 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 3755 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 3756 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 3757 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 3758 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 3759 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 3760 } 3761 #endif 3762 3763 /* assemble new additional is (if any) */ 3764 if (nis) { 3765 PetscInt **temp_idxs,*count_is,j,psum; 3766 3767 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3768 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 3769 ptr_idxs = recv_buffer_idxs_is; 3770 psum = 0; 3771 for (i=0;i<n_recvs;i++) { 3772 for (j=0;j<nis;j++) { 3773 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 3774 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 3775 psum += plen; 3776 ptr_idxs += plen+1; /* shift pointer to received data */ 3777 } 3778 } 3779 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 3780 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 3781 for (i=1;i<nis;i++) { 3782 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 3783 } 3784 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 3785 ptr_idxs = recv_buffer_idxs_is; 3786 for (i=0;i<n_recvs;i++) { 3787 for (j=0;j<nis;j++) { 3788 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 3789 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 3790 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 3791 ptr_idxs += plen+1; /* shift pointer to received data */ 3792 } 3793 } 3794 for (i=0;i<nis;i++) { 3795 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 3796 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 3797 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 3798 } 3799 ierr = PetscFree(count_is);CHKERRQ(ierr); 3800 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 3801 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 3802 } 3803 /* free workspace */ 3804 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 3805 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3806 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 3807 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3808 if (isdense) { 3809 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 3810 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 3811 } else { 3812 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 3813 } 3814 if (nis) { 3815 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3816 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 3817 } 3818 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 3819 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 3820 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 3821 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 3822 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 3823 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 3824 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 3825 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 3826 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 3827 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 3828 ierr = PetscFree(onodes);CHKERRQ(ierr); 3829 if (nis) { 3830 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 3831 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 3832 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 3833 } 3834 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 3835 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 3836 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 3837 for (i=0;i<nis;i++) { 3838 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 3839 } 3840 } 3841 PetscFunctionReturn(0); 3842 } 3843 3844 /* temporary hack into ksp private data structure */ 3845 #include <petsc-private/kspimpl.h> 3846 3847 #undef __FUNCT__ 3848 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 3849 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 3850 { 3851 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3852 PC_IS *pcis = (PC_IS*)pc->data; 3853 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 3854 MatNullSpace CoarseNullSpace=NULL; 3855 ISLocalToGlobalMapping coarse_islg; 3856 IS coarse_is,*isarray; 3857 PetscInt i,im_active=-1,active_procs=-1; 3858 PetscInt nis,nisdofs,nisneu; 3859 PC pc_temp; 3860 PCType coarse_pc_type; 3861 KSPType coarse_ksp_type; 3862 PetscBool multilevel_requested,multilevel_allowed; 3863 PetscBool isredundant,isbddc,isnn,coarse_reuse; 3864 Mat t_coarse_mat_is; 3865 PetscInt void_procs,ncoarse_ml,ncoarse_ds,ncoarse; 3866 PetscMPIInt all_procs; 3867 PetscBool csin_ml,csin_ds,csin,csin_type_simple,redist; 3868 PetscBool compute_vecs = PETSC_FALSE; 3869 PetscScalar *array; 3870 PetscErrorCode ierr; 3871 3872 PetscFunctionBegin; 3873 /* Assign global numbering to coarse dofs */ 3874 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 3875 compute_vecs = PETSC_TRUE; 3876 PetscInt ocoarse_size; 3877 ocoarse_size = pcbddc->coarse_size; 3878 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 3879 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 3880 /* see if we can avoid some work */ 3881 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 3882 if (ocoarse_size != pcbddc->coarse_size) { /* ...but with different size, so reset it and set reuse flag to false */ 3883 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 3884 coarse_reuse = PETSC_FALSE; 3885 } else { /* we can safely reuse already computed coarse matrix */ 3886 coarse_reuse = PETSC_TRUE; 3887 } 3888 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 3889 coarse_reuse = PETSC_FALSE; 3890 } 3891 /* reset any subassembling information */ 3892 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 3893 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 3894 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 3895 coarse_reuse = PETSC_TRUE; 3896 } 3897 3898 /* count "active" (i.e. with positive local size) and "void" processes */ 3899 im_active = !!(pcis->n); 3900 ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3901 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr); 3902 void_procs = all_procs-active_procs; 3903 csin_type_simple = PETSC_TRUE; 3904 redist = PETSC_FALSE; 3905 if (pcbddc->current_level && void_procs) { 3906 csin_ml = PETSC_TRUE; 3907 ncoarse_ml = void_procs; 3908 /* it has no sense to redistribute on a set of processors larger than the number of active processes */ 3909 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < active_procs) { 3910 csin_ds = PETSC_TRUE; 3911 ncoarse_ds = pcbddc->redistribute_coarse; 3912 redist = PETSC_TRUE; 3913 } else { 3914 csin_ds = PETSC_TRUE; 3915 ncoarse_ds = active_procs; 3916 redist = PETSC_TRUE; 3917 } 3918 } else { 3919 csin_ml = PETSC_FALSE; 3920 ncoarse_ml = all_procs; 3921 if (void_procs) { 3922 csin_ds = PETSC_TRUE; 3923 ncoarse_ds = void_procs; 3924 csin_type_simple = PETSC_FALSE; 3925 } else { 3926 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < all_procs) { 3927 csin_ds = PETSC_TRUE; 3928 ncoarse_ds = pcbddc->redistribute_coarse; 3929 redist = PETSC_TRUE; 3930 } else { 3931 csin_ds = PETSC_FALSE; 3932 ncoarse_ds = all_procs; 3933 } 3934 } 3935 } 3936 3937 /* 3938 test if we can go multilevel: three conditions must be satisfied: 3939 - we have not exceeded the number of levels requested 3940 - we can actually subassemble the active processes 3941 - we can find a suitable number of MPI processes where we can place the subassembled problem 3942 */ 3943 multilevel_allowed = PETSC_FALSE; 3944 multilevel_requested = PETSC_FALSE; 3945 if (pcbddc->current_level < pcbddc->max_levels) { 3946 multilevel_requested = PETSC_TRUE; 3947 if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) { 3948 multilevel_allowed = PETSC_FALSE; 3949 } else { 3950 multilevel_allowed = PETSC_TRUE; 3951 } 3952 } 3953 /* determine number of process partecipating to coarse solver */ 3954 if (multilevel_allowed) { 3955 ncoarse = ncoarse_ml; 3956 csin = csin_ml; 3957 } else { 3958 ncoarse = ncoarse_ds; 3959 csin = csin_ds; 3960 } 3961 3962 /* creates temporary l2gmap and IS for coarse indexes */ 3963 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 3964 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 3965 3966 /* creates temporary MATIS object for coarse matrix */ 3967 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr); 3968 ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr); 3969 ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr); 3970 ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr); 3971 #if 0 3972 { 3973 PetscViewer viewer; 3974 char filename[256]; 3975 sprintf(filename,"local_coarse_mat%d.m",PetscGlobalRank); 3976 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 3977 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 3978 ierr = MatView(coarse_submat_dense,viewer);CHKERRQ(ierr); 3979 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 3980 } 3981 #endif 3982 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,&t_coarse_mat_is);CHKERRQ(ierr); 3983 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 3984 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3985 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3986 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 3987 3988 /* compute dofs splitting and neumann boundaries for coarse dofs */ 3989 if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal) ) { /* protects from unneded computations */ 3990 PetscInt *tidxs,*tidxs2,nout,tsize,i; 3991 const PetscInt *idxs; 3992 ISLocalToGlobalMapping tmap; 3993 3994 /* create map between primal indices (in local representative ordering) and local primal numbering */ 3995 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 3996 /* allocate space for temporary storage */ 3997 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 3998 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 3999 /* allocate for IS array */ 4000 nisdofs = pcbddc->n_ISForDofsLocal; 4001 nisneu = !!pcbddc->NeumannBoundariesLocal; 4002 nis = nisdofs + nisneu; 4003 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 4004 /* dofs splitting */ 4005 for (i=0;i<nisdofs;i++) { 4006 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 4007 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 4008 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 4009 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 4010 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 4011 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 4012 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->ISForDofsLocal[i]),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 4013 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 4014 } 4015 /* neumann boundaries */ 4016 if (pcbddc->NeumannBoundariesLocal) { 4017 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 4018 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 4019 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 4020 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 4021 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 4022 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 4023 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->NeumannBoundariesLocal),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 4024 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 4025 } 4026 /* free memory */ 4027 ierr = PetscFree(tidxs);CHKERRQ(ierr); 4028 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 4029 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 4030 } else { 4031 nis = 0; 4032 nisdofs = 0; 4033 nisneu = 0; 4034 isarray = NULL; 4035 } 4036 /* destroy no longer needed map */ 4037 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 4038 4039 /* restrict on coarse candidates (if needed) */ 4040 coarse_mat_is = NULL; 4041 if (csin) { 4042 if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */ 4043 if (redist) { 4044 PetscMPIInt rank; 4045 PetscInt spc,n_spc_p1,dest[1],destsize; 4046 4047 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 4048 spc = active_procs/pcbddc->redistribute_coarse; 4049 n_spc_p1 = active_procs%pcbddc->redistribute_coarse; 4050 if (im_active) { 4051 destsize = 1; 4052 if (rank > n_spc_p1*(spc+1)-1) { 4053 dest[0] = n_spc_p1+(rank-(n_spc_p1*(spc+1)))/spc; 4054 } else { 4055 dest[0] = rank/(spc+1); 4056 } 4057 } else { 4058 destsize = 0; 4059 } 4060 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),destsize,dest,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4061 } else if (csin_type_simple) { 4062 PetscMPIInt rank; 4063 PetscInt issize,isidx; 4064 4065 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 4066 if (im_active) { 4067 issize = 1; 4068 isidx = (PetscInt)rank; 4069 } else { 4070 issize = 0; 4071 isidx = -1; 4072 } 4073 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4074 } else { /* get a suitable subassembling pattern from MATIS code */ 4075 ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,PETSC_TRUE,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4076 } 4077 4078 /* we need to shift on coarse candidates either if we are not redistributing or we are redistributing and we have enough void processes */ 4079 if (!redist || ncoarse <= void_procs) { 4080 PetscInt ncoarse_cand,tissize,*nisindices; 4081 PetscInt *coarse_candidates; 4082 const PetscInt* tisindices; 4083 4084 /* get coarse candidates' ranks in pc communicator */ 4085 ierr = PetscMalloc1(all_procs,&coarse_candidates);CHKERRQ(ierr); 4086 ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4087 for (i=0,ncoarse_cand=0;i<all_procs;i++) { 4088 if (!coarse_candidates[i]) { 4089 coarse_candidates[ncoarse_cand++]=i; 4090 } 4091 } 4092 if (ncoarse_cand < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",ncoarse_cand,ncoarse); 4093 4094 4095 if (pcbddc->dbg_flag) { 4096 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4097 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr); 4098 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 4099 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr); 4100 for (i=0;i<ncoarse_cand;i++) { 4101 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr); 4102 } 4103 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr); 4104 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4105 } 4106 /* shift the pattern on coarse candidates */ 4107 ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr); 4108 ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 4109 ierr = PetscMalloc1(tissize,&nisindices);CHKERRQ(ierr); 4110 for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]]; 4111 ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 4112 ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr); 4113 ierr = PetscFree(coarse_candidates);CHKERRQ(ierr); 4114 } 4115 if (pcbddc->dbg_flag) { 4116 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4117 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr); 4118 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 4119 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4120 } 4121 } 4122 /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */ 4123 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 4124 } else { 4125 if (pcbddc->dbg_flag) { 4126 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4127 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr); 4128 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4129 } 4130 ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr); 4131 coarse_mat_is = t_coarse_mat_is; 4132 } 4133 4134 /* create local to global scatters for coarse problem */ 4135 if (compute_vecs) { 4136 PetscInt lrows; 4137 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 4138 if (coarse_mat_is) { 4139 ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr); 4140 } else { 4141 lrows = 0; 4142 } 4143 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 4144 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 4145 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 4146 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 4147 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 4148 } 4149 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 4150 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 4151 4152 /* set defaults for coarse KSP and PC */ 4153 if (multilevel_allowed) { 4154 coarse_ksp_type = KSPRICHARDSON; 4155 coarse_pc_type = PCBDDC; 4156 } else { 4157 coarse_ksp_type = KSPPREONLY; 4158 coarse_pc_type = PCREDUNDANT; 4159 } 4160 4161 /* print some info if requested */ 4162 if (pcbddc->dbg_flag) { 4163 if (!multilevel_allowed) { 4164 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4165 if (multilevel_requested) { 4166 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 4167 } else if (pcbddc->max_levels) { 4168 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 4169 } 4170 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4171 } 4172 } 4173 4174 /* create the coarse KSP object only once with defaults */ 4175 if (coarse_mat_is) { 4176 MatReuse coarse_mat_reuse; 4177 PetscViewer dbg_viewer = NULL; 4178 if (pcbddc->dbg_flag) { 4179 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is)); 4180 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 4181 } 4182 if (!pcbddc->coarse_ksp) { 4183 char prefix[256],str_level[16]; 4184 size_t len; 4185 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr); 4186 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 4187 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 4188 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr); 4189 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 4190 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 4191 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 4192 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 4193 /* prefix */ 4194 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 4195 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 4196 if (!pcbddc->current_level) { 4197 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 4198 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 4199 } else { 4200 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 4201 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 4202 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 4203 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 4204 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 4205 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 4206 } 4207 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 4208 /* allow user customization */ 4209 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 4210 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 4211 } 4212 4213 /* get some info after set from options */ 4214 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 4215 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 4216 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 4217 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 4218 if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */ 4219 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 4220 isbddc = PETSC_FALSE; 4221 } 4222 if (isredundant) { 4223 KSP inner_ksp; 4224 PC inner_pc; 4225 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 4226 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 4227 ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr); 4228 } 4229 4230 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 4231 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 4232 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 4233 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 4234 if (nisdofs) { 4235 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 4236 for (i=0;i<nisdofs;i++) { 4237 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 4238 } 4239 } 4240 if (nisneu) { 4241 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 4242 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 4243 } 4244 4245 /* assemble coarse matrix */ 4246 if (coarse_reuse) { 4247 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 4248 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 4249 coarse_mat_reuse = MAT_REUSE_MATRIX; 4250 } else { 4251 coarse_mat_reuse = MAT_INITIAL_MATRIX; 4252 } 4253 if (isbddc || isnn) { 4254 if (pcbddc->coarsening_ratio > 1) { 4255 if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */ 4256 ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,PETSC_TRUE,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 4257 if (pcbddc->dbg_flag) { 4258 ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4259 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr); 4260 ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr); 4261 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 4262 } 4263 } 4264 ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr); 4265 } else { 4266 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 4267 coarse_mat = coarse_mat_is; 4268 } 4269 } else { 4270 ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 4271 } 4272 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 4273 4274 /* propagate symmetry info to coarse matrix */ 4275 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pcbddc->issym);CHKERRQ(ierr); 4276 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 4277 4278 /* set operators */ 4279 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 4280 if (pcbddc->dbg_flag) { 4281 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 4282 } 4283 } else { /* processes non partecipating to coarse solver (if any) */ 4284 coarse_mat = 0; 4285 } 4286 ierr = PetscFree(isarray);CHKERRQ(ierr); 4287 #if 0 4288 { 4289 PetscViewer viewer; 4290 char filename[256]; 4291 sprintf(filename,"coarse_mat.m"); 4292 ierr = PetscViewerASCIIOpen(PETSC_COMM_WORLD,filename,&viewer);CHKERRQ(ierr); 4293 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 4294 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 4295 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 4296 } 4297 #endif 4298 4299 /* Compute coarse null space (special handling by BDDC only) */ 4300 if (pcbddc->NullSpace) { 4301 ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr); 4302 } 4303 4304 if (pcbddc->coarse_ksp) { 4305 Vec crhs,csol; 4306 PetscBool ispreonly; 4307 if (CoarseNullSpace) { 4308 if (isbddc) { 4309 ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr); 4310 } else { 4311 ierr = KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);CHKERRQ(ierr); 4312 } 4313 } 4314 /* setup coarse ksp */ 4315 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 4316 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 4317 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 4318 /* hack */ 4319 if (!csol) { 4320 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 4321 } 4322 if (!crhs) { 4323 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 4324 } 4325 /* Check coarse problem if in debug mode or if solving with an iterative method */ 4326 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 4327 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 4328 KSP check_ksp; 4329 KSPType check_ksp_type; 4330 PC check_pc; 4331 Vec check_vec,coarse_vec; 4332 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 4333 PetscInt its; 4334 PetscBool compute_eigs; 4335 PetscReal *eigs_r,*eigs_c; 4336 PetscInt neigs; 4337 const char *prefix; 4338 4339 /* Create ksp object suitable for estimation of extreme eigenvalues */ 4340 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 4341 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 4342 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 4343 if (ispreonly) { 4344 check_ksp_type = KSPPREONLY; 4345 compute_eigs = PETSC_FALSE; 4346 } else { 4347 check_ksp_type = KSPGMRES; 4348 compute_eigs = PETSC_TRUE; 4349 } 4350 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 4351 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 4352 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 4353 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 4354 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 4355 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 4356 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 4357 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 4358 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 4359 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 4360 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 4361 /* create random vec */ 4362 ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr); 4363 ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr); 4364 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 4365 if (CoarseNullSpace) { 4366 ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr); 4367 } 4368 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 4369 /* solve coarse problem */ 4370 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 4371 if (CoarseNullSpace) { 4372 ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr); 4373 } 4374 /* set eigenvalue estimation if preonly has not been requested */ 4375 if (compute_eigs) { 4376 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 4377 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 4378 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 4379 lambda_max = eigs_r[neigs-1]; 4380 lambda_min = eigs_r[0]; 4381 if (pcbddc->use_coarse_estimates) { 4382 if (lambda_max>lambda_min) { 4383 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr); 4384 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 4385 } 4386 } 4387 } 4388 4389 /* check coarse problem residual error */ 4390 if (pcbddc->dbg_flag) { 4391 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 4392 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 4393 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 4394 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 4395 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 4396 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 4397 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 4398 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 4399 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 4400 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 4401 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 4402 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 4403 if (compute_eigs) { 4404 PetscReal lambda_max_s,lambda_min_s; 4405 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 4406 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 4407 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 4408 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 4409 for (i=0;i<neigs;i++) { 4410 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 4411 } 4412 } 4413 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 4414 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 4415 } 4416 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 4417 if (compute_eigs) { 4418 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 4419 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 4420 } 4421 } 4422 } 4423 /* print additional info */ 4424 if (pcbddc->dbg_flag) { 4425 /* waits until all processes reaches this point */ 4426 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 4427 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 4428 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4429 } 4430 4431 /* free memory */ 4432 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 4433 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 4434 PetscFunctionReturn(0); 4435 } 4436 4437 #undef __FUNCT__ 4438 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 4439 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 4440 { 4441 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 4442 PC_IS* pcis = (PC_IS*)pc->data; 4443 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 4444 PetscInt i,coarse_size; 4445 PetscInt *local_primal_indices; 4446 PetscErrorCode ierr; 4447 4448 PetscFunctionBegin; 4449 /* Compute global number of coarse dofs */ 4450 if (!pcbddc->primal_indices_local_idxs && pcbddc->local_primal_size) { 4451 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Local primal indices have not been created"); 4452 } 4453 ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)(pc->pmat)),matis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,NULL,&coarse_size,&local_primal_indices);CHKERRQ(ierr); 4454 4455 /* check numbering */ 4456 if (pcbddc->dbg_flag) { 4457 PetscScalar coarsesum,*array; 4458 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 4459 4460 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4461 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4462 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 4463 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 4464 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 4465 for (i=0;i<pcbddc->local_primal_size;i++) { 4466 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 4467 } 4468 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 4469 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 4470 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4471 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4472 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4473 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4474 ierr = VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4475 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4476 for (i=0;i<pcis->n;i++) { 4477 if (array[i] == 1.0) { 4478 set_error = PETSC_TRUE; 4479 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr); 4480 } 4481 } 4482 ierr = MPI_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4483 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4484 for (i=0;i<pcis->n;i++) { 4485 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 4486 } 4487 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4488 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4489 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4490 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4491 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 4492 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 4493 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 4494 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 4495 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4496 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 4497 for (i=0;i<pcbddc->local_primal_size;i++) { 4498 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i]); 4499 } 4500 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4501 } 4502 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4503 if (set_error_reduced) { 4504 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 4505 } 4506 } 4507 /* get back data */ 4508 *coarse_size_n = coarse_size; 4509 *local_primal_indices_n = local_primal_indices; 4510 PetscFunctionReturn(0); 4511 } 4512 4513 #undef __FUNCT__ 4514 #define __FUNCT__ "PCBDDCGlobalToLocal" 4515 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 4516 { 4517 IS localis_t; 4518 PetscInt i,lsize,*idxs,n; 4519 PetscScalar *vals; 4520 PetscErrorCode ierr; 4521 4522 PetscFunctionBegin; 4523 /* get indices in local ordering exploiting local to global map */ 4524 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 4525 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 4526 for (i=0;i<lsize;i++) vals[i] = 1.0; 4527 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 4528 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 4529 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 4530 if (idxs) { /* multilevel guard */ 4531 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 4532 } 4533 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 4534 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 4535 ierr = PetscFree(vals);CHKERRQ(ierr); 4536 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 4537 /* now compute set in local ordering */ 4538 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4539 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4540 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 4541 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 4542 for (i=0,lsize=0;i<n;i++) { 4543 if (PetscRealPart(vals[i]) > 0.5) { 4544 lsize++; 4545 } 4546 } 4547 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 4548 for (i=0,lsize=0;i<n;i++) { 4549 if (PetscRealPart(vals[i]) > 0.5) { 4550 idxs[lsize++] = i; 4551 } 4552 } 4553 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 4554 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 4555 *localis = localis_t; 4556 PetscFunctionReturn(0); 4557 } 4558 4559 /* the next two functions will be called in KSPMatMult if a change of basis has been requested */ 4560 #undef __FUNCT__ 4561 #define __FUNCT__ "PCBDDCMatMult_Private" 4562 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y) 4563 { 4564 PCBDDCChange_ctx change_ctx; 4565 PetscErrorCode ierr; 4566 4567 PetscFunctionBegin; 4568 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 4569 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 4570 ierr = MatMult(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 4571 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 4572 PetscFunctionReturn(0); 4573 } 4574 4575 #undef __FUNCT__ 4576 #define __FUNCT__ "PCBDDCMatMultTranspose_Private" 4577 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y) 4578 { 4579 PCBDDCChange_ctx change_ctx; 4580 PetscErrorCode ierr; 4581 4582 PetscFunctionBegin; 4583 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 4584 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 4585 ierr = MatMultTranspose(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 4586 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 4587 PetscFunctionReturn(0); 4588 } 4589 4590 #undef __FUNCT__ 4591 #define __FUNCT__ "PCBDDCSetUpSubSchurs" 4592 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 4593 { 4594 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 4595 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 4596 PetscInt *used_xadj,*used_adjncy; 4597 PetscBool free_used_adj; 4598 PetscErrorCode ierr; 4599 4600 PetscFunctionBegin; 4601 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 4602 free_used_adj = PETSC_FALSE; 4603 if (pcbddc->sub_schurs_layers == -1) { 4604 used_xadj = NULL; 4605 used_adjncy = NULL; 4606 } else { 4607 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 4608 used_xadj = pcbddc->mat_graph->xadj; 4609 used_adjncy = pcbddc->mat_graph->adjncy; 4610 } else if (pcbddc->computed_rowadj) { 4611 used_xadj = pcbddc->mat_graph->xadj; 4612 used_adjncy = pcbddc->mat_graph->adjncy; 4613 } else { 4614 Mat mat_adj; 4615 PetscBool flg_row=PETSC_TRUE; 4616 const PetscInt *xadj,*adjncy; 4617 PetscInt nvtxs; 4618 4619 ierr = MatConvert(pcbddc->local_mat,MATMPIADJ,MAT_INITIAL_MATRIX,&mat_adj);CHKERRQ(ierr); 4620 ierr = MatGetRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4621 if (!flg_row) { 4622 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__); 4623 } 4624 ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr); 4625 ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr); 4626 ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr); 4627 ierr = MatRestoreRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4628 if (!flg_row) { 4629 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__); 4630 } 4631 ierr = MatDestroy(&mat_adj);CHKERRQ(ierr); 4632 free_used_adj = PETSC_TRUE; 4633 } 4634 } 4635 ierr = PCBDDCSubSchursSetUp(sub_schurs,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->adaptive_selection,pcbddc->use_deluxe_scaling,pcbddc->adaptive_invert_Stildas,pcbddc->use_edges,pcbddc->use_faces);CHKERRQ(ierr); 4636 4637 /* free adjacency */ 4638 if (free_used_adj) { 4639 ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr); 4640 } 4641 PetscFunctionReturn(0); 4642 } 4643 4644 #undef __FUNCT__ 4645 #define __FUNCT__ "PCBDDCInitSubSchurs" 4646 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 4647 { 4648 PC_IS *pcis=(PC_IS*)pc->data; 4649 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 4650 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 4651 PCBDDCGraph graph; 4652 Mat S_j; 4653 PetscErrorCode ierr; 4654 4655 PetscFunctionBegin; 4656 /* attach interface graph for determining subsets */ 4657 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 4658 IS verticesIS; 4659 4660 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 4661 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 4662 ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap);CHKERRQ(ierr); 4663 ierr = PCBDDCGraphSetUp(graph,0,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticesIS);CHKERRQ(ierr); 4664 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 4665 ierr = ISDestroy(&verticesIS);CHKERRQ(ierr); 4666 /* 4667 if (pcbddc->dbg_flag) { 4668 ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 4669 } 4670 */ 4671 } else { 4672 graph = pcbddc->mat_graph; 4673 } 4674 4675 /* Create Schur complement matrix */ 4676 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 4677 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 4678 4679 /* sub_schurs init */ 4680 ierr = PCBDDCSubSchursInit(sub_schurs,pcbddc->local_mat,S_j,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap,pcbddc->sub_schurs_threshold);CHKERRQ(ierr); 4681 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 4682 /* free graph struct */ 4683 if (pcbddc->sub_schurs_rebuild) { 4684 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 4685 } 4686 PetscFunctionReturn(0); 4687 } 4688