1 #include <../src/ksp/pc/impls/bddc/bddc.h> 2 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 3 #include <petscblaslapack.h> 4 5 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y); 6 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y); 7 8 #undef __FUNCT__ 9 #define __FUNCT__ "PCBDDCAdaptiveSelection" 10 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 11 { 12 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 13 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 14 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 15 PetscBLASInt *B_iwork,*B_ifail; 16 PetscScalar *work,lwork; 17 PetscScalar *St,*S,*eigv; 18 PetscScalar *Sarray,*Starray; 19 PetscReal *eigs,thresh; 20 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs; 21 PetscBool allocated_S_St; 22 #if defined(PETSC_USE_COMPLEX) 23 PetscReal *rwork; 24 #endif 25 PetscErrorCode ierr; 26 27 PetscFunctionBegin; 28 if (!sub_schurs->use_mumps) { 29 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS"); 30 } 31 32 if (pcbddc->dbg_flag) { 33 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 34 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 35 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr); 36 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 37 } 38 39 if (pcbddc->dbg_flag) { 40 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef); 41 } 42 43 if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) { 44 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\n",sub_schurs->is_hermitian,sub_schurs->is_posdef); 45 } 46 47 /* max size of subsets */ 48 mss = 0; 49 for (i=0;i<sub_schurs->n_subs;i++) { 50 if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) { 51 PetscInt subset_size; 52 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 53 mss = PetscMax(mss,subset_size); 54 } 55 } 56 57 /* min/max and threshold */ 58 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 59 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0; 60 nmax = PetscMax(nmin,nmax); 61 allocated_S_St = PETSC_FALSE; 62 if (nmin) { 63 allocated_S_St = PETSC_TRUE; 64 } 65 66 /* allocate lapack workspace */ 67 cum = cum2 = 0; 68 maxneigs = 0; 69 for (i=0;i<sub_schurs->n_subs;i++) { 70 if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) { 71 PetscInt n,subset_size; 72 73 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 74 n = PetscMin(subset_size,nmax); 75 cum += subset_size; 76 cum2 += subset_size*n; 77 maxneigs = PetscMax(maxneigs,n); 78 } 79 } 80 if (mss) { 81 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 82 PetscBLASInt B_itype = 1; 83 PetscBLASInt B_N = mss; 84 PetscReal zero = 0.0; 85 PetscReal eps = 0.0; /* dlamch? */ 86 87 B_lwork = -1; 88 S = NULL; 89 St = NULL; 90 eigs = NULL; 91 eigv = NULL; 92 B_iwork = NULL; 93 B_ifail = NULL; 94 thresh = 1.0; 95 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 96 #if defined(PETSC_USE_COMPLEX) 97 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 98 #else 99 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 100 #endif 101 if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 102 ierr = PetscFPTrapPop();CHKERRQ(ierr); 103 } else { 104 /* TODO */ 105 } 106 } else { 107 lwork = 0; 108 } 109 110 nv = 0; 111 if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */ 112 ierr = ISGetLocalSize(sub_schurs->is_vertices,&nv);CHKERRQ(ierr); 113 } 114 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr); 115 if (allocated_S_St) { 116 ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr); 117 } 118 ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr); 119 #if defined(PETSC_USE_COMPLEX) 120 ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr); 121 #endif 122 ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 123 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr, 124 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr, 125 nv+cum,&pcbddc->adaptive_constraints_idxs, 126 nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 127 ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr); 128 129 maxneigs = 0; 130 cum = cum2 = cumarray = 0; 131 pcbddc->adaptive_constraints_idxs_ptr[0] = 0; 132 pcbddc->adaptive_constraints_data_ptr[0] = 0; 133 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 134 const PetscInt *idxs; 135 136 ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 137 for (cum=0;cum<nv;cum++) { 138 pcbddc->adaptive_constraints_n[cum] = 1; 139 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 140 pcbddc->adaptive_constraints_data[cum] = 1.0; 141 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1; 142 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1; 143 } 144 cum2 = cum; 145 ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 146 } 147 148 if (mss) { /* multilevel */ 149 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 150 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 151 } 152 153 for (i=0;i<sub_schurs->n_subs;i++) { 154 PetscInt j,subset_size; 155 156 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 157 if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) { 158 const PetscInt *idxs; 159 PetscReal infty = PETSC_MAX_REAL; 160 PetscInt eigs_start = 0; 161 PetscBLASInt B_N; 162 163 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 164 if (allocated_S_St) { /* S and S_t should be copied since we could need them later */ 165 if (sub_schurs->is_hermitian) { 166 PetscInt j; 167 for (j=0;j<subset_size;j++) { 168 ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 169 } 170 for (j=0;j<subset_size;j++) { 171 ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 172 } 173 } else { 174 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 175 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 176 } 177 } else { 178 S = Sarray + cumarray; 179 St = Starray + cumarray; 180 } 181 182 /* Threshold: this is an heuristic for edges */ 183 ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 184 thresh = pcbddc->mat_graph->count[idxs[0]]*pcbddc->adaptive_threshold; 185 186 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 187 PetscBLASInt B_itype = 1; 188 PetscBLASInt B_IL, B_IU; 189 PetscReal eps = -1.0; /* dlamch? */ 190 PetscInt nmin_s; 191 192 /* ask for eigenvalues larger than thresh */ 193 if (pcbddc->dbg_flag) { 194 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d %d %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]); 195 } 196 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 197 #if defined(PETSC_USE_COMPLEX) 198 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 199 #else 200 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 201 #endif 202 ierr = PetscFPTrapPop();CHKERRQ(ierr); 203 if (B_ierr) { 204 if (B_ierr < 0 ) { 205 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 206 } else if (B_ierr <= B_N) { 207 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 208 } else { 209 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 210 } 211 } 212 213 if (B_neigs > nmax) { 214 if (pcbddc->dbg_flag) { 215 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, more than maximum required %d.\n",B_neigs,nmax); 216 } 217 eigs_start = B_neigs -nmax; 218 B_neigs = nmax; 219 } 220 221 nmin_s = PetscMin(nmin,B_N); 222 if (B_neigs < nmin_s) { 223 PetscBLASInt B_neigs2; 224 225 B_IU = B_N - B_neigs; 226 B_IL = B_N - nmin_s + 1; 227 if (pcbddc->dbg_flag) { 228 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU); 229 } 230 if (sub_schurs->is_hermitian) { 231 PetscInt j; 232 for (j=0;j<subset_size;j++) { 233 ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 234 } 235 for (j=0;j<subset_size;j++) { 236 ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 237 } 238 } else { 239 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 240 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 241 } 242 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 243 #if defined(PETSC_USE_COMPLEX) 244 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 245 #else 246 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 247 #endif 248 ierr = PetscFPTrapPop();CHKERRQ(ierr); 249 B_neigs += B_neigs2; 250 } 251 if (B_ierr) { 252 if (B_ierr < 0 ) { 253 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 254 } else if (B_ierr <= B_N) { 255 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 256 } else { 257 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 258 } 259 } 260 if (pcbddc->dbg_flag) { 261 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Got %d eigs\n",B_neigs); 262 for (j=0;j<B_neigs;j++) { 263 if (eigs[j] == 0.0) { 264 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," Inf\n"); 265 } else { 266 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",eigs[j+eigs_start]); 267 } 268 } 269 } 270 } else { 271 /* TODO */ 272 } 273 274 maxneigs = PetscMax(B_neigs,maxneigs); 275 pcbddc->adaptive_constraints_n[i+nv] = B_neigs; 276 if (B_neigs) { 277 ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 278 279 if (pcbddc->dbg_flag > 1) { 280 PetscInt ii; 281 for (ii=0;ii<B_neigs;ii++) { 282 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N); 283 for (j=0;j<B_N;j++) { 284 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 285 } 286 } 287 } 288 #if 0 289 for (j=0;j<B_neigs;j++) { 290 PetscBLASInt Blas_N,Blas_one = 1.0; 291 PetscScalar norm; 292 ierr = PetscBLASIntCast(subset_size,&Blas_N);CHKERRQ(ierr); 293 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size, 294 &Blas_one,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one)); 295 if (pcbddc->adaptive_constraints_data[cum2] > 0.0) { 296 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 297 } else { 298 norm = -1.0/PetscSqrtReal(PetscRealPart(norm)); 299 } 300 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one)); 301 } 302 #endif 303 ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr); 304 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size; 305 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs; 306 cum++; 307 } 308 ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 309 } 310 /* shift for next computation */ 311 cumarray += subset_size*subset_size; 312 } 313 if (pcbddc->dbg_flag) { 314 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 315 } 316 317 if (mss) { 318 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 319 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 320 /* destroy matrices (junk) */ 321 ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr); 322 ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr); 323 } 324 if (allocated_S_St) { 325 ierr = PetscFree2(S,St);CHKERRQ(ierr); 326 } 327 ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr); 328 #if defined(PETSC_USE_COMPLEX) 329 ierr = PetscFree(rwork);CHKERRQ(ierr); 330 #endif 331 if (pcbddc->dbg_flag) { 332 PetscInt maxneigs_r; 333 ierr = MPI_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 334 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr); 335 } 336 PetscFunctionReturn(0); 337 } 338 339 #undef __FUNCT__ 340 #define __FUNCT__ "PCBDDCSetUpSolvers" 341 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 342 { 343 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 344 PetscScalar *coarse_submat_vals; 345 PetscErrorCode ierr; 346 347 PetscFunctionBegin; 348 /* Setup local scatters R_to_B and (optionally) R_to_D */ 349 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 350 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 351 352 /* Setup local neumann solver ksp_R */ 353 /* PCBDDCSetUpLocalScatters should be called first! */ 354 ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 355 356 /* Change global null space passed in by the user if change of basis has been requested */ 357 if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) { 358 ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr); 359 } 360 361 /* 362 Setup local correction and local part of coarse basis. 363 Gives back the dense local part of the coarse matrix in column major ordering 364 */ 365 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 366 367 /* Compute total number of coarse nodes and setup coarse solver */ 368 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 369 370 /* free */ 371 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 372 PetscFunctionReturn(0); 373 } 374 375 #undef __FUNCT__ 376 #define __FUNCT__ "PCBDDCResetCustomization" 377 PetscErrorCode PCBDDCResetCustomization(PC pc) 378 { 379 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 380 PetscErrorCode ierr; 381 382 PetscFunctionBegin; 383 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 384 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 385 ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr); 386 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 387 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 388 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 389 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 390 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 391 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 392 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 393 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 394 PetscFunctionReturn(0); 395 } 396 397 #undef __FUNCT__ 398 #define __FUNCT__ "PCBDDCResetTopography" 399 PetscErrorCode PCBDDCResetTopography(PC pc) 400 { 401 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 402 PetscErrorCode ierr; 403 404 PetscFunctionBegin; 405 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 406 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 407 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 408 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 409 ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr); 410 PetscFunctionReturn(0); 411 } 412 413 #undef __FUNCT__ 414 #define __FUNCT__ "PCBDDCResetSolvers" 415 PetscErrorCode PCBDDCResetSolvers(PC pc) 416 { 417 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 418 PetscScalar *array; 419 PetscErrorCode ierr; 420 421 PetscFunctionBegin; 422 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 423 if (pcbddc->coarse_phi_B) { 424 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr); 425 ierr = PetscFree(array);CHKERRQ(ierr); 426 } 427 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 428 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 429 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 430 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 431 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 432 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 433 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 434 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 435 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 436 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 437 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 438 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 439 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 440 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 441 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 442 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 443 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 444 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 445 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 446 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 447 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 448 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 449 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 450 PetscFunctionReturn(0); 451 } 452 453 #undef __FUNCT__ 454 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors" 455 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 456 { 457 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 458 PC_IS *pcis = (PC_IS*)pc->data; 459 VecType impVecType; 460 PetscInt n_constraints,n_R,old_size; 461 PetscErrorCode ierr; 462 463 PetscFunctionBegin; 464 if (!pcbddc->ConstraintMatrix) { 465 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created"); 466 } 467 /* get sizes */ 468 n_constraints = pcbddc->local_primal_size - pcbddc->n_vertices; 469 n_R = pcis->n-pcbddc->n_vertices; 470 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 471 /* local work vectors (try to avoid unneeded work)*/ 472 /* R nodes */ 473 old_size = -1; 474 if (pcbddc->vec1_R) { 475 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 476 } 477 if (n_R != old_size) { 478 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 479 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 480 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 481 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 482 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 483 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 484 } 485 /* local primal dofs */ 486 old_size = -1; 487 if (pcbddc->vec1_P) { 488 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 489 } 490 if (pcbddc->local_primal_size != old_size) { 491 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 492 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 493 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 494 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 495 } 496 /* local explicit constraints */ 497 old_size = -1; 498 if (pcbddc->vec1_C) { 499 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 500 } 501 if (n_constraints && n_constraints != old_size) { 502 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 503 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 504 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 505 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 506 } 507 PetscFunctionReturn(0); 508 } 509 510 #undef __FUNCT__ 511 #define __FUNCT__ "PCBDDCSetUpCorrection" 512 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 513 { 514 PetscErrorCode ierr; 515 /* pointers to pcis and pcbddc */ 516 PC_IS* pcis = (PC_IS*)pc->data; 517 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 518 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 519 /* submatrices of local problem */ 520 Mat A_RV,A_VR,A_VV,local_auxmat2_R; 521 /* submatrices of local coarse problem */ 522 Mat S_VV,S_CV,S_VC,S_CC; 523 /* working matrices */ 524 Mat C_CR; 525 /* additional working stuff */ 526 PC pc_R; 527 Mat F; 528 PetscBool isLU,isCHOL,isILU; 529 530 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 531 PetscScalar *work; 532 PetscInt *idx_V_B; 533 PetscInt n,n_vertices,n_constraints; 534 PetscInt i,n_R,n_D,n_B; 535 PetscBool unsymmetric_check; 536 /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */ 537 MatType impMatType; 538 /* some shortcuts to scalars */ 539 PetscScalar one=1.0,m_one=-1.0; 540 541 PetscFunctionBegin; 542 n_vertices = pcbddc->n_vertices; 543 n_constraints = pcbddc->local_primal_size-n_vertices; 544 /* Set Non-overlapping dimensions */ 545 n_B = pcis->n_B; 546 n_D = pcis->n - n_B; 547 n_R = pcis->n - n_vertices; 548 549 /* Set types for local objects needed by BDDC precondtioner */ 550 impMatType = MATSEQDENSE; 551 552 /* vertices in boundary numbering */ 553 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 554 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);CHKERRQ(ierr); 555 if (i != n_vertices) { 556 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i); 557 } 558 559 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 560 ierr = PetscMalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 561 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr); 562 ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr); 563 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr); 564 ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr); 565 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr); 566 ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr); 567 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr); 568 ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr); 569 570 unsymmetric_check = PETSC_FALSE; 571 /* allocate workspace */ 572 n = 0; 573 if (n_constraints) { 574 n += n_R*n_constraints; 575 } 576 if (n_vertices) { 577 n = PetscMax(2*n_R*n_vertices,n); 578 n = PetscMax((n_R+n_B)*n_vertices,n); 579 } 580 if (!pcbddc->issym) { 581 n = PetscMax(2*n_R*pcbddc->local_primal_size,n); 582 unsymmetric_check = PETSC_TRUE; 583 } 584 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 585 586 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 587 ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr); 588 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr); 589 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr); 590 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr); 591 if (isLU || isILU || isCHOL) { 592 ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr); 593 } else if (sub_schurs->reuse_mumps) { 594 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 595 MatFactorType type; 596 597 F = reuse_mumps->F; 598 ierr = MatGetFactorType(F,&type);CHKERRQ(ierr); 599 if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE; 600 } else { 601 F = NULL; 602 } 603 604 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 605 if (n_constraints) { 606 Mat M1,M2,M3; 607 Mat auxmat; 608 IS is_aux; 609 PetscScalar *array,*array2; 610 611 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 612 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 613 614 /* Extract constraints on R nodes: C_{CR} */ 615 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 616 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 617 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&auxmat);CHKERRQ(ierr); 618 619 /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */ 620 /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */ 621 ierr = PetscMemzero(work,n_R*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 622 for (i=0;i<n_constraints;i++) { 623 const PetscScalar *row_cmat_values; 624 const PetscInt *row_cmat_indices; 625 PetscInt size_of_constraint,j; 626 627 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 628 for (j=0;j<size_of_constraint;j++) { 629 work[row_cmat_indices[j]+i*n_R] = -row_cmat_values[j]; 630 } 631 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 632 } 633 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,NULL,&local_auxmat2_R);CHKERRQ(ierr); 634 if (F) { 635 Mat B; 636 637 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 638 ierr = MatMatSolve(F,B,local_auxmat2_R);CHKERRQ(ierr); 639 ierr = MatDestroy(&B);CHKERRQ(ierr); 640 } else { 641 PetscScalar *marr; 642 643 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 644 for (i=0;i<n_constraints;i++) { 645 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 646 ierr = VecPlaceArray(pcbddc->vec2_R,marr+i*n_R);CHKERRQ(ierr); 647 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 648 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 649 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 650 } 651 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 652 } 653 if (!pcbddc->switch_static) { 654 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);CHKERRQ(ierr); 655 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 656 ierr = MatDenseGetArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 657 for (i=0;i<n_constraints;i++) { 658 ierr = VecPlaceArray(pcbddc->vec1_R,array2+i*n_R);CHKERRQ(ierr); 659 ierr = VecPlaceArray(pcis->vec1_B,array+i*n_B);CHKERRQ(ierr); 660 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 661 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 662 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 663 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 664 } 665 ierr = MatDenseRestoreArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 666 ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 667 ierr = MatMatMult(auxmat,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 668 } else { 669 ierr = PetscObjectReference((PetscObject)local_auxmat2_R);CHKERRQ(ierr); 670 pcbddc->local_auxmat2 = local_auxmat2_R; 671 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 672 } 673 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 674 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 675 ierr = MatScale(M3,m_one);CHKERRQ(ierr); 676 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr); 677 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 678 if (isCHOL) { 679 ierr = MatCholeskyFactor(M3,NULL,NULL);CHKERRQ(ierr); 680 } else { 681 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 682 } 683 ierr = VecSet(pcbddc->vec1_C,one);CHKERRQ(ierr); 684 ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr); 685 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 686 ierr = MatDestroy(&M2);CHKERRQ(ierr); 687 ierr = MatDestroy(&M3);CHKERRQ(ierr); 688 /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */ 689 ierr = MatMatMult(M1,auxmat,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 690 ierr = MatDestroy(&auxmat);CHKERRQ(ierr); 691 ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 692 ierr = MatDestroy(&M1);CHKERRQ(ierr); 693 } 694 /* Get submatrices from subdomain matrix */ 695 if (n_vertices) { 696 Mat newmat; 697 IS is_aux; 698 PetscInt ibs,mbs; 699 PetscBool issbaij; 700 701 if (sub_schurs->reuse_mumps) { /* is_R_local is not sorted, ISComplement doesn't like it */ 702 IS tis; 703 704 ierr = ISDuplicate(pcbddc->is_R_local,&tis);CHKERRQ(ierr); 705 ierr = ISSort(tis);CHKERRQ(ierr); 706 ierr = ISComplement(tis,0,pcis->n,&is_aux);CHKERRQ(ierr); 707 ierr = ISDestroy(&tis);CHKERRQ(ierr); 708 } else { 709 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 710 } 711 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 712 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 713 if (ibs != mbs) { /* need to convert to SEQAIJ */ 714 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 715 ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 716 ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 717 ierr = MatGetSubMatrix(newmat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 718 ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr); 719 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 720 } else { 721 /* this is safe */ 722 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 723 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 724 if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 725 ierr = MatConvert(A_VV,MATSEQBAIJ,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr); 726 ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr); 727 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 728 ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 729 ierr = MatTranspose(A_VR,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 730 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 731 } else { 732 ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr); 733 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 734 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 735 } 736 /* need to convert to SeqAIJ for MatMatMult with dense matrix */ 737 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_REUSE_MATRIX,&A_VR);CHKERRQ(ierr); 738 } 739 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 740 } 741 742 /* Matrix of coarse basis functions (local) */ 743 if (pcbddc->coarse_phi_B) { 744 PetscInt on_B,on_primal,on_D=n_D; 745 if (pcbddc->coarse_phi_D) { 746 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr); 747 } 748 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 749 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 750 PetscScalar *marray; 751 752 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr); 753 ierr = PetscFree(marray);CHKERRQ(ierr); 754 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 755 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 756 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 757 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 758 } 759 } 760 761 if (!pcbddc->coarse_phi_B) { 762 PetscScalar *marray; 763 764 n = n_B*pcbddc->local_primal_size; 765 if (pcbddc->switch_static || pcbddc->dbg_flag) { 766 n += n_D*pcbddc->local_primal_size; 767 } 768 if (!pcbddc->issym) { 769 n *= 2; 770 } 771 ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr); 772 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 773 n = n_B*pcbddc->local_primal_size; 774 if (pcbddc->switch_static || pcbddc->dbg_flag) { 775 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 776 n += n_D*pcbddc->local_primal_size; 777 } 778 if (!pcbddc->issym) { 779 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 780 if (pcbddc->switch_static || pcbddc->dbg_flag) { 781 n = n_B*pcbddc->local_primal_size; 782 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 783 } 784 } else { 785 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 786 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 787 if (pcbddc->switch_static || pcbddc->dbg_flag) { 788 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 789 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 790 } 791 } 792 } 793 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 794 /* vertices */ 795 if (n_vertices) { 796 797 if (n_R) { 798 Mat A_RRmA_RV,S_VVt; /* S_VVt with LDA=N */ 799 PetscBLASInt B_N,B_one = 1; 800 PetscScalar *x,*y; 801 802 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 803 ierr = MatScale(A_RV,m_one);CHKERRQ(ierr); 804 ierr = MatConvert(A_RV,impMatType,MAT_REUSE_MATRIX,&A_RV);CHKERRQ(ierr); 805 if (F) { /* TODO could be optimized for symmetric problems */ 806 ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr); 807 } else { 808 ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr); 809 for (i=0;i<n_vertices;i++) { 810 ierr = VecPlaceArray(pcbddc->vec1_R,y+i*n_R);CHKERRQ(ierr); 811 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 812 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 813 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 814 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 815 } 816 ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr); 817 } 818 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 819 /* S_VV and S_CV are the subdomain contribution to coarse matrix. WARNING -> column major ordering */ 820 if (n_constraints) { 821 Mat B; 822 823 ierr = PetscMemzero(work+n_R*n_vertices,n_B*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 824 for (i=0;i<n_vertices;i++) { 825 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 826 ierr = VecPlaceArray(pcis->vec1_B,work+n_R*n_vertices+i*n_B);CHKERRQ(ierr); 827 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 828 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 829 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 830 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 831 } 832 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr); 833 ierr = MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr); 834 ierr = MatDestroy(&B);CHKERRQ(ierr); 835 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr); 836 ierr = MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 837 ierr = MatScale(S_CV,m_one);CHKERRQ(ierr); 838 ierr = PetscBLASIntCast(n_R*n_vertices,&B_N);CHKERRQ(ierr); 839 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+n_R*n_vertices,&B_one,work,&B_one)); 840 ierr = MatDestroy(&B);CHKERRQ(ierr); 841 } 842 ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr); 843 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 844 ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr); 845 ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr); 846 ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr); 847 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one)); 848 ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr); 849 ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr); 850 ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 851 ierr = MatDestroy(&S_VVt);CHKERRQ(ierr); 852 } else { 853 ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 854 } 855 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 856 /* coarse basis functions */ 857 for (i=0;i<n_vertices;i++) { 858 PetscScalar *y; 859 860 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr); 861 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 862 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 863 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 864 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 865 y[n_B*i+idx_V_B[i]] = 1.0; 866 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 867 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 868 869 if (pcbddc->switch_static || pcbddc->dbg_flag) { 870 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 871 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 872 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 873 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 874 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 875 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 876 } 877 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 878 } 879 } 880 881 if (n_constraints) { 882 Mat B; 883 884 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 885 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 886 ierr = MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 887 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 888 if (n_vertices) { 889 if (isCHOL) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */ 890 ierr = MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);CHKERRQ(ierr); 891 } else { 892 Mat S_VCt; 893 894 ierr = MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);CHKERRQ(ierr); 895 ierr = MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 896 ierr = MatDestroy(&S_VCt);CHKERRQ(ierr); 897 } 898 } 899 ierr = MatDestroy(&B);CHKERRQ(ierr); 900 /* coarse basis functions */ 901 for (i=0;i<n_constraints;i++) { 902 PetscScalar *y; 903 904 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr); 905 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 906 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr); 907 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 908 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 909 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 910 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 911 if (pcbddc->switch_static || pcbddc->dbg_flag) { 912 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 913 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr); 914 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 915 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 916 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 917 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 918 } 919 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 920 } 921 } 922 if (n_constraints) { 923 ierr = MatDestroy(&local_auxmat2_R);CHKERRQ(ierr); 924 } 925 926 /* compute other basis functions for non-symmetric problems */ 927 if (!pcbddc->issym) { 928 929 if (n_constraints) { 930 Mat S_CCT,B_C; 931 932 /* this is a lazy thing */ 933 ierr = MatConvert(C_CR,impMatType,MAT_REUSE_MATRIX,&C_CR);CHKERRQ(ierr); 934 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work+n_vertices*n_R,&B_C);CHKERRQ(ierr); 935 ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr); 936 ierr = MatTransposeMatMult(C_CR,S_CCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr); 937 ierr = MatDestroy(&S_CCT);CHKERRQ(ierr); 938 if (n_vertices) { 939 Mat B_V,S_VCT; 940 941 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&B_V);CHKERRQ(ierr); 942 ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr); 943 ierr = MatTransposeMatMult(C_CR,S_VCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr); 944 ierr = MatDestroy(&B_V);CHKERRQ(ierr); 945 ierr = MatDestroy(&S_VCT);CHKERRQ(ierr); 946 } 947 ierr = MatDestroy(&B_C);CHKERRQ(ierr); 948 } else { /* if there are no constraints, reset work */ 949 ierr = PetscMemzero(work,n_R*pcbddc->local_primal_size*sizeof(PetscScalar));CHKERRQ(ierr); 950 } 951 if (n_vertices && n_R) { 952 Mat A_VRT; 953 PetscScalar *marray; 954 PetscBLASInt B_N,B_one = 1; 955 956 ierr = MatTranspose(A_VR,MAT_INITIAL_MATRIX,&A_VRT);CHKERRQ(ierr); 957 ierr = MatConvert(A_VRT,impMatType,MAT_REUSE_MATRIX,&A_VRT);CHKERRQ(ierr); 958 ierr = MatDenseGetArray(A_VRT,&marray);CHKERRQ(ierr); 959 ierr = PetscBLASIntCast(n_vertices*n_R,&B_N);CHKERRQ(ierr); 960 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&m_one,marray,&B_one,work,&B_one)); 961 ierr = MatDenseRestoreArray(A_VRT,&marray);CHKERRQ(ierr); 962 ierr = MatDestroy(&A_VRT);CHKERRQ(ierr); 963 } 964 965 if (F) { /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 966 for (i=0;i<pcbddc->local_primal_size;i++) { 967 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 968 ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr); 969 ierr = MatSolveTranspose(F,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 970 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 971 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 972 } 973 } else { 974 for (i=0;i<pcbddc->local_primal_size;i++) { 975 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 976 ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr); 977 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 978 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 979 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 980 } 981 } 982 /* coarse basis functions */ 983 for (i=0;i<pcbddc->local_primal_size;i++) { 984 PetscScalar *y; 985 986 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*(i+pcbddc->local_primal_size));CHKERRQ(ierr); 987 ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 988 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 989 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 990 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 991 if (i<n_vertices) { 992 y[n_B*i+idx_V_B[i]] = 1.0; 993 } 994 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 995 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 996 997 if (pcbddc->switch_static || pcbddc->dbg_flag) { 998 ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 999 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 1000 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1001 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1002 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 1003 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 1004 } 1005 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1006 } 1007 } 1008 /* free memory */ 1009 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 1010 ierr = MatDestroy(&S_VV);CHKERRQ(ierr); 1011 ierr = MatDestroy(&S_CV);CHKERRQ(ierr); 1012 ierr = MatDestroy(&S_VC);CHKERRQ(ierr); 1013 ierr = MatDestroy(&S_CC);CHKERRQ(ierr); 1014 ierr = PetscFree(work);CHKERRQ(ierr); 1015 if (n_vertices) { 1016 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 1017 } 1018 if (n_constraints) { 1019 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 1020 } 1021 /* Checking coarse_sub_mat and coarse basis functios */ 1022 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 1023 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 1024 if (pcbddc->dbg_flag) { 1025 Mat coarse_sub_mat; 1026 Mat AUXMAT,TM1,TM2,TM3,TM4; 1027 Mat coarse_phi_D,coarse_phi_B; 1028 Mat coarse_psi_D,coarse_psi_B; 1029 Mat A_II,A_BB,A_IB,A_BI; 1030 Mat C_B,CPHI; 1031 IS is_dummy; 1032 Vec mones; 1033 MatType checkmattype=MATSEQAIJ; 1034 PetscReal real_value; 1035 1036 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 1037 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 1038 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 1039 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 1040 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 1041 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 1042 if (unsymmetric_check) { 1043 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 1044 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 1045 } 1046 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 1047 1048 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1049 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation\n");CHKERRQ(ierr); 1050 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1051 if (unsymmetric_check) { 1052 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1053 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 1054 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1055 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1056 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 1057 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1058 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1059 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 1060 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1061 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1062 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 1063 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1064 } else { 1065 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 1066 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 1067 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1068 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 1069 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1070 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1071 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 1072 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1073 } 1074 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1075 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1076 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1077 ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr); 1078 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1079 ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1080 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1081 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 1082 1083 /* check constraints */ 1084 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size,0,1,&is_dummy);CHKERRQ(ierr); 1085 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B); 1086 ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 1087 ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr); 1088 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 1089 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 1090 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1091 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 1092 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 1093 ierr = MatDestroy(&CPHI);CHKERRQ(ierr); 1094 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 1095 ierr = VecDestroy(&mones);CHKERRQ(ierr); 1096 1097 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1098 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 1099 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 1100 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 1101 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 1102 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 1103 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 1104 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 1105 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 1106 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 1107 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 1108 if (unsymmetric_check) { 1109 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 1110 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 1111 } 1112 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 1113 } 1114 /* get back data */ 1115 *coarse_submat_vals_n = coarse_submat_vals; 1116 PetscFunctionReturn(0); 1117 } 1118 1119 #undef __FUNCT__ 1120 #define __FUNCT__ "MatGetSubMatrixUnsorted" 1121 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B) 1122 { 1123 Mat *work_mat; 1124 IS isrow_s,iscol_s; 1125 PetscBool rsorted,csorted; 1126 PetscInt rsize,*idxs_perm_r,csize,*idxs_perm_c; 1127 PetscErrorCode ierr; 1128 1129 PetscFunctionBegin; 1130 ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr); 1131 ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr); 1132 ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr); 1133 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 1134 1135 if (!rsorted) { 1136 const PetscInt *idxs; 1137 PetscInt *idxs_sorted,i; 1138 1139 ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr); 1140 ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr); 1141 for (i=0;i<rsize;i++) { 1142 idxs_perm_r[i] = i; 1143 } 1144 ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr); 1145 ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr); 1146 for (i=0;i<rsize;i++) { 1147 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 1148 } 1149 ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr); 1150 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr); 1151 } else { 1152 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1153 isrow_s = isrow; 1154 } 1155 1156 if (!csorted) { 1157 if (isrow == iscol) { 1158 ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr); 1159 iscol_s = isrow_s; 1160 } else { 1161 const PetscInt *idxs; 1162 PetscInt *idxs_sorted,i; 1163 1164 ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr); 1165 ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr); 1166 for (i=0;i<csize;i++) { 1167 idxs_perm_c[i] = i; 1168 } 1169 ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr); 1170 ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr); 1171 for (i=0;i<csize;i++) { 1172 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 1173 } 1174 ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr); 1175 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr); 1176 } 1177 } else { 1178 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1179 iscol_s = iscol; 1180 } 1181 1182 ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 1183 1184 if (!rsorted || !csorted) { 1185 Mat new_mat; 1186 IS is_perm_r,is_perm_c; 1187 1188 if (!rsorted) { 1189 PetscInt *idxs_r,i; 1190 ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr); 1191 for (i=0;i<rsize;i++) { 1192 idxs_r[idxs_perm_r[i]] = i; 1193 } 1194 ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr); 1195 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr); 1196 } else { 1197 ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr); 1198 } 1199 ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr); 1200 1201 if (!csorted) { 1202 if (isrow_s == iscol_s) { 1203 ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr); 1204 is_perm_c = is_perm_r; 1205 } else { 1206 PetscInt *idxs_c,i; 1207 ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr); 1208 for (i=0;i<csize;i++) { 1209 idxs_c[idxs_perm_c[i]] = i; 1210 } 1211 ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr); 1212 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr); 1213 } 1214 } else { 1215 ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr); 1216 } 1217 ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr); 1218 1219 ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr); 1220 ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr); 1221 work_mat[0] = new_mat; 1222 ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr); 1223 ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr); 1224 } 1225 1226 ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr); 1227 *B = work_mat[0]; 1228 ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr); 1229 ierr = ISDestroy(&isrow_s);CHKERRQ(ierr); 1230 ierr = ISDestroy(&iscol_s);CHKERRQ(ierr); 1231 PetscFunctionReturn(0); 1232 } 1233 1234 #undef __FUNCT__ 1235 #define __FUNCT__ "PCBDDCComputeLocalMatrix" 1236 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 1237 { 1238 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1239 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1240 Mat new_mat; 1241 IS is_local,is_global; 1242 PetscInt local_size; 1243 PetscBool isseqaij; 1244 PetscErrorCode ierr; 1245 1246 PetscFunctionBegin; 1247 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1248 ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr); 1249 ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr); 1250 ierr = ISLocalToGlobalMappingApplyIS(matis->mapping,is_local,&is_global);CHKERRQ(ierr); 1251 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 1252 ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr); 1253 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 1254 1255 /* check */ 1256 if (pcbddc->dbg_flag) { 1257 Vec x,x_change; 1258 PetscReal error; 1259 1260 ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr); 1261 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 1262 ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr); 1263 ierr = VecScatterBegin(matis->ctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1264 ierr = VecScatterEnd(matis->ctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1265 ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr); 1266 ierr = VecScatterBegin(matis->ctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1267 ierr = VecScatterEnd(matis->ctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1268 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 1269 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 1270 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1271 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr); 1272 ierr = VecDestroy(&x);CHKERRQ(ierr); 1273 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 1274 } 1275 1276 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 1277 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 1278 if (isseqaij) { 1279 ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 1280 } else { 1281 Mat work_mat; 1282 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 1283 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 1284 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 1285 } 1286 ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,pcbddc->issym);CHKERRQ(ierr); 1287 #if !defined(PETSC_USE_COMPLEX) 1288 ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,pcbddc->issym);CHKERRQ(ierr); 1289 #endif 1290 /* 1291 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 1292 ierr = MatView(new_mat,(PetscViewer)0);CHKERRQ(ierr); 1293 */ 1294 ierr = MatDestroy(&new_mat);CHKERRQ(ierr); 1295 PetscFunctionReturn(0); 1296 } 1297 1298 #undef __FUNCT__ 1299 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 1300 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 1301 { 1302 PC_IS* pcis = (PC_IS*)(pc->data); 1303 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1304 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1305 PetscInt *idx_R_local=NULL; 1306 PetscInt n_vertices,i,j,n_R,n_D,n_B; 1307 PetscInt vbs,bs; 1308 PetscBT bitmask=NULL; 1309 PetscErrorCode ierr; 1310 1311 PetscFunctionBegin; 1312 /* 1313 No need to setup local scatters if 1314 - primal space is unchanged 1315 AND 1316 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 1317 AND 1318 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 1319 */ 1320 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 1321 PetscFunctionReturn(0); 1322 } 1323 /* destroy old objects */ 1324 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 1325 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 1326 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 1327 /* Set Non-overlapping dimensions */ 1328 n_B = pcis->n_B; 1329 n_D = pcis->n - n_B; 1330 n_vertices = pcbddc->n_vertices; 1331 1332 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 1333 1334 /* create auxiliary bitmask and allocate workspace */ 1335 if (!sub_schurs->reuse_mumps) { 1336 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 1337 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 1338 for (i=0;i<n_vertices;i++) { 1339 ierr = PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);CHKERRQ(ierr); 1340 } 1341 1342 for (i=0, n_R=0; i<pcis->n; i++) { 1343 if (!PetscBTLookup(bitmask,i)) { 1344 idx_R_local[n_R++] = i; 1345 } 1346 } 1347 } else { /* A different ordering (already computed) is present if we are reusing MUMPS Schur solver */ 1348 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1349 1350 ierr = ISGetIndices(reuse_mumps->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1351 ierr = ISGetLocalSize(reuse_mumps->is_R,&n_R);CHKERRQ(ierr); 1352 } 1353 1354 /* Block code */ 1355 vbs = 1; 1356 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 1357 if (bs>1 && !(n_vertices%bs)) { 1358 PetscBool is_blocked = PETSC_TRUE; 1359 PetscInt *vary; 1360 /* Verify if the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 1361 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 1362 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 1363 for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++; 1364 for (i=0; i<n_vertices/bs; i++) { 1365 if (vary[i]!=0 && vary[i]!=bs) { 1366 is_blocked = PETSC_FALSE; 1367 break; 1368 } 1369 } 1370 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 1371 vbs = bs; 1372 for (i=0;i<n_R/vbs;i++) { 1373 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 1374 } 1375 } 1376 ierr = PetscFree(vary);CHKERRQ(ierr); 1377 } 1378 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 1379 if (sub_schurs->reuse_mumps) { 1380 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1381 1382 ierr = ISRestoreIndices(reuse_mumps->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1383 ierr = ISDestroy(&reuse_mumps->is_R);CHKERRQ(ierr); 1384 ierr = PetscObjectReference((PetscObject)pcbddc->is_R_local);CHKERRQ(ierr); 1385 reuse_mumps->is_R = pcbddc->is_R_local; 1386 } else { 1387 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 1388 } 1389 1390 /* print some info if requested */ 1391 if (pcbddc->dbg_flag) { 1392 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1393 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1394 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1395 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 1396 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 1397 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices,pcbddc->local_primal_size);CHKERRQ(ierr); 1398 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1399 } 1400 1401 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 1402 if (!sub_schurs->reuse_mumps) { 1403 IS is_aux1,is_aux2; 1404 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 1405 1406 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1407 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 1408 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 1409 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1410 for (i=0; i<n_D; i++) { 1411 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 1412 } 1413 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1414 for (i=0, j=0; i<n_R; i++) { 1415 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 1416 aux_array1[j++] = i; 1417 } 1418 } 1419 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 1420 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1421 for (i=0, j=0; i<n_B; i++) { 1422 if (!PetscBTLookup(bitmask,is_indices[i])) { 1423 aux_array2[j++] = i; 1424 } 1425 } 1426 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1427 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 1428 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 1429 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1430 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 1431 1432 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1433 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 1434 for (i=0, j=0; i<n_R; i++) { 1435 if (PetscBTLookup(bitmask,idx_R_local[i])) { 1436 aux_array1[j++] = i; 1437 } 1438 } 1439 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 1440 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 1441 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1442 } 1443 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 1444 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1445 } else { 1446 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1447 IS tis; 1448 PetscInt schur_size; 1449 1450 ierr = ISGetLocalSize(reuse_mumps->is_B,&schur_size);CHKERRQ(ierr); 1451 ierr = ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);CHKERRQ(ierr); 1452 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_mumps->is_B,&pcbddc->R_to_B);CHKERRQ(ierr); 1453 ierr = ISDestroy(&tis);CHKERRQ(ierr); 1454 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1455 ierr = ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);CHKERRQ(ierr); 1456 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 1457 ierr = ISDestroy(&tis);CHKERRQ(ierr); 1458 } 1459 } 1460 PetscFunctionReturn(0); 1461 } 1462 1463 1464 #undef __FUNCT__ 1465 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 1466 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 1467 { 1468 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1469 PC_IS *pcis = (PC_IS*)pc->data; 1470 PC pc_temp; 1471 Mat A_RR; 1472 MatReuse reuse; 1473 PetscScalar m_one = -1.0; 1474 PetscReal value; 1475 PetscInt n_D,n_R,ibs,mbs; 1476 PetscBool use_exact,use_exact_reduced,issbaij; 1477 PetscErrorCode ierr; 1478 /* prefixes stuff */ 1479 char dir_prefix[256],neu_prefix[256],str_level[16]; 1480 size_t len; 1481 1482 PetscFunctionBegin; 1483 1484 /* compute prefixes */ 1485 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 1486 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 1487 if (!pcbddc->current_level) { 1488 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1489 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1490 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1491 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1492 } else { 1493 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 1494 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 1495 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 1496 len -= 15; /* remove "pc_bddc_coarse_" */ 1497 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 1498 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 1499 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 1500 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 1501 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1502 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1503 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 1504 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 1505 } 1506 1507 /* DIRICHLET PROBLEM */ 1508 if (dirichlet) { 1509 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1510 if (pcbddc->issym) { 1511 ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 1512 } 1513 /* Matrix for Dirichlet problem is pcis->A_II */ 1514 n_D = pcis->n - pcis->n_B; 1515 if (!pcbddc->ksp_D) { /* create object if not yet build */ 1516 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 1517 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 1518 /* default */ 1519 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 1520 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 1521 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1522 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1523 if (issbaij) { 1524 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1525 } else { 1526 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1527 } 1528 /* Allow user's customization */ 1529 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 1530 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1531 } 1532 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 1533 if (sub_schurs->reuse_mumps) { 1534 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1535 1536 ierr = KSPSetPC(pcbddc->ksp_D,reuse_mumps->interior_solver);CHKERRQ(ierr); 1537 } 1538 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1539 if (!n_D) { 1540 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1541 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1542 } 1543 /* Set Up KSP for Dirichlet problem of BDDC */ 1544 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 1545 /* set ksp_D into pcis data */ 1546 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 1547 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 1548 pcis->ksp_D = pcbddc->ksp_D; 1549 } 1550 1551 /* NEUMANN PROBLEM */ 1552 A_RR = 0; 1553 if (neumann) { 1554 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1555 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 1556 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 1557 if (pcbddc->ksp_R) { /* already created ksp */ 1558 PetscInt nn_R; 1559 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 1560 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 1561 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 1562 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 1563 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 1564 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1565 reuse = MAT_INITIAL_MATRIX; 1566 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 1567 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 1568 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1569 reuse = MAT_INITIAL_MATRIX; 1570 } else { /* safe to reuse the matrix */ 1571 reuse = MAT_REUSE_MATRIX; 1572 } 1573 } 1574 /* last check */ 1575 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 1576 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1577 reuse = MAT_INITIAL_MATRIX; 1578 } 1579 } else { /* first time, so we need to create the matrix */ 1580 reuse = MAT_INITIAL_MATRIX; 1581 } 1582 /* extract A_RR */ 1583 if (!sub_schurs->reuse_mumps) { 1584 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 1585 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 1586 if (ibs != mbs) { 1587 Mat newmat; 1588 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 1589 ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 1590 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 1591 } else { 1592 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 1593 } 1594 if (pcbddc->issym) { 1595 ierr = MatSetOption(A_RR,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 1596 } 1597 } else { 1598 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1599 1600 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1601 ierr = PCGetOperators(reuse_mumps->correction_solver,&A_RR,NULL);CHKERRQ(ierr); 1602 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 1603 } 1604 if (!pcbddc->ksp_R) { /* create object if not present */ 1605 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 1606 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 1607 /* default */ 1608 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 1609 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 1610 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1611 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1612 if (issbaij) { 1613 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1614 } else { 1615 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1616 } 1617 /* Allow user's customization */ 1618 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 1619 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1620 } 1621 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 1622 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1623 if (!n_R) { 1624 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1625 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1626 } 1627 /* Reuse MUMPS solver if it is present */ 1628 if (sub_schurs->reuse_mumps) { 1629 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1630 1631 ierr = KSPSetPC(pcbddc->ksp_R,reuse_mumps->correction_solver);CHKERRQ(ierr); 1632 } 1633 /* Set Up KSP for Neumann problem of BDDC */ 1634 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 1635 } 1636 /* free Neumann problem's matrix */ 1637 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1638 1639 /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */ 1640 if (pcbddc->NullSpace || pcbddc->dbg_flag) { 1641 if (pcbddc->dbg_flag) { 1642 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1643 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1644 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1645 } 1646 if (dirichlet) { /* Dirichlet */ 1647 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 1648 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 1649 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 1650 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 1651 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 1652 /* need to be adapted? */ 1653 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1654 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1655 ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr); 1656 /* print info */ 1657 if (pcbddc->dbg_flag) { 1658 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 1659 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1660 } 1661 if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) { 1662 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr); 1663 } 1664 } 1665 if (neumann) { /* Neumann */ 1666 ierr = KSPGetOperators(pcbddc->ksp_R,&A_RR,NULL);CHKERRQ(ierr); 1667 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 1668 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1669 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 1670 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 1671 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 1672 /* need to be adapted? */ 1673 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1674 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1675 /* print info */ 1676 if (pcbddc->dbg_flag) { 1677 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 1678 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1679 } 1680 if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */ 1681 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr); 1682 } 1683 } 1684 } 1685 PetscFunctionReturn(0); 1686 } 1687 1688 #undef __FUNCT__ 1689 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection" 1690 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose) 1691 { 1692 PetscErrorCode ierr; 1693 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1694 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1695 1696 PetscFunctionBegin; 1697 if (!sub_schurs->reuse_mumps) { 1698 ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr); 1699 } 1700 if (!pcbddc->switch_static) { 1701 if (applytranspose && pcbddc->local_auxmat1) { 1702 ierr = MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 1703 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 1704 } 1705 if (!sub_schurs->reuse_mumps) { 1706 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1707 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1708 } else { 1709 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1710 1711 ierr = VecScatterBegin(reuse_mumps->correction_scatter_B,inout_B,reuse_mumps->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1712 ierr = VecScatterEnd(reuse_mumps->correction_scatter_B,inout_B,reuse_mumps->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1713 } 1714 } else { 1715 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1716 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1717 ierr = VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1718 ierr = VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1719 if (applytranspose && pcbddc->local_auxmat1) { 1720 ierr = MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);CHKERRQ(ierr); 1721 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 1722 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1723 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1724 } 1725 } 1726 if (!sub_schurs->reuse_mumps) { 1727 if (applytranspose) { 1728 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 1729 } else { 1730 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 1731 } 1732 #if defined(PETSC_HAVE_MUMPS) 1733 } else { 1734 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1735 1736 if (applytranspose) { 1737 ierr = MatMumpsSolveSchurComplementTranspose(reuse_mumps->F,reuse_mumps->rhs_B,reuse_mumps->sol_B);CHKERRQ(ierr); 1738 } else { 1739 ierr = MatMumpsSolveSchurComplement(reuse_mumps->F,reuse_mumps->rhs_B,reuse_mumps->sol_B);CHKERRQ(ierr); 1740 } 1741 #endif 1742 } 1743 ierr = VecSet(inout_B,0.);CHKERRQ(ierr); 1744 if (!pcbddc->switch_static) { 1745 if (!sub_schurs->reuse_mumps) { 1746 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1747 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1748 } else { 1749 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1750 1751 ierr = VecScatterBegin(reuse_mumps->correction_scatter_B,reuse_mumps->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1752 ierr = VecScatterEnd(reuse_mumps->correction_scatter_B,reuse_mumps->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1753 } 1754 if (!applytranspose && pcbddc->local_auxmat1) { 1755 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 1756 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 1757 } 1758 } else { 1759 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1760 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1761 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1762 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1763 if (!applytranspose && pcbddc->local_auxmat1) { 1764 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 1765 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 1766 } 1767 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1768 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1769 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1770 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1771 } 1772 PetscFunctionReturn(0); 1773 } 1774 1775 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 1776 #undef __FUNCT__ 1777 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 1778 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 1779 { 1780 PetscErrorCode ierr; 1781 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1782 PC_IS* pcis = (PC_IS*) (pc->data); 1783 const PetscScalar zero = 0.0; 1784 1785 PetscFunctionBegin; 1786 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 1787 if (applytranspose) { 1788 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1789 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1790 } else { 1791 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1792 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1793 } 1794 /* start communications from local primal nodes to rhs of coarse solver */ 1795 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 1796 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1797 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1798 1799 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 1800 /* TODO remove null space when doing multilevel */ 1801 if (pcbddc->coarse_ksp) { 1802 Vec rhs,sol; 1803 1804 ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr); 1805 ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr); 1806 if (applytranspose) { 1807 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 1808 } else { 1809 ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 1810 } 1811 } 1812 1813 /* Local solution on R nodes */ 1814 if (pcis->n) { /* in/out pcbddc->vec1_B,pcbddc->vec1_D */ 1815 ierr = PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);CHKERRQ(ierr); 1816 } 1817 1818 /* communications from coarse sol to local primal nodes */ 1819 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1820 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1821 1822 /* Sum contributions from two levels */ 1823 if (applytranspose) { 1824 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1825 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1826 } else { 1827 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1828 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1829 } 1830 PetscFunctionReturn(0); 1831 } 1832 1833 #undef __FUNCT__ 1834 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 1835 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 1836 { 1837 PetscErrorCode ierr; 1838 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1839 PetscScalar *array; 1840 Vec from,to; 1841 1842 PetscFunctionBegin; 1843 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 1844 from = pcbddc->coarse_vec; 1845 to = pcbddc->vec1_P; 1846 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 1847 Vec tvec; 1848 1849 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1850 ierr = VecResetArray(tvec);CHKERRQ(ierr); 1851 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1852 ierr = VecGetArray(tvec,&array);CHKERRQ(ierr); 1853 ierr = VecPlaceArray(from,array);CHKERRQ(ierr); 1854 ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr); 1855 } 1856 } else { /* from local to global -> put data in coarse right hand side */ 1857 from = pcbddc->vec1_P; 1858 to = pcbddc->coarse_vec; 1859 } 1860 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 1861 PetscFunctionReturn(0); 1862 } 1863 1864 #undef __FUNCT__ 1865 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 1866 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 1867 { 1868 PetscErrorCode ierr; 1869 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1870 PetscScalar *array; 1871 Vec from,to; 1872 1873 PetscFunctionBegin; 1874 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 1875 from = pcbddc->coarse_vec; 1876 to = pcbddc->vec1_P; 1877 } else { /* from local to global -> put data in coarse right hand side */ 1878 from = pcbddc->vec1_P; 1879 to = pcbddc->coarse_vec; 1880 } 1881 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 1882 if (smode == SCATTER_FORWARD) { 1883 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 1884 Vec tvec; 1885 1886 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1887 ierr = VecGetArray(to,&array);CHKERRQ(ierr); 1888 ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr); 1889 ierr = VecRestoreArray(to,&array);CHKERRQ(ierr); 1890 } 1891 } else { 1892 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 1893 ierr = VecResetArray(from);CHKERRQ(ierr); 1894 } 1895 } 1896 PetscFunctionReturn(0); 1897 } 1898 1899 /* uncomment for testing purposes */ 1900 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 1901 #undef __FUNCT__ 1902 #define __FUNCT__ "PCBDDCConstraintsSetUp" 1903 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 1904 { 1905 PetscErrorCode ierr; 1906 PC_IS* pcis = (PC_IS*)(pc->data); 1907 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1908 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1909 /* one and zero */ 1910 PetscScalar one=1.0,zero=0.0; 1911 /* space to store constraints and their local indices */ 1912 PetscScalar *constraints_data; 1913 PetscInt *constraints_idxs,*constraints_idxs_B; 1914 PetscInt *constraints_idxs_ptr,*constraints_data_ptr; 1915 PetscInt *constraints_n; 1916 /* iterators */ 1917 PetscInt i,j,k,total_counts,total_counts_cc,cum; 1918 /* BLAS integers */ 1919 PetscBLASInt lwork,lierr; 1920 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 1921 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 1922 /* reuse */ 1923 PetscInt olocal_primal_size,olocal_primal_size_cc; 1924 PetscInt *olocal_primal_ref_node,*olocal_primal_ref_mult; 1925 /* change of basis */ 1926 PetscBool qr_needed; 1927 PetscBT change_basis,qr_needed_idx; 1928 /* auxiliary stuff */ 1929 PetscInt *nnz,*is_indices; 1930 PetscInt ncc; 1931 /* some quantities */ 1932 PetscInt n_vertices,total_primal_vertices,valid_constraints; 1933 PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints; 1934 1935 PetscFunctionBegin; 1936 /* Destroy Mat objects computed previously */ 1937 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1938 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1939 /* save info on constraints from previous setup (if any) */ 1940 olocal_primal_size = pcbddc->local_primal_size; 1941 olocal_primal_size_cc = pcbddc->local_primal_size_cc; 1942 ierr = PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);CHKERRQ(ierr); 1943 ierr = PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 1944 ierr = PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 1945 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 1946 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 1947 1948 /* print some info */ 1949 if (pcbddc->dbg_flag) { 1950 IS vertices; 1951 PetscInt nv,nedges,nfaces; 1952 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 1953 ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr); 1954 ierr = ISDestroy(&vertices);CHKERRQ(ierr); 1955 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1956 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 1957 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 1958 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr); 1959 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr); 1960 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1961 } 1962 1963 if (!pcbddc->adaptive_selection) { 1964 IS ISForVertices,*ISForFaces,*ISForEdges; 1965 MatNullSpace nearnullsp; 1966 const Vec *nearnullvecs; 1967 Vec *localnearnullsp; 1968 PetscScalar *array; 1969 PetscInt n_ISForFaces,n_ISForEdges,nnsp_size; 1970 PetscBool nnsp_has_cnst; 1971 /* LAPACK working arrays for SVD or POD */ 1972 PetscBool skip_lapack,boolforchange; 1973 PetscScalar *work; 1974 PetscReal *singular_vals; 1975 #if defined(PETSC_USE_COMPLEX) 1976 PetscReal *rwork; 1977 #endif 1978 #if defined(PETSC_MISSING_LAPACK_GESVD) 1979 PetscScalar *temp_basis,*correlation_mat; 1980 #else 1981 PetscBLASInt dummy_int=1; 1982 PetscScalar dummy_scalar=1.; 1983 #endif 1984 1985 /* Get index sets for faces, edges and vertices from graph */ 1986 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 1987 /* free unneeded index sets */ 1988 if (!pcbddc->use_vertices) { 1989 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 1990 } 1991 if (!pcbddc->use_edges) { 1992 for (i=0;i<n_ISForEdges;i++) { 1993 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 1994 } 1995 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 1996 n_ISForEdges = 0; 1997 } 1998 if (!pcbddc->use_faces) { 1999 for (i=0;i<n_ISForFaces;i++) { 2000 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 2001 } 2002 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 2003 n_ISForFaces = 0; 2004 } 2005 2006 #if defined(PETSC_USE_DEBUG) 2007 /* HACK: when solving singular problems not using vertices, a change of basis is mandatory. 2008 Also use_change_of_basis should be consistent among processors */ 2009 if (pcbddc->NullSpace) { 2010 PetscBool tbool[2],gbool[2]; 2011 2012 if (!ISForVertices && !pcbddc->user_ChangeOfBasisMatrix) { 2013 pcbddc->use_change_of_basis = PETSC_TRUE; 2014 if (!ISForEdges) { 2015 pcbddc->use_change_on_faces = PETSC_TRUE; 2016 } 2017 } 2018 tbool[0] = pcbddc->use_change_of_basis; 2019 tbool[1] = pcbddc->use_change_on_faces; 2020 ierr = MPI_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2021 pcbddc->use_change_of_basis = gbool[0]; 2022 pcbddc->use_change_on_faces = gbool[1]; 2023 } 2024 #endif 2025 2026 /* check if near null space is attached to global mat */ 2027 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 2028 if (nearnullsp) { 2029 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 2030 /* remove any stored info */ 2031 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 2032 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 2033 /* store information for BDDC solver reuse */ 2034 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 2035 pcbddc->onearnullspace = nearnullsp; 2036 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 2037 for (i=0;i<nnsp_size;i++) { 2038 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 2039 } 2040 } else { /* if near null space is not provided BDDC uses constants by default */ 2041 nnsp_size = 0; 2042 nnsp_has_cnst = PETSC_TRUE; 2043 } 2044 /* get max number of constraints on a single cc */ 2045 max_constraints = nnsp_size; 2046 if (nnsp_has_cnst) max_constraints++; 2047 2048 /* 2049 Evaluate maximum storage size needed by the procedure 2050 - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]" 2051 - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]" 2052 There can be multiple constraints per connected component 2053 */ 2054 n_vertices = 0; 2055 if (ISForVertices) { 2056 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 2057 } 2058 ncc = n_vertices+n_ISForFaces+n_ISForEdges; 2059 ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr); 2060 2061 total_counts = n_ISForFaces+n_ISForEdges; 2062 total_counts *= max_constraints; 2063 total_counts += n_vertices; 2064 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 2065 2066 total_counts = 0; 2067 max_size_of_constraint = 0; 2068 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 2069 IS used_is; 2070 if (i<n_ISForEdges) { 2071 used_is = ISForEdges[i]; 2072 } else { 2073 used_is = ISForFaces[i-n_ISForEdges]; 2074 } 2075 ierr = ISGetSize(used_is,&j);CHKERRQ(ierr); 2076 total_counts += j; 2077 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 2078 } 2079 ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr); 2080 2081 /* get local part of global near null space vectors */ 2082 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 2083 for (k=0;k<nnsp_size;k++) { 2084 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 2085 ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2086 ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2087 } 2088 2089 /* whether or not to skip lapack calls */ 2090 skip_lapack = PETSC_TRUE; 2091 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 2092 2093 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 2094 if (!skip_lapack) { 2095 PetscScalar temp_work; 2096 2097 #if defined(PETSC_MISSING_LAPACK_GESVD) 2098 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 2099 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 2100 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 2101 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 2102 #if defined(PETSC_USE_COMPLEX) 2103 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 2104 #endif 2105 /* now we evaluate the optimal workspace using query with lwork=-1 */ 2106 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 2107 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 2108 lwork = -1; 2109 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2110 #if !defined(PETSC_USE_COMPLEX) 2111 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 2112 #else 2113 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 2114 #endif 2115 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2116 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 2117 #else /* on missing GESVD */ 2118 /* SVD */ 2119 PetscInt max_n,min_n; 2120 max_n = max_size_of_constraint; 2121 min_n = max_constraints; 2122 if (max_size_of_constraint < max_constraints) { 2123 min_n = max_size_of_constraint; 2124 max_n = max_constraints; 2125 } 2126 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 2127 #if defined(PETSC_USE_COMPLEX) 2128 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 2129 #endif 2130 /* now we evaluate the optimal workspace using query with lwork=-1 */ 2131 lwork = -1; 2132 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 2133 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 2134 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 2135 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2136 #if !defined(PETSC_USE_COMPLEX) 2137 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr)); 2138 #else 2139 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr)); 2140 #endif 2141 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2142 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 2143 #endif /* on missing GESVD */ 2144 /* Allocate optimal workspace */ 2145 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 2146 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 2147 } 2148 /* Now we can loop on constraining sets */ 2149 total_counts = 0; 2150 constraints_idxs_ptr[0] = 0; 2151 constraints_data_ptr[0] = 0; 2152 /* vertices */ 2153 if (n_vertices) { 2154 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2155 if (nnsp_has_cnst) { /* it considers all possible vertices */ 2156 ierr = PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 2157 for (i=0;i<n_vertices;i++) { 2158 constraints_n[total_counts] = 1; 2159 constraints_data[total_counts] = 1.0; 2160 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 2161 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 2162 total_counts++; 2163 } 2164 } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */ 2165 PetscBool used_vertex; 2166 for (i=0;i<n_vertices;i++) { 2167 used_vertex = PETSC_FALSE; 2168 k = 0; 2169 while (!used_vertex && k<nnsp_size) { 2170 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2171 if (PetscAbsScalar(array[is_indices[i]])>0.0) { 2172 constraints_n[total_counts] = 1; 2173 constraints_idxs[total_counts] = is_indices[i]; 2174 constraints_data[total_counts] = 1.0; 2175 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 2176 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 2177 total_counts++; 2178 used_vertex = PETSC_TRUE; 2179 } 2180 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2181 k++; 2182 } 2183 } 2184 } 2185 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2186 n_vertices = total_counts; 2187 } 2188 2189 /* edges and faces */ 2190 total_counts_cc = total_counts; 2191 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 2192 IS used_is; 2193 PetscBool idxs_copied = PETSC_FALSE; 2194 2195 if (ncc<n_ISForEdges) { 2196 used_is = ISForEdges[ncc]; 2197 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 2198 } else { 2199 used_is = ISForFaces[ncc-n_ISForEdges]; 2200 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 2201 } 2202 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 2203 2204 ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr); 2205 ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2206 /* change of basis should not be performed on local periodic nodes */ 2207 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 2208 if (nnsp_has_cnst) { 2209 PetscScalar quad_value; 2210 2211 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 2212 idxs_copied = PETSC_TRUE; 2213 2214 if (!pcbddc->use_nnsp_true) { 2215 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 2216 } else { 2217 quad_value = 1.0; 2218 } 2219 for (j=0;j<size_of_constraint;j++) { 2220 constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value; 2221 } 2222 temp_constraints++; 2223 total_counts++; 2224 } 2225 for (k=0;k<nnsp_size;k++) { 2226 PetscReal real_value; 2227 PetscScalar *ptr_to_data; 2228 2229 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2230 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint]; 2231 for (j=0;j<size_of_constraint;j++) { 2232 ptr_to_data[j] = array[is_indices[j]]; 2233 } 2234 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2235 /* check if array is null on the connected component */ 2236 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2237 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one)); 2238 if (real_value > 0.0) { /* keep indices and values */ 2239 temp_constraints++; 2240 total_counts++; 2241 if (!idxs_copied) { 2242 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 2243 idxs_copied = PETSC_TRUE; 2244 } 2245 } 2246 } 2247 ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2248 valid_constraints = temp_constraints; 2249 if (!pcbddc->use_nnsp_true && temp_constraints) { 2250 if (temp_constraints == 1) { /* just normalize the constraint */ 2251 PetscScalar norm,*ptr_to_data; 2252 2253 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 2254 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2255 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one)); 2256 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 2257 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one)); 2258 } else { /* perform SVD */ 2259 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 2260 PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 2261 2262 #if defined(PETSC_MISSING_LAPACK_GESVD) 2263 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 2264 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 2265 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 2266 the constraints basis will differ (by a complex factor with absolute value equal to 1) 2267 from that computed using LAPACKgesvd 2268 -> This is due to a different computation of eigenvectors in LAPACKheev 2269 -> The quality of the POD-computed basis will be the same */ 2270 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 2271 /* Store upper triangular part of correlation matrix */ 2272 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2273 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2274 for (j=0;j<temp_constraints;j++) { 2275 for (k=0;k<j+1;k++) { 2276 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one)); 2277 } 2278 } 2279 /* compute eigenvalues and eigenvectors of correlation matrix */ 2280 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2281 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 2282 #if !defined(PETSC_USE_COMPLEX) 2283 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 2284 #else 2285 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 2286 #endif 2287 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2288 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 2289 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 2290 j = 0; 2291 while (j < temp_constraints && singular_vals[j] < tol) j++; 2292 total_counts = total_counts-j; 2293 valid_constraints = temp_constraints-j; 2294 /* scale and copy POD basis into used quadrature memory */ 2295 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2296 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2297 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 2298 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2299 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 2300 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 2301 if (j<temp_constraints) { 2302 PetscInt ii; 2303 for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]); 2304 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2305 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 2306 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2307 for (k=0;k<temp_constraints-j;k++) { 2308 for (ii=0;ii<size_of_constraint;ii++) { 2309 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 2310 } 2311 } 2312 } 2313 #else /* on missing GESVD */ 2314 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2315 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2316 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2317 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2318 #if !defined(PETSC_USE_COMPLEX) 2319 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr)); 2320 #else 2321 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr)); 2322 #endif 2323 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 2324 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2325 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 2326 k = temp_constraints; 2327 if (k > size_of_constraint) k = size_of_constraint; 2328 j = 0; 2329 while (j < k && singular_vals[k-j-1] < tol) j++; 2330 valid_constraints = k-j; 2331 total_counts = total_counts-temp_constraints+valid_constraints; 2332 #endif /* on missing GESVD */ 2333 } 2334 } 2335 /* update pointers information */ 2336 if (valid_constraints) { 2337 constraints_n[total_counts_cc] = valid_constraints; 2338 constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint; 2339 constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints; 2340 /* set change_of_basis flag */ 2341 if (boolforchange) { 2342 PetscBTSet(change_basis,total_counts_cc); 2343 } 2344 total_counts_cc++; 2345 } 2346 } 2347 /* free workspace */ 2348 if (!skip_lapack) { 2349 ierr = PetscFree(work);CHKERRQ(ierr); 2350 #if defined(PETSC_USE_COMPLEX) 2351 ierr = PetscFree(rwork);CHKERRQ(ierr); 2352 #endif 2353 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 2354 #if defined(PETSC_MISSING_LAPACK_GESVD) 2355 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 2356 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 2357 #endif 2358 } 2359 for (k=0;k<nnsp_size;k++) { 2360 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 2361 } 2362 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 2363 /* free index sets of faces, edges and vertices */ 2364 for (i=0;i<n_ISForFaces;i++) { 2365 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 2366 } 2367 if (n_ISForFaces) { 2368 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 2369 } 2370 for (i=0;i<n_ISForEdges;i++) { 2371 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 2372 } 2373 if (n_ISForEdges) { 2374 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 2375 } 2376 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 2377 } else { 2378 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2379 2380 total_counts = 0; 2381 n_vertices = 0; 2382 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 2383 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 2384 } 2385 max_constraints = 0; 2386 total_counts_cc = 0; 2387 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 2388 total_counts += pcbddc->adaptive_constraints_n[i]; 2389 if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++; 2390 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 2391 } 2392 constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr; 2393 constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr; 2394 constraints_idxs = pcbddc->adaptive_constraints_idxs; 2395 constraints_data = pcbddc->adaptive_constraints_data; 2396 /* constraints_n differs from pcbddc->adaptive_constraints_n */ 2397 ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr); 2398 total_counts_cc = 0; 2399 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 2400 if (pcbddc->adaptive_constraints_n[i]) { 2401 constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i]; 2402 } 2403 } 2404 #if 0 2405 printf("Found %d totals (%d)\n",total_counts_cc,total_counts); 2406 for (i=0;i<total_counts_cc;i++) { 2407 printf("const %d, start %d",i,constraints_idxs_ptr[i]); 2408 printf(" end %d:\n",constraints_idxs_ptr[i+1]); 2409 for (j=constraints_idxs_ptr[i];j<constraints_idxs_ptr[i+1];j++) { 2410 printf(" %d",constraints_idxs[j]); 2411 } 2412 printf("\n"); 2413 printf("number of cc: %d\n",constraints_n[i]); 2414 } 2415 for (i=0;i<n_vertices;i++) { 2416 PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]); 2417 } 2418 for (i=0;i<sub_schurs->n_subs;i++) { 2419 PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]); 2420 } 2421 #endif 2422 2423 max_size_of_constraint = 0; 2424 for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]); 2425 ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr); 2426 /* Change of basis */ 2427 ierr = PetscBTCreate(total_counts_cc,&change_basis);CHKERRQ(ierr); 2428 if (pcbddc->use_change_of_basis) { 2429 for (i=0;i<sub_schurs->n_subs;i++) { 2430 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 2431 ierr = PetscBTSet(change_basis,i+n_vertices);CHKERRQ(ierr); 2432 } 2433 } 2434 } 2435 } 2436 pcbddc->local_primal_size = total_counts; 2437 ierr = PetscMalloc1(pcbddc->local_primal_size,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2438 2439 /* map constraints_idxs in boundary numbering */ 2440 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr); 2441 if (i != constraints_idxs_ptr[total_counts_cc]) { 2442 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",constraints_idxs_ptr[total_counts_cc],i); 2443 } 2444 2445 /* Create constraint matrix */ 2446 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 2447 ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr); 2448 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 2449 2450 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 2451 /* determine if a QR strategy is needed for change of basis */ 2452 qr_needed = PETSC_FALSE; 2453 ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr); 2454 total_primal_vertices=0; 2455 pcbddc->local_primal_size_cc = 0; 2456 for (i=0;i<total_counts_cc;i++) { 2457 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2458 if (size_of_constraint == 1) { 2459 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]]; 2460 pcbddc->local_primal_size_cc += 1; 2461 } else if (PetscBTLookup(change_basis,i)) { 2462 for (k=0;k<constraints_n[i];k++) { 2463 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 2464 } 2465 pcbddc->local_primal_size_cc += constraints_n[i]; 2466 if (constraints_n[i] > 1 || pcbddc->use_qr_single || pcbddc->faster_deluxe) { 2467 PetscBTSet(qr_needed_idx,i); 2468 qr_needed = PETSC_TRUE; 2469 } 2470 } else { 2471 pcbddc->local_primal_size_cc += 1; 2472 } 2473 } 2474 /* note that the local variable n_vertices used below stores the number of pointwise constraints */ 2475 pcbddc->n_vertices = total_primal_vertices; 2476 /* permute indices in order to have a sorted set of vertices */ 2477 ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2478 2479 ierr = PetscMalloc2(pcbddc->local_primal_size_cc,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc,&pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 2480 ierr = PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));CHKERRQ(ierr); 2481 for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1; 2482 2483 /* nonzero structure of constraint matrix */ 2484 /* and get reference dof for local constraints */ 2485 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 2486 for (i=0;i<total_primal_vertices;i++) nnz[i] = 1; 2487 2488 j = total_primal_vertices; 2489 total_counts = total_primal_vertices; 2490 cum = total_primal_vertices; 2491 for (i=n_vertices;i<total_counts_cc;i++) { 2492 if (!PetscBTLookup(change_basis,i)) { 2493 pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]]; 2494 pcbddc->local_primal_ref_mult[cum] = constraints_n[i]; 2495 cum++; 2496 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2497 for (k=0;k<constraints_n[i];k++) { 2498 pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 2499 nnz[j+k] = size_of_constraint; 2500 } 2501 j += constraints_n[i]; 2502 } 2503 } 2504 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 2505 ierr = PetscFree(nnz);CHKERRQ(ierr); 2506 2507 /* set values in constraint matrix */ 2508 for (i=0;i<total_primal_vertices;i++) { 2509 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 2510 } 2511 total_counts = total_primal_vertices; 2512 for (i=n_vertices;i<total_counts_cc;i++) { 2513 if (!PetscBTLookup(change_basis,i)) { 2514 PetscInt *cols; 2515 2516 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2517 cols = constraints_idxs+constraints_idxs_ptr[i]; 2518 for (k=0;k<constraints_n[i];k++) { 2519 PetscInt row = total_counts+k; 2520 PetscScalar *vals; 2521 2522 vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint; 2523 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2524 } 2525 total_counts += constraints_n[i]; 2526 } 2527 } 2528 /* assembling */ 2529 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2530 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2531 2532 /* 2533 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 2534 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 2535 */ 2536 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 2537 if (pcbddc->use_change_of_basis) { 2538 /* dual and primal dofs on a single cc */ 2539 PetscInt dual_dofs,primal_dofs; 2540 /* working stuff for GEQRF */ 2541 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 2542 PetscBLASInt lqr_work; 2543 /* working stuff for UNGQR */ 2544 PetscScalar *gqr_work,lgqr_work_t; 2545 PetscBLASInt lgqr_work; 2546 /* working stuff for TRTRS */ 2547 PetscScalar *trs_rhs; 2548 PetscBLASInt Blas_NRHS; 2549 /* pointers for values insertion into change of basis matrix */ 2550 PetscInt *start_rows,*start_cols; 2551 PetscScalar *start_vals; 2552 /* working stuff for values insertion */ 2553 PetscBT is_primal; 2554 PetscInt *aux_primal_numbering_B; 2555 /* matrix sizes */ 2556 PetscInt global_size,local_size; 2557 /* temporary change of basis */ 2558 Mat localChangeOfBasisMatrix; 2559 /* extra space for debugging */ 2560 PetscScalar *dbg_work; 2561 2562 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 2563 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 2564 ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 2565 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 2566 /* nonzeros for local mat */ 2567 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 2568 for (i=0;i<pcis->n;i++) nnz[i]=1; 2569 for (i=n_vertices;i<total_counts_cc;i++) { 2570 if (PetscBTLookup(change_basis,i)) { 2571 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2572 if (PetscBTLookup(qr_needed_idx,i)) { 2573 for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint; 2574 } else { 2575 nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint; 2576 for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2; 2577 } 2578 } 2579 } 2580 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 2581 ierr = PetscFree(nnz);CHKERRQ(ierr); 2582 /* Set initial identity in the matrix */ 2583 for (i=0;i<pcis->n;i++) { 2584 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 2585 } 2586 2587 if (pcbddc->dbg_flag) { 2588 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 2589 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 2590 } 2591 2592 2593 /* Now we loop on the constraints which need a change of basis */ 2594 /* 2595 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 2596 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 2597 2598 Basic blocks of change of basis matrix T computed by 2599 2600 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 2601 2602 | 1 0 ... 0 s_1/S | 2603 | 0 1 ... 0 s_2/S | 2604 | ... | 2605 | 0 ... 1 s_{n-1}/S | 2606 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 2607 2608 with S = \sum_{i=1}^n s_i^2 2609 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 2610 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 2611 2612 - QR decomposition of constraints otherwise 2613 */ 2614 if (qr_needed) { 2615 /* space to store Q */ 2616 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 2617 /* first we issue queries for optimal work */ 2618 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 2619 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 2620 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2621 lqr_work = -1; 2622 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 2623 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 2624 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 2625 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 2626 lgqr_work = -1; 2627 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 2628 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 2629 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 2630 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2631 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 2632 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 2633 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 2634 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 2635 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 2636 /* array to store scaling factors for reflectors */ 2637 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 2638 /* array to store rhs and solution of triangular solver */ 2639 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 2640 /* allocating workspace for check */ 2641 if (pcbddc->dbg_flag) { 2642 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr); 2643 } 2644 } 2645 /* array to store whether a node is primal or not */ 2646 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 2647 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 2648 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);CHKERRQ(ierr); 2649 if (i != total_primal_vertices) { 2650 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i); 2651 } 2652 for (i=0;i<total_primal_vertices;i++) { 2653 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 2654 } 2655 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 2656 2657 /* loop on constraints and see whether or not they need a change of basis and compute it */ 2658 for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) { 2659 size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts]; 2660 if (PetscBTLookup(change_basis,total_counts)) { 2661 /* get constraint info */ 2662 primal_dofs = constraints_n[total_counts]; 2663 dual_dofs = size_of_constraint-primal_dofs; 2664 2665 if (pcbddc->dbg_flag) { 2666 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d: %d need a change of basis (size %d)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr); 2667 } 2668 2669 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 2670 2671 /* copy quadrature constraints for change of basis check */ 2672 if (pcbddc->dbg_flag) { 2673 ierr = PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2674 } 2675 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 2676 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2677 2678 /* compute QR decomposition of constraints */ 2679 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2680 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2681 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2682 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2683 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 2684 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 2685 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2686 2687 /* explictly compute R^-T */ 2688 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 2689 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 2690 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2691 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 2692 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2693 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 2694 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2695 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 2696 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 2697 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2698 2699 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 2700 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2701 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2702 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 2703 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2704 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2705 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 2706 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 2707 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2708 2709 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 2710 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 2711 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 2712 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2713 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2714 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 2715 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2716 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 2717 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 2718 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2719 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC)); 2720 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2721 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2722 2723 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 2724 start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]]; 2725 /* insert cols for primal dofs */ 2726 for (j=0;j<primal_dofs;j++) { 2727 start_vals = &qr_basis[j*size_of_constraint]; 2728 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 2729 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 2730 } 2731 /* insert cols for dual dofs */ 2732 for (j=0,k=0;j<dual_dofs;k++) { 2733 if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) { 2734 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 2735 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 2736 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 2737 j++; 2738 } 2739 } 2740 2741 /* check change of basis */ 2742 if (pcbddc->dbg_flag) { 2743 PetscInt ii,jj; 2744 PetscBool valid_qr=PETSC_TRUE; 2745 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 2746 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2747 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 2748 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2749 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 2750 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 2751 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2752 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC)); 2753 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2754 for (jj=0;jj<size_of_constraint;jj++) { 2755 for (ii=0;ii<primal_dofs;ii++) { 2756 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 2757 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 2758 } 2759 } 2760 if (!valid_qr) { 2761 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 2762 for (jj=0;jj<size_of_constraint;jj++) { 2763 for (ii=0;ii<primal_dofs;ii++) { 2764 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 2765 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2766 } 2767 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 2768 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2769 } 2770 } 2771 } 2772 } else { 2773 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 2774 } 2775 } 2776 } else { /* simple transformation block */ 2777 PetscInt row,col; 2778 PetscScalar val,norm; 2779 2780 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2781 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one)); 2782 for (j=0;j<size_of_constraint;j++) { 2783 PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j]; 2784 row = constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 2785 if (!PetscBTLookup(is_primal,row_B)) { 2786 col = constraints_idxs[constraints_idxs_ptr[total_counts]]; 2787 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 2788 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 2789 } else { 2790 for (k=0;k<size_of_constraint;k++) { 2791 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 2792 if (row != col) { 2793 val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]]; 2794 } else { 2795 val = constraints_data[constraints_data_ptr[total_counts]]/norm; 2796 } 2797 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 2798 } 2799 } 2800 } 2801 if (pcbddc->dbg_flag) { 2802 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 2803 } 2804 } 2805 } else { 2806 if (pcbddc->dbg_flag) { 2807 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,size_of_constraint);CHKERRQ(ierr); 2808 } 2809 } 2810 } 2811 2812 /* free workspace */ 2813 if (qr_needed) { 2814 if (pcbddc->dbg_flag) { 2815 ierr = PetscFree(dbg_work);CHKERRQ(ierr); 2816 } 2817 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 2818 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 2819 ierr = PetscFree(qr_work);CHKERRQ(ierr); 2820 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 2821 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 2822 } 2823 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 2824 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2825 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2826 2827 /* assembling of global change of variable */ 2828 { 2829 Mat tmat; 2830 PetscInt bs; 2831 2832 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 2833 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 2834 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 2835 ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr); 2836 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2837 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 2838 ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr); 2839 ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr); 2840 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 2841 ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr); 2842 ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2843 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 2844 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 2845 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 2846 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2847 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2848 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 2849 ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr); 2850 } 2851 /* check */ 2852 if (pcbddc->dbg_flag) { 2853 PetscReal error; 2854 Vec x,x_change; 2855 2856 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 2857 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 2858 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 2859 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 2860 ierr = VecScatterBegin(matis->ctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2861 ierr = VecScatterEnd(matis->ctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2862 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 2863 ierr = VecScatterBegin(matis->ctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2864 ierr = VecScatterEnd(matis->ctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2865 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 2866 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 2867 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 2868 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2869 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr); 2870 ierr = VecDestroy(&x);CHKERRQ(ierr); 2871 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 2872 } 2873 2874 /* adapt sub_schurs computed (if any) */ 2875 if (pcbddc->use_deluxe_scaling) { 2876 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 2877 if (sub_schurs->S_Ej_all) { 2878 Mat S_new,tmat; 2879 IS is_all_N; 2880 2881 ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr); 2882 ierr = MatGetSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr); 2883 ierr = ISDestroy(&is_all_N);CHKERRQ(ierr); 2884 ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 2885 ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr); 2886 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 2887 sub_schurs->S_Ej_all = S_new; 2888 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 2889 if (sub_schurs->sum_S_Ej_all) { 2890 ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 2891 ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 2892 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 2893 sub_schurs->sum_S_Ej_all = S_new; 2894 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 2895 } 2896 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 2897 } 2898 } 2899 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 2900 } else if (pcbddc->user_ChangeOfBasisMatrix) { 2901 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 2902 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 2903 } 2904 2905 /* set up change of basis context */ 2906 if (pcbddc->ChangeOfBasisMatrix) { 2907 PCBDDCChange_ctx change_ctx; 2908 2909 if (!pcbddc->new_global_mat) { 2910 PetscInt global_size,local_size; 2911 2912 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 2913 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 2914 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->new_global_mat);CHKERRQ(ierr); 2915 ierr = MatSetSizes(pcbddc->new_global_mat,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 2916 ierr = MatSetType(pcbddc->new_global_mat,MATSHELL);CHKERRQ(ierr); 2917 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT,(void (*)(void))PCBDDCMatMult_Private);CHKERRQ(ierr); 2918 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCMatMultTranspose_Private);CHKERRQ(ierr); 2919 ierr = PetscNew(&change_ctx);CHKERRQ(ierr); 2920 ierr = MatShellSetContext(pcbddc->new_global_mat,change_ctx);CHKERRQ(ierr); 2921 } else { 2922 ierr = MatShellGetContext(pcbddc->new_global_mat,&change_ctx);CHKERRQ(ierr); 2923 ierr = MatDestroy(&change_ctx->global_change);CHKERRQ(ierr); 2924 ierr = VecDestroyVecs(2,&change_ctx->work);CHKERRQ(ierr); 2925 } 2926 if (!pcbddc->user_ChangeOfBasisMatrix) { 2927 ierr = PetscObjectReference((PetscObject)pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2928 change_ctx->global_change = pcbddc->ChangeOfBasisMatrix; 2929 } else { 2930 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 2931 change_ctx->global_change = pcbddc->user_ChangeOfBasisMatrix; 2932 } 2933 ierr = VecDuplicateVecs(pcis->vec1_global,2,&change_ctx->work);CHKERRQ(ierr); 2934 ierr = MatSetUp(pcbddc->new_global_mat);CHKERRQ(ierr); 2935 ierr = MatAssemblyBegin(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2936 ierr = MatAssemblyEnd(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2937 } 2938 2939 /* check if a new primal space has been introduced */ 2940 pcbddc->new_primal_space_local = PETSC_TRUE; 2941 if (olocal_primal_size == pcbddc->local_primal_size) { 2942 ierr = PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc,&pcbddc->new_primal_space_local);CHKERRQ(ierr); 2943 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 2944 if (!pcbddc->new_primal_space_local) { 2945 ierr = PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc,&pcbddc->new_primal_space_local);CHKERRQ(ierr); 2946 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 2947 } 2948 } 2949 ierr = PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);CHKERRQ(ierr); 2950 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 2951 ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2952 2953 /* flush dbg viewer */ 2954 if (pcbddc->dbg_flag) { 2955 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2956 } 2957 2958 /* free workspace */ 2959 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 2960 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 2961 if (!pcbddc->adaptive_selection) { 2962 ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr); 2963 ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr); 2964 } else { 2965 ierr = PetscFree5(pcbddc->adaptive_constraints_n, 2966 pcbddc->adaptive_constraints_idxs_ptr, 2967 pcbddc->adaptive_constraints_data_ptr, 2968 pcbddc->adaptive_constraints_idxs, 2969 pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 2970 ierr = PetscFree(constraints_n);CHKERRQ(ierr); 2971 ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr); 2972 } 2973 PetscFunctionReturn(0); 2974 } 2975 2976 #undef __FUNCT__ 2977 #define __FUNCT__ "PCBDDCAnalyzeInterface" 2978 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 2979 { 2980 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2981 PC_IS *pcis = (PC_IS*)pc->data; 2982 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2983 PetscInt ierr,i,vertex_size,N; 2984 PetscViewer viewer=pcbddc->dbg_viewer; 2985 2986 PetscFunctionBegin; 2987 /* Reset previously computed graph */ 2988 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 2989 /* Init local Graph struct */ 2990 ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr); 2991 ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping,N);CHKERRQ(ierr); 2992 2993 /* Check validity of the csr graph passed in by the user */ 2994 if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) { 2995 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 2996 } 2997 2998 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 2999 if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) { 3000 PetscInt *xadj,*adjncy; 3001 PetscInt nvtxs; 3002 PetscBool flg_row=PETSC_FALSE; 3003 3004 if (pcbddc->use_local_adj) { 3005 3006 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3007 if (flg_row) { 3008 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 3009 pcbddc->computed_rowadj = PETSC_TRUE; 3010 } 3011 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3012 } else if (pcbddc->current_level && pcis->n_B) { /* just compute subdomain's connected components for coarser levels when the local boundary is not empty */ 3013 IS is_dummy; 3014 ISLocalToGlobalMapping l2gmap_dummy; 3015 PetscInt j,sum; 3016 PetscInt *cxadj,*cadjncy; 3017 const PetscInt *idxs; 3018 PCBDDCGraph graph; 3019 PetscBT is_on_boundary; 3020 3021 ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n,0,1,&is_dummy);CHKERRQ(ierr); 3022 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 3023 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 3024 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 3025 ierr = PCBDDCGraphInit(graph,l2gmap_dummy,pcis->n);CHKERRQ(ierr); 3026 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 3027 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3028 if (flg_row) { 3029 graph->xadj = xadj; 3030 graph->adjncy = adjncy; 3031 } 3032 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 3033 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 3034 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3035 3036 if (pcbddc->dbg_flag) { 3037 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] Found %d subdomains (local size %d)\n",PetscGlobalRank,graph->ncc,pcis->n);CHKERRQ(ierr); 3038 for (i=0;i<graph->ncc;i++) { 3039 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] %d cc size %d\n",PetscGlobalRank,i,graph->cptr[i+1]-graph->cptr[i]);CHKERRQ(ierr); 3040 } 3041 } 3042 3043 ierr = PetscBTCreate(pcis->n,&is_on_boundary);CHKERRQ(ierr); 3044 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 3045 for (i=0;i<pcis->n_B;i++) { 3046 ierr = PetscBTSet(is_on_boundary,idxs[i]);CHKERRQ(ierr); 3047 } 3048 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 3049 3050 ierr = PetscCalloc1(pcis->n+1,&cxadj);CHKERRQ(ierr); 3051 sum = 0; 3052 for (i=0;i<graph->ncc;i++) { 3053 PetscInt sizecc = 0; 3054 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 3055 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 3056 sizecc++; 3057 } 3058 } 3059 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 3060 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 3061 cxadj[graph->queue[j]] = sizecc; 3062 } 3063 } 3064 sum += sizecc*sizecc; 3065 } 3066 ierr = PetscMalloc1(sum,&cadjncy);CHKERRQ(ierr); 3067 sum = 0; 3068 for (i=0;i<pcis->n;i++) { 3069 PetscInt temp = cxadj[i]; 3070 cxadj[i] = sum; 3071 sum += temp; 3072 } 3073 cxadj[pcis->n] = sum; 3074 for (i=0;i<graph->ncc;i++) { 3075 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 3076 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 3077 PetscInt k,sizecc = 0; 3078 for (k=graph->cptr[i];k<graph->cptr[i+1];k++) { 3079 if (PetscBTLookup(is_on_boundary,graph->queue[k])) { 3080 cadjncy[cxadj[graph->queue[j]]+sizecc]=graph->queue[k]; 3081 sizecc++; 3082 } 3083 } 3084 } 3085 } 3086 } 3087 if (sum) { 3088 ierr = PCBDDCSetLocalAdjacencyGraph(pc,pcis->n,cxadj,cadjncy,PETSC_OWN_POINTER);CHKERRQ(ierr); 3089 } else { 3090 ierr = PetscFree(cxadj);CHKERRQ(ierr); 3091 ierr = PetscFree(cadjncy);CHKERRQ(ierr); 3092 } 3093 graph->xadj = 0; 3094 graph->adjncy = 0; 3095 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 3096 ierr = PetscBTDestroy(&is_on_boundary);CHKERRQ(ierr); 3097 } 3098 } 3099 if (pcbddc->dbg_flag) { 3100 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3101 } 3102 3103 /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */ 3104 vertex_size = 1; 3105 if (pcbddc->user_provided_isfordofs) { 3106 if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */ 3107 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 3108 for (i=0;i<pcbddc->n_ISForDofs;i++) { 3109 ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 3110 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 3111 } 3112 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 3113 pcbddc->n_ISForDofs = 0; 3114 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 3115 } 3116 /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */ 3117 ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr); 3118 } else { 3119 if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */ 3120 ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr); 3121 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 3122 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 3123 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 3124 } 3125 } 3126 } 3127 3128 /* Setup of Graph */ 3129 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */ 3130 ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 3131 } 3132 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */ 3133 ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 3134 } 3135 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices);CHKERRQ(ierr); 3136 3137 /* Graph's connected components analysis */ 3138 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 3139 3140 /* print some info to stdout */ 3141 if (pcbddc->dbg_flag) { 3142 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);CHKERRQ(ierr); 3143 } 3144 3145 /* mark topography has done */ 3146 pcbddc->recompute_topography = PETSC_FALSE; 3147 PetscFunctionReturn(0); 3148 } 3149 3150 #undef __FUNCT__ 3151 #define __FUNCT__ "PCBDDCSubsetNumbering" 3152 PetscErrorCode PCBDDCSubsetNumbering(MPI_Comm comm,ISLocalToGlobalMapping l2gmap, PetscInt n_local_dofs, PetscInt local_dofs[], PetscInt local_dofs_mult[], PetscInt* n_global_subset, PetscInt* global_numbering_subset[]) 3153 { 3154 Vec local_vec,global_vec; 3155 IS seqis,paris; 3156 VecScatter scatter_ctx; 3157 PetscScalar *array; 3158 PetscInt *temp_global_dofs; 3159 PetscScalar globalsum; 3160 PetscInt i,j,s; 3161 PetscInt nlocals,first_index,old_index,max_local,max_global; 3162 PetscMPIInt rank_prec_comm,size_prec_comm; 3163 PetscInt *dof_sizes,*dof_displs; 3164 PetscBool first_found; 3165 PetscErrorCode ierr; 3166 3167 PetscFunctionBegin; 3168 /* mpi buffers */ 3169 ierr = MPI_Comm_size(comm,&size_prec_comm);CHKERRQ(ierr); 3170 ierr = MPI_Comm_rank(comm,&rank_prec_comm);CHKERRQ(ierr); 3171 j = ( !rank_prec_comm ? size_prec_comm : 0); 3172 ierr = PetscMalloc2(j,&dof_sizes,j,&dof_displs);CHKERRQ(ierr); 3173 /* get maximum size of subset */ 3174 ierr = PetscMalloc1(n_local_dofs,&temp_global_dofs);CHKERRQ(ierr); 3175 ierr = ISLocalToGlobalMappingApply(l2gmap,n_local_dofs,local_dofs,temp_global_dofs);CHKERRQ(ierr); 3176 max_local = 0; 3177 for (i=0;i<n_local_dofs;i++) { 3178 if (max_local < temp_global_dofs[i] ) { 3179 max_local = temp_global_dofs[i]; 3180 } 3181 } 3182 ierr = MPI_Allreduce(&max_local,&max_global,1,MPIU_INT,MPI_MAX,comm);CHKERRQ(ierr); 3183 max_global++; 3184 max_local = 0; 3185 for (i=0;i<n_local_dofs;i++) { 3186 if (max_local < local_dofs[i] ) { 3187 max_local = local_dofs[i]; 3188 } 3189 } 3190 max_local++; 3191 /* allocate workspace */ 3192 ierr = VecCreate(PETSC_COMM_SELF,&local_vec);CHKERRQ(ierr); 3193 ierr = VecSetSizes(local_vec,PETSC_DECIDE,max_local);CHKERRQ(ierr); 3194 ierr = VecSetType(local_vec,VECSEQ);CHKERRQ(ierr); 3195 ierr = VecCreate(comm,&global_vec);CHKERRQ(ierr); 3196 ierr = VecSetSizes(global_vec,PETSC_DECIDE,max_global);CHKERRQ(ierr); 3197 ierr = VecSetType(global_vec,VECMPI);CHKERRQ(ierr); 3198 /* create scatter */ 3199 ierr = ISCreateGeneral(PETSC_COMM_SELF,n_local_dofs,local_dofs,PETSC_COPY_VALUES,&seqis);CHKERRQ(ierr); 3200 ierr = ISCreateGeneral(comm,n_local_dofs,temp_global_dofs,PETSC_COPY_VALUES,&paris);CHKERRQ(ierr); 3201 ierr = VecScatterCreate(local_vec,seqis,global_vec,paris,&scatter_ctx);CHKERRQ(ierr); 3202 ierr = ISDestroy(&seqis);CHKERRQ(ierr); 3203 ierr = ISDestroy(&paris);CHKERRQ(ierr); 3204 /* init array */ 3205 ierr = VecSet(global_vec,0.0);CHKERRQ(ierr); 3206 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 3207 ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr); 3208 if (local_dofs_mult) { 3209 for (i=0;i<n_local_dofs;i++) { 3210 array[local_dofs[i]]=(PetscScalar)local_dofs_mult[i]; 3211 } 3212 } else { 3213 for (i=0;i<n_local_dofs;i++) { 3214 array[local_dofs[i]]=1.0; 3215 } 3216 } 3217 ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr); 3218 /* scatter into global vec and get total number of global dofs */ 3219 ierr = VecScatterBegin(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3220 ierr = VecScatterEnd(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3221 ierr = VecSum(global_vec,&globalsum);CHKERRQ(ierr); 3222 *n_global_subset = (PetscInt)PetscRealPart(globalsum); 3223 /* Fill global_vec with cumulative function for global numbering */ 3224 ierr = VecGetArray(global_vec,&array);CHKERRQ(ierr); 3225 ierr = VecGetLocalSize(global_vec,&s);CHKERRQ(ierr); 3226 nlocals = 0; 3227 first_index = -1; 3228 first_found = PETSC_FALSE; 3229 for (i=0;i<s;i++) { 3230 if (!first_found && PetscRealPart(array[i]) > 0.1) { 3231 first_found = PETSC_TRUE; 3232 first_index = i; 3233 } 3234 nlocals += (PetscInt)PetscRealPart(array[i]); 3235 } 3236 ierr = MPI_Gather(&nlocals,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr); 3237 if (!rank_prec_comm) { 3238 dof_displs[0]=0; 3239 for (i=1;i<size_prec_comm;i++) { 3240 dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1]; 3241 } 3242 } 3243 ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&nlocals,1,MPIU_INT,0,comm);CHKERRQ(ierr); 3244 if (first_found) { 3245 array[first_index] += (PetscScalar)nlocals; 3246 old_index = first_index; 3247 for (i=first_index+1;i<s;i++) { 3248 if (PetscRealPart(array[i]) > 0.1) { 3249 array[i] += array[old_index]; 3250 old_index = i; 3251 } 3252 } 3253 } 3254 ierr = VecRestoreArray(global_vec,&array);CHKERRQ(ierr); 3255 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 3256 ierr = VecScatterBegin(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3257 ierr = VecScatterEnd(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3258 /* get global ordering of local dofs */ 3259 ierr = VecGetArrayRead(local_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 3260 if (local_dofs_mult) { 3261 for (i=0;i<n_local_dofs;i++) { 3262 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-local_dofs_mult[i]; 3263 } 3264 } else { 3265 for (i=0;i<n_local_dofs;i++) { 3266 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-1; 3267 } 3268 } 3269 ierr = VecRestoreArrayRead(local_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 3270 /* free workspace */ 3271 ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr); 3272 ierr = VecDestroy(&local_vec);CHKERRQ(ierr); 3273 ierr = VecDestroy(&global_vec);CHKERRQ(ierr); 3274 ierr = PetscFree2(dof_sizes,dof_displs);CHKERRQ(ierr); 3275 /* return pointer to global ordering of local dofs */ 3276 *global_numbering_subset = temp_global_dofs; 3277 PetscFunctionReturn(0); 3278 } 3279 3280 #undef __FUNCT__ 3281 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 3282 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 3283 { 3284 PetscInt i,j; 3285 PetscScalar *alphas; 3286 PetscErrorCode ierr; 3287 3288 PetscFunctionBegin; 3289 /* this implements stabilized Gram-Schmidt */ 3290 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 3291 for (i=0;i<n;i++) { 3292 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 3293 if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); } 3294 for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); } 3295 } 3296 ierr = PetscFree(alphas);CHKERRQ(ierr); 3297 PetscFunctionReturn(0); 3298 } 3299 3300 #undef __FUNCT__ 3301 #define __FUNCT__ "MatISGetSubassemblingPattern" 3302 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscBool contiguous, IS* is_sends) 3303 { 3304 Mat subdomain_adj; 3305 IS new_ranks,ranks_send_to; 3306 MatPartitioning partitioner; 3307 Mat_IS *matis; 3308 PetscInt n_neighs,*neighs,*n_shared,**shared; 3309 PetscInt prank; 3310 PetscMPIInt size,rank,color; 3311 PetscInt *xadj,*adjncy,*oldranks; 3312 PetscInt *adjncy_wgt,*v_wgt,*is_indices,*ranks_send_to_idx; 3313 PetscInt i,local_size,threshold=0; 3314 PetscErrorCode ierr; 3315 PetscBool use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE; 3316 PetscSubcomm subcomm; 3317 3318 PetscFunctionBegin; 3319 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr); 3320 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 3321 ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 3322 3323 /* Get info on mapping */ 3324 matis = (Mat_IS*)(mat->data); 3325 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&local_size);CHKERRQ(ierr); 3326 ierr = ISLocalToGlobalMappingGetInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 3327 3328 /* build local CSR graph of subdomains' connectivity */ 3329 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 3330 xadj[0] = 0; 3331 xadj[1] = PetscMax(n_neighs-1,0); 3332 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 3333 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 3334 3335 if (threshold) { 3336 PetscInt xadj_count = 0; 3337 for (i=1;i<n_neighs;i++) { 3338 if (n_shared[i] > threshold) { 3339 adjncy[xadj_count] = neighs[i]; 3340 adjncy_wgt[xadj_count] = n_shared[i]; 3341 xadj_count++; 3342 } 3343 } 3344 xadj[1] = xadj_count; 3345 } else { 3346 if (xadj[1]) { 3347 ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr); 3348 ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr); 3349 } 3350 } 3351 ierr = ISLocalToGlobalMappingRestoreInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 3352 if (use_square) { 3353 for (i=0;i<xadj[1];i++) { 3354 adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i]; 3355 } 3356 } 3357 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 3358 3359 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 3360 3361 /* 3362 Restrict work on active processes only. 3363 */ 3364 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr); 3365 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 3366 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 3367 ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr); 3368 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 3369 if (color) { 3370 ierr = PetscFree(xadj);CHKERRQ(ierr); 3371 ierr = PetscFree(adjncy);CHKERRQ(ierr); 3372 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 3373 } else { 3374 PetscInt coarsening_ratio; 3375 ierr = MPI_Comm_size(PetscSubcommChild(subcomm),&size);CHKERRQ(ierr); 3376 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 3377 prank = rank; 3378 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,PetscSubcommChild(subcomm));CHKERRQ(ierr); 3379 /* 3380 for (i=0;i<size;i++) { 3381 PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]); 3382 } 3383 */ 3384 for (i=0;i<xadj[1];i++) { 3385 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 3386 } 3387 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 3388 ierr = MatCreateMPIAdj(PetscSubcommChild(subcomm),1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 3389 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 3390 3391 /* Partition */ 3392 ierr = MatPartitioningCreate(PetscSubcommChild(subcomm),&partitioner);CHKERRQ(ierr); 3393 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 3394 if (use_vwgt) { 3395 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 3396 v_wgt[0] = local_size; 3397 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 3398 } 3399 n_subdomains = PetscMin((PetscInt)size,n_subdomains); 3400 coarsening_ratio = size/n_subdomains; 3401 ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr); 3402 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 3403 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 3404 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 3405 3406 ierr = ISGetIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3407 if (contiguous) { 3408 ranks_send_to_idx[0] = oldranks[is_indices[0]]; /* contiguos set of processes */ 3409 } else { 3410 ranks_send_to_idx[0] = coarsening_ratio*oldranks[is_indices[0]]; /* scattered set of processes */ 3411 } 3412 ierr = ISRestoreIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3413 /* clean up */ 3414 ierr = PetscFree(oldranks);CHKERRQ(ierr); 3415 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 3416 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 3417 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 3418 } 3419 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 3420 3421 /* assemble parallel IS for sends */ 3422 i = 1; 3423 if (color) i=0; 3424 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr); 3425 3426 /* get back IS */ 3427 *is_sends = ranks_send_to; 3428 PetscFunctionReturn(0); 3429 } 3430 3431 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 3432 3433 #undef __FUNCT__ 3434 #define __FUNCT__ "MatISSubassemble" 3435 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[]) 3436 { 3437 Mat local_mat; 3438 Mat_IS *matis; 3439 IS is_sends_internal; 3440 PetscInt rows,cols,new_local_rows; 3441 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals; 3442 PetscBool ismatis,isdense,newisdense,destroy_mat; 3443 ISLocalToGlobalMapping l2gmap; 3444 PetscInt* l2gmap_indices; 3445 const PetscInt* is_indices; 3446 MatType new_local_type; 3447 /* buffers */ 3448 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 3449 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 3450 PetscInt *recv_buffer_idxs_local; 3451 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 3452 /* MPI */ 3453 MPI_Comm comm,comm_n; 3454 PetscSubcomm subcomm; 3455 PetscMPIInt n_sends,n_recvs,commsize; 3456 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 3457 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 3458 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,source_dest; 3459 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals; 3460 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals; 3461 PetscErrorCode ierr; 3462 3463 PetscFunctionBegin; 3464 /* TODO: add missing checks */ 3465 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 3466 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 3467 PetscValidLogicalCollectiveEnum(mat,reuse,5); 3468 PetscValidLogicalCollectiveInt(mat,nis,7); 3469 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 3470 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 3471 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 3472 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 3473 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 3474 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 3475 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 3476 if (reuse == MAT_REUSE_MATRIX && *mat_n) { 3477 PetscInt mrows,mcols,mnrows,mncols; 3478 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 3479 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 3480 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 3481 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 3482 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 3483 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 3484 } 3485 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 3486 PetscValidLogicalCollectiveInt(mat,bs,0); 3487 /* prepare IS for sending if not provided */ 3488 if (!is_sends) { 3489 PetscBool pcontig = PETSC_TRUE; 3490 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 3491 ierr = MatISGetSubassemblingPattern(mat,n_subdomains,pcontig,&is_sends_internal);CHKERRQ(ierr); 3492 } else { 3493 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 3494 is_sends_internal = is_sends; 3495 } 3496 3497 /* get pointer of MATIS data */ 3498 matis = (Mat_IS*)mat->data; 3499 3500 /* get comm */ 3501 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3502 3503 /* compute number of sends */ 3504 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 3505 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 3506 3507 /* compute number of receives */ 3508 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 3509 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 3510 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 3511 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 3512 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 3513 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 3514 ierr = PetscFree(iflags);CHKERRQ(ierr); 3515 3516 /* restrict comm if requested */ 3517 subcomm = 0; 3518 destroy_mat = PETSC_FALSE; 3519 if (restrict_comm) { 3520 PetscMPIInt color,subcommsize; 3521 3522 color = 0; 3523 if (!n_recvs && n_sends) color = 1; /* processes sending and not receiving anything will not partecipate in new comm */ 3524 ierr = MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 3525 subcommsize = commsize - subcommsize; 3526 /* check if reuse has been requested */ 3527 if (reuse == MAT_REUSE_MATRIX) { 3528 if (*mat_n) { 3529 PetscMPIInt subcommsize2; 3530 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 3531 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 3532 comm_n = PetscObjectComm((PetscObject)*mat_n); 3533 } else { 3534 comm_n = PETSC_COMM_SELF; 3535 } 3536 } else { /* MAT_INITIAL_MATRIX */ 3537 PetscMPIInt rank; 3538 3539 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3540 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 3541 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 3542 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 3543 comm_n = PetscSubcommChild(subcomm); 3544 } 3545 /* flag to destroy *mat_n if not significative */ 3546 if (color) destroy_mat = PETSC_TRUE; 3547 } else { 3548 comm_n = comm; 3549 } 3550 3551 /* prepare send/receive buffers */ 3552 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 3553 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 3554 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 3555 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 3556 if (nis) { 3557 ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr); 3558 } 3559 3560 /* Get data from local matrices */ 3561 if (!isdense) { 3562 SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 3563 /* TODO: See below some guidelines on how to prepare the local buffers */ 3564 /* 3565 send_buffer_vals should contain the raw values of the local matrix 3566 send_buffer_idxs should contain: 3567 - MatType_PRIVATE type 3568 - PetscInt size_of_l2gmap 3569 - PetscInt global_row_indices[size_of_l2gmap] 3570 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 3571 */ 3572 } else { 3573 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 3574 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&i);CHKERRQ(ierr); 3575 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 3576 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 3577 send_buffer_idxs[1] = i; 3578 ierr = ISLocalToGlobalMappingGetIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 3579 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 3580 ierr = ISLocalToGlobalMappingRestoreIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 3581 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 3582 for (i=0;i<n_sends;i++) { 3583 ilengths_vals[is_indices[i]] = len*len; 3584 ilengths_idxs[is_indices[i]] = len+2; 3585 } 3586 } 3587 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 3588 /* additional is (if any) */ 3589 if (nis) { 3590 PetscMPIInt psum; 3591 PetscInt j; 3592 for (j=0,psum=0;j<nis;j++) { 3593 PetscInt plen; 3594 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 3595 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 3596 psum += len+1; /* indices + lenght */ 3597 } 3598 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 3599 for (j=0,psum=0;j<nis;j++) { 3600 PetscInt plen; 3601 const PetscInt *is_array_idxs; 3602 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 3603 send_buffer_idxs_is[psum] = plen; 3604 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 3605 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 3606 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 3607 psum += plen+1; /* indices + lenght */ 3608 } 3609 for (i=0;i<n_sends;i++) { 3610 ilengths_idxs_is[is_indices[i]] = psum; 3611 } 3612 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 3613 } 3614 3615 buf_size_idxs = 0; 3616 buf_size_vals = 0; 3617 buf_size_idxs_is = 0; 3618 for (i=0;i<n_recvs;i++) { 3619 buf_size_idxs += (PetscInt)olengths_idxs[i]; 3620 buf_size_vals += (PetscInt)olengths_vals[i]; 3621 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 3622 } 3623 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 3624 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 3625 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 3626 3627 /* get new tags for clean communications */ 3628 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 3629 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 3630 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 3631 3632 /* allocate for requests */ 3633 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 3634 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 3635 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 3636 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 3637 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 3638 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 3639 3640 /* communications */ 3641 ptr_idxs = recv_buffer_idxs; 3642 ptr_vals = recv_buffer_vals; 3643 ptr_idxs_is = recv_buffer_idxs_is; 3644 for (i=0;i<n_recvs;i++) { 3645 source_dest = onodes[i]; 3646 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 3647 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 3648 ptr_idxs += olengths_idxs[i]; 3649 ptr_vals += olengths_vals[i]; 3650 if (nis) { 3651 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 3652 ptr_idxs_is += olengths_idxs_is[i]; 3653 } 3654 } 3655 for (i=0;i<n_sends;i++) { 3656 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 3657 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 3658 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 3659 if (nis) { 3660 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 3661 } 3662 } 3663 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 3664 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 3665 3666 /* assemble new l2g map */ 3667 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3668 ptr_idxs = recv_buffer_idxs; 3669 new_local_rows = 0; 3670 for (i=0;i<n_recvs;i++) { 3671 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 3672 ptr_idxs += olengths_idxs[i]; 3673 } 3674 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 3675 ptr_idxs = recv_buffer_idxs; 3676 new_local_rows = 0; 3677 for (i=0;i<n_recvs;i++) { 3678 ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 3679 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 3680 ptr_idxs += olengths_idxs[i]; 3681 } 3682 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 3683 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 3684 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 3685 3686 /* infer new local matrix type from received local matrices type */ 3687 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 3688 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 3689 if (n_recvs) { 3690 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 3691 ptr_idxs = recv_buffer_idxs; 3692 for (i=0;i<n_recvs;i++) { 3693 if ((PetscInt)new_local_type_private != *ptr_idxs) { 3694 new_local_type_private = MATAIJ_PRIVATE; 3695 break; 3696 } 3697 ptr_idxs += olengths_idxs[i]; 3698 } 3699 switch (new_local_type_private) { 3700 case MATDENSE_PRIVATE: 3701 if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */ 3702 new_local_type = MATSEQAIJ; 3703 bs = 1; 3704 } else { /* if I receive only 1 dense matrix */ 3705 new_local_type = MATSEQDENSE; 3706 bs = 1; 3707 } 3708 break; 3709 case MATAIJ_PRIVATE: 3710 new_local_type = MATSEQAIJ; 3711 bs = 1; 3712 break; 3713 case MATBAIJ_PRIVATE: 3714 new_local_type = MATSEQBAIJ; 3715 break; 3716 case MATSBAIJ_PRIVATE: 3717 new_local_type = MATSEQSBAIJ; 3718 break; 3719 default: 3720 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__); 3721 break; 3722 } 3723 } else { /* by default, new_local_type is seqdense */ 3724 new_local_type = MATSEQDENSE; 3725 bs = 1; 3726 } 3727 3728 /* create MATIS object if needed */ 3729 if (reuse == MAT_INITIAL_MATRIX) { 3730 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 3731 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,mat_n);CHKERRQ(ierr); 3732 } else { 3733 /* it also destroys the local matrices */ 3734 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 3735 } 3736 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 3737 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 3738 3739 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3740 3741 /* Global to local map of received indices */ 3742 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 3743 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 3744 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 3745 3746 /* restore attributes -> type of incoming data and its size */ 3747 buf_size_idxs = 0; 3748 for (i=0;i<n_recvs;i++) { 3749 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 3750 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 3751 buf_size_idxs += (PetscInt)olengths_idxs[i]; 3752 } 3753 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 3754 3755 /* set preallocation */ 3756 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 3757 if (!newisdense) { 3758 PetscInt *new_local_nnz=0; 3759 3760 ptr_vals = recv_buffer_vals; 3761 ptr_idxs = recv_buffer_idxs_local; 3762 if (n_recvs) { 3763 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 3764 } 3765 for (i=0;i<n_recvs;i++) { 3766 PetscInt j; 3767 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 3768 for (j=0;j<*(ptr_idxs+1);j++) { 3769 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 3770 } 3771 } else { 3772 /* TODO */ 3773 } 3774 ptr_idxs += olengths_idxs[i]; 3775 } 3776 if (new_local_nnz) { 3777 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 3778 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 3779 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 3780 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 3781 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 3782 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 3783 } else { 3784 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 3785 } 3786 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 3787 } else { 3788 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 3789 } 3790 3791 /* set values */ 3792 ptr_vals = recv_buffer_vals; 3793 ptr_idxs = recv_buffer_idxs_local; 3794 for (i=0;i<n_recvs;i++) { 3795 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 3796 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 3797 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 3798 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 3799 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 3800 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 3801 } else { 3802 /* TODO */ 3803 } 3804 ptr_idxs += olengths_idxs[i]; 3805 ptr_vals += olengths_vals[i]; 3806 } 3807 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3808 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3809 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3810 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3811 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 3812 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 3813 3814 #if 0 3815 if (!restrict_comm) { /* check */ 3816 Vec lvec,rvec; 3817 PetscReal infty_error; 3818 3819 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 3820 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 3821 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 3822 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 3823 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 3824 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 3825 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 3826 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 3827 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 3828 } 3829 #endif 3830 3831 /* assemble new additional is (if any) */ 3832 if (nis) { 3833 PetscInt **temp_idxs,*count_is,j,psum; 3834 3835 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3836 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 3837 ptr_idxs = recv_buffer_idxs_is; 3838 psum = 0; 3839 for (i=0;i<n_recvs;i++) { 3840 for (j=0;j<nis;j++) { 3841 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 3842 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 3843 psum += plen; 3844 ptr_idxs += plen+1; /* shift pointer to received data */ 3845 } 3846 } 3847 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 3848 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 3849 for (i=1;i<nis;i++) { 3850 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 3851 } 3852 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 3853 ptr_idxs = recv_buffer_idxs_is; 3854 for (i=0;i<n_recvs;i++) { 3855 for (j=0;j<nis;j++) { 3856 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 3857 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 3858 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 3859 ptr_idxs += plen+1; /* shift pointer to received data */ 3860 } 3861 } 3862 for (i=0;i<nis;i++) { 3863 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 3864 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 3865 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 3866 } 3867 ierr = PetscFree(count_is);CHKERRQ(ierr); 3868 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 3869 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 3870 } 3871 /* free workspace */ 3872 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 3873 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3874 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 3875 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3876 if (isdense) { 3877 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 3878 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 3879 } else { 3880 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 3881 } 3882 if (nis) { 3883 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3884 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 3885 } 3886 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 3887 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 3888 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 3889 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 3890 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 3891 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 3892 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 3893 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 3894 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 3895 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 3896 ierr = PetscFree(onodes);CHKERRQ(ierr); 3897 if (nis) { 3898 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 3899 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 3900 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 3901 } 3902 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 3903 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 3904 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 3905 for (i=0;i<nis;i++) { 3906 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 3907 } 3908 } 3909 PetscFunctionReturn(0); 3910 } 3911 3912 /* temporary hack into ksp private data structure */ 3913 #include <petsc/private/kspimpl.h> 3914 3915 #undef __FUNCT__ 3916 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 3917 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 3918 { 3919 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3920 PC_IS *pcis = (PC_IS*)pc->data; 3921 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 3922 MatNullSpace CoarseNullSpace=NULL; 3923 ISLocalToGlobalMapping coarse_islg; 3924 IS coarse_is,*isarray; 3925 PetscInt i,im_active=-1,active_procs=-1; 3926 PetscInt nis,nisdofs,nisneu; 3927 PC pc_temp; 3928 PCType coarse_pc_type; 3929 KSPType coarse_ksp_type; 3930 PetscBool multilevel_requested,multilevel_allowed; 3931 PetscBool isredundant,isbddc,isnn,coarse_reuse; 3932 Mat t_coarse_mat_is; 3933 PetscInt void_procs,ncoarse_ml,ncoarse_ds,ncoarse; 3934 PetscMPIInt all_procs; 3935 PetscBool csin_ml,csin_ds,csin,csin_type_simple,redist; 3936 PetscBool compute_vecs = PETSC_FALSE; 3937 PetscScalar *array; 3938 PetscErrorCode ierr; 3939 3940 PetscFunctionBegin; 3941 /* Assign global numbering to coarse dofs */ 3942 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 3943 PetscInt ocoarse_size; 3944 compute_vecs = PETSC_TRUE; 3945 ocoarse_size = pcbddc->coarse_size; 3946 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 3947 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 3948 /* see if we can avoid some work */ 3949 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 3950 if (ocoarse_size != pcbddc->coarse_size) { /* ...but with different size, so reset it and set reuse flag to false */ 3951 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 3952 coarse_reuse = PETSC_FALSE; 3953 } else { /* we can safely reuse already computed coarse matrix */ 3954 coarse_reuse = PETSC_TRUE; 3955 } 3956 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 3957 coarse_reuse = PETSC_FALSE; 3958 } 3959 /* reset any subassembling information */ 3960 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 3961 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 3962 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 3963 coarse_reuse = PETSC_TRUE; 3964 } 3965 3966 /* count "active" (i.e. with positive local size) and "void" processes */ 3967 im_active = !!(pcis->n); 3968 ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3969 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr); 3970 void_procs = all_procs-active_procs; 3971 csin_type_simple = PETSC_TRUE; 3972 redist = PETSC_FALSE; 3973 if (pcbddc->current_level && void_procs) { 3974 csin_ml = PETSC_TRUE; 3975 ncoarse_ml = void_procs; 3976 /* it has no sense to redistribute on a set of processors larger than the number of active processes */ 3977 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < active_procs) { 3978 csin_ds = PETSC_TRUE; 3979 ncoarse_ds = pcbddc->redistribute_coarse; 3980 redist = PETSC_TRUE; 3981 } else { 3982 csin_ds = PETSC_TRUE; 3983 ncoarse_ds = active_procs; 3984 redist = PETSC_TRUE; 3985 } 3986 } else { 3987 csin_ml = PETSC_FALSE; 3988 ncoarse_ml = all_procs; 3989 if (void_procs) { 3990 csin_ds = PETSC_TRUE; 3991 ncoarse_ds = void_procs; 3992 csin_type_simple = PETSC_FALSE; 3993 } else { 3994 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < all_procs) { 3995 csin_ds = PETSC_TRUE; 3996 ncoarse_ds = pcbddc->redistribute_coarse; 3997 redist = PETSC_TRUE; 3998 } else { 3999 csin_ds = PETSC_FALSE; 4000 ncoarse_ds = all_procs; 4001 } 4002 } 4003 } 4004 4005 /* 4006 test if we can go multilevel: three conditions must be satisfied: 4007 - we have not exceeded the number of levels requested 4008 - we can actually subassemble the active processes 4009 - we can find a suitable number of MPI processes where we can place the subassembled problem 4010 */ 4011 multilevel_allowed = PETSC_FALSE; 4012 multilevel_requested = PETSC_FALSE; 4013 if (pcbddc->current_level < pcbddc->max_levels) { 4014 multilevel_requested = PETSC_TRUE; 4015 if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) { 4016 multilevel_allowed = PETSC_FALSE; 4017 } else { 4018 multilevel_allowed = PETSC_TRUE; 4019 } 4020 } 4021 /* determine number of process partecipating to coarse solver */ 4022 if (multilevel_allowed) { 4023 ncoarse = ncoarse_ml; 4024 csin = csin_ml; 4025 redist = PETSC_FALSE; 4026 } else { 4027 ncoarse = ncoarse_ds; 4028 csin = csin_ds; 4029 } 4030 4031 /* creates temporary l2gmap and IS for coarse indexes */ 4032 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 4033 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 4034 4035 /* creates temporary MATIS object for coarse matrix */ 4036 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr); 4037 ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr); 4038 ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr); 4039 ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr); 4040 #if 0 4041 { 4042 PetscViewer viewer; 4043 char filename[256]; 4044 sprintf(filename,"local_coarse_mat%d.m",PetscGlobalRank); 4045 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 4046 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 4047 ierr = MatView(coarse_submat_dense,viewer);CHKERRQ(ierr); 4048 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 4049 } 4050 #endif 4051 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,&t_coarse_mat_is);CHKERRQ(ierr); 4052 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 4053 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4054 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4055 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 4056 4057 /* compute dofs splitting and neumann boundaries for coarse dofs */ 4058 if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal) ) { /* protects from unneded computations */ 4059 PetscInt *tidxs,*tidxs2,nout,tsize,i; 4060 const PetscInt *idxs; 4061 ISLocalToGlobalMapping tmap; 4062 4063 /* create map between primal indices (in local representative ordering) and local primal numbering */ 4064 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 4065 /* allocate space for temporary storage */ 4066 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 4067 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 4068 /* allocate for IS array */ 4069 nisdofs = pcbddc->n_ISForDofsLocal; 4070 nisneu = !!pcbddc->NeumannBoundariesLocal; 4071 nis = nisdofs + nisneu; 4072 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 4073 /* dofs splitting */ 4074 for (i=0;i<nisdofs;i++) { 4075 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 4076 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 4077 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 4078 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 4079 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 4080 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 4081 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->ISForDofsLocal[i]),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 4082 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 4083 } 4084 /* neumann boundaries */ 4085 if (pcbddc->NeumannBoundariesLocal) { 4086 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 4087 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 4088 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 4089 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 4090 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 4091 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 4092 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->NeumannBoundariesLocal),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 4093 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 4094 } 4095 /* free memory */ 4096 ierr = PetscFree(tidxs);CHKERRQ(ierr); 4097 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 4098 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 4099 } else { 4100 nis = 0; 4101 nisdofs = 0; 4102 nisneu = 0; 4103 isarray = NULL; 4104 } 4105 /* destroy no longer needed map */ 4106 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 4107 4108 /* restrict on coarse candidates (if needed) */ 4109 coarse_mat_is = NULL; 4110 if (csin) { 4111 if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */ 4112 if (redist) { 4113 PetscMPIInt rank; 4114 PetscInt spc,n_spc_p1,dest[1],destsize; 4115 4116 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 4117 spc = active_procs/ncoarse; 4118 n_spc_p1 = active_procs%ncoarse; 4119 if (im_active) { 4120 destsize = 1; 4121 if (rank > n_spc_p1*(spc+1)-1) { 4122 dest[0] = n_spc_p1+(rank-(n_spc_p1*(spc+1)))/spc; 4123 } else { 4124 dest[0] = rank/(spc+1); 4125 } 4126 } else { 4127 destsize = 0; 4128 } 4129 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),destsize,dest,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4130 } else if (csin_type_simple) { 4131 PetscMPIInt rank; 4132 PetscInt issize,isidx; 4133 4134 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 4135 if (im_active) { 4136 issize = 1; 4137 isidx = (PetscInt)rank; 4138 } else { 4139 issize = 0; 4140 isidx = -1; 4141 } 4142 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4143 } else { /* get a suitable subassembling pattern from MATIS code */ 4144 ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,PETSC_TRUE,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4145 } 4146 4147 /* we need to shift on coarse candidates either if we are not redistributing or we are redistributing and we have enough void processes */ 4148 if (!redist || ncoarse <= void_procs) { 4149 PetscInt ncoarse_cand,tissize,*nisindices; 4150 PetscInt *coarse_candidates; 4151 const PetscInt* tisindices; 4152 4153 /* get coarse candidates' ranks in pc communicator */ 4154 ierr = PetscMalloc1(all_procs,&coarse_candidates);CHKERRQ(ierr); 4155 ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4156 for (i=0,ncoarse_cand=0;i<all_procs;i++) { 4157 if (!coarse_candidates[i]) { 4158 coarse_candidates[ncoarse_cand++]=i; 4159 } 4160 } 4161 if (ncoarse_cand < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",ncoarse_cand,ncoarse); 4162 4163 4164 if (pcbddc->dbg_flag) { 4165 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4166 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr); 4167 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 4168 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr); 4169 for (i=0;i<ncoarse_cand;i++) { 4170 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr); 4171 } 4172 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr); 4173 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4174 } 4175 /* shift the pattern on coarse candidates */ 4176 ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr); 4177 ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 4178 ierr = PetscMalloc1(tissize,&nisindices);CHKERRQ(ierr); 4179 for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]]; 4180 ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 4181 ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr); 4182 ierr = PetscFree(coarse_candidates);CHKERRQ(ierr); 4183 } 4184 if (pcbddc->dbg_flag) { 4185 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4186 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr); 4187 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 4188 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4189 } 4190 } 4191 /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */ 4192 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 4193 } else { 4194 if (pcbddc->dbg_flag) { 4195 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4196 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr); 4197 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4198 } 4199 ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr); 4200 coarse_mat_is = t_coarse_mat_is; 4201 } 4202 4203 /* create local to global scatters for coarse problem */ 4204 if (compute_vecs) { 4205 PetscInt lrows; 4206 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 4207 if (coarse_mat_is) { 4208 ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr); 4209 } else { 4210 lrows = 0; 4211 } 4212 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 4213 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 4214 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 4215 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 4216 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 4217 } 4218 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 4219 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 4220 4221 /* set defaults for coarse KSP and PC */ 4222 if (multilevel_allowed) { 4223 coarse_ksp_type = KSPRICHARDSON; 4224 coarse_pc_type = PCBDDC; 4225 } else { 4226 coarse_ksp_type = KSPPREONLY; 4227 coarse_pc_type = PCREDUNDANT; 4228 } 4229 4230 /* print some info if requested */ 4231 if (pcbddc->dbg_flag) { 4232 if (!multilevel_allowed) { 4233 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4234 if (multilevel_requested) { 4235 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 4236 } else if (pcbddc->max_levels) { 4237 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 4238 } 4239 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4240 } 4241 } 4242 4243 /* create the coarse KSP object only once with defaults */ 4244 if (coarse_mat_is) { 4245 MatReuse coarse_mat_reuse; 4246 PetscViewer dbg_viewer = NULL; 4247 if (pcbddc->dbg_flag) { 4248 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is)); 4249 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 4250 } 4251 if (!pcbddc->coarse_ksp) { 4252 char prefix[256],str_level[16]; 4253 size_t len; 4254 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr); 4255 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 4256 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 4257 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr); 4258 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 4259 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 4260 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 4261 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 4262 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 4263 /* prefix */ 4264 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 4265 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 4266 if (!pcbddc->current_level) { 4267 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 4268 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 4269 } else { 4270 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 4271 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 4272 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 4273 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 4274 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 4275 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 4276 } 4277 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 4278 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 4279 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 4280 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 4281 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 4282 /* allow user customization */ 4283 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 4284 } 4285 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 4286 if (nisdofs) { 4287 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 4288 for (i=0;i<nisdofs;i++) { 4289 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 4290 } 4291 } 4292 if (nisneu) { 4293 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 4294 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 4295 } 4296 4297 /* get some info after set from options */ 4298 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 4299 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 4300 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 4301 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 4302 if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */ 4303 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 4304 isbddc = PETSC_FALSE; 4305 } 4306 if (isredundant) { 4307 KSP inner_ksp; 4308 PC inner_pc; 4309 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 4310 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 4311 ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr); 4312 } 4313 4314 /* assemble coarse matrix */ 4315 if (coarse_reuse) { 4316 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 4317 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 4318 coarse_mat_reuse = MAT_REUSE_MATRIX; 4319 } else { 4320 coarse_mat_reuse = MAT_INITIAL_MATRIX; 4321 } 4322 if (isbddc || isnn) { 4323 if (pcbddc->coarsening_ratio > 1) { 4324 if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */ 4325 ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,PETSC_TRUE,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 4326 if (pcbddc->dbg_flag) { 4327 ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4328 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr); 4329 ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr); 4330 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 4331 } 4332 } 4333 ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr); 4334 } else { 4335 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 4336 coarse_mat = coarse_mat_is; 4337 } 4338 } else { 4339 ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 4340 } 4341 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 4342 4343 /* propagate symmetry info to coarse matrix */ 4344 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pcbddc->issym);CHKERRQ(ierr); 4345 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 4346 4347 /* set operators */ 4348 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 4349 if (pcbddc->dbg_flag) { 4350 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 4351 } 4352 } else { /* processes non partecipating to coarse solver (if any) */ 4353 coarse_mat = 0; 4354 } 4355 ierr = PetscFree(isarray);CHKERRQ(ierr); 4356 #if 0 4357 { 4358 PetscViewer viewer; 4359 char filename[256]; 4360 sprintf(filename,"coarse_mat.m"); 4361 ierr = PetscViewerASCIIOpen(PETSC_COMM_WORLD,filename,&viewer);CHKERRQ(ierr); 4362 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 4363 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 4364 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 4365 } 4366 #endif 4367 4368 /* Compute coarse null space (special handling by BDDC only) */ 4369 if (pcbddc->NullSpace) { 4370 ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr); 4371 } 4372 4373 if (pcbddc->coarse_ksp) { 4374 Vec crhs,csol; 4375 PetscBool ispreonly; 4376 if (CoarseNullSpace) { 4377 if (isbddc) { 4378 ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr); 4379 } else { 4380 ierr = KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);CHKERRQ(ierr); 4381 } 4382 } 4383 /* setup coarse ksp */ 4384 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 4385 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 4386 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 4387 /* hack */ 4388 if (!csol) { 4389 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 4390 } 4391 if (!crhs) { 4392 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 4393 } 4394 /* Check coarse problem if in debug mode or if solving with an iterative method */ 4395 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 4396 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 4397 KSP check_ksp; 4398 KSPType check_ksp_type; 4399 PC check_pc; 4400 Vec check_vec,coarse_vec; 4401 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 4402 PetscInt its; 4403 PetscBool compute_eigs; 4404 PetscReal *eigs_r,*eigs_c; 4405 PetscInt neigs; 4406 const char *prefix; 4407 4408 /* Create ksp object suitable for estimation of extreme eigenvalues */ 4409 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 4410 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 4411 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 4412 if (ispreonly) { 4413 check_ksp_type = KSPPREONLY; 4414 compute_eigs = PETSC_FALSE; 4415 } else { 4416 check_ksp_type = KSPGMRES; 4417 compute_eigs = PETSC_TRUE; 4418 } 4419 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 4420 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 4421 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 4422 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 4423 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 4424 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 4425 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 4426 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 4427 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 4428 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 4429 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 4430 /* create random vec */ 4431 ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr); 4432 ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr); 4433 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 4434 if (CoarseNullSpace) { 4435 ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr); 4436 } 4437 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 4438 /* solve coarse problem */ 4439 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 4440 if (CoarseNullSpace) { 4441 ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr); 4442 } 4443 /* set eigenvalue estimation if preonly has not been requested */ 4444 if (compute_eigs) { 4445 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 4446 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 4447 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 4448 lambda_max = eigs_r[neigs-1]; 4449 lambda_min = eigs_r[0]; 4450 if (pcbddc->use_coarse_estimates) { 4451 if (lambda_max>lambda_min) { 4452 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr); 4453 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 4454 } 4455 } 4456 } 4457 4458 /* check coarse problem residual error */ 4459 if (pcbddc->dbg_flag) { 4460 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 4461 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 4462 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 4463 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 4464 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 4465 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 4466 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 4467 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 4468 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 4469 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 4470 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 4471 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 4472 if (compute_eigs) { 4473 PetscReal lambda_max_s,lambda_min_s; 4474 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 4475 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 4476 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 4477 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 4478 for (i=0;i<neigs;i++) { 4479 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 4480 } 4481 } 4482 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 4483 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 4484 } 4485 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 4486 if (compute_eigs) { 4487 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 4488 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 4489 } 4490 } 4491 } 4492 /* print additional info */ 4493 if (pcbddc->dbg_flag) { 4494 /* waits until all processes reaches this point */ 4495 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 4496 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 4497 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4498 } 4499 4500 /* free memory */ 4501 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 4502 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 4503 PetscFunctionReturn(0); 4504 } 4505 4506 #undef __FUNCT__ 4507 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 4508 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 4509 { 4510 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 4511 PC_IS* pcis = (PC_IS*)pc->data; 4512 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 4513 PetscInt i,local_size,coarse_size=0; 4514 PetscInt *local_primal_indices=NULL; 4515 PetscInt *t_local_primal_indices=NULL; 4516 PetscErrorCode ierr; 4517 4518 PetscFunctionBegin; 4519 /* Compute global number of coarse dofs */ 4520 if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) { 4521 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first"); 4522 } 4523 ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)(pc->pmat)),matis->mapping,pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult,&coarse_size,&t_local_primal_indices);CHKERRQ(ierr); 4524 ierr = PetscMalloc1(pcbddc->local_primal_size,&local_primal_indices);CHKERRQ(ierr); 4525 local_size = 0; 4526 for (i=0;i<pcbddc->local_primal_size_cc;i++) { 4527 PetscInt j; 4528 for (j=0;j<pcbddc->local_primal_ref_mult[i];j++) local_primal_indices[local_size++] = t_local_primal_indices[i] + j; 4529 } 4530 ierr = PetscFree(t_local_primal_indices);CHKERRQ(ierr); 4531 if (local_size != pcbddc->local_primal_size) { 4532 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %d != %d",local_size,pcbddc->local_primal_size); 4533 } 4534 4535 /* check numbering */ 4536 if (pcbddc->dbg_flag) { 4537 PetscScalar coarsesum,*array; 4538 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 4539 4540 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4541 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4542 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 4543 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 4544 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 4545 for (i=0;i<pcbddc->local_primal_size;i++) { 4546 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 4547 } 4548 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 4549 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 4550 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4551 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4552 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4553 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4554 ierr = VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4555 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4556 for (i=0;i<pcis->n;i++) { 4557 if (array[i] == 1.0) { 4558 set_error = PETSC_TRUE; 4559 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr); 4560 } 4561 } 4562 ierr = MPI_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4563 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4564 for (i=0;i<pcis->n;i++) { 4565 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 4566 } 4567 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4568 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4569 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4570 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4571 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 4572 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 4573 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 4574 PetscInt *gidxs; 4575 4576 ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr); 4577 ierr = ISLocalToGlobalMappingApply(matis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr); 4578 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 4579 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4580 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 4581 for (i=0;i<pcbddc->local_primal_size;i++) { 4582 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr); 4583 } 4584 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4585 ierr = PetscFree(gidxs);CHKERRQ(ierr); 4586 } 4587 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4588 if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 4589 } 4590 /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */ 4591 /* get back data */ 4592 *coarse_size_n = coarse_size; 4593 *local_primal_indices_n = local_primal_indices; 4594 PetscFunctionReturn(0); 4595 } 4596 4597 #undef __FUNCT__ 4598 #define __FUNCT__ "PCBDDCGlobalToLocal" 4599 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 4600 { 4601 IS localis_t; 4602 PetscInt i,lsize,*idxs,n; 4603 PetscScalar *vals; 4604 PetscErrorCode ierr; 4605 4606 PetscFunctionBegin; 4607 /* get indices in local ordering exploiting local to global map */ 4608 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 4609 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 4610 for (i=0;i<lsize;i++) vals[i] = 1.0; 4611 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 4612 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 4613 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 4614 if (idxs) { /* multilevel guard */ 4615 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 4616 } 4617 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 4618 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 4619 ierr = PetscFree(vals);CHKERRQ(ierr); 4620 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 4621 /* now compute set in local ordering */ 4622 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4623 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4624 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 4625 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 4626 for (i=0,lsize=0;i<n;i++) { 4627 if (PetscRealPart(vals[i]) > 0.5) { 4628 lsize++; 4629 } 4630 } 4631 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 4632 for (i=0,lsize=0;i<n;i++) { 4633 if (PetscRealPart(vals[i]) > 0.5) { 4634 idxs[lsize++] = i; 4635 } 4636 } 4637 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 4638 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 4639 *localis = localis_t; 4640 PetscFunctionReturn(0); 4641 } 4642 4643 /* the next two functions will be called in KSPMatMult if a change of basis has been requested */ 4644 #undef __FUNCT__ 4645 #define __FUNCT__ "PCBDDCMatMult_Private" 4646 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y) 4647 { 4648 PCBDDCChange_ctx change_ctx; 4649 PetscErrorCode ierr; 4650 4651 PetscFunctionBegin; 4652 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 4653 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 4654 ierr = MatMult(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 4655 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 4656 PetscFunctionReturn(0); 4657 } 4658 4659 #undef __FUNCT__ 4660 #define __FUNCT__ "PCBDDCMatMultTranspose_Private" 4661 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y) 4662 { 4663 PCBDDCChange_ctx change_ctx; 4664 PetscErrorCode ierr; 4665 4666 PetscFunctionBegin; 4667 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 4668 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 4669 ierr = MatMultTranspose(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 4670 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 4671 PetscFunctionReturn(0); 4672 } 4673 4674 #undef __FUNCT__ 4675 #define __FUNCT__ "PCBDDCSetUpSubSchurs" 4676 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 4677 { 4678 PC_IS *pcis=(PC_IS*)pc->data; 4679 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 4680 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 4681 Mat S_j; 4682 PetscInt *used_xadj,*used_adjncy; 4683 PetscBool free_used_adj; 4684 PetscErrorCode ierr; 4685 4686 PetscFunctionBegin; 4687 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 4688 free_used_adj = PETSC_FALSE; 4689 if (pcbddc->sub_schurs_layers == -1) { 4690 used_xadj = NULL; 4691 used_adjncy = NULL; 4692 } else { 4693 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 4694 used_xadj = pcbddc->mat_graph->xadj; 4695 used_adjncy = pcbddc->mat_graph->adjncy; 4696 } else if (pcbddc->computed_rowadj) { 4697 used_xadj = pcbddc->mat_graph->xadj; 4698 used_adjncy = pcbddc->mat_graph->adjncy; 4699 } else { 4700 PetscBool flg_row=PETSC_FALSE; 4701 const PetscInt *xadj,*adjncy; 4702 PetscInt nvtxs; 4703 4704 ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4705 if (flg_row) { 4706 ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr); 4707 ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr); 4708 ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr); 4709 free_used_adj = PETSC_TRUE; 4710 } else { 4711 pcbddc->sub_schurs_layers = -1; 4712 used_xadj = NULL; 4713 used_adjncy = NULL; 4714 } 4715 ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4716 } 4717 } 4718 4719 /* setup sub_schurs data */ 4720 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 4721 if (!sub_schurs->use_mumps) { 4722 /* pcbddc->ksp_D up to date only if not using MUMPS */ 4723 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 4724 ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,PETSC_FALSE,pcbddc->use_edges,pcbddc->use_faces);CHKERRQ(ierr); 4725 } else { 4726 PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis; 4727 ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,reuse_solvers,pcbddc->use_edges,pcbddc->use_faces);CHKERRQ(ierr); 4728 } 4729 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 4730 4731 /* free adjacency */ 4732 if (free_used_adj) { 4733 ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr); 4734 } 4735 PetscFunctionReturn(0); 4736 } 4737 4738 #undef __FUNCT__ 4739 #define __FUNCT__ "PCBDDCInitSubSchurs" 4740 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 4741 { 4742 PC_IS *pcis=(PC_IS*)pc->data; 4743 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 4744 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 4745 PCBDDCGraph graph; 4746 PetscErrorCode ierr; 4747 4748 PetscFunctionBegin; 4749 /* attach interface graph for determining subsets */ 4750 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 4751 IS verticesIS; 4752 4753 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 4754 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 4755 ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global);CHKERRQ(ierr); 4756 ierr = PCBDDCGraphSetUp(graph,0,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticesIS);CHKERRQ(ierr); 4757 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 4758 ierr = ISDestroy(&verticesIS);CHKERRQ(ierr); 4759 /* 4760 if (pcbddc->dbg_flag) { 4761 ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 4762 } 4763 */ 4764 } else { 4765 graph = pcbddc->mat_graph; 4766 } 4767 4768 /* sub_schurs init */ 4769 ierr = PCBDDCSubSchursInit(sub_schurs,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap);CHKERRQ(ierr); 4770 4771 /* free graph struct */ 4772 if (pcbddc->sub_schurs_rebuild) { 4773 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 4774 } 4775 PetscFunctionReturn(0); 4776 } 4777