1 #include <../src/ksp/pc/impls/bddc/bddc.h> 2 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 3 #include <petscblaslapack.h> 4 5 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y); 6 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y); 7 8 #undef __FUNCT__ 9 #define __FUNCT__ "PCBDDCAdaptiveSelection" 10 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 11 { 12 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 13 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 14 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 15 PetscBLASInt *B_iwork,*B_ifail; 16 PetscScalar *work,lwork; 17 PetscScalar *St,*S,*eigv; 18 PetscScalar *Sarray,*Starray; 19 PetscReal *eigs,thresh; 20 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs; 21 PetscBool allocated_S_St; 22 #if defined(PETSC_USE_COMPLEX) 23 PetscReal *rwork; 24 #endif 25 PetscErrorCode ierr; 26 27 PetscFunctionBegin; 28 if (!sub_schurs->use_mumps) { 29 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS"); 30 } 31 32 if (pcbddc->dbg_flag) { 33 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 34 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 35 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr); 36 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 37 } 38 39 if (pcbddc->dbg_flag) { 40 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef); 41 } 42 43 if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) { 44 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\n",sub_schurs->is_hermitian,sub_schurs->is_posdef); 45 } 46 47 /* max size of subsets */ 48 mss = 0; 49 for (i=0;i<sub_schurs->n_subs;i++) { 50 if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) { 51 PetscInt subset_size; 52 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 53 mss = PetscMax(mss,subset_size); 54 } 55 } 56 57 /* min/max and threshold */ 58 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 59 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0; 60 nmax = PetscMax(nmin,nmax); 61 allocated_S_St = PETSC_FALSE; 62 if (nmin) { 63 allocated_S_St = PETSC_TRUE; 64 } 65 66 /* allocate lapack workspace */ 67 cum = cum2 = 0; 68 maxneigs = 0; 69 for (i=0;i<sub_schurs->n_subs;i++) { 70 if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) { 71 PetscInt n,subset_size; 72 73 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 74 n = PetscMin(subset_size,nmax); 75 cum += subset_size; 76 cum2 += subset_size*n; 77 maxneigs = PetscMax(maxneigs,n); 78 } 79 } 80 if (mss) { 81 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 82 PetscBLASInt B_itype = 1; 83 PetscBLASInt B_N = mss; 84 PetscReal zero = 0.0; 85 PetscReal eps = 0.0; /* dlamch? */ 86 87 B_lwork = -1; 88 S = NULL; 89 St = NULL; 90 eigs = NULL; 91 eigv = NULL; 92 B_iwork = NULL; 93 B_ifail = NULL; 94 thresh = 1.0; 95 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 96 #if defined(PETSC_USE_COMPLEX) 97 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 98 #else 99 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 100 #endif 101 if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 102 ierr = PetscFPTrapPop();CHKERRQ(ierr); 103 } else { 104 /* TODO */ 105 } 106 } else { 107 lwork = 0; 108 } 109 110 nv = 0; 111 if (sub_schurs->is_Ej_com && pcbddc->use_vertices) { /* complement of subsets, each entry is a vertex */ 112 ierr = ISGetLocalSize(sub_schurs->is_Ej_com,&nv);CHKERRQ(ierr); 113 } 114 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr); 115 if (allocated_S_St) { 116 ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr); 117 } 118 ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr); 119 #if defined(PETSC_USE_COMPLEX) 120 ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr); 121 #endif 122 ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 123 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr, 124 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr, 125 nv+cum,&pcbddc->adaptive_constraints_idxs, 126 nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 127 ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr); 128 129 maxneigs = 0; 130 cum = cum2 = cumarray = 0; 131 pcbddc->adaptive_constraints_idxs_ptr[0] = 0; 132 pcbddc->adaptive_constraints_data_ptr[0] = 0; 133 if (sub_schurs->is_Ej_com && pcbddc->use_vertices) { 134 const PetscInt *idxs; 135 136 ierr = ISGetIndices(sub_schurs->is_Ej_com,&idxs);CHKERRQ(ierr); 137 for (cum=0;cum<nv;cum++) { 138 pcbddc->adaptive_constraints_n[cum] = 1; 139 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 140 pcbddc->adaptive_constraints_data[cum] = 1.0; 141 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1; 142 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1; 143 } 144 cum2 = cum; 145 ierr = ISRestoreIndices(sub_schurs->is_Ej_com,&idxs);CHKERRQ(ierr); 146 } 147 148 if (mss) { /* multilevel */ 149 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 150 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 151 } 152 153 for (i=0;i<sub_schurs->n_subs;i++) { 154 PetscInt j,subset_size; 155 156 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 157 if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) { 158 const PetscInt *idxs; 159 PetscReal infty = PETSC_MAX_REAL; 160 PetscInt eigs_start = 0; 161 PetscBLASInt B_N; 162 163 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 164 if (allocated_S_St) { /* S and S_t should be copied since we could need them later */ 165 if (sub_schurs->is_hermitian) { 166 PetscInt j; 167 for (j=0;j<subset_size;j++) { 168 ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 169 } 170 for (j=0;j<subset_size;j++) { 171 ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 172 } 173 } else { 174 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 175 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 176 } 177 } else { 178 S = Sarray + cumarray; 179 St = Starray + cumarray; 180 } 181 182 /* Threshold: this is an heuristic for edges */ 183 ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 184 thresh = pcbddc->mat_graph->count[idxs[0]]*pcbddc->adaptive_threshold; 185 186 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 187 PetscBLASInt B_itype = 1; 188 PetscBLASInt B_IL, B_IU; 189 PetscReal eps = -1.0; /* dlamch? */ 190 PetscInt nmin_s; 191 192 /* ask for eigenvalues larger than thresh */ 193 if (pcbddc->dbg_flag) { 194 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d %d %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]); 195 } 196 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 197 #if defined(PETSC_USE_COMPLEX) 198 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 199 #else 200 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 201 #endif 202 ierr = PetscFPTrapPop();CHKERRQ(ierr); 203 if (B_ierr) { 204 if (B_ierr < 0 ) { 205 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 206 } else if (B_ierr <= B_N) { 207 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 208 } else { 209 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 210 } 211 } 212 213 if (B_neigs > nmax) { 214 if (pcbddc->dbg_flag) { 215 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, more than maximum required %d.\n",B_neigs,nmax); 216 } 217 eigs_start = B_neigs -nmax; 218 B_neigs = nmax; 219 } 220 221 nmin_s = PetscMin(nmin,B_N); 222 if (B_neigs < nmin_s) { 223 PetscBLASInt B_neigs2; 224 225 B_IU = B_N - B_neigs; 226 B_IL = B_N - nmin_s + 1; 227 if (pcbddc->dbg_flag) { 228 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU); 229 } 230 if (sub_schurs->is_hermitian) { 231 PetscInt j; 232 for (j=0;j<subset_size;j++) { 233 ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 234 } 235 for (j=0;j<subset_size;j++) { 236 ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 237 } 238 } else { 239 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 240 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 241 } 242 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 243 #if defined(PETSC_USE_COMPLEX) 244 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 245 #else 246 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 247 #endif 248 ierr = PetscFPTrapPop();CHKERRQ(ierr); 249 B_neigs += B_neigs2; 250 } 251 if (B_ierr) { 252 if (B_ierr < 0 ) { 253 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 254 } else if (B_ierr <= B_N) { 255 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 256 } else { 257 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 258 } 259 } 260 if (pcbddc->dbg_flag) { 261 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Got %d eigs\n",B_neigs); 262 for (j=0;j<B_neigs;j++) { 263 if (eigs[j] == 0.0) { 264 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," Inf\n"); 265 } else { 266 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",eigs[j+eigs_start]); 267 } 268 } 269 } 270 } else { 271 /* TODO */ 272 } 273 274 maxneigs = PetscMax(B_neigs,maxneigs); 275 pcbddc->adaptive_constraints_n[i+nv] = B_neigs; 276 if (B_neigs) { 277 ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 278 279 if (pcbddc->dbg_flag > 1) { 280 PetscInt ii; 281 for (ii=0;ii<B_neigs;ii++) { 282 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N); 283 for (j=0;j<B_N;j++) { 284 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 285 } 286 } 287 } 288 #if 0 289 for (j=0;j<B_neigs;j++) { 290 PetscBLASInt Blas_N,Blas_one = 1.0; 291 PetscScalar norm; 292 ierr = PetscBLASIntCast(subset_size,&Blas_N);CHKERRQ(ierr); 293 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size, 294 &Blas_one,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one)); 295 if (pcbddc->adaptive_constraints_data[cum2] > 0.0) { 296 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 297 } else { 298 norm = -1.0/PetscSqrtReal(PetscRealPart(norm)); 299 } 300 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one)); 301 } 302 #endif 303 ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr); 304 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size; 305 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs; 306 cum++; 307 } 308 ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 309 } 310 /* shift for next computation */ 311 cumarray += subset_size*subset_size; 312 } 313 if (pcbddc->dbg_flag) { 314 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 315 } 316 317 if (mss) { 318 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 319 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 320 /* destroy matrices (junk) */ 321 ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr); 322 ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr); 323 } 324 if (allocated_S_St) { 325 ierr = PetscFree2(S,St);CHKERRQ(ierr); 326 } 327 ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr); 328 #if defined(PETSC_USE_COMPLEX) 329 ierr = PetscFree(rwork);CHKERRQ(ierr); 330 #endif 331 if (pcbddc->dbg_flag) { 332 PetscInt maxneigs_r; 333 ierr = MPI_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 334 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr); 335 } 336 PetscFunctionReturn(0); 337 } 338 339 #undef __FUNCT__ 340 #define __FUNCT__ "PCBDDCSetUpSolvers" 341 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 342 { 343 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 344 PetscScalar *coarse_submat_vals; 345 PetscErrorCode ierr; 346 347 PetscFunctionBegin; 348 /* Setup local scatters R_to_B and (optionally) R_to_D */ 349 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 350 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 351 352 /* Setup local neumann solver ksp_R */ 353 /* PCBDDCSetUpLocalScatters should be called first! */ 354 ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 355 356 /* Change global null space passed in by the user if change of basis has been requested */ 357 if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) { 358 ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr); 359 } 360 361 /* 362 Setup local correction and local part of coarse basis. 363 Gives back the dense local part of the coarse matrix in column major ordering 364 */ 365 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 366 367 /* Compute total number of coarse nodes and setup coarse solver */ 368 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 369 370 /* free */ 371 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 372 PetscFunctionReturn(0); 373 } 374 375 #undef __FUNCT__ 376 #define __FUNCT__ "PCBDDCResetCustomization" 377 PetscErrorCode PCBDDCResetCustomization(PC pc) 378 { 379 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 380 PetscErrorCode ierr; 381 382 PetscFunctionBegin; 383 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 384 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 385 ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr); 386 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 387 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 388 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 389 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 390 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 391 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 392 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 393 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 394 PetscFunctionReturn(0); 395 } 396 397 #undef __FUNCT__ 398 #define __FUNCT__ "PCBDDCResetTopography" 399 PetscErrorCode PCBDDCResetTopography(PC pc) 400 { 401 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 402 PetscErrorCode ierr; 403 404 PetscFunctionBegin; 405 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 406 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 407 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 408 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 409 ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr); 410 PetscFunctionReturn(0); 411 } 412 413 #undef __FUNCT__ 414 #define __FUNCT__ "PCBDDCResetSolvers" 415 PetscErrorCode PCBDDCResetSolvers(PC pc) 416 { 417 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 418 PetscScalar *array; 419 PetscErrorCode ierr; 420 421 PetscFunctionBegin; 422 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 423 if (pcbddc->coarse_phi_B) { 424 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr); 425 ierr = PetscFree(array);CHKERRQ(ierr); 426 } 427 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 428 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 429 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 430 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 431 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 432 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 433 if (pcbddc->local_auxmat2) { 434 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 435 ierr = PetscFree(array);CHKERRQ(ierr); 436 } 437 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 438 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 439 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 440 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 441 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 442 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 443 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 444 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 445 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 446 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 447 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 448 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 449 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 450 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 451 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 452 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 453 PetscFunctionReturn(0); 454 } 455 456 #undef __FUNCT__ 457 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors" 458 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 459 { 460 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 461 PC_IS *pcis = (PC_IS*)pc->data; 462 VecType impVecType; 463 PetscInt n_constraints,n_R,old_size; 464 PetscErrorCode ierr; 465 466 PetscFunctionBegin; 467 if (!pcbddc->ConstraintMatrix) { 468 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created"); 469 } 470 /* get sizes */ 471 n_constraints = pcbddc->local_primal_size - pcbddc->n_vertices; 472 n_R = pcis->n-pcbddc->n_vertices; 473 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 474 /* local work vectors (try to avoid unneeded work)*/ 475 /* R nodes */ 476 old_size = -1; 477 if (pcbddc->vec1_R) { 478 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 479 } 480 if (n_R != old_size) { 481 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 482 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 483 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 484 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 485 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 486 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 487 } 488 /* local primal dofs */ 489 old_size = -1; 490 if (pcbddc->vec1_P) { 491 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 492 } 493 if (pcbddc->local_primal_size != old_size) { 494 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 495 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 496 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 497 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 498 } 499 /* local explicit constraints */ 500 old_size = -1; 501 if (pcbddc->vec1_C) { 502 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 503 } 504 if (n_constraints && n_constraints != old_size) { 505 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 506 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 507 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 508 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 509 } 510 PetscFunctionReturn(0); 511 } 512 513 #undef __FUNCT__ 514 #define __FUNCT__ "PCBDDCSetUpCorrection" 515 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 516 { 517 PetscErrorCode ierr; 518 /* pointers to pcis and pcbddc */ 519 PC_IS* pcis = (PC_IS*)pc->data; 520 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 521 /* submatrices of local problem */ 522 Mat A_RV,A_VR,A_VV; 523 /* submatrices of local coarse problem */ 524 Mat S_VV,S_CV,S_VC,S_CC; 525 /* working matrices */ 526 Mat C_CR; 527 /* additional working stuff */ 528 PC pc_R; 529 Mat F; 530 PetscBool isLU,isCHOL,isILU; 531 532 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 533 PetscScalar *work; 534 PetscInt *idx_V_B; 535 PetscInt n,n_vertices,n_constraints; 536 PetscInt i,n_R,n_D,n_B; 537 PetscBool unsymmetric_check; 538 /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */ 539 MatType impMatType; 540 /* some shortcuts to scalars */ 541 PetscScalar one=1.0,m_one=-1.0; 542 543 PetscFunctionBegin; 544 n_vertices = pcbddc->n_vertices; 545 n_constraints = pcbddc->local_primal_size-n_vertices; 546 /* Set Non-overlapping dimensions */ 547 n_B = pcis->n_B; 548 n_D = pcis->n - n_B; 549 n_R = pcis->n - n_vertices; 550 551 /* Set types for local objects needed by BDDC precondtioner */ 552 impMatType = MATSEQDENSE; 553 554 /* vertices in boundary numbering */ 555 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 556 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->primal_indices_local_idxs,&i,idx_V_B);CHKERRQ(ierr); 557 if (i != n_vertices) { 558 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i); 559 } 560 561 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 562 ierr = PetscMalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 563 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr); 564 ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr); 565 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr); 566 ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr); 567 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr); 568 ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr); 569 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr); 570 ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr); 571 572 unsymmetric_check = PETSC_FALSE; 573 /* allocate workspace */ 574 n = 0; 575 if (n_constraints) { 576 n += n_R*n_constraints; 577 } 578 if (n_vertices) { 579 n = PetscMax(2*n_R*n_vertices,n); 580 } 581 if (!pcbddc->issym) { 582 n = PetscMax(2*n_R*pcbddc->local_primal_size,n); 583 unsymmetric_check = PETSC_TRUE; 584 } 585 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 586 587 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 588 ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr); 589 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr); 590 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr); 591 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr); 592 if (isLU || isILU || isCHOL) { 593 ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr); 594 } else { 595 F = NULL; 596 } 597 598 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 599 if (n_constraints) { 600 Mat M1,M2,M3; 601 IS is_aux; 602 /* see if we can save some allocations */ 603 if (pcbddc->local_auxmat2) { 604 PetscInt on_R,on_constraints; 605 ierr = MatGetSize(pcbddc->local_auxmat2,&on_R,&on_constraints);CHKERRQ(ierr); 606 if (on_R != n_R || on_constraints != n_constraints) { 607 PetscScalar *marray; 608 609 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&marray);CHKERRQ(ierr); 610 ierr = PetscFree(marray);CHKERRQ(ierr); 611 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 612 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 613 } 614 } 615 /* auxiliary matrices */ 616 if (!pcbddc->local_auxmat2) { 617 PetscScalar *marray; 618 619 ierr = PetscMalloc1(2*n_R*n_constraints,&marray);CHKERRQ(ierr); 620 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,marray,&pcbddc->local_auxmat2);CHKERRQ(ierr); 621 marray += n_R*n_constraints; 622 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_R,marray,&pcbddc->local_auxmat1);CHKERRQ(ierr); 623 } 624 625 /* Extract constraints on R nodes: C_{CR} */ 626 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 627 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 628 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 629 630 /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */ 631 ierr = PetscMemzero(work,n_R*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 632 for (i=0;i<n_constraints;i++) { 633 const PetscScalar *row_cmat_values; 634 const PetscInt *row_cmat_indices; 635 PetscInt size_of_constraint,j; 636 637 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 638 for (j=0;j<size_of_constraint;j++) { 639 work[row_cmat_indices[j]+i*n_R] = -row_cmat_values[j]; 640 } 641 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 642 } 643 if (F) { 644 Mat B; 645 646 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 647 ierr = MatMatSolve(F,B,pcbddc->local_auxmat2);CHKERRQ(ierr); 648 ierr = MatDestroy(&B);CHKERRQ(ierr); 649 } else { 650 PetscScalar *xarray; 651 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&xarray);CHKERRQ(ierr); 652 for (i=0;i<n_constraints;i++) { 653 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 654 ierr = VecPlaceArray(pcbddc->vec2_R,xarray+i*n_R);CHKERRQ(ierr); 655 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 656 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 657 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 658 } 659 ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&xarray);CHKERRQ(ierr); 660 } 661 662 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 663 ierr = MatConvert(C_CR,impMatType,MAT_REUSE_MATRIX,&C_CR);CHKERRQ(ierr); 664 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 665 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr); 666 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 667 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 668 ierr = VecSet(pcbddc->vec1_C,m_one);CHKERRQ(ierr); 669 ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr); 670 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 671 ierr = MatDestroy(&M2);CHKERRQ(ierr); 672 ierr = MatDestroy(&M3);CHKERRQ(ierr); 673 /* Assemble local_auxmat1 = S_CC*C_{CR} needed by BDDC application in KSP and in preproc */ 674 ierr = MatMatMult(M1,C_CR,MAT_REUSE_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 675 ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 676 ierr = MatDestroy(&M1);CHKERRQ(ierr); 677 } 678 /* Get submatrices from subdomain matrix */ 679 if (n_vertices) { 680 Mat newmat; 681 IS is_aux; 682 PetscInt ibs,mbs; 683 PetscBool issbaij; 684 685 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 686 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 687 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 688 if (ibs != mbs) { /* need to convert to SEQAIJ */ 689 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 690 ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 691 ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 692 ierr = MatGetSubMatrix(newmat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 693 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 694 } else { 695 /* this is safe */ 696 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 697 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 698 if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 699 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 700 ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 701 ierr = MatTranspose(A_VR,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 702 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 703 ierr = MatConvert(A_VV,MATSEQBAIJ,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr); 704 } else { 705 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 706 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 707 } 708 } 709 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 710 } 711 712 /* Matrix of coarse basis functions (local) */ 713 if (pcbddc->coarse_phi_B) { 714 PetscInt on_B,on_primal,on_D=n_D; 715 if (pcbddc->coarse_phi_D) { 716 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr); 717 } 718 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 719 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 720 PetscScalar *marray; 721 722 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr); 723 ierr = PetscFree(marray);CHKERRQ(ierr); 724 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 725 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 726 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 727 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 728 } 729 } 730 731 if (!pcbddc->coarse_phi_B) { 732 PetscScalar *marray; 733 734 n = n_B*pcbddc->local_primal_size; 735 if (pcbddc->switch_static || pcbddc->dbg_flag) { 736 n += n_D*pcbddc->local_primal_size; 737 } 738 if (!pcbddc->issym) { 739 n *= 2; 740 } 741 ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr); 742 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 743 n = n_B*pcbddc->local_primal_size; 744 if (pcbddc->switch_static || pcbddc->dbg_flag) { 745 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 746 n += n_D*pcbddc->local_primal_size; 747 } 748 if (!pcbddc->issym) { 749 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 750 if (pcbddc->switch_static || pcbddc->dbg_flag) { 751 n = n_B*pcbddc->local_primal_size; 752 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 753 } 754 } else { 755 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 756 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 757 if (pcbddc->switch_static || pcbddc->dbg_flag) { 758 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 759 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 760 } 761 } 762 } 763 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 764 /* vertices */ 765 if (n_vertices) { 766 767 if (n_R) { 768 Mat A_RRmA_RV,S_VVt; /* S_VVt with LDA=N */ 769 PetscBLASInt B_N,B_one = 1; 770 PetscScalar *x,*y; 771 772 ierr = PetscMemzero(work,2*n_R*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 773 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 774 ierr = MatConvert(A_RV,impMatType,MAT_REUSE_MATRIX,&A_RV);CHKERRQ(ierr); 775 if (F) { 776 ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr); 777 } else { 778 ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr); 779 for (i=0;i<n_vertices;i++) { 780 ierr = VecPlaceArray(pcbddc->vec1_R,y+i*n_R);CHKERRQ(ierr); 781 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 782 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 783 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 784 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 785 } 786 ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr); 787 } 788 ierr = MatScale(A_RRmA_RV,m_one);CHKERRQ(ierr); 789 /* S_VV and S_CV are the subdomain contribution to coarse matrix. WARNING -> column major ordering */ 790 if (n_constraints) { 791 Mat B; 792 ierr = MatMatMult(pcbddc->local_auxmat1,A_RRmA_RV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr); 793 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr); 794 ierr = MatMatMult(pcbddc->local_auxmat2,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 795 ierr = MatScale(S_CV,m_one);CHKERRQ(ierr); 796 ierr = PetscBLASIntCast(n_R*n_vertices,&B_N);CHKERRQ(ierr); 797 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+n_R*n_vertices,&B_one,work,&B_one)); 798 ierr = MatDestroy(&B);CHKERRQ(ierr); 799 } 800 ierr = MatConvert(A_VR,impMatType,MAT_REUSE_MATRIX,&A_VR);CHKERRQ(ierr); 801 ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr); 802 ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr); 803 ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr); 804 ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr); 805 ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr); 806 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one)); 807 ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr); 808 ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr); 809 ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 810 ierr = MatDestroy(&S_VVt);CHKERRQ(ierr); 811 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 812 } else { 813 ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr); 814 ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 815 } 816 /* coarse basis functions */ 817 for (i=0;i<n_vertices;i++) { 818 PetscScalar *y; 819 820 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr); 821 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 822 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 823 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 824 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 825 y[n_B*i+idx_V_B[i]] = 1.0; 826 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 827 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 828 829 if (pcbddc->switch_static || pcbddc->dbg_flag) { 830 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 831 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 832 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 833 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 834 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 835 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 836 } 837 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 838 } 839 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 840 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 841 } 842 843 if (n_constraints) { 844 Mat B; 845 846 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 847 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 848 ierr = MatMatMult(pcbddc->local_auxmat2,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 849 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 850 if (n_vertices) { 851 ierr = MatMatMult(A_VR,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_VC);CHKERRQ(ierr); 852 } 853 ierr = MatDestroy(&B);CHKERRQ(ierr); 854 /* coarse basis functions */ 855 for (i=0;i<n_constraints;i++) { 856 PetscScalar *y; 857 858 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr); 859 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 860 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr); 861 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 862 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 863 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 864 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 865 866 if (pcbddc->switch_static || pcbddc->dbg_flag) { 867 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 868 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr); 869 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 870 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 871 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 872 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 873 } 874 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 875 } 876 } 877 878 /* compute other basis functions for non-symmetric problems */ 879 if (!pcbddc->issym) { 880 Mat B,X; 881 882 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,pcbddc->local_primal_size,work,&B);CHKERRQ(ierr); 883 884 if (n_constraints) { 885 Mat S_CCT,B_C; 886 887 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work+n_vertices*n_R,&B_C);CHKERRQ(ierr); 888 ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr); 889 ierr = MatTransposeMatMult(C_CR,S_CCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr); 890 ierr = MatDestroy(&S_CCT);CHKERRQ(ierr); 891 if (n_vertices) { 892 Mat B_V,S_VCT; 893 894 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&B_V);CHKERRQ(ierr); 895 ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr); 896 ierr = MatTransposeMatMult(C_CR,S_VCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr); 897 ierr = MatDestroy(&B_V);CHKERRQ(ierr); 898 ierr = MatDestroy(&S_VCT);CHKERRQ(ierr); 899 } 900 ierr = MatDestroy(&B_C);CHKERRQ(ierr); 901 } 902 if (n_vertices && n_R) { 903 Mat A_VRT; 904 PetscBLASInt B_N,B_one = 1; 905 906 if (!n_constraints) { /* if there are no constraints, reset work */ 907 ierr = PetscMemzero(work,n_R*pcbddc->local_primal_size*sizeof(PetscScalar));CHKERRQ(ierr); 908 } 909 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work+pcbddc->local_primal_size*n_R,&A_VRT);CHKERRQ(ierr); 910 ierr = MatTranspose(A_VR,MAT_REUSE_MATRIX,&A_VRT);CHKERRQ(ierr); 911 ierr = PetscBLASIntCast(n_vertices*n_R,&B_N);CHKERRQ(ierr); 912 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&m_one,work+pcbddc->local_primal_size*n_R,&B_one,work,&B_one)); 913 ierr = MatDestroy(&A_VRT);CHKERRQ(ierr); 914 } 915 916 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,pcbddc->local_primal_size,work+pcbddc->local_primal_size*n_R,&X);CHKERRQ(ierr); 917 if (F) { /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 918 for (i=0;i<pcbddc->local_primal_size;i++) { 919 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 920 ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr); 921 ierr = MatSolveTranspose(F,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 922 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 923 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 924 } 925 } else { 926 for (i=0;i<pcbddc->local_primal_size;i++) { 927 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 928 ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr); 929 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 930 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 931 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 932 } 933 } 934 ierr = MatDestroy(&B);CHKERRQ(ierr); 935 /* coarse basis functions */ 936 for (i=0;i<pcbddc->local_primal_size;i++) { 937 PetscScalar *y; 938 939 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*(i+pcbddc->local_primal_size));CHKERRQ(ierr); 940 ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 941 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 942 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 943 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 944 if (i<n_vertices) { 945 y[n_B*i+idx_V_B[i]] = 1.0; 946 } 947 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 948 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 949 950 if (pcbddc->switch_static || pcbddc->dbg_flag) { 951 ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 952 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 953 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 954 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 955 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 956 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 957 } 958 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 959 } 960 ierr = MatDestroy(&X);CHKERRQ(ierr); 961 } 962 963 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 964 ierr = MatDestroy(&S_VV);CHKERRQ(ierr); 965 ierr = MatDestroy(&S_CV);CHKERRQ(ierr); 966 ierr = MatDestroy(&S_VC);CHKERRQ(ierr); 967 ierr = MatDestroy(&S_CC);CHKERRQ(ierr); 968 /* Checking coarse_sub_mat and coarse basis functios */ 969 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 970 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 971 if (pcbddc->dbg_flag) { 972 Mat coarse_sub_mat; 973 Mat AUXMAT,TM1,TM2,TM3,TM4; 974 Mat coarse_phi_D,coarse_phi_B; 975 Mat coarse_psi_D,coarse_psi_B; 976 Mat A_II,A_BB,A_IB,A_BI; 977 Mat C_B,CPHI; 978 IS is_dummy; 979 Vec mones; 980 MatType checkmattype=MATSEQAIJ; 981 PetscReal real_value; 982 983 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 984 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 985 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 986 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 987 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 988 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 989 if (unsymmetric_check) { 990 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 991 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 992 } 993 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 994 995 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 996 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation\n");CHKERRQ(ierr); 997 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 998 if (unsymmetric_check) { 999 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1000 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 1001 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1002 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1003 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 1004 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1005 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1006 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 1007 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1008 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1009 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 1010 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1011 } else { 1012 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 1013 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 1014 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1015 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 1016 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1017 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1018 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 1019 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1020 } 1021 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1022 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1023 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1024 ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr); 1025 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1026 ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1027 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1028 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 1029 1030 /* check constraints */ 1031 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size,0,1,&is_dummy);CHKERRQ(ierr); 1032 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B); 1033 ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 1034 ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr); 1035 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 1036 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 1037 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1038 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 1039 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 1040 ierr = MatDestroy(&CPHI);CHKERRQ(ierr); 1041 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 1042 ierr = VecDestroy(&mones);CHKERRQ(ierr); 1043 1044 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1045 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 1046 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 1047 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 1048 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 1049 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 1050 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 1051 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 1052 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 1053 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 1054 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 1055 if (unsymmetric_check) { 1056 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 1057 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 1058 } 1059 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 1060 } 1061 1062 /* free memory */ 1063 ierr = PetscFree(work);CHKERRQ(ierr); 1064 if (n_vertices) { 1065 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 1066 } 1067 if (n_constraints) { 1068 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 1069 } 1070 /* get back data */ 1071 *coarse_submat_vals_n = coarse_submat_vals; 1072 PetscFunctionReturn(0); 1073 } 1074 1075 #undef __FUNCT__ 1076 #define __FUNCT__ "MatGetSubMatrixUnsorted" 1077 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B) 1078 { 1079 Mat *work_mat; 1080 IS isrow_s,iscol_s; 1081 PetscBool rsorted,csorted; 1082 PetscInt rsize,*idxs_perm_r,csize,*idxs_perm_c; 1083 PetscErrorCode ierr; 1084 1085 PetscFunctionBegin; 1086 ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr); 1087 ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr); 1088 ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr); 1089 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 1090 1091 if (!rsorted) { 1092 const PetscInt *idxs; 1093 PetscInt *idxs_sorted,i; 1094 1095 ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr); 1096 ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr); 1097 for (i=0;i<rsize;i++) { 1098 idxs_perm_r[i] = i; 1099 } 1100 ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr); 1101 ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr); 1102 for (i=0;i<rsize;i++) { 1103 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 1104 } 1105 ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr); 1106 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr); 1107 } else { 1108 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1109 isrow_s = isrow; 1110 } 1111 1112 if (!csorted) { 1113 if (isrow == iscol) { 1114 ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr); 1115 iscol_s = isrow_s; 1116 } else { 1117 const PetscInt *idxs; 1118 PetscInt *idxs_sorted,i; 1119 1120 ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr); 1121 ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr); 1122 for (i=0;i<csize;i++) { 1123 idxs_perm_c[i] = i; 1124 } 1125 ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr); 1126 ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr); 1127 for (i=0;i<csize;i++) { 1128 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 1129 } 1130 ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr); 1131 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr); 1132 } 1133 } else { 1134 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1135 iscol_s = iscol; 1136 } 1137 1138 ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 1139 1140 if (!rsorted || !csorted) { 1141 Mat new_mat; 1142 IS is_perm_r,is_perm_c; 1143 1144 if (!rsorted) { 1145 PetscInt *idxs_r,i; 1146 ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr); 1147 for (i=0;i<rsize;i++) { 1148 idxs_r[idxs_perm_r[i]] = i; 1149 } 1150 ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr); 1151 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr); 1152 } else { 1153 ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr); 1154 } 1155 ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr); 1156 1157 if (!csorted) { 1158 if (isrow_s == iscol_s) { 1159 ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr); 1160 is_perm_c = is_perm_r; 1161 } else { 1162 PetscInt *idxs_c,i; 1163 ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr); 1164 for (i=0;i<csize;i++) { 1165 idxs_c[idxs_perm_c[i]] = i; 1166 } 1167 ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr); 1168 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr); 1169 } 1170 } else { 1171 ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr); 1172 } 1173 ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr); 1174 1175 ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr); 1176 ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr); 1177 work_mat[0] = new_mat; 1178 ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr); 1179 ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr); 1180 } 1181 1182 ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr); 1183 *B = work_mat[0]; 1184 ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr); 1185 ierr = ISDestroy(&isrow_s);CHKERRQ(ierr); 1186 ierr = ISDestroy(&iscol_s);CHKERRQ(ierr); 1187 PetscFunctionReturn(0); 1188 } 1189 1190 #undef __FUNCT__ 1191 #define __FUNCT__ "PCBDDCComputeLocalMatrix" 1192 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 1193 { 1194 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1195 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1196 Mat new_mat; 1197 IS is_local,is_global; 1198 PetscInt local_size; 1199 PetscBool isseqaij; 1200 PetscErrorCode ierr; 1201 1202 PetscFunctionBegin; 1203 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1204 ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr); 1205 ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr); 1206 ierr = ISLocalToGlobalMappingApplyIS(matis->mapping,is_local,&is_global);CHKERRQ(ierr); 1207 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 1208 ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr); 1209 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 1210 1211 /* check */ 1212 if (pcbddc->dbg_flag) { 1213 Vec x,x_change; 1214 PetscReal error; 1215 1216 ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr); 1217 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 1218 ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr); 1219 ierr = VecScatterBegin(matis->ctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1220 ierr = VecScatterEnd(matis->ctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1221 ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr); 1222 ierr = VecScatterBegin(matis->ctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1223 ierr = VecScatterEnd(matis->ctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1224 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 1225 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 1226 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1227 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr); 1228 ierr = VecDestroy(&x);CHKERRQ(ierr); 1229 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 1230 } 1231 1232 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 1233 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 1234 if (isseqaij) { 1235 ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 1236 } else { 1237 Mat work_mat; 1238 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 1239 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 1240 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 1241 } 1242 ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,pcbddc->issym);CHKERRQ(ierr); 1243 #if !defined(PETSC_USE_COMPLEX) 1244 ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,pcbddc->issym);CHKERRQ(ierr); 1245 #endif 1246 /* 1247 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 1248 ierr = MatView(new_mat,(PetscViewer)0);CHKERRQ(ierr); 1249 */ 1250 ierr = MatDestroy(&new_mat);CHKERRQ(ierr); 1251 PetscFunctionReturn(0); 1252 } 1253 1254 #undef __FUNCT__ 1255 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 1256 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 1257 { 1258 PC_IS* pcis = (PC_IS*)(pc->data); 1259 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1260 IS is_aux1,is_aux2; 1261 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 1262 PetscInt n_vertices,i,j,n_R,n_D,n_B; 1263 PetscInt vbs,bs; 1264 PetscBT bitmask; 1265 PetscErrorCode ierr; 1266 1267 PetscFunctionBegin; 1268 /* 1269 No need to setup local scatters if 1270 - primal space is unchanged 1271 AND 1272 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 1273 AND 1274 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 1275 */ 1276 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 1277 PetscFunctionReturn(0); 1278 } 1279 /* destroy old objects */ 1280 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 1281 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 1282 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 1283 /* Set Non-overlapping dimensions */ 1284 n_B = pcis->n_B; 1285 n_D = pcis->n - n_B; 1286 n_vertices = pcbddc->n_vertices; 1287 /* create auxiliary bitmask */ 1288 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 1289 for (i=0;i<n_vertices;i++) { 1290 ierr = PetscBTSet(bitmask,pcbddc->primal_indices_local_idxs[i]);CHKERRQ(ierr); 1291 } 1292 1293 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 1294 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 1295 for (i=0, n_R=0; i<pcis->n; i++) { 1296 if (!PetscBTLookup(bitmask,i)) { 1297 idx_R_local[n_R] = i; 1298 n_R++; 1299 } 1300 } 1301 1302 /* Block code */ 1303 vbs = 1; 1304 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 1305 if (bs>1 && !(n_vertices%bs)) { 1306 PetscBool is_blocked = PETSC_TRUE; 1307 PetscInt *vary; 1308 /* Verify if the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 1309 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 1310 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 1311 for (i=0; i<n_vertices; i++) vary[pcbddc->primal_indices_local_idxs[i]/bs]++; 1312 for (i=0; i<n_vertices/bs; i++) { 1313 if (vary[i]!=0 && vary[i]!=bs) { 1314 is_blocked = PETSC_FALSE; 1315 break; 1316 } 1317 } 1318 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 1319 vbs = bs; 1320 for (i=0;i<n_R/vbs;i++) { 1321 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 1322 } 1323 } 1324 ierr = PetscFree(vary);CHKERRQ(ierr); 1325 } 1326 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 1327 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 1328 1329 /* print some info if requested */ 1330 if (pcbddc->dbg_flag) { 1331 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1332 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1333 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1334 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 1335 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 1336 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices,pcbddc->local_primal_size);CHKERRQ(ierr); 1337 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1338 } 1339 1340 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 1341 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1342 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 1343 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 1344 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1345 for (i=0; i<n_D; i++) { 1346 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 1347 } 1348 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1349 for (i=0, j=0; i<n_R; i++) { 1350 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 1351 aux_array1[j++] = i; 1352 } 1353 } 1354 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 1355 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1356 for (i=0, j=0; i<n_B; i++) { 1357 if (!PetscBTLookup(bitmask,is_indices[i])) { 1358 aux_array2[j++] = i; 1359 } 1360 } 1361 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1362 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 1363 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 1364 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1365 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 1366 1367 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1368 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 1369 for (i=0, j=0; i<n_R; i++) { 1370 if (PetscBTLookup(bitmask,idx_R_local[i])) { 1371 aux_array1[j++] = i; 1372 } 1373 } 1374 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 1375 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 1376 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1377 } 1378 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 1379 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1380 PetscFunctionReturn(0); 1381 } 1382 1383 1384 #undef __FUNCT__ 1385 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 1386 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 1387 { 1388 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1389 PC_IS *pcis = (PC_IS*)pc->data; 1390 PC pc_temp; 1391 Mat A_RR; 1392 MatReuse reuse; 1393 PetscScalar m_one = -1.0; 1394 PetscReal value; 1395 PetscInt n_D,n_R,ibs,mbs; 1396 PetscBool use_exact,use_exact_reduced,issbaij; 1397 PetscErrorCode ierr; 1398 /* prefixes stuff */ 1399 char dir_prefix[256],neu_prefix[256],str_level[16]; 1400 size_t len; 1401 1402 PetscFunctionBegin; 1403 1404 /* compute prefixes */ 1405 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 1406 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 1407 if (!pcbddc->current_level) { 1408 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1409 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1410 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1411 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1412 } else { 1413 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 1414 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 1415 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 1416 len -= 15; /* remove "pc_bddc_coarse_" */ 1417 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 1418 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 1419 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 1420 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 1421 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1422 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1423 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 1424 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 1425 } 1426 1427 /* DIRICHLET PROBLEM */ 1428 if (dirichlet) { 1429 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1430 if (pcbddc->issym) { 1431 ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 1432 } 1433 /* Matrix for Dirichlet problem is pcis->A_II */ 1434 n_D = pcis->n - pcis->n_B; 1435 if (!pcbddc->ksp_D) { /* create object if not yet build */ 1436 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 1437 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 1438 /* default */ 1439 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 1440 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 1441 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1442 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1443 if (issbaij) { 1444 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1445 } else { 1446 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1447 } 1448 /* Allow user's customization */ 1449 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 1450 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1451 } 1452 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 1453 if (sub_schurs->interior_solver) { 1454 ierr = KSPSetPC(pcbddc->ksp_D,sub_schurs->interior_solver);CHKERRQ(ierr); 1455 } 1456 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1457 if (!n_D) { 1458 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1459 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1460 } 1461 /* Set Up KSP for Dirichlet problem of BDDC */ 1462 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 1463 /* set ksp_D into pcis data */ 1464 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 1465 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 1466 pcis->ksp_D = pcbddc->ksp_D; 1467 } 1468 1469 /* NEUMANN PROBLEM */ 1470 A_RR = 0; 1471 if (neumann) { 1472 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 1473 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 1474 if (pcbddc->ksp_R) { /* already created ksp */ 1475 PetscInt nn_R; 1476 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 1477 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 1478 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 1479 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 1480 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 1481 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1482 reuse = MAT_INITIAL_MATRIX; 1483 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 1484 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 1485 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1486 reuse = MAT_INITIAL_MATRIX; 1487 } else { /* safe to reuse the matrix */ 1488 reuse = MAT_REUSE_MATRIX; 1489 } 1490 } 1491 /* last check */ 1492 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 1493 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1494 reuse = MAT_INITIAL_MATRIX; 1495 } 1496 } else { /* first time, so we need to create the matrix */ 1497 reuse = MAT_INITIAL_MATRIX; 1498 } 1499 /* extract A_RR */ 1500 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 1501 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 1502 if (ibs != mbs) { 1503 Mat newmat; 1504 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 1505 ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 1506 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 1507 } else { 1508 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 1509 } 1510 if (pcbddc->issym) { 1511 ierr = MatSetOption(A_RR,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 1512 } 1513 if (!pcbddc->ksp_R) { /* create object if not present */ 1514 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 1515 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 1516 /* default */ 1517 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 1518 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 1519 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1520 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1521 if (issbaij) { 1522 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1523 } else { 1524 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1525 } 1526 /* Allow user's customization */ 1527 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 1528 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1529 } 1530 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 1531 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1532 if (!n_R) { 1533 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1534 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1535 } 1536 /* Set Up KSP for Neumann problem of BDDC */ 1537 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 1538 } 1539 1540 /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */ 1541 if (pcbddc->NullSpace || pcbddc->dbg_flag) { 1542 if (pcbddc->dbg_flag) { 1543 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1544 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1545 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1546 } 1547 if (dirichlet) { /* Dirichlet */ 1548 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 1549 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 1550 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 1551 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 1552 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 1553 /* need to be adapted? */ 1554 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1555 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1556 ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr); 1557 /* print info */ 1558 if (pcbddc->dbg_flag) { 1559 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 1560 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1561 } 1562 if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) { 1563 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr); 1564 } 1565 } 1566 if (neumann) { /* Neumann */ 1567 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 1568 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1569 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 1570 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 1571 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 1572 /* need to be adapted? */ 1573 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1574 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1575 /* print info */ 1576 if (pcbddc->dbg_flag) { 1577 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 1578 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1579 } 1580 if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */ 1581 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr); 1582 } 1583 } 1584 } 1585 /* free Neumann problem's matrix */ 1586 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1587 PetscFunctionReturn(0); 1588 } 1589 1590 #undef __FUNCT__ 1591 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection" 1592 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec rhs, Vec sol, Vec work, PetscBool applytranspose) 1593 { 1594 PetscErrorCode ierr; 1595 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1596 1597 PetscFunctionBegin; 1598 if (applytranspose) { 1599 if (pcbddc->local_auxmat1) { 1600 ierr = MatMultTranspose(pcbddc->local_auxmat2,rhs,work);CHKERRQ(ierr); 1601 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,work,rhs,rhs);CHKERRQ(ierr); 1602 } 1603 ierr = KSPSolveTranspose(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr); 1604 } else { 1605 ierr = KSPSolve(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr); 1606 if (pcbddc->local_auxmat1) { 1607 ierr = MatMult(pcbddc->local_auxmat1,sol,work);CHKERRQ(ierr); 1608 ierr = MatMultAdd(pcbddc->local_auxmat2,work,sol,sol);CHKERRQ(ierr); 1609 } 1610 } 1611 PetscFunctionReturn(0); 1612 } 1613 1614 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 1615 #undef __FUNCT__ 1616 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 1617 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 1618 { 1619 PetscErrorCode ierr; 1620 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1621 PC_IS* pcis = (PC_IS*) (pc->data); 1622 const PetscScalar zero = 0.0; 1623 1624 PetscFunctionBegin; 1625 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 1626 if (applytranspose) { 1627 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1628 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1629 } else { 1630 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1631 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1632 } 1633 /* start communications from local primal nodes to rhs of coarse solver */ 1634 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 1635 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1636 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1637 1638 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 1639 /* TODO remove null space when doing multilevel */ 1640 if (pcbddc->coarse_ksp) { 1641 Vec rhs,sol; 1642 1643 ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr); 1644 ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr); 1645 if (applytranspose) { 1646 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 1647 } else { 1648 ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 1649 } 1650 } 1651 1652 /* Local solution on R nodes */ 1653 if (pcis->n) { 1654 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 1655 ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1656 ierr = VecScatterEnd(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1657 if (pcbddc->switch_static) { 1658 ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1659 ierr = VecScatterEnd(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1660 } 1661 ierr = PCBDDCSolveSubstructureCorrection(pc,pcbddc->vec1_R,pcbddc->vec2_R,pcbddc->vec1_C,applytranspose);CHKERRQ(ierr); 1662 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 1663 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1664 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1665 if (pcbddc->switch_static) { 1666 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1667 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1668 } 1669 } 1670 1671 /* communications from coarse sol to local primal nodes */ 1672 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1673 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1674 1675 /* Sum contributions from two levels */ 1676 if (applytranspose) { 1677 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1678 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1679 } else { 1680 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1681 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1682 } 1683 PetscFunctionReturn(0); 1684 } 1685 1686 #undef __FUNCT__ 1687 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 1688 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 1689 { 1690 PetscErrorCode ierr; 1691 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1692 PetscScalar *array; 1693 Vec from,to; 1694 1695 PetscFunctionBegin; 1696 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 1697 from = pcbddc->coarse_vec; 1698 to = pcbddc->vec1_P; 1699 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 1700 Vec tvec; 1701 1702 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1703 ierr = VecResetArray(tvec);CHKERRQ(ierr); 1704 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1705 ierr = VecGetArray(tvec,&array);CHKERRQ(ierr); 1706 ierr = VecPlaceArray(from,array);CHKERRQ(ierr); 1707 ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr); 1708 } 1709 } else { /* from local to global -> put data in coarse right hand side */ 1710 from = pcbddc->vec1_P; 1711 to = pcbddc->coarse_vec; 1712 } 1713 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 1714 PetscFunctionReturn(0); 1715 } 1716 1717 #undef __FUNCT__ 1718 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 1719 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 1720 { 1721 PetscErrorCode ierr; 1722 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1723 PetscScalar *array; 1724 Vec from,to; 1725 1726 PetscFunctionBegin; 1727 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 1728 from = pcbddc->coarse_vec; 1729 to = pcbddc->vec1_P; 1730 } else { /* from local to global -> put data in coarse right hand side */ 1731 from = pcbddc->vec1_P; 1732 to = pcbddc->coarse_vec; 1733 } 1734 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 1735 if (smode == SCATTER_FORWARD) { 1736 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 1737 Vec tvec; 1738 1739 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1740 ierr = VecGetArray(to,&array);CHKERRQ(ierr); 1741 ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr); 1742 ierr = VecRestoreArray(to,&array);CHKERRQ(ierr); 1743 } 1744 } else { 1745 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 1746 ierr = VecResetArray(from);CHKERRQ(ierr); 1747 } 1748 } 1749 PetscFunctionReturn(0); 1750 } 1751 1752 /* uncomment for testing purposes */ 1753 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 1754 #undef __FUNCT__ 1755 #define __FUNCT__ "PCBDDCConstraintsSetUp" 1756 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 1757 { 1758 PetscErrorCode ierr; 1759 PC_IS* pcis = (PC_IS*)(pc->data); 1760 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1761 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1762 /* one and zero */ 1763 PetscScalar one=1.0,zero=0.0; 1764 /* space to store constraints and their local indices */ 1765 PetscScalar *constraints_data; 1766 PetscInt *constraints_idxs,*constraints_idxs_B; 1767 PetscInt *constraints_idxs_ptr,*constraints_data_ptr; 1768 PetscInt *constraints_n; 1769 /* iterators */ 1770 PetscInt i,j,k,total_counts,total_counts_cc; 1771 /* BLAS integers */ 1772 PetscBLASInt lwork,lierr; 1773 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 1774 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 1775 /* reuse */ 1776 PetscInt olocal_primal_size; 1777 PetscInt *oprimal_indices_local_idxs; 1778 /* change of basis */ 1779 PetscBool boolforchange,qr_needed; 1780 PetscBT change_basis,qr_needed_idx; 1781 /* auxiliary stuff */ 1782 PetscInt *nnz,*is_indices; 1783 PetscInt ncc; 1784 /* some quantities */ 1785 PetscInt n_vertices,total_primal_vertices,valid_constraints; 1786 PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints; 1787 1788 PetscFunctionBegin; 1789 /* Destroy Mat objects computed previously */ 1790 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1791 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1792 /* save info on constraints from previous setup (if any) */ 1793 olocal_primal_size = pcbddc->local_primal_size; 1794 ierr = PetscMalloc1(olocal_primal_size,&oprimal_indices_local_idxs);CHKERRQ(ierr); 1795 ierr = PetscMemcpy(oprimal_indices_local_idxs,pcbddc->primal_indices_local_idxs,olocal_primal_size*sizeof(PetscInt));CHKERRQ(ierr); 1796 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 1797 1798 /* print some info */ 1799 if (pcbddc->dbg_flag) { 1800 IS vertices; 1801 PetscInt nv,nedges,nfaces; 1802 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 1803 ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr); 1804 ierr = ISDestroy(&vertices);CHKERRQ(ierr); 1805 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1806 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 1807 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 1808 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr); 1809 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr); 1810 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1811 } 1812 1813 if (!pcbddc->adaptive_selection) { 1814 IS ISForVertices,*ISForFaces,*ISForEdges; 1815 MatNullSpace nearnullsp; 1816 const Vec *nearnullvecs; 1817 Vec *localnearnullsp; 1818 PetscScalar *array; 1819 PetscInt n_ISForFaces,n_ISForEdges,nnsp_size; 1820 PetscBool nnsp_has_cnst; 1821 /* LAPACK working arrays for SVD or POD */ 1822 PetscBool skip_lapack; 1823 PetscScalar *work; 1824 PetscReal *singular_vals; 1825 #if defined(PETSC_USE_COMPLEX) 1826 PetscReal *rwork; 1827 #endif 1828 #if defined(PETSC_MISSING_LAPACK_GESVD) 1829 PetscScalar *temp_basis,*correlation_mat; 1830 #else 1831 PetscBLASInt dummy_int=1; 1832 PetscScalar dummy_scalar=1.; 1833 #endif 1834 1835 /* Get index sets for faces, edges and vertices from graph */ 1836 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 1837 /* free unneeded index sets */ 1838 if (!pcbddc->use_vertices) { 1839 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 1840 } 1841 if (!pcbddc->use_edges) { 1842 for (i=0;i<n_ISForEdges;i++) { 1843 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 1844 } 1845 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 1846 n_ISForEdges = 0; 1847 } 1848 if (!pcbddc->use_faces) { 1849 for (i=0;i<n_ISForFaces;i++) { 1850 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 1851 } 1852 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 1853 n_ISForFaces = 0; 1854 } 1855 1856 #if defined(PETSC_USE_DEBUG) 1857 /* HACK: when solving singular problems not using vertices, a change of basis is mandatory. 1858 Also use_change_of_basis should be consistent among processors */ 1859 if (pcbddc->NullSpace) { 1860 PetscBool tbool[2],gbool[2]; 1861 1862 if (!ISForVertices && !pcbddc->user_ChangeOfBasisMatrix) { 1863 pcbddc->use_change_of_basis = PETSC_TRUE; 1864 if (!ISForEdges) { 1865 pcbddc->use_change_on_faces = PETSC_TRUE; 1866 } 1867 } 1868 tbool[0] = pcbddc->use_change_of_basis; 1869 tbool[1] = pcbddc->use_change_on_faces; 1870 ierr = MPI_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1871 pcbddc->use_change_of_basis = gbool[0]; 1872 pcbddc->use_change_on_faces = gbool[1]; 1873 } 1874 #endif 1875 1876 /* check if near null space is attached to global mat */ 1877 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 1878 if (nearnullsp) { 1879 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 1880 /* remove any stored info */ 1881 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 1882 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 1883 /* store information for BDDC solver reuse */ 1884 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 1885 pcbddc->onearnullspace = nearnullsp; 1886 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 1887 for (i=0;i<nnsp_size;i++) { 1888 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 1889 } 1890 } else { /* if near null space is not provided BDDC uses constants by default */ 1891 nnsp_size = 0; 1892 nnsp_has_cnst = PETSC_TRUE; 1893 } 1894 /* get max number of constraints on a single cc */ 1895 max_constraints = nnsp_size; 1896 if (nnsp_has_cnst) max_constraints++; 1897 1898 /* 1899 Evaluate maximum storage size needed by the procedure 1900 - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]" 1901 - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]" 1902 There can be multiple constraints per connected component 1903 */ 1904 n_vertices = 0; 1905 if (ISForVertices) { 1906 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 1907 } 1908 ncc = n_vertices+n_ISForFaces+n_ISForEdges; 1909 ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr); 1910 1911 total_counts = n_ISForFaces+n_ISForEdges; 1912 total_counts *= max_constraints; 1913 total_counts += n_vertices; 1914 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 1915 1916 total_counts = 0; 1917 max_size_of_constraint = 0; 1918 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 1919 IS used_is; 1920 if (i<n_ISForEdges) { 1921 used_is = ISForEdges[i]; 1922 } else { 1923 used_is = ISForFaces[i-n_ISForEdges]; 1924 } 1925 ierr = ISGetSize(used_is,&j);CHKERRQ(ierr); 1926 total_counts += j; 1927 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 1928 } 1929 ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr); 1930 1931 /* get local part of global near null space vectors */ 1932 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 1933 for (k=0;k<nnsp_size;k++) { 1934 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 1935 ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1936 ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1937 } 1938 1939 /* whether or not to skip lapack calls */ 1940 skip_lapack = PETSC_TRUE; 1941 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 1942 1943 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 1944 if (!skip_lapack) { 1945 PetscScalar temp_work; 1946 1947 #if defined(PETSC_MISSING_LAPACK_GESVD) 1948 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 1949 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 1950 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 1951 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 1952 #if defined(PETSC_USE_COMPLEX) 1953 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 1954 #endif 1955 /* now we evaluate the optimal workspace using query with lwork=-1 */ 1956 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 1957 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 1958 lwork = -1; 1959 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1960 #if !defined(PETSC_USE_COMPLEX) 1961 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 1962 #else 1963 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 1964 #endif 1965 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1966 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 1967 #else /* on missing GESVD */ 1968 /* SVD */ 1969 PetscInt max_n,min_n; 1970 max_n = max_size_of_constraint; 1971 min_n = max_constraints; 1972 if (max_size_of_constraint < max_constraints) { 1973 min_n = max_size_of_constraint; 1974 max_n = max_constraints; 1975 } 1976 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 1977 #if defined(PETSC_USE_COMPLEX) 1978 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 1979 #endif 1980 /* now we evaluate the optimal workspace using query with lwork=-1 */ 1981 lwork = -1; 1982 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 1983 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 1984 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 1985 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1986 #if !defined(PETSC_USE_COMPLEX) 1987 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr)); 1988 #else 1989 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr)); 1990 #endif 1991 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1992 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 1993 #endif /* on missing GESVD */ 1994 /* Allocate optimal workspace */ 1995 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 1996 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 1997 } 1998 /* Now we can loop on constraining sets */ 1999 total_counts = 0; 2000 constraints_idxs_ptr[0] = 0; 2001 constraints_data_ptr[0] = 0; 2002 /* vertices */ 2003 if (n_vertices) { 2004 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2005 if (nnsp_has_cnst) { /* it considers all possible vertices */ 2006 ierr = PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 2007 for (i=0;i<n_vertices;i++) { 2008 constraints_n[total_counts] = 1; 2009 constraints_data[total_counts] = 1.0; 2010 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 2011 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 2012 total_counts++; 2013 } 2014 } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */ 2015 PetscBool used_vertex; 2016 for (i=0;i<n_vertices;i++) { 2017 used_vertex = PETSC_FALSE; 2018 k = 0; 2019 while (!used_vertex && k<nnsp_size) { 2020 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2021 if (PetscAbsScalar(array[is_indices[i]])>0.0) { 2022 constraints_n[total_counts] = 1; 2023 constraints_idxs[total_counts] = is_indices[i]; 2024 constraints_data[total_counts] = 1.0; 2025 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 2026 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 2027 total_counts++; 2028 used_vertex = PETSC_TRUE; 2029 } 2030 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2031 k++; 2032 } 2033 } 2034 } 2035 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2036 n_vertices = total_counts; 2037 } 2038 2039 /* edges and faces */ 2040 total_counts_cc = total_counts; 2041 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 2042 IS used_is; 2043 PetscBool idxs_copied = PETSC_FALSE; 2044 2045 if (ncc<n_ISForEdges) { 2046 used_is = ISForEdges[ncc]; 2047 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 2048 } else { 2049 used_is = ISForFaces[ncc-n_ISForEdges]; 2050 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 2051 } 2052 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 2053 2054 ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr); 2055 ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2056 /* change of basis should not be performed on local periodic nodes */ 2057 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 2058 if (nnsp_has_cnst) { 2059 PetscScalar quad_value; 2060 2061 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 2062 idxs_copied = PETSC_TRUE; 2063 2064 if (!pcbddc->use_nnsp_true) { 2065 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 2066 } else { 2067 quad_value = 1.0; 2068 } 2069 for (j=0;j<size_of_constraint;j++) { 2070 constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value; 2071 } 2072 temp_constraints++; 2073 total_counts++; 2074 } 2075 for (k=0;k<nnsp_size;k++) { 2076 PetscReal real_value; 2077 PetscScalar *ptr_to_data; 2078 2079 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2080 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint]; 2081 for (j=0;j<size_of_constraint;j++) { 2082 ptr_to_data[j] = array[is_indices[j]]; 2083 } 2084 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2085 /* check if array is null on the connected component */ 2086 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2087 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one)); 2088 if (real_value > 0.0) { /* keep indices and values */ 2089 temp_constraints++; 2090 total_counts++; 2091 if (!idxs_copied) { 2092 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 2093 idxs_copied = PETSC_TRUE; 2094 } 2095 } 2096 } 2097 ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2098 valid_constraints = temp_constraints; 2099 if (!pcbddc->use_nnsp_true && temp_constraints) { 2100 if (temp_constraints == 1) { /* just normalize the constraint */ 2101 PetscScalar norm,*ptr_to_data; 2102 2103 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 2104 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2105 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one)); 2106 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 2107 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one)); 2108 } else { /* perform SVD */ 2109 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 2110 PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 2111 2112 #if defined(PETSC_MISSING_LAPACK_GESVD) 2113 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 2114 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 2115 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 2116 the constraints basis will differ (by a complex factor with absolute value equal to 1) 2117 from that computed using LAPACKgesvd 2118 -> This is due to a different computation of eigenvectors in LAPACKheev 2119 -> The quality of the POD-computed basis will be the same */ 2120 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 2121 /* Store upper triangular part of correlation matrix */ 2122 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2123 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2124 for (j=0;j<temp_constraints;j++) { 2125 for (k=0;k<j+1;k++) { 2126 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one)); 2127 } 2128 } 2129 /* compute eigenvalues and eigenvectors of correlation matrix */ 2130 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2131 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 2132 #if !defined(PETSC_USE_COMPLEX) 2133 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 2134 #else 2135 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 2136 #endif 2137 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2138 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 2139 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 2140 j = 0; 2141 while (j < temp_constraints && singular_vals[j] < tol) j++; 2142 total_counts = total_counts-j; 2143 valid_constraints = temp_constraints-j; 2144 /* scale and copy POD basis into used quadrature memory */ 2145 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2146 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2147 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 2148 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2149 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 2150 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 2151 if (j<temp_constraints) { 2152 PetscInt ii; 2153 for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]); 2154 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2155 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 2156 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2157 for (k=0;k<temp_constraints-j;k++) { 2158 for (ii=0;ii<size_of_constraint;ii++) { 2159 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 2160 } 2161 } 2162 } 2163 #else /* on missing GESVD */ 2164 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2165 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2166 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2167 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2168 #if !defined(PETSC_USE_COMPLEX) 2169 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr)); 2170 #else 2171 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr)); 2172 #endif 2173 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 2174 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2175 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 2176 k = temp_constraints; 2177 if (k > size_of_constraint) k = size_of_constraint; 2178 j = 0; 2179 while (j < k && singular_vals[k-j-1] < tol) j++; 2180 valid_constraints = k-j; 2181 total_counts = total_counts-temp_constraints+valid_constraints; 2182 #endif /* on missing GESVD */ 2183 } 2184 } 2185 /* update pointers information */ 2186 if (valid_constraints) { 2187 constraints_n[total_counts_cc] = valid_constraints; 2188 constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint; 2189 constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints; 2190 total_counts_cc++; 2191 /* set change_of_basis flag */ 2192 if (boolforchange) { 2193 for (j=valid_constraints;j>0;j--) { 2194 PetscBTSet(change_basis,total_counts-j); 2195 } 2196 } 2197 } 2198 } 2199 /* free workspace */ 2200 if (!skip_lapack) { 2201 ierr = PetscFree(work);CHKERRQ(ierr); 2202 #if defined(PETSC_USE_COMPLEX) 2203 ierr = PetscFree(rwork);CHKERRQ(ierr); 2204 #endif 2205 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 2206 #if defined(PETSC_MISSING_LAPACK_GESVD) 2207 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 2208 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 2209 #endif 2210 } 2211 for (k=0;k<nnsp_size;k++) { 2212 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 2213 } 2214 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 2215 /* free index sets of faces, edges and vertices */ 2216 for (i=0;i<n_ISForFaces;i++) { 2217 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 2218 } 2219 if (n_ISForFaces) { 2220 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 2221 } 2222 for (i=0;i<n_ISForEdges;i++) { 2223 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 2224 } 2225 if (n_ISForEdges) { 2226 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 2227 } 2228 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 2229 } else { 2230 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2231 2232 total_counts = 0; 2233 n_vertices = 0; 2234 if (sub_schurs->is_Ej_com && pcbddc->use_vertices) { 2235 ierr = ISGetLocalSize(sub_schurs->is_Ej_com,&n_vertices);CHKERRQ(ierr); 2236 } 2237 max_constraints = 0; 2238 total_counts_cc = 0; 2239 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 2240 total_counts += pcbddc->adaptive_constraints_n[i]; 2241 if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++; 2242 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 2243 } 2244 constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr; 2245 constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr; 2246 constraints_idxs = pcbddc->adaptive_constraints_idxs; 2247 constraints_data = pcbddc->adaptive_constraints_data; 2248 ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr); 2249 total_counts_cc = 0; 2250 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 2251 if (pcbddc->adaptive_constraints_n[i]) { 2252 constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i]; 2253 } 2254 } 2255 #if 0 2256 printf("Found %d totals (%d)\n",total_counts_cc,total_counts); 2257 for (i=0;i<total_counts_cc;i++) { 2258 printf("const %d, start %d",i,constraints_idxs_ptr[i]); 2259 printf(" end %d:\n",constraints_idxs_ptr[i+1]); 2260 for (j=constraints_idxs_ptr[i];j<constraints_idxs_ptr[i+1];j++) { 2261 printf(" %d",constraints_idxs[j]); 2262 } 2263 printf("\n"); 2264 printf("number of cc: %d\n",constraints_n[i]); 2265 } 2266 for (i=0;i<n_vertices;i++) { 2267 PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]); 2268 } 2269 for (i=0;i<sub_schurs->n_subs;i++) { 2270 PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]); 2271 } 2272 #endif 2273 2274 max_size_of_constraint = 0; 2275 for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]); 2276 ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr); 2277 /* Change of basis */ 2278 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 2279 if (pcbddc->use_change_of_basis) { 2280 PetscInt cum = n_vertices; 2281 for (i=0;i<sub_schurs->n_subs;i++) { 2282 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 2283 for (j=0;j<pcbddc->adaptive_constraints_n[i+n_vertices];j++) { 2284 ierr = PetscBTSet(change_basis,cum+j);CHKERRQ(ierr); 2285 } 2286 } 2287 cum += pcbddc->adaptive_constraints_n[i+n_vertices]; 2288 } 2289 } 2290 } 2291 pcbddc->local_primal_size = total_counts; 2292 2293 /* map constraints_idxs in boundary numbering */ 2294 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr); 2295 if (i != constraints_idxs_ptr[total_counts_cc]) { 2296 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",constraints_idxs_ptr[total_counts_cc],i); 2297 } 2298 2299 /* Create constraint matrix */ 2300 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 2301 ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr); 2302 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 2303 /* array to compute a local numbering of constraints : vertices first then constraints */ 2304 ierr = PetscMalloc1(pcbddc->local_primal_size,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2305 2306 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 2307 total_primal_vertices=0; 2308 for (i=0;i<total_counts_cc;i++) { 2309 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2310 if (size_of_constraint == 1) { 2311 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]]; 2312 } else if (PetscBTLookup(change_basis,i)) { 2313 for (k=0;k<constraints_n[i];k++) { 2314 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 2315 } 2316 } 2317 } 2318 /* note that the local variable n_vertices used below stores the number of pointwise constraints */ 2319 pcbddc->n_vertices = total_primal_vertices; 2320 /* permute indices in order to have a sorted set of vertices */ 2321 ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2322 2323 /* determine if a QR strategy is needed for change of basis */ 2324 qr_needed = PETSC_FALSE; 2325 ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr); 2326 for (i=n_vertices;i<total_counts_cc;i++) { 2327 if (PetscBTLookup(change_basis,i)) { 2328 if (constraints_n[i] > 1 || pcbddc->use_qr_single || pcbddc->faster_deluxe) { 2329 PetscBTSet(qr_needed_idx,i); 2330 qr_needed = PETSC_TRUE; 2331 } 2332 } 2333 } 2334 2335 /* get reference dof for local constraints */ 2336 total_counts = total_primal_vertices; 2337 for (i=n_vertices;i<total_counts_cc;i++) { 2338 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2339 if (!PetscBTLookup(change_basis,i)) { 2340 for (k=0;k<constraints_n[i];k++) { 2341 pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 2342 } 2343 } 2344 } 2345 2346 /* nonzero structure of constraint matrix */ 2347 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 2348 for (i=0;i<total_primal_vertices;i++) nnz[i]=1; 2349 j = total_primal_vertices; 2350 for (i=n_vertices;i<total_counts_cc;i++) { 2351 if (!PetscBTLookup(change_basis,i)) { 2352 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2353 for (k=0;k<constraints_n[i];k++) nnz[j+k] = size_of_constraint; 2354 j += constraints_n[i]; 2355 } 2356 } 2357 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 2358 ierr = PetscFree(nnz);CHKERRQ(ierr); 2359 2360 /* set values in constraint matrix */ 2361 for (i=0;i<total_primal_vertices;i++) { 2362 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 2363 } 2364 total_counts = total_primal_vertices; 2365 for (i=n_vertices;i<total_counts_cc;i++) { 2366 if (!PetscBTLookup(change_basis,i)) { 2367 PetscInt *cols; 2368 2369 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2370 cols = constraints_idxs+constraints_idxs_ptr[i]; 2371 for (k=0;k<constraints_n[i];k++) { 2372 PetscInt row = total_counts+k; 2373 PetscScalar *vals; 2374 2375 vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint; 2376 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2377 } 2378 total_counts += constraints_n[i]; 2379 } 2380 } 2381 /* assembling */ 2382 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2383 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2384 2385 /* 2386 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 2387 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 2388 */ 2389 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 2390 if (pcbddc->use_change_of_basis) { 2391 /* dual and primal dofs on a single cc */ 2392 PetscInt dual_dofs,primal_dofs; 2393 /* working stuff for GEQRF */ 2394 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 2395 PetscBLASInt lqr_work; 2396 /* working stuff for UNGQR */ 2397 PetscScalar *gqr_work,lgqr_work_t; 2398 PetscBLASInt lgqr_work; 2399 /* working stuff for TRTRS */ 2400 PetscScalar *trs_rhs; 2401 PetscBLASInt Blas_NRHS; 2402 /* pointers for values insertion into change of basis matrix */ 2403 PetscInt *start_rows,*start_cols; 2404 PetscScalar *start_vals; 2405 /* working stuff for values insertion */ 2406 PetscBT is_primal; 2407 PetscInt *aux_primal_numbering_B; 2408 /* matrix sizes */ 2409 PetscInt global_size,local_size; 2410 /* temporary change of basis */ 2411 Mat localChangeOfBasisMatrix; 2412 /* extra space for debugging */ 2413 PetscScalar *dbg_work; 2414 2415 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 2416 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 2417 ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 2418 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 2419 /* nonzeros for local mat */ 2420 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 2421 for (i=0;i<pcis->n;i++) nnz[i]=1; 2422 for (i=n_vertices;i<total_counts_cc;i++) { 2423 if (PetscBTLookup(change_basis,i)) { 2424 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2425 if (PetscBTLookup(qr_needed_idx,i)) { 2426 for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint; 2427 } else { 2428 nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint; 2429 for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2; 2430 } 2431 } 2432 } 2433 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 2434 ierr = PetscFree(nnz);CHKERRQ(ierr); 2435 /* Set initial identity in the matrix */ 2436 for (i=0;i<pcis->n;i++) { 2437 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 2438 } 2439 2440 if (pcbddc->dbg_flag) { 2441 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 2442 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 2443 } 2444 2445 2446 /* Now we loop on the constraints which need a change of basis */ 2447 /* 2448 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 2449 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 2450 2451 Basic blocks of change of basis matrix T computed by 2452 2453 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 2454 2455 | 1 0 ... 0 s_1/S | 2456 | 0 1 ... 0 s_2/S | 2457 | ... | 2458 | 0 ... 1 s_{n-1}/S | 2459 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 2460 2461 with S = \sum_{i=1}^n s_i^2 2462 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 2463 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 2464 2465 - QR decomposition of constraints otherwise 2466 */ 2467 if (qr_needed) { 2468 /* space to store Q */ 2469 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 2470 /* first we issue queries for optimal work */ 2471 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 2472 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 2473 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2474 lqr_work = -1; 2475 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 2476 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 2477 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 2478 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 2479 lgqr_work = -1; 2480 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 2481 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 2482 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 2483 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2484 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 2485 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 2486 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 2487 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 2488 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 2489 /* array to store scaling factors for reflectors */ 2490 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 2491 /* array to store rhs and solution of triangular solver */ 2492 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 2493 /* allocating workspace for check */ 2494 if (pcbddc->dbg_flag) { 2495 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr); 2496 } 2497 } 2498 /* array to store whether a node is primal or not */ 2499 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 2500 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 2501 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->primal_indices_local_idxs,&i,aux_primal_numbering_B);CHKERRQ(ierr); 2502 if (i != total_primal_vertices) { 2503 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i); 2504 } 2505 for (i=0;i<total_primal_vertices;i++) { 2506 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 2507 } 2508 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 2509 2510 /* loop on constraints and see whether or not they need a change of basis and compute it */ 2511 for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) { 2512 size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts]; 2513 if (PetscBTLookup(change_basis,total_counts)) { 2514 /* get constraint info */ 2515 primal_dofs = constraints_n[total_counts]; 2516 dual_dofs = size_of_constraint-primal_dofs; 2517 2518 if (pcbddc->dbg_flag) { 2519 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d: %d need a change of basis (size %d)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr); 2520 } 2521 2522 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 2523 2524 /* copy quadrature constraints for change of basis check */ 2525 if (pcbddc->dbg_flag) { 2526 ierr = PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2527 } 2528 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 2529 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2530 2531 /* compute QR decomposition of constraints */ 2532 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2533 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2534 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2535 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2536 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 2537 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 2538 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2539 2540 /* explictly compute R^-T */ 2541 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 2542 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 2543 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2544 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 2545 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2546 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 2547 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2548 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 2549 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 2550 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2551 2552 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 2553 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2554 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2555 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 2556 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2557 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2558 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 2559 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 2560 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2561 2562 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 2563 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 2564 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 2565 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2566 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2567 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 2568 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2569 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 2570 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 2571 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2572 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC)); 2573 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2574 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2575 2576 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 2577 start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]]; 2578 /* insert cols for primal dofs */ 2579 for (j=0;j<primal_dofs;j++) { 2580 start_vals = &qr_basis[j*size_of_constraint]; 2581 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 2582 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 2583 } 2584 /* insert cols for dual dofs */ 2585 for (j=0,k=0;j<dual_dofs;k++) { 2586 if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) { 2587 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 2588 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 2589 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 2590 j++; 2591 } 2592 } 2593 2594 /* check change of basis */ 2595 if (pcbddc->dbg_flag) { 2596 PetscInt ii,jj; 2597 PetscBool valid_qr=PETSC_TRUE; 2598 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 2599 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2600 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 2601 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2602 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 2603 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 2604 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2605 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC)); 2606 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2607 for (jj=0;jj<size_of_constraint;jj++) { 2608 for (ii=0;ii<primal_dofs;ii++) { 2609 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 2610 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 2611 } 2612 } 2613 if (!valid_qr) { 2614 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 2615 for (jj=0;jj<size_of_constraint;jj++) { 2616 for (ii=0;ii<primal_dofs;ii++) { 2617 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 2618 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2619 } 2620 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 2621 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2622 } 2623 } 2624 } 2625 } else { 2626 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 2627 } 2628 } 2629 } else { /* simple transformation block */ 2630 PetscInt row,col; 2631 PetscScalar val,norm; 2632 2633 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2634 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one)); 2635 for (j=0;j<size_of_constraint;j++) { 2636 PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j]; 2637 row = constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 2638 if (!PetscBTLookup(is_primal,row_B)) { 2639 col = constraints_idxs[constraints_idxs_ptr[total_counts]]; 2640 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 2641 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 2642 } else { 2643 for (k=0;k<size_of_constraint;k++) { 2644 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 2645 if (row != col) { 2646 val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]]; 2647 } else { 2648 val = constraints_data[constraints_data_ptr[total_counts]]/norm; 2649 } 2650 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 2651 } 2652 } 2653 } 2654 if (pcbddc->dbg_flag) { 2655 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 2656 } 2657 } 2658 } else { 2659 if (pcbddc->dbg_flag) { 2660 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,size_of_constraint);CHKERRQ(ierr); 2661 } 2662 } 2663 } 2664 2665 /* free workspace */ 2666 if (qr_needed) { 2667 if (pcbddc->dbg_flag) { 2668 ierr = PetscFree(dbg_work);CHKERRQ(ierr); 2669 } 2670 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 2671 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 2672 ierr = PetscFree(qr_work);CHKERRQ(ierr); 2673 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 2674 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 2675 } 2676 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 2677 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2678 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2679 2680 /* assembling of global change of variable */ 2681 { 2682 Mat tmat; 2683 PetscInt bs; 2684 2685 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 2686 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 2687 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 2688 ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr); 2689 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2690 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 2691 ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr); 2692 ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr); 2693 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 2694 ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr); 2695 ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2696 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 2697 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 2698 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 2699 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2700 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2701 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 2702 ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr); 2703 } 2704 /* check */ 2705 if (pcbddc->dbg_flag) { 2706 PetscReal error; 2707 Vec x,x_change; 2708 2709 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 2710 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 2711 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 2712 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 2713 ierr = VecScatterBegin(matis->ctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2714 ierr = VecScatterEnd(matis->ctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2715 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 2716 ierr = VecScatterBegin(matis->ctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2717 ierr = VecScatterEnd(matis->ctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2718 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 2719 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 2720 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 2721 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2722 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr); 2723 ierr = VecDestroy(&x);CHKERRQ(ierr); 2724 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 2725 } 2726 2727 /* adapt sub_schurs computed (if any) */ 2728 if (pcbddc->use_deluxe_scaling) { 2729 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 2730 if (sub_schurs->S_Ej_all) { 2731 Mat S_new,tmat; 2732 IS is_all_N; 2733 2734 ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr); 2735 ierr = MatGetSubMatrixUnsorted(localChangeOfBasisMatrix,is_all_N,is_all_N,&tmat);CHKERRQ(ierr); 2736 ierr = ISDestroy(&is_all_N);CHKERRQ(ierr); 2737 ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 2738 ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr); 2739 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 2740 sub_schurs->S_Ej_all = S_new; 2741 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 2742 if (sub_schurs->sum_S_Ej_all) { 2743 ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 2744 ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 2745 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 2746 sub_schurs->sum_S_Ej_all = S_new; 2747 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 2748 } 2749 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 2750 } 2751 } 2752 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 2753 } else if (pcbddc->user_ChangeOfBasisMatrix) { 2754 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 2755 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 2756 } 2757 2758 /* set up change of basis context */ 2759 if (pcbddc->ChangeOfBasisMatrix) { 2760 PCBDDCChange_ctx change_ctx; 2761 2762 if (!pcbddc->new_global_mat) { 2763 PetscInt global_size,local_size; 2764 2765 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 2766 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 2767 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->new_global_mat);CHKERRQ(ierr); 2768 ierr = MatSetSizes(pcbddc->new_global_mat,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 2769 ierr = MatSetType(pcbddc->new_global_mat,MATSHELL);CHKERRQ(ierr); 2770 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT,(void (*)(void))PCBDDCMatMult_Private);CHKERRQ(ierr); 2771 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCMatMultTranspose_Private);CHKERRQ(ierr); 2772 ierr = PetscNew(&change_ctx);CHKERRQ(ierr); 2773 ierr = MatShellSetContext(pcbddc->new_global_mat,change_ctx);CHKERRQ(ierr); 2774 } else { 2775 ierr = MatShellGetContext(pcbddc->new_global_mat,&change_ctx);CHKERRQ(ierr); 2776 ierr = MatDestroy(&change_ctx->global_change);CHKERRQ(ierr); 2777 ierr = VecDestroyVecs(2,&change_ctx->work);CHKERRQ(ierr); 2778 } 2779 if (!pcbddc->user_ChangeOfBasisMatrix) { 2780 ierr = PetscObjectReference((PetscObject)pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2781 change_ctx->global_change = pcbddc->ChangeOfBasisMatrix; 2782 } else { 2783 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 2784 change_ctx->global_change = pcbddc->user_ChangeOfBasisMatrix; 2785 } 2786 ierr = VecDuplicateVecs(pcis->vec1_global,2,&change_ctx->work);CHKERRQ(ierr); 2787 ierr = MatSetUp(pcbddc->new_global_mat);CHKERRQ(ierr); 2788 ierr = MatAssemblyBegin(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2789 ierr = MatAssemblyEnd(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2790 } 2791 2792 /* check if a new primal space has been introduced */ 2793 pcbddc->new_primal_space_local = PETSC_TRUE; 2794 if (olocal_primal_size == pcbddc->local_primal_size) { 2795 ierr = PetscMemcmp(pcbddc->primal_indices_local_idxs,oprimal_indices_local_idxs,olocal_primal_size,&pcbddc->new_primal_space_local);CHKERRQ(ierr); 2796 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 2797 } 2798 ierr = PetscFree(oprimal_indices_local_idxs);CHKERRQ(ierr); 2799 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 2800 ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2801 2802 /* flush dbg viewer */ 2803 if (pcbddc->dbg_flag) { 2804 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2805 } 2806 2807 /* free workspace */ 2808 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 2809 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 2810 if (!pcbddc->adaptive_selection) { 2811 ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr); 2812 ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr); 2813 } else { 2814 ierr = PetscFree5(pcbddc->adaptive_constraints_n, 2815 pcbddc->adaptive_constraints_idxs_ptr, 2816 pcbddc->adaptive_constraints_data_ptr, 2817 pcbddc->adaptive_constraints_idxs, 2818 pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 2819 ierr = PetscFree(constraints_n);CHKERRQ(ierr); 2820 ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr); 2821 } 2822 PetscFunctionReturn(0); 2823 } 2824 2825 #undef __FUNCT__ 2826 #define __FUNCT__ "PCBDDCAnalyzeInterface" 2827 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 2828 { 2829 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2830 PC_IS *pcis = (PC_IS*)pc->data; 2831 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2832 PetscInt ierr,i,vertex_size,N; 2833 PetscViewer viewer=pcbddc->dbg_viewer; 2834 2835 PetscFunctionBegin; 2836 /* Reset previously computed graph */ 2837 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 2838 /* Init local Graph struct */ 2839 ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr); 2840 ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping,N);CHKERRQ(ierr); 2841 2842 /* Check validity of the csr graph passed in by the user */ 2843 if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) { 2844 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 2845 } 2846 2847 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 2848 if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) { 2849 PetscInt *xadj,*adjncy; 2850 PetscInt nvtxs; 2851 PetscBool flg_row=PETSC_FALSE; 2852 2853 if (pcbddc->use_local_adj) { 2854 2855 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 2856 if (flg_row) { 2857 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 2858 pcbddc->computed_rowadj = PETSC_TRUE; 2859 } 2860 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 2861 } else if (pcbddc->current_level && pcis->n_B) { /* just compute subdomain's connected components for coarser levels when the local boundary is not empty */ 2862 IS is_dummy; 2863 ISLocalToGlobalMapping l2gmap_dummy; 2864 PetscInt j,sum; 2865 PetscInt *cxadj,*cadjncy; 2866 const PetscInt *idxs; 2867 PCBDDCGraph graph; 2868 PetscBT is_on_boundary; 2869 2870 ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n,0,1,&is_dummy);CHKERRQ(ierr); 2871 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 2872 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2873 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 2874 ierr = PCBDDCGraphInit(graph,l2gmap_dummy,pcis->n);CHKERRQ(ierr); 2875 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 2876 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 2877 if (flg_row) { 2878 graph->xadj = xadj; 2879 graph->adjncy = adjncy; 2880 } 2881 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 2882 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 2883 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 2884 2885 if (pcbddc->dbg_flag) { 2886 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] Found %d subdomains (local size %d)\n",PetscGlobalRank,graph->ncc,pcis->n);CHKERRQ(ierr); 2887 for (i=0;i<graph->ncc;i++) { 2888 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] %d cc size %d\n",PetscGlobalRank,i,graph->cptr[i+1]-graph->cptr[i]);CHKERRQ(ierr); 2889 } 2890 } 2891 2892 ierr = PetscBTCreate(pcis->n,&is_on_boundary);CHKERRQ(ierr); 2893 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2894 for (i=0;i<pcis->n_B;i++) { 2895 ierr = PetscBTSet(is_on_boundary,idxs[i]);CHKERRQ(ierr); 2896 } 2897 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2898 2899 ierr = PetscCalloc1(pcis->n+1,&cxadj);CHKERRQ(ierr); 2900 sum = 0; 2901 for (i=0;i<graph->ncc;i++) { 2902 PetscInt sizecc = 0; 2903 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 2904 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 2905 sizecc++; 2906 } 2907 } 2908 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 2909 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 2910 cxadj[graph->queue[j]] = sizecc; 2911 } 2912 } 2913 sum += sizecc*sizecc; 2914 } 2915 ierr = PetscMalloc1(sum,&cadjncy);CHKERRQ(ierr); 2916 sum = 0; 2917 for (i=0;i<pcis->n;i++) { 2918 PetscInt temp = cxadj[i]; 2919 cxadj[i] = sum; 2920 sum += temp; 2921 } 2922 cxadj[pcis->n] = sum; 2923 for (i=0;i<graph->ncc;i++) { 2924 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 2925 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 2926 PetscInt k,sizecc = 0; 2927 for (k=graph->cptr[i];k<graph->cptr[i+1];k++) { 2928 if (PetscBTLookup(is_on_boundary,graph->queue[k])) { 2929 cadjncy[cxadj[graph->queue[j]]+sizecc]=graph->queue[k]; 2930 sizecc++; 2931 } 2932 } 2933 } 2934 } 2935 } 2936 if (sum) { 2937 ierr = PCBDDCSetLocalAdjacencyGraph(pc,pcis->n,cxadj,cadjncy,PETSC_OWN_POINTER);CHKERRQ(ierr); 2938 } else { 2939 ierr = PetscFree(cxadj);CHKERRQ(ierr); 2940 ierr = PetscFree(cadjncy);CHKERRQ(ierr); 2941 } 2942 graph->xadj = 0; 2943 graph->adjncy = 0; 2944 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 2945 ierr = PetscBTDestroy(&is_on_boundary);CHKERRQ(ierr); 2946 } 2947 } 2948 if (pcbddc->dbg_flag) { 2949 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2950 } 2951 2952 /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */ 2953 vertex_size = 1; 2954 if (pcbddc->user_provided_isfordofs) { 2955 if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */ 2956 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 2957 for (i=0;i<pcbddc->n_ISForDofs;i++) { 2958 ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 2959 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 2960 } 2961 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 2962 pcbddc->n_ISForDofs = 0; 2963 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 2964 } 2965 /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */ 2966 ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr); 2967 } else { 2968 if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */ 2969 ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr); 2970 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 2971 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 2972 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 2973 } 2974 } 2975 } 2976 2977 /* Setup of Graph */ 2978 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */ 2979 ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 2980 } 2981 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */ 2982 ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 2983 } 2984 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices);CHKERRQ(ierr); 2985 2986 /* Graph's connected components analysis */ 2987 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 2988 2989 /* print some info to stdout */ 2990 if (pcbddc->dbg_flag) { 2991 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);CHKERRQ(ierr); 2992 } 2993 2994 /* mark topography has done */ 2995 pcbddc->recompute_topography = PETSC_FALSE; 2996 PetscFunctionReturn(0); 2997 } 2998 2999 #undef __FUNCT__ 3000 #define __FUNCT__ "PCBDDCSubsetNumbering" 3001 PetscErrorCode PCBDDCSubsetNumbering(MPI_Comm comm,ISLocalToGlobalMapping l2gmap, PetscInt n_local_dofs, PetscInt local_dofs[], PetscInt local_dofs_mult[], PetscInt* n_global_subset, PetscInt* global_numbering_subset[]) 3002 { 3003 Vec local_vec,global_vec; 3004 IS seqis,paris; 3005 VecScatter scatter_ctx; 3006 PetscScalar *array; 3007 PetscInt *temp_global_dofs; 3008 PetscScalar globalsum; 3009 PetscInt i,j,s; 3010 PetscInt nlocals,first_index,old_index,max_local,max_global; 3011 PetscMPIInt rank_prec_comm,size_prec_comm; 3012 PetscInt *dof_sizes,*dof_displs; 3013 PetscBool first_found; 3014 PetscErrorCode ierr; 3015 3016 PetscFunctionBegin; 3017 /* mpi buffers */ 3018 ierr = MPI_Comm_size(comm,&size_prec_comm);CHKERRQ(ierr); 3019 ierr = MPI_Comm_rank(comm,&rank_prec_comm);CHKERRQ(ierr); 3020 j = ( !rank_prec_comm ? size_prec_comm : 0); 3021 ierr = PetscMalloc2(j,&dof_sizes,j,&dof_displs);CHKERRQ(ierr); 3022 /* get maximum size of subset */ 3023 ierr = PetscMalloc1(n_local_dofs,&temp_global_dofs);CHKERRQ(ierr); 3024 ierr = ISLocalToGlobalMappingApply(l2gmap,n_local_dofs,local_dofs,temp_global_dofs);CHKERRQ(ierr); 3025 max_local = 0; 3026 for (i=0;i<n_local_dofs;i++) { 3027 if (max_local < temp_global_dofs[i] ) { 3028 max_local = temp_global_dofs[i]; 3029 } 3030 } 3031 ierr = MPI_Allreduce(&max_local,&max_global,1,MPIU_INT,MPI_MAX,comm);CHKERRQ(ierr); 3032 max_global++; 3033 max_local = 0; 3034 for (i=0;i<n_local_dofs;i++) { 3035 if (max_local < local_dofs[i] ) { 3036 max_local = local_dofs[i]; 3037 } 3038 } 3039 max_local++; 3040 /* allocate workspace */ 3041 ierr = VecCreate(PETSC_COMM_SELF,&local_vec);CHKERRQ(ierr); 3042 ierr = VecSetSizes(local_vec,PETSC_DECIDE,max_local);CHKERRQ(ierr); 3043 ierr = VecSetType(local_vec,VECSEQ);CHKERRQ(ierr); 3044 ierr = VecCreate(comm,&global_vec);CHKERRQ(ierr); 3045 ierr = VecSetSizes(global_vec,PETSC_DECIDE,max_global);CHKERRQ(ierr); 3046 ierr = VecSetType(global_vec,VECMPI);CHKERRQ(ierr); 3047 /* create scatter */ 3048 ierr = ISCreateGeneral(PETSC_COMM_SELF,n_local_dofs,local_dofs,PETSC_COPY_VALUES,&seqis);CHKERRQ(ierr); 3049 ierr = ISCreateGeneral(comm,n_local_dofs,temp_global_dofs,PETSC_COPY_VALUES,&paris);CHKERRQ(ierr); 3050 ierr = VecScatterCreate(local_vec,seqis,global_vec,paris,&scatter_ctx);CHKERRQ(ierr); 3051 ierr = ISDestroy(&seqis);CHKERRQ(ierr); 3052 ierr = ISDestroy(&paris);CHKERRQ(ierr); 3053 /* init array */ 3054 ierr = VecSet(global_vec,0.0);CHKERRQ(ierr); 3055 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 3056 ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr); 3057 if (local_dofs_mult) { 3058 for (i=0;i<n_local_dofs;i++) { 3059 array[local_dofs[i]]=(PetscScalar)local_dofs_mult[i]; 3060 } 3061 } else { 3062 for (i=0;i<n_local_dofs;i++) { 3063 array[local_dofs[i]]=1.0; 3064 } 3065 } 3066 ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr); 3067 /* scatter into global vec and get total number of global dofs */ 3068 ierr = VecScatterBegin(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3069 ierr = VecScatterEnd(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3070 ierr = VecSum(global_vec,&globalsum);CHKERRQ(ierr); 3071 *n_global_subset = (PetscInt)PetscRealPart(globalsum); 3072 /* Fill global_vec with cumulative function for global numbering */ 3073 ierr = VecGetArray(global_vec,&array);CHKERRQ(ierr); 3074 ierr = VecGetLocalSize(global_vec,&s);CHKERRQ(ierr); 3075 nlocals = 0; 3076 first_index = -1; 3077 first_found = PETSC_FALSE; 3078 for (i=0;i<s;i++) { 3079 if (!first_found && PetscRealPart(array[i]) > 0.1) { 3080 first_found = PETSC_TRUE; 3081 first_index = i; 3082 } 3083 nlocals += (PetscInt)PetscRealPart(array[i]); 3084 } 3085 ierr = MPI_Gather(&nlocals,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr); 3086 if (!rank_prec_comm) { 3087 dof_displs[0]=0; 3088 for (i=1;i<size_prec_comm;i++) { 3089 dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1]; 3090 } 3091 } 3092 ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&nlocals,1,MPIU_INT,0,comm);CHKERRQ(ierr); 3093 if (first_found) { 3094 array[first_index] += (PetscScalar)nlocals; 3095 old_index = first_index; 3096 for (i=first_index+1;i<s;i++) { 3097 if (PetscRealPart(array[i]) > 0.1) { 3098 array[i] += array[old_index]; 3099 old_index = i; 3100 } 3101 } 3102 } 3103 ierr = VecRestoreArray(global_vec,&array);CHKERRQ(ierr); 3104 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 3105 ierr = VecScatterBegin(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3106 ierr = VecScatterEnd(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3107 /* get global ordering of local dofs */ 3108 ierr = VecGetArrayRead(local_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 3109 if (local_dofs_mult) { 3110 for (i=0;i<n_local_dofs;i++) { 3111 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-local_dofs_mult[i]; 3112 } 3113 } else { 3114 for (i=0;i<n_local_dofs;i++) { 3115 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-1; 3116 } 3117 } 3118 ierr = VecRestoreArrayRead(local_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 3119 /* free workspace */ 3120 ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr); 3121 ierr = VecDestroy(&local_vec);CHKERRQ(ierr); 3122 ierr = VecDestroy(&global_vec);CHKERRQ(ierr); 3123 ierr = PetscFree2(dof_sizes,dof_displs);CHKERRQ(ierr); 3124 /* return pointer to global ordering of local dofs */ 3125 *global_numbering_subset = temp_global_dofs; 3126 PetscFunctionReturn(0); 3127 } 3128 3129 #undef __FUNCT__ 3130 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 3131 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 3132 { 3133 PetscInt i,j; 3134 PetscScalar *alphas; 3135 PetscErrorCode ierr; 3136 3137 PetscFunctionBegin; 3138 /* this implements stabilized Gram-Schmidt */ 3139 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 3140 for (i=0;i<n;i++) { 3141 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 3142 if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); } 3143 for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); } 3144 } 3145 ierr = PetscFree(alphas);CHKERRQ(ierr); 3146 PetscFunctionReturn(0); 3147 } 3148 3149 #undef __FUNCT__ 3150 #define __FUNCT__ "MatISGetSubassemblingPattern" 3151 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscBool contiguous, IS* is_sends) 3152 { 3153 Mat subdomain_adj; 3154 IS new_ranks,ranks_send_to; 3155 MatPartitioning partitioner; 3156 Mat_IS *matis; 3157 PetscInt n_neighs,*neighs,*n_shared,**shared; 3158 PetscInt prank; 3159 PetscMPIInt size,rank,color; 3160 PetscInt *xadj,*adjncy,*oldranks; 3161 PetscInt *adjncy_wgt,*v_wgt,*is_indices,*ranks_send_to_idx; 3162 PetscInt i,local_size,threshold=0; 3163 PetscErrorCode ierr; 3164 PetscBool use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE; 3165 PetscSubcomm subcomm; 3166 3167 PetscFunctionBegin; 3168 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr); 3169 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 3170 ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 3171 3172 /* Get info on mapping */ 3173 matis = (Mat_IS*)(mat->data); 3174 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&local_size);CHKERRQ(ierr); 3175 ierr = ISLocalToGlobalMappingGetInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 3176 3177 /* build local CSR graph of subdomains' connectivity */ 3178 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 3179 xadj[0] = 0; 3180 xadj[1] = PetscMax(n_neighs-1,0); 3181 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 3182 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 3183 3184 if (threshold) { 3185 PetscInt xadj_count = 0; 3186 for (i=1;i<n_neighs;i++) { 3187 if (n_shared[i] > threshold) { 3188 adjncy[xadj_count] = neighs[i]; 3189 adjncy_wgt[xadj_count] = n_shared[i]; 3190 xadj_count++; 3191 } 3192 } 3193 xadj[1] = xadj_count; 3194 } else { 3195 if (xadj[1]) { 3196 ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr); 3197 ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr); 3198 } 3199 } 3200 ierr = ISLocalToGlobalMappingRestoreInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 3201 if (use_square) { 3202 for (i=0;i<xadj[1];i++) { 3203 adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i]; 3204 } 3205 } 3206 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 3207 3208 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 3209 3210 /* 3211 Restrict work on active processes only. 3212 */ 3213 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr); 3214 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 3215 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 3216 ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr); 3217 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 3218 if (color) { 3219 ierr = PetscFree(xadj);CHKERRQ(ierr); 3220 ierr = PetscFree(adjncy);CHKERRQ(ierr); 3221 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 3222 } else { 3223 PetscInt coarsening_ratio; 3224 ierr = MPI_Comm_size(PetscSubcommChild(subcomm),&size);CHKERRQ(ierr); 3225 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 3226 prank = rank; 3227 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,PetscSubcommChild(subcomm));CHKERRQ(ierr); 3228 /* 3229 for (i=0;i<size;i++) { 3230 PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]); 3231 } 3232 */ 3233 for (i=0;i<xadj[1];i++) { 3234 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 3235 } 3236 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 3237 ierr = MatCreateMPIAdj(PetscSubcommChild(subcomm),1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 3238 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 3239 3240 /* Partition */ 3241 ierr = MatPartitioningCreate(PetscSubcommChild(subcomm),&partitioner);CHKERRQ(ierr); 3242 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 3243 if (use_vwgt) { 3244 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 3245 v_wgt[0] = local_size; 3246 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 3247 } 3248 n_subdomains = PetscMin((PetscInt)size,n_subdomains); 3249 coarsening_ratio = size/n_subdomains; 3250 ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr); 3251 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 3252 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 3253 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 3254 3255 ierr = ISGetIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3256 if (contiguous) { 3257 ranks_send_to_idx[0] = oldranks[is_indices[0]]; /* contiguos set of processes */ 3258 } else { 3259 ranks_send_to_idx[0] = coarsening_ratio*oldranks[is_indices[0]]; /* scattered set of processes */ 3260 } 3261 ierr = ISRestoreIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3262 /* clean up */ 3263 ierr = PetscFree(oldranks);CHKERRQ(ierr); 3264 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 3265 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 3266 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 3267 } 3268 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 3269 3270 /* assemble parallel IS for sends */ 3271 i = 1; 3272 if (color) i=0; 3273 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr); 3274 3275 /* get back IS */ 3276 *is_sends = ranks_send_to; 3277 PetscFunctionReturn(0); 3278 } 3279 3280 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 3281 3282 #undef __FUNCT__ 3283 #define __FUNCT__ "MatISSubassemble" 3284 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[]) 3285 { 3286 Mat local_mat; 3287 Mat_IS *matis; 3288 IS is_sends_internal; 3289 PetscInt rows,cols,new_local_rows; 3290 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals; 3291 PetscBool ismatis,isdense,newisdense,destroy_mat; 3292 ISLocalToGlobalMapping l2gmap; 3293 PetscInt* l2gmap_indices; 3294 const PetscInt* is_indices; 3295 MatType new_local_type; 3296 /* buffers */ 3297 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 3298 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 3299 PetscInt *recv_buffer_idxs_local; 3300 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 3301 /* MPI */ 3302 MPI_Comm comm,comm_n; 3303 PetscSubcomm subcomm; 3304 PetscMPIInt n_sends,n_recvs,commsize; 3305 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 3306 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 3307 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,source_dest; 3308 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals; 3309 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals; 3310 PetscErrorCode ierr; 3311 3312 PetscFunctionBegin; 3313 /* TODO: add missing checks */ 3314 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 3315 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 3316 PetscValidLogicalCollectiveEnum(mat,reuse,5); 3317 PetscValidLogicalCollectiveInt(mat,nis,7); 3318 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 3319 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 3320 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 3321 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 3322 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 3323 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 3324 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 3325 if (reuse == MAT_REUSE_MATRIX && *mat_n) { 3326 PetscInt mrows,mcols,mnrows,mncols; 3327 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 3328 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 3329 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 3330 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 3331 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 3332 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 3333 } 3334 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 3335 PetscValidLogicalCollectiveInt(mat,bs,0); 3336 /* prepare IS for sending if not provided */ 3337 if (!is_sends) { 3338 PetscBool pcontig = PETSC_TRUE; 3339 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 3340 ierr = MatISGetSubassemblingPattern(mat,n_subdomains,pcontig,&is_sends_internal);CHKERRQ(ierr); 3341 } else { 3342 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 3343 is_sends_internal = is_sends; 3344 } 3345 3346 /* get pointer of MATIS data */ 3347 matis = (Mat_IS*)mat->data; 3348 3349 /* get comm */ 3350 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3351 3352 /* compute number of sends */ 3353 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 3354 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 3355 3356 /* compute number of receives */ 3357 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 3358 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 3359 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 3360 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 3361 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 3362 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 3363 ierr = PetscFree(iflags);CHKERRQ(ierr); 3364 3365 /* restrict comm if requested */ 3366 subcomm = 0; 3367 destroy_mat = PETSC_FALSE; 3368 if (restrict_comm) { 3369 PetscMPIInt color,subcommsize; 3370 3371 color = 0; 3372 if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm */ 3373 ierr = MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 3374 subcommsize = commsize - subcommsize; 3375 /* check if reuse has been requested */ 3376 if (reuse == MAT_REUSE_MATRIX) { 3377 if (*mat_n) { 3378 PetscMPIInt subcommsize2; 3379 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 3380 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 3381 comm_n = PetscObjectComm((PetscObject)*mat_n); 3382 } else { 3383 comm_n = PETSC_COMM_SELF; 3384 } 3385 } else { /* MAT_INITIAL_MATRIX */ 3386 PetscMPIInt rank; 3387 3388 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3389 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 3390 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 3391 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 3392 comm_n = PetscSubcommChild(subcomm); 3393 } 3394 /* flag to destroy *mat_n if not significative */ 3395 if (color) destroy_mat = PETSC_TRUE; 3396 } else { 3397 comm_n = comm; 3398 } 3399 3400 /* prepare send/receive buffers */ 3401 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 3402 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 3403 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 3404 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 3405 if (nis) { 3406 ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr); 3407 } 3408 3409 /* Get data from local matrices */ 3410 if (!isdense) { 3411 SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 3412 /* TODO: See below some guidelines on how to prepare the local buffers */ 3413 /* 3414 send_buffer_vals should contain the raw values of the local matrix 3415 send_buffer_idxs should contain: 3416 - MatType_PRIVATE type 3417 - PetscInt size_of_l2gmap 3418 - PetscInt global_row_indices[size_of_l2gmap] 3419 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 3420 */ 3421 } else { 3422 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 3423 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&i);CHKERRQ(ierr); 3424 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 3425 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 3426 send_buffer_idxs[1] = i; 3427 ierr = ISLocalToGlobalMappingGetIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 3428 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 3429 ierr = ISLocalToGlobalMappingRestoreIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 3430 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 3431 for (i=0;i<n_sends;i++) { 3432 ilengths_vals[is_indices[i]] = len*len; 3433 ilengths_idxs[is_indices[i]] = len+2; 3434 } 3435 } 3436 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 3437 /* additional is (if any) */ 3438 if (nis) { 3439 PetscMPIInt psum; 3440 PetscInt j; 3441 for (j=0,psum=0;j<nis;j++) { 3442 PetscInt plen; 3443 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 3444 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 3445 psum += len+1; /* indices + lenght */ 3446 } 3447 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 3448 for (j=0,psum=0;j<nis;j++) { 3449 PetscInt plen; 3450 const PetscInt *is_array_idxs; 3451 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 3452 send_buffer_idxs_is[psum] = plen; 3453 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 3454 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 3455 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 3456 psum += plen+1; /* indices + lenght */ 3457 } 3458 for (i=0;i<n_sends;i++) { 3459 ilengths_idxs_is[is_indices[i]] = psum; 3460 } 3461 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 3462 } 3463 3464 buf_size_idxs = 0; 3465 buf_size_vals = 0; 3466 buf_size_idxs_is = 0; 3467 for (i=0;i<n_recvs;i++) { 3468 buf_size_idxs += (PetscInt)olengths_idxs[i]; 3469 buf_size_vals += (PetscInt)olengths_vals[i]; 3470 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 3471 } 3472 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 3473 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 3474 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 3475 3476 /* get new tags for clean communications */ 3477 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 3478 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 3479 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 3480 3481 /* allocate for requests */ 3482 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 3483 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 3484 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 3485 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 3486 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 3487 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 3488 3489 /* communications */ 3490 ptr_idxs = recv_buffer_idxs; 3491 ptr_vals = recv_buffer_vals; 3492 ptr_idxs_is = recv_buffer_idxs_is; 3493 for (i=0;i<n_recvs;i++) { 3494 source_dest = onodes[i]; 3495 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 3496 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 3497 ptr_idxs += olengths_idxs[i]; 3498 ptr_vals += olengths_vals[i]; 3499 if (nis) { 3500 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 3501 ptr_idxs_is += olengths_idxs_is[i]; 3502 } 3503 } 3504 for (i=0;i<n_sends;i++) { 3505 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 3506 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 3507 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 3508 if (nis) { 3509 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 3510 } 3511 } 3512 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 3513 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 3514 3515 /* assemble new l2g map */ 3516 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3517 ptr_idxs = recv_buffer_idxs; 3518 new_local_rows = 0; 3519 for (i=0;i<n_recvs;i++) { 3520 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 3521 ptr_idxs += olengths_idxs[i]; 3522 } 3523 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 3524 ptr_idxs = recv_buffer_idxs; 3525 new_local_rows = 0; 3526 for (i=0;i<n_recvs;i++) { 3527 ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 3528 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 3529 ptr_idxs += olengths_idxs[i]; 3530 } 3531 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 3532 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 3533 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 3534 3535 /* infer new local matrix type from received local matrices type */ 3536 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 3537 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 3538 if (n_recvs) { 3539 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 3540 ptr_idxs = recv_buffer_idxs; 3541 for (i=0;i<n_recvs;i++) { 3542 if ((PetscInt)new_local_type_private != *ptr_idxs) { 3543 new_local_type_private = MATAIJ_PRIVATE; 3544 break; 3545 } 3546 ptr_idxs += olengths_idxs[i]; 3547 } 3548 switch (new_local_type_private) { 3549 case MATDENSE_PRIVATE: 3550 if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */ 3551 new_local_type = MATSEQAIJ; 3552 bs = 1; 3553 } else { /* if I receive only 1 dense matrix */ 3554 new_local_type = MATSEQDENSE; 3555 bs = 1; 3556 } 3557 break; 3558 case MATAIJ_PRIVATE: 3559 new_local_type = MATSEQAIJ; 3560 bs = 1; 3561 break; 3562 case MATBAIJ_PRIVATE: 3563 new_local_type = MATSEQBAIJ; 3564 break; 3565 case MATSBAIJ_PRIVATE: 3566 new_local_type = MATSEQSBAIJ; 3567 break; 3568 default: 3569 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__); 3570 break; 3571 } 3572 } else { /* by default, new_local_type is seqdense */ 3573 new_local_type = MATSEQDENSE; 3574 bs = 1; 3575 } 3576 3577 /* create MATIS object if needed */ 3578 if (reuse == MAT_INITIAL_MATRIX) { 3579 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 3580 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,mat_n);CHKERRQ(ierr); 3581 } else { 3582 /* it also destroys the local matrices */ 3583 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 3584 } 3585 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 3586 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 3587 3588 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3589 3590 /* Global to local map of received indices */ 3591 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 3592 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 3593 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 3594 3595 /* restore attributes -> type of incoming data and its size */ 3596 buf_size_idxs = 0; 3597 for (i=0;i<n_recvs;i++) { 3598 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 3599 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 3600 buf_size_idxs += (PetscInt)olengths_idxs[i]; 3601 } 3602 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 3603 3604 /* set preallocation */ 3605 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 3606 if (!newisdense) { 3607 PetscInt *new_local_nnz=0; 3608 3609 ptr_vals = recv_buffer_vals; 3610 ptr_idxs = recv_buffer_idxs_local; 3611 if (n_recvs) { 3612 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 3613 } 3614 for (i=0;i<n_recvs;i++) { 3615 PetscInt j; 3616 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 3617 for (j=0;j<*(ptr_idxs+1);j++) { 3618 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 3619 } 3620 } else { 3621 /* TODO */ 3622 } 3623 ptr_idxs += olengths_idxs[i]; 3624 } 3625 if (new_local_nnz) { 3626 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 3627 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 3628 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 3629 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 3630 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 3631 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 3632 } else { 3633 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 3634 } 3635 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 3636 } else { 3637 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 3638 } 3639 3640 /* set values */ 3641 ptr_vals = recv_buffer_vals; 3642 ptr_idxs = recv_buffer_idxs_local; 3643 for (i=0;i<n_recvs;i++) { 3644 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 3645 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 3646 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 3647 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 3648 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 3649 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 3650 } else { 3651 /* TODO */ 3652 } 3653 ptr_idxs += olengths_idxs[i]; 3654 ptr_vals += olengths_vals[i]; 3655 } 3656 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3657 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3658 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3659 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3660 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 3661 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 3662 3663 #if 0 3664 if (!restrict_comm) { /* check */ 3665 Vec lvec,rvec; 3666 PetscReal infty_error; 3667 3668 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 3669 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 3670 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 3671 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 3672 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 3673 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 3674 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 3675 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 3676 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 3677 } 3678 #endif 3679 3680 /* assemble new additional is (if any) */ 3681 if (nis) { 3682 PetscInt **temp_idxs,*count_is,j,psum; 3683 3684 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3685 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 3686 ptr_idxs = recv_buffer_idxs_is; 3687 psum = 0; 3688 for (i=0;i<n_recvs;i++) { 3689 for (j=0;j<nis;j++) { 3690 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 3691 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 3692 psum += plen; 3693 ptr_idxs += plen+1; /* shift pointer to received data */ 3694 } 3695 } 3696 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 3697 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 3698 for (i=1;i<nis;i++) { 3699 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 3700 } 3701 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 3702 ptr_idxs = recv_buffer_idxs_is; 3703 for (i=0;i<n_recvs;i++) { 3704 for (j=0;j<nis;j++) { 3705 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 3706 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 3707 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 3708 ptr_idxs += plen+1; /* shift pointer to received data */ 3709 } 3710 } 3711 for (i=0;i<nis;i++) { 3712 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 3713 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 3714 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 3715 } 3716 ierr = PetscFree(count_is);CHKERRQ(ierr); 3717 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 3718 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 3719 } 3720 /* free workspace */ 3721 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 3722 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3723 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 3724 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3725 if (isdense) { 3726 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 3727 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 3728 } else { 3729 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 3730 } 3731 if (nis) { 3732 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3733 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 3734 } 3735 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 3736 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 3737 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 3738 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 3739 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 3740 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 3741 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 3742 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 3743 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 3744 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 3745 ierr = PetscFree(onodes);CHKERRQ(ierr); 3746 if (nis) { 3747 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 3748 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 3749 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 3750 } 3751 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 3752 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 3753 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 3754 for (i=0;i<nis;i++) { 3755 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 3756 } 3757 } 3758 PetscFunctionReturn(0); 3759 } 3760 3761 /* temporary hack into ksp private data structure */ 3762 #include <petsc/private/kspimpl.h> 3763 3764 #undef __FUNCT__ 3765 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 3766 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 3767 { 3768 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3769 PC_IS *pcis = (PC_IS*)pc->data; 3770 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 3771 MatNullSpace CoarseNullSpace=NULL; 3772 ISLocalToGlobalMapping coarse_islg; 3773 IS coarse_is,*isarray; 3774 PetscInt i,im_active=-1,active_procs=-1; 3775 PetscInt nis,nisdofs,nisneu; 3776 PC pc_temp; 3777 PCType coarse_pc_type; 3778 KSPType coarse_ksp_type; 3779 PetscBool multilevel_requested,multilevel_allowed; 3780 PetscBool isredundant,isbddc,isnn,coarse_reuse; 3781 Mat t_coarse_mat_is; 3782 PetscInt void_procs,ncoarse_ml,ncoarse_ds,ncoarse; 3783 PetscMPIInt all_procs; 3784 PetscBool csin_ml,csin_ds,csin,csin_type_simple,redist; 3785 PetscBool compute_vecs = PETSC_FALSE; 3786 PetscScalar *array; 3787 PetscErrorCode ierr; 3788 3789 PetscFunctionBegin; 3790 /* Assign global numbering to coarse dofs */ 3791 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 3792 PetscInt ocoarse_size; 3793 compute_vecs = PETSC_TRUE; 3794 ocoarse_size = pcbddc->coarse_size; 3795 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 3796 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 3797 /* see if we can avoid some work */ 3798 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 3799 if (ocoarse_size != pcbddc->coarse_size) { /* ...but with different size, so reset it and set reuse flag to false */ 3800 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 3801 coarse_reuse = PETSC_FALSE; 3802 } else { /* we can safely reuse already computed coarse matrix */ 3803 coarse_reuse = PETSC_TRUE; 3804 } 3805 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 3806 coarse_reuse = PETSC_FALSE; 3807 } 3808 /* reset any subassembling information */ 3809 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 3810 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 3811 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 3812 coarse_reuse = PETSC_TRUE; 3813 } 3814 3815 /* count "active" (i.e. with positive local size) and "void" processes */ 3816 im_active = !!(pcis->n); 3817 ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3818 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr); 3819 void_procs = all_procs-active_procs; 3820 csin_type_simple = PETSC_TRUE; 3821 redist = PETSC_FALSE; 3822 if (pcbddc->current_level && void_procs) { 3823 csin_ml = PETSC_TRUE; 3824 ncoarse_ml = void_procs; 3825 /* it has no sense to redistribute on a set of processors larger than the number of active processes */ 3826 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < active_procs) { 3827 csin_ds = PETSC_TRUE; 3828 ncoarse_ds = pcbddc->redistribute_coarse; 3829 redist = PETSC_TRUE; 3830 } else { 3831 csin_ds = PETSC_TRUE; 3832 ncoarse_ds = active_procs; 3833 redist = PETSC_TRUE; 3834 } 3835 } else { 3836 csin_ml = PETSC_FALSE; 3837 ncoarse_ml = all_procs; 3838 if (void_procs) { 3839 csin_ds = PETSC_TRUE; 3840 ncoarse_ds = void_procs; 3841 csin_type_simple = PETSC_FALSE; 3842 } else { 3843 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < all_procs) { 3844 csin_ds = PETSC_TRUE; 3845 ncoarse_ds = pcbddc->redistribute_coarse; 3846 redist = PETSC_TRUE; 3847 } else { 3848 csin_ds = PETSC_FALSE; 3849 ncoarse_ds = all_procs; 3850 } 3851 } 3852 } 3853 3854 /* 3855 test if we can go multilevel: three conditions must be satisfied: 3856 - we have not exceeded the number of levels requested 3857 - we can actually subassemble the active processes 3858 - we can find a suitable number of MPI processes where we can place the subassembled problem 3859 */ 3860 multilevel_allowed = PETSC_FALSE; 3861 multilevel_requested = PETSC_FALSE; 3862 if (pcbddc->current_level < pcbddc->max_levels) { 3863 multilevel_requested = PETSC_TRUE; 3864 if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) { 3865 multilevel_allowed = PETSC_FALSE; 3866 } else { 3867 multilevel_allowed = PETSC_TRUE; 3868 } 3869 } 3870 /* determine number of process partecipating to coarse solver */ 3871 if (multilevel_allowed) { 3872 ncoarse = ncoarse_ml; 3873 csin = csin_ml; 3874 redist = PETSC_FALSE; 3875 } else { 3876 ncoarse = ncoarse_ds; 3877 csin = csin_ds; 3878 } 3879 3880 /* creates temporary l2gmap and IS for coarse indexes */ 3881 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 3882 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 3883 3884 /* creates temporary MATIS object for coarse matrix */ 3885 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr); 3886 ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr); 3887 ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr); 3888 ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr); 3889 #if 0 3890 { 3891 PetscViewer viewer; 3892 char filename[256]; 3893 sprintf(filename,"local_coarse_mat%d.m",PetscGlobalRank); 3894 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 3895 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 3896 ierr = MatView(coarse_submat_dense,viewer);CHKERRQ(ierr); 3897 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 3898 } 3899 #endif 3900 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,&t_coarse_mat_is);CHKERRQ(ierr); 3901 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 3902 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3903 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3904 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 3905 3906 /* compute dofs splitting and neumann boundaries for coarse dofs */ 3907 if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal) ) { /* protects from unneded computations */ 3908 PetscInt *tidxs,*tidxs2,nout,tsize,i; 3909 const PetscInt *idxs; 3910 ISLocalToGlobalMapping tmap; 3911 3912 /* create map between primal indices (in local representative ordering) and local primal numbering */ 3913 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 3914 /* allocate space for temporary storage */ 3915 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 3916 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 3917 /* allocate for IS array */ 3918 nisdofs = pcbddc->n_ISForDofsLocal; 3919 nisneu = !!pcbddc->NeumannBoundariesLocal; 3920 nis = nisdofs + nisneu; 3921 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 3922 /* dofs splitting */ 3923 for (i=0;i<nisdofs;i++) { 3924 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 3925 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 3926 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 3927 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 3928 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 3929 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 3930 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->ISForDofsLocal[i]),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 3931 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 3932 } 3933 /* neumann boundaries */ 3934 if (pcbddc->NeumannBoundariesLocal) { 3935 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 3936 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 3937 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 3938 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 3939 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 3940 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 3941 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->NeumannBoundariesLocal),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 3942 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 3943 } 3944 /* free memory */ 3945 ierr = PetscFree(tidxs);CHKERRQ(ierr); 3946 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 3947 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 3948 } else { 3949 nis = 0; 3950 nisdofs = 0; 3951 nisneu = 0; 3952 isarray = NULL; 3953 } 3954 /* destroy no longer needed map */ 3955 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 3956 3957 /* restrict on coarse candidates (if needed) */ 3958 coarse_mat_is = NULL; 3959 if (csin) { 3960 if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */ 3961 if (redist) { 3962 PetscMPIInt rank; 3963 PetscInt spc,n_spc_p1,dest[1],destsize; 3964 3965 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 3966 spc = active_procs/ncoarse; 3967 n_spc_p1 = active_procs%ncoarse; 3968 if (im_active) { 3969 destsize = 1; 3970 if (rank > n_spc_p1*(spc+1)-1) { 3971 dest[0] = n_spc_p1+(rank-(n_spc_p1*(spc+1)))/spc; 3972 } else { 3973 dest[0] = rank/(spc+1); 3974 } 3975 } else { 3976 destsize = 0; 3977 } 3978 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),destsize,dest,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 3979 } else if (csin_type_simple) { 3980 PetscMPIInt rank; 3981 PetscInt issize,isidx; 3982 3983 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 3984 if (im_active) { 3985 issize = 1; 3986 isidx = (PetscInt)rank; 3987 } else { 3988 issize = 0; 3989 isidx = -1; 3990 } 3991 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 3992 } else { /* get a suitable subassembling pattern from MATIS code */ 3993 ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,PETSC_TRUE,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 3994 } 3995 3996 /* we need to shift on coarse candidates either if we are not redistributing or we are redistributing and we have enough void processes */ 3997 if (!redist || ncoarse <= void_procs) { 3998 PetscInt ncoarse_cand,tissize,*nisindices; 3999 PetscInt *coarse_candidates; 4000 const PetscInt* tisindices; 4001 4002 /* get coarse candidates' ranks in pc communicator */ 4003 ierr = PetscMalloc1(all_procs,&coarse_candidates);CHKERRQ(ierr); 4004 ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4005 for (i=0,ncoarse_cand=0;i<all_procs;i++) { 4006 if (!coarse_candidates[i]) { 4007 coarse_candidates[ncoarse_cand++]=i; 4008 } 4009 } 4010 if (ncoarse_cand < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",ncoarse_cand,ncoarse); 4011 4012 4013 if (pcbddc->dbg_flag) { 4014 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4015 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr); 4016 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 4017 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr); 4018 for (i=0;i<ncoarse_cand;i++) { 4019 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr); 4020 } 4021 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr); 4022 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4023 } 4024 /* shift the pattern on coarse candidates */ 4025 ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr); 4026 ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 4027 ierr = PetscMalloc1(tissize,&nisindices);CHKERRQ(ierr); 4028 for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]]; 4029 ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 4030 ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr); 4031 ierr = PetscFree(coarse_candidates);CHKERRQ(ierr); 4032 } 4033 if (pcbddc->dbg_flag) { 4034 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4035 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr); 4036 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 4037 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4038 } 4039 } 4040 /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */ 4041 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 4042 } else { 4043 if (pcbddc->dbg_flag) { 4044 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4045 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr); 4046 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4047 } 4048 ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr); 4049 coarse_mat_is = t_coarse_mat_is; 4050 } 4051 4052 /* create local to global scatters for coarse problem */ 4053 if (compute_vecs) { 4054 PetscInt lrows; 4055 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 4056 if (coarse_mat_is) { 4057 ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr); 4058 } else { 4059 lrows = 0; 4060 } 4061 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 4062 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 4063 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 4064 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 4065 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 4066 } 4067 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 4068 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 4069 4070 /* set defaults for coarse KSP and PC */ 4071 if (multilevel_allowed) { 4072 coarse_ksp_type = KSPRICHARDSON; 4073 coarse_pc_type = PCBDDC; 4074 } else { 4075 coarse_ksp_type = KSPPREONLY; 4076 coarse_pc_type = PCREDUNDANT; 4077 } 4078 4079 /* print some info if requested */ 4080 if (pcbddc->dbg_flag) { 4081 if (!multilevel_allowed) { 4082 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4083 if (multilevel_requested) { 4084 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 4085 } else if (pcbddc->max_levels) { 4086 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 4087 } 4088 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4089 } 4090 } 4091 4092 /* create the coarse KSP object only once with defaults */ 4093 if (coarse_mat_is) { 4094 MatReuse coarse_mat_reuse; 4095 PetscViewer dbg_viewer = NULL; 4096 if (pcbddc->dbg_flag) { 4097 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is)); 4098 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 4099 } 4100 if (!pcbddc->coarse_ksp) { 4101 char prefix[256],str_level[16]; 4102 size_t len; 4103 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr); 4104 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 4105 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 4106 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr); 4107 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 4108 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 4109 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 4110 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 4111 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 4112 /* prefix */ 4113 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 4114 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 4115 if (!pcbddc->current_level) { 4116 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 4117 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 4118 } else { 4119 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 4120 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 4121 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 4122 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 4123 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 4124 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 4125 } 4126 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 4127 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 4128 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 4129 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 4130 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 4131 /* allow user customization */ 4132 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 4133 } 4134 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 4135 if (nisdofs) { 4136 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 4137 for (i=0;i<nisdofs;i++) { 4138 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 4139 } 4140 } 4141 if (nisneu) { 4142 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 4143 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 4144 } 4145 4146 /* get some info after set from options */ 4147 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 4148 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 4149 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 4150 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 4151 if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */ 4152 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 4153 isbddc = PETSC_FALSE; 4154 } 4155 if (isredundant) { 4156 KSP inner_ksp; 4157 PC inner_pc; 4158 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 4159 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 4160 ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr); 4161 } 4162 4163 /* assemble coarse matrix */ 4164 if (coarse_reuse) { 4165 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 4166 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 4167 coarse_mat_reuse = MAT_REUSE_MATRIX; 4168 } else { 4169 coarse_mat_reuse = MAT_INITIAL_MATRIX; 4170 } 4171 if (isbddc || isnn) { 4172 if (pcbddc->coarsening_ratio > 1) { 4173 if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */ 4174 ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,PETSC_TRUE,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 4175 if (pcbddc->dbg_flag) { 4176 ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4177 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr); 4178 ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr); 4179 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 4180 } 4181 } 4182 ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr); 4183 } else { 4184 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 4185 coarse_mat = coarse_mat_is; 4186 } 4187 } else { 4188 ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 4189 } 4190 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 4191 4192 /* propagate symmetry info to coarse matrix */ 4193 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pcbddc->issym);CHKERRQ(ierr); 4194 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 4195 4196 /* set operators */ 4197 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 4198 if (pcbddc->dbg_flag) { 4199 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 4200 } 4201 } else { /* processes non partecipating to coarse solver (if any) */ 4202 coarse_mat = 0; 4203 } 4204 ierr = PetscFree(isarray);CHKERRQ(ierr); 4205 #if 0 4206 { 4207 PetscViewer viewer; 4208 char filename[256]; 4209 sprintf(filename,"coarse_mat.m"); 4210 ierr = PetscViewerASCIIOpen(PETSC_COMM_WORLD,filename,&viewer);CHKERRQ(ierr); 4211 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 4212 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 4213 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 4214 } 4215 #endif 4216 4217 /* Compute coarse null space (special handling by BDDC only) */ 4218 if (pcbddc->NullSpace) { 4219 ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr); 4220 } 4221 4222 if (pcbddc->coarse_ksp) { 4223 Vec crhs,csol; 4224 PetscBool ispreonly; 4225 if (CoarseNullSpace) { 4226 if (isbddc) { 4227 ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr); 4228 } else { 4229 ierr = KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);CHKERRQ(ierr); 4230 } 4231 } 4232 /* setup coarse ksp */ 4233 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 4234 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 4235 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 4236 /* hack */ 4237 if (!csol) { 4238 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 4239 } 4240 if (!crhs) { 4241 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 4242 } 4243 /* Check coarse problem if in debug mode or if solving with an iterative method */ 4244 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 4245 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 4246 KSP check_ksp; 4247 KSPType check_ksp_type; 4248 PC check_pc; 4249 Vec check_vec,coarse_vec; 4250 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 4251 PetscInt its; 4252 PetscBool compute_eigs; 4253 PetscReal *eigs_r,*eigs_c; 4254 PetscInt neigs; 4255 const char *prefix; 4256 4257 /* Create ksp object suitable for estimation of extreme eigenvalues */ 4258 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 4259 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 4260 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 4261 if (ispreonly) { 4262 check_ksp_type = KSPPREONLY; 4263 compute_eigs = PETSC_FALSE; 4264 } else { 4265 check_ksp_type = KSPGMRES; 4266 compute_eigs = PETSC_TRUE; 4267 } 4268 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 4269 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 4270 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 4271 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 4272 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 4273 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 4274 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 4275 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 4276 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 4277 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 4278 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 4279 /* create random vec */ 4280 ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr); 4281 ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr); 4282 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 4283 if (CoarseNullSpace) { 4284 ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr); 4285 } 4286 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 4287 /* solve coarse problem */ 4288 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 4289 if (CoarseNullSpace) { 4290 ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr); 4291 } 4292 /* set eigenvalue estimation if preonly has not been requested */ 4293 if (compute_eigs) { 4294 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 4295 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 4296 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 4297 lambda_max = eigs_r[neigs-1]; 4298 lambda_min = eigs_r[0]; 4299 if (pcbddc->use_coarse_estimates) { 4300 if (lambda_max>lambda_min) { 4301 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr); 4302 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 4303 } 4304 } 4305 } 4306 4307 /* check coarse problem residual error */ 4308 if (pcbddc->dbg_flag) { 4309 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 4310 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 4311 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 4312 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 4313 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 4314 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 4315 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 4316 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 4317 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 4318 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 4319 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 4320 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 4321 if (compute_eigs) { 4322 PetscReal lambda_max_s,lambda_min_s; 4323 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 4324 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 4325 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 4326 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 4327 for (i=0;i<neigs;i++) { 4328 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 4329 } 4330 } 4331 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 4332 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 4333 } 4334 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 4335 if (compute_eigs) { 4336 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 4337 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 4338 } 4339 } 4340 } 4341 /* print additional info */ 4342 if (pcbddc->dbg_flag) { 4343 /* waits until all processes reaches this point */ 4344 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 4345 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 4346 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4347 } 4348 4349 /* free memory */ 4350 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 4351 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 4352 PetscFunctionReturn(0); 4353 } 4354 4355 #undef __FUNCT__ 4356 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 4357 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 4358 { 4359 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 4360 PC_IS* pcis = (PC_IS*)pc->data; 4361 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 4362 PetscInt i,coarse_size=0; 4363 PetscInt *local_primal_indices=NULL; 4364 PetscErrorCode ierr; 4365 4366 PetscFunctionBegin; 4367 /* Compute global number of coarse dofs */ 4368 if (!pcbddc->primal_indices_local_idxs && pcbddc->local_primal_size) { 4369 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Local primal indices have not been created"); 4370 } 4371 ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)(pc->pmat)),matis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,NULL,&coarse_size,&local_primal_indices);CHKERRQ(ierr); 4372 4373 /* check numbering */ 4374 if (pcbddc->dbg_flag) { 4375 PetscScalar coarsesum,*array; 4376 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 4377 4378 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4379 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4380 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 4381 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 4382 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 4383 for (i=0;i<pcbddc->local_primal_size;i++) { 4384 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 4385 } 4386 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 4387 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 4388 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4389 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4390 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4391 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4392 ierr = VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4393 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4394 for (i=0;i<pcis->n;i++) { 4395 if (array[i] == 1.0) { 4396 set_error = PETSC_TRUE; 4397 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr); 4398 } 4399 } 4400 ierr = MPI_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4401 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4402 for (i=0;i<pcis->n;i++) { 4403 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 4404 } 4405 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4406 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4407 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4408 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4409 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 4410 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 4411 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 4412 PetscInt *gidxs; 4413 4414 ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr); 4415 ierr = ISLocalToGlobalMappingApply(matis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr); 4416 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 4417 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4418 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 4419 for (i=0;i<pcbddc->local_primal_size;i++) { 4420 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr); 4421 } 4422 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4423 ierr = PetscFree(gidxs);CHKERRQ(ierr); 4424 } 4425 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4426 if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 4427 } 4428 /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */ 4429 /* get back data */ 4430 *coarse_size_n = coarse_size; 4431 *local_primal_indices_n = local_primal_indices; 4432 PetscFunctionReturn(0); 4433 } 4434 4435 #undef __FUNCT__ 4436 #define __FUNCT__ "PCBDDCGlobalToLocal" 4437 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 4438 { 4439 IS localis_t; 4440 PetscInt i,lsize,*idxs,n; 4441 PetscScalar *vals; 4442 PetscErrorCode ierr; 4443 4444 PetscFunctionBegin; 4445 /* get indices in local ordering exploiting local to global map */ 4446 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 4447 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 4448 for (i=0;i<lsize;i++) vals[i] = 1.0; 4449 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 4450 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 4451 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 4452 if (idxs) { /* multilevel guard */ 4453 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 4454 } 4455 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 4456 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 4457 ierr = PetscFree(vals);CHKERRQ(ierr); 4458 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 4459 /* now compute set in local ordering */ 4460 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4461 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4462 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 4463 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 4464 for (i=0,lsize=0;i<n;i++) { 4465 if (PetscRealPart(vals[i]) > 0.5) { 4466 lsize++; 4467 } 4468 } 4469 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 4470 for (i=0,lsize=0;i<n;i++) { 4471 if (PetscRealPart(vals[i]) > 0.5) { 4472 idxs[lsize++] = i; 4473 } 4474 } 4475 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 4476 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 4477 *localis = localis_t; 4478 PetscFunctionReturn(0); 4479 } 4480 4481 /* the next two functions will be called in KSPMatMult if a change of basis has been requested */ 4482 #undef __FUNCT__ 4483 #define __FUNCT__ "PCBDDCMatMult_Private" 4484 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y) 4485 { 4486 PCBDDCChange_ctx change_ctx; 4487 PetscErrorCode ierr; 4488 4489 PetscFunctionBegin; 4490 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 4491 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 4492 ierr = MatMult(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 4493 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 4494 PetscFunctionReturn(0); 4495 } 4496 4497 #undef __FUNCT__ 4498 #define __FUNCT__ "PCBDDCMatMultTranspose_Private" 4499 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y) 4500 { 4501 PCBDDCChange_ctx change_ctx; 4502 PetscErrorCode ierr; 4503 4504 PetscFunctionBegin; 4505 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 4506 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 4507 ierr = MatMultTranspose(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 4508 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 4509 PetscFunctionReturn(0); 4510 } 4511 4512 #undef __FUNCT__ 4513 #define __FUNCT__ "PCBDDCSetUpSubSchurs" 4514 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 4515 { 4516 PC_IS *pcis=(PC_IS*)pc->data; 4517 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 4518 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 4519 Mat S_j; 4520 PetscInt *used_xadj,*used_adjncy; 4521 PetscBool free_used_adj; 4522 PetscErrorCode ierr; 4523 4524 PetscFunctionBegin; 4525 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 4526 free_used_adj = PETSC_FALSE; 4527 if (pcbddc->sub_schurs_layers == -1) { 4528 used_xadj = NULL; 4529 used_adjncy = NULL; 4530 } else { 4531 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 4532 used_xadj = pcbddc->mat_graph->xadj; 4533 used_adjncy = pcbddc->mat_graph->adjncy; 4534 } else if (pcbddc->computed_rowadj) { 4535 used_xadj = pcbddc->mat_graph->xadj; 4536 used_adjncy = pcbddc->mat_graph->adjncy; 4537 } else { 4538 PetscBool flg_row=PETSC_FALSE; 4539 const PetscInt *xadj,*adjncy; 4540 PetscInt nvtxs; 4541 4542 ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4543 if (flg_row) { 4544 ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr); 4545 ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr); 4546 ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr); 4547 free_used_adj = PETSC_TRUE; 4548 } else { 4549 pcbddc->sub_schurs_layers = -1; 4550 used_xadj = NULL; 4551 used_adjncy = NULL; 4552 } 4553 ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4554 } 4555 } 4556 4557 /* setup sub_schurs data */ 4558 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 4559 if (!sub_schurs->use_mumps) { 4560 /* pcbddc->ksp_D up to date only if not using MUMPS */ 4561 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 4562 ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,pcbddc->use_edges,pcbddc->use_faces);CHKERRQ(ierr); 4563 } else { 4564 ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,pcbddc->use_edges,pcbddc->use_faces);CHKERRQ(ierr); 4565 } 4566 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 4567 4568 /* free adjacency */ 4569 if (free_used_adj) { 4570 ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr); 4571 } 4572 PetscFunctionReturn(0); 4573 } 4574 4575 #undef __FUNCT__ 4576 #define __FUNCT__ "PCBDDCInitSubSchurs" 4577 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 4578 { 4579 PC_IS *pcis=(PC_IS*)pc->data; 4580 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 4581 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 4582 PCBDDCGraph graph; 4583 PetscErrorCode ierr; 4584 4585 PetscFunctionBegin; 4586 /* attach interface graph for determining subsets */ 4587 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 4588 IS verticesIS; 4589 4590 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 4591 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 4592 ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global);CHKERRQ(ierr); 4593 ierr = PCBDDCGraphSetUp(graph,0,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticesIS);CHKERRQ(ierr); 4594 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 4595 ierr = ISDestroy(&verticesIS);CHKERRQ(ierr); 4596 /* 4597 if (pcbddc->dbg_flag) { 4598 ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 4599 } 4600 */ 4601 } else { 4602 graph = pcbddc->mat_graph; 4603 } 4604 4605 /* sub_schurs init */ 4606 ierr = PCBDDCSubSchursInit(sub_schurs,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap);CHKERRQ(ierr); 4607 4608 /* free graph struct */ 4609 if (pcbddc->sub_schurs_rebuild) { 4610 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 4611 } 4612 PetscFunctionReturn(0); 4613 } 4614