1 #include <../src/ksp/pc/impls/bddc/bddc.h> 2 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 3 #include <petscblaslapack.h> 4 5 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y); 6 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y); 7 8 #undef __FUNCT__ 9 #define __FUNCT__ "PCBDDCAdaptiveSelection" 10 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 11 { 12 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 13 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 14 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 15 PetscBLASInt *B_iwork,*B_ifail; 16 PetscScalar *work,lwork; 17 PetscScalar *St,*S,*eigv; 18 PetscScalar *Sarray,*Starray; 19 PetscReal *eigs,thresh; 20 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs; 21 PetscBool allocated_S_St; 22 #if defined(PETSC_USE_COMPLEX) 23 PetscReal *rwork; 24 #endif 25 PetscErrorCode ierr; 26 27 PetscFunctionBegin; 28 if (!sub_schurs->use_mumps) { 29 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS"); 30 } 31 32 if (pcbddc->dbg_flag) { 33 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 34 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 35 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr); 36 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 37 } 38 39 if (pcbddc->dbg_flag) { 40 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef); 41 } 42 43 if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) { 44 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\n",sub_schurs->is_hermitian,sub_schurs->is_posdef); 45 } 46 47 /* max size of subsets */ 48 mss = 0; 49 for (i=0;i<sub_schurs->n_subs;i++) { 50 if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) { 51 PetscInt subset_size; 52 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 53 mss = PetscMax(mss,subset_size); 54 } 55 } 56 57 /* min/max and threshold */ 58 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 59 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0; 60 nmax = PetscMax(nmin,nmax); 61 allocated_S_St = PETSC_FALSE; 62 if (nmin) { 63 allocated_S_St = PETSC_TRUE; 64 } 65 66 /* allocate lapack workspace */ 67 cum = cum2 = 0; 68 maxneigs = 0; 69 for (i=0;i<sub_schurs->n_subs;i++) { 70 if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) { 71 PetscInt n,subset_size; 72 73 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 74 n = PetscMin(subset_size,nmax); 75 cum += subset_size; 76 cum2 += subset_size*n; 77 maxneigs = PetscMax(maxneigs,n); 78 } 79 } 80 if (mss) { 81 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 82 PetscBLASInt B_itype = 1; 83 PetscBLASInt B_N = mss; 84 PetscReal zero = 0.0; 85 PetscReal eps = 0.0; /* dlamch? */ 86 87 B_lwork = -1; 88 S = NULL; 89 St = NULL; 90 eigs = NULL; 91 eigv = NULL; 92 B_iwork = NULL; 93 B_ifail = NULL; 94 thresh = 1.0; 95 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 96 #if defined(PETSC_USE_COMPLEX) 97 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 98 #else 99 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 100 #endif 101 if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 102 ierr = PetscFPTrapPop();CHKERRQ(ierr); 103 } else { 104 /* TODO */ 105 } 106 } else { 107 lwork = 0; 108 } 109 110 nv = 0; 111 if (sub_schurs->is_Ej_com && pcbddc->use_vertices) { /* complement of subsets, each entry is a vertex */ 112 ierr = ISGetLocalSize(sub_schurs->is_Ej_com,&nv);CHKERRQ(ierr); 113 } 114 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr); 115 if (allocated_S_St) { 116 ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr); 117 } 118 ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr); 119 #if defined(PETSC_USE_COMPLEX) 120 ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr); 121 #endif 122 ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 123 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr, 124 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr, 125 nv+cum,&pcbddc->adaptive_constraints_idxs, 126 nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 127 ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr); 128 129 maxneigs = 0; 130 cum = cum2 = cumarray = 0; 131 pcbddc->adaptive_constraints_idxs_ptr[0] = 0; 132 pcbddc->adaptive_constraints_data_ptr[0] = 0; 133 if (sub_schurs->is_Ej_com && pcbddc->use_vertices) { 134 const PetscInt *idxs; 135 136 ierr = ISGetIndices(sub_schurs->is_Ej_com,&idxs);CHKERRQ(ierr); 137 for (cum=0;cum<nv;cum++) { 138 pcbddc->adaptive_constraints_n[cum] = 1; 139 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 140 pcbddc->adaptive_constraints_data[cum] = 1.0; 141 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1; 142 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1; 143 } 144 cum2 = cum; 145 ierr = ISRestoreIndices(sub_schurs->is_Ej_com,&idxs);CHKERRQ(ierr); 146 } 147 148 if (mss) { /* multilevel */ 149 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 150 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 151 } 152 153 for (i=0;i<sub_schurs->n_subs;i++) { 154 PetscInt j,subset_size; 155 156 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 157 if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) { 158 const PetscInt *idxs; 159 PetscReal infty = PETSC_MAX_REAL; 160 PetscInt eigs_start = 0; 161 PetscBLASInt B_N; 162 163 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 164 if (allocated_S_St) { /* S and S_t should be copied since we could need them later */ 165 if (sub_schurs->is_hermitian) { 166 PetscInt j; 167 for (j=0;j<subset_size;j++) { 168 ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 169 } 170 for (j=0;j<subset_size;j++) { 171 ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 172 } 173 } else { 174 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 175 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 176 } 177 } else { 178 S = Sarray + cumarray; 179 St = Starray + cumarray; 180 } 181 182 /* Threshold: this is an heuristic for edges */ 183 ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 184 thresh = pcbddc->mat_graph->count[idxs[0]]*pcbddc->adaptive_threshold; 185 186 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 187 PetscBLASInt B_itype = 1; 188 PetscBLASInt B_IL, B_IU; 189 PetscReal eps = -1.0; /* dlamch? */ 190 PetscInt nmin_s; 191 192 /* ask for eigenvalues larger than thresh */ 193 if (pcbddc->dbg_flag) { 194 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d %d %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]); 195 } 196 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 197 #if defined(PETSC_USE_COMPLEX) 198 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 199 #else 200 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 201 #endif 202 ierr = PetscFPTrapPop();CHKERRQ(ierr); 203 if (B_ierr) { 204 if (B_ierr < 0 ) { 205 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 206 } else if (B_ierr <= B_N) { 207 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 208 } else { 209 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 210 } 211 } 212 213 if (B_neigs > nmax) { 214 if (pcbddc->dbg_flag) { 215 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, more than maximum required %d.\n",B_neigs,nmax); 216 } 217 eigs_start = B_neigs -nmax; 218 B_neigs = nmax; 219 } 220 221 nmin_s = PetscMin(nmin,B_N); 222 if (B_neigs < nmin_s) { 223 PetscBLASInt B_neigs2; 224 225 B_IU = B_N - B_neigs; 226 B_IL = B_N - nmin_s + 1; 227 if (pcbddc->dbg_flag) { 228 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU); 229 } 230 if (sub_schurs->is_hermitian) { 231 PetscInt j; 232 for (j=0;j<subset_size;j++) { 233 ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 234 } 235 for (j=0;j<subset_size;j++) { 236 ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 237 } 238 } else { 239 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 240 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 241 } 242 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 243 #if defined(PETSC_USE_COMPLEX) 244 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 245 #else 246 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 247 #endif 248 ierr = PetscFPTrapPop();CHKERRQ(ierr); 249 B_neigs += B_neigs2; 250 } 251 if (B_ierr) { 252 if (B_ierr < 0 ) { 253 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 254 } else if (B_ierr <= B_N) { 255 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 256 } else { 257 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 258 } 259 } 260 if (pcbddc->dbg_flag) { 261 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Got %d eigs\n",B_neigs); 262 for (j=0;j<B_neigs;j++) { 263 if (eigs[j] == 0.0) { 264 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," Inf\n"); 265 } else { 266 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",eigs[j+eigs_start]); 267 } 268 } 269 } 270 } else { 271 /* TODO */ 272 } 273 274 maxneigs = PetscMax(B_neigs,maxneigs); 275 pcbddc->adaptive_constraints_n[i+nv] = B_neigs; 276 if (B_neigs) { 277 ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 278 279 if (pcbddc->dbg_flag > 1) { 280 PetscInt ii; 281 for (ii=0;ii<B_neigs;ii++) { 282 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N); 283 for (j=0;j<B_N;j++) { 284 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 285 } 286 } 287 } 288 #if 0 289 for (j=0;j<B_neigs;j++) { 290 PetscBLASInt Blas_N,Blas_one = 1.0; 291 PetscScalar norm; 292 ierr = PetscBLASIntCast(subset_size,&Blas_N);CHKERRQ(ierr); 293 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size, 294 &Blas_one,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one)); 295 if (pcbddc->adaptive_constraints_data[cum2] > 0.0) { 296 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 297 } else { 298 norm = -1.0/PetscSqrtReal(PetscRealPart(norm)); 299 } 300 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one)); 301 } 302 #endif 303 ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr); 304 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size; 305 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs; 306 cum++; 307 } 308 ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 309 } 310 /* shift for next computation */ 311 cumarray += subset_size*subset_size; 312 } 313 if (pcbddc->dbg_flag) { 314 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 315 } 316 317 if (mss) { 318 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 319 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 320 /* destroy matrices (junk) */ 321 ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr); 322 ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr); 323 } 324 if (allocated_S_St) { 325 ierr = PetscFree2(S,St);CHKERRQ(ierr); 326 } 327 ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr); 328 #if defined(PETSC_USE_COMPLEX) 329 ierr = PetscFree(rwork);CHKERRQ(ierr); 330 #endif 331 if (pcbddc->dbg_flag) { 332 PetscInt maxneigs_r; 333 ierr = MPI_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 334 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr); 335 } 336 PetscFunctionReturn(0); 337 } 338 339 #undef __FUNCT__ 340 #define __FUNCT__ "PCBDDCSetUpSolvers" 341 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 342 { 343 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 344 PetscScalar *coarse_submat_vals; 345 PetscErrorCode ierr; 346 347 PetscFunctionBegin; 348 /* Setup local scatters R_to_B and (optionally) R_to_D */ 349 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 350 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 351 352 /* Setup local neumann solver ksp_R */ 353 /* PCBDDCSetUpLocalScatters should be called first! */ 354 ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 355 356 /* Change global null space passed in by the user if change of basis has been requested */ 357 if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) { 358 ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr); 359 } 360 361 /* 362 Setup local correction and local part of coarse basis. 363 Gives back the dense local part of the coarse matrix in column major ordering 364 */ 365 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 366 367 /* Compute total number of coarse nodes and setup coarse solver */ 368 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 369 370 /* free */ 371 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 372 PetscFunctionReturn(0); 373 } 374 375 #undef __FUNCT__ 376 #define __FUNCT__ "PCBDDCResetCustomization" 377 PetscErrorCode PCBDDCResetCustomization(PC pc) 378 { 379 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 380 PetscErrorCode ierr; 381 382 PetscFunctionBegin; 383 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 384 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 385 ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr); 386 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 387 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 388 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 389 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 390 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 391 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 392 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 393 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 394 PetscFunctionReturn(0); 395 } 396 397 #undef __FUNCT__ 398 #define __FUNCT__ "PCBDDCResetTopography" 399 PetscErrorCode PCBDDCResetTopography(PC pc) 400 { 401 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 402 PetscErrorCode ierr; 403 404 PetscFunctionBegin; 405 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 406 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 407 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 408 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 409 ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr); 410 PetscFunctionReturn(0); 411 } 412 413 #undef __FUNCT__ 414 #define __FUNCT__ "PCBDDCResetSolvers" 415 PetscErrorCode PCBDDCResetSolvers(PC pc) 416 { 417 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 418 PetscScalar *array; 419 PetscErrorCode ierr; 420 421 PetscFunctionBegin; 422 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 423 if (pcbddc->coarse_phi_B) { 424 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr); 425 ierr = PetscFree(array);CHKERRQ(ierr); 426 } 427 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 428 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 429 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 430 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 431 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 432 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 433 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 434 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 435 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 436 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 437 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 438 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 439 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 440 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 441 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 442 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 443 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 444 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 445 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 446 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 447 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 448 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 449 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 450 PetscFunctionReturn(0); 451 } 452 453 #undef __FUNCT__ 454 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors" 455 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 456 { 457 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 458 PC_IS *pcis = (PC_IS*)pc->data; 459 VecType impVecType; 460 PetscInt n_constraints,n_R,old_size; 461 PetscErrorCode ierr; 462 463 PetscFunctionBegin; 464 if (!pcbddc->ConstraintMatrix) { 465 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created"); 466 } 467 /* get sizes */ 468 n_constraints = pcbddc->local_primal_size - pcbddc->n_vertices; 469 n_R = pcis->n-pcbddc->n_vertices; 470 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 471 /* local work vectors (try to avoid unneeded work)*/ 472 /* R nodes */ 473 old_size = -1; 474 if (pcbddc->vec1_R) { 475 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 476 } 477 if (n_R != old_size) { 478 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 479 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 480 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 481 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 482 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 483 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 484 } 485 /* local primal dofs */ 486 old_size = -1; 487 if (pcbddc->vec1_P) { 488 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 489 } 490 if (pcbddc->local_primal_size != old_size) { 491 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 492 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 493 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 494 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 495 } 496 /* local explicit constraints */ 497 old_size = -1; 498 if (pcbddc->vec1_C) { 499 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 500 } 501 if (n_constraints && n_constraints != old_size) { 502 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 503 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 504 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 505 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 506 } 507 PetscFunctionReturn(0); 508 } 509 510 #undef __FUNCT__ 511 #define __FUNCT__ "PCBDDCSetUpCorrection" 512 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 513 { 514 PetscErrorCode ierr; 515 /* pointers to pcis and pcbddc */ 516 PC_IS* pcis = (PC_IS*)pc->data; 517 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 518 /* submatrices of local problem */ 519 Mat A_RV,A_VR,A_VV,local_auxmat2_R; 520 /* submatrices of local coarse problem */ 521 Mat S_VV,S_CV,S_VC,S_CC; 522 /* working matrices */ 523 Mat C_CR; 524 /* additional working stuff */ 525 PC pc_R; 526 Mat F; 527 PetscBool isLU,isCHOL,isILU; 528 529 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 530 PetscScalar *work; 531 PetscInt *idx_V_B; 532 PetscInt n,n_vertices,n_constraints; 533 PetscInt i,n_R,n_D,n_B; 534 PetscBool unsymmetric_check; 535 /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */ 536 MatType impMatType; 537 /* some shortcuts to scalars */ 538 PetscScalar one=1.0,m_one=-1.0; 539 540 PetscFunctionBegin; 541 n_vertices = pcbddc->n_vertices; 542 n_constraints = pcbddc->local_primal_size-n_vertices; 543 /* Set Non-overlapping dimensions */ 544 n_B = pcis->n_B; 545 n_D = pcis->n - n_B; 546 n_R = pcis->n - n_vertices; 547 548 /* Set types for local objects needed by BDDC precondtioner */ 549 impMatType = MATSEQDENSE; 550 551 /* vertices in boundary numbering */ 552 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 553 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);CHKERRQ(ierr); 554 if (i != n_vertices) { 555 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i); 556 } 557 558 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 559 ierr = PetscMalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 560 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr); 561 ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr); 562 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr); 563 ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr); 564 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr); 565 ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr); 566 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr); 567 ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr); 568 569 unsymmetric_check = PETSC_FALSE; 570 /* allocate workspace */ 571 n = 0; 572 if (n_constraints) { 573 n += n_R*n_constraints; 574 } 575 if (n_vertices) { 576 n = PetscMax(2*n_R*n_vertices,n); 577 n = PetscMax((n_R+n_B)*n_vertices,n); 578 } 579 if (!pcbddc->issym) { 580 n = PetscMax(2*n_R*pcbddc->local_primal_size,n); 581 unsymmetric_check = PETSC_TRUE; 582 } 583 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 584 585 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 586 ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr); 587 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr); 588 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr); 589 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr); 590 if (isLU || isILU || isCHOL) { 591 ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr); 592 } else { 593 F = NULL; 594 } 595 596 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 597 if (n_constraints) { 598 Mat M1,M2,M3; 599 Mat auxmat; 600 IS is_aux; 601 PetscScalar *array,*array2; 602 603 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 604 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 605 606 /* Extract constraints on R nodes: C_{CR} */ 607 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 608 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 609 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&auxmat);CHKERRQ(ierr); 610 611 /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */ 612 /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */ 613 ierr = PetscMemzero(work,n_R*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 614 for (i=0;i<n_constraints;i++) { 615 const PetscScalar *row_cmat_values; 616 const PetscInt *row_cmat_indices; 617 PetscInt size_of_constraint,j; 618 619 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 620 for (j=0;j<size_of_constraint;j++) { 621 work[row_cmat_indices[j]+i*n_R] = -row_cmat_values[j]; 622 } 623 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 624 } 625 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,NULL,&local_auxmat2_R);CHKERRQ(ierr); 626 if (F) { 627 Mat B; 628 629 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 630 ierr = MatMatSolve(F,B,local_auxmat2_R);CHKERRQ(ierr); 631 ierr = MatDestroy(&B);CHKERRQ(ierr); 632 } else { 633 PetscScalar *marr; 634 635 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 636 for (i=0;i<n_constraints;i++) { 637 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 638 ierr = VecPlaceArray(pcbddc->vec2_R,marr+i*n_R);CHKERRQ(ierr); 639 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 640 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 641 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 642 } 643 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 644 } 645 if (!pcbddc->switch_static) { 646 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);CHKERRQ(ierr); 647 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 648 ierr = MatDenseGetArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 649 for (i=0;i<n_constraints;i++) { 650 ierr = VecPlaceArray(pcbddc->vec1_R,array2+i*n_R);CHKERRQ(ierr); 651 ierr = VecPlaceArray(pcis->vec1_B,array+i*n_B);CHKERRQ(ierr); 652 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 653 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 654 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 655 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 656 } 657 ierr = MatDenseRestoreArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 658 ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 659 ierr = MatMatMult(auxmat,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 660 } else { 661 ierr = PetscObjectReference((PetscObject)local_auxmat2_R);CHKERRQ(ierr); 662 pcbddc->local_auxmat2 = local_auxmat2_R; 663 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 664 } 665 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 666 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 667 ierr = MatScale(M3,m_one);CHKERRQ(ierr); 668 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr); 669 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 670 if (isCHOL) { 671 ierr = MatCholeskyFactor(M3,NULL,NULL);CHKERRQ(ierr); 672 } else { 673 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 674 } 675 ierr = VecSet(pcbddc->vec1_C,one);CHKERRQ(ierr); 676 ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr); 677 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 678 ierr = MatDestroy(&M2);CHKERRQ(ierr); 679 ierr = MatDestroy(&M3);CHKERRQ(ierr); 680 /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */ 681 ierr = MatMatMult(M1,auxmat,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 682 ierr = MatDestroy(&auxmat);CHKERRQ(ierr); 683 ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 684 ierr = MatDestroy(&M1);CHKERRQ(ierr); 685 } 686 /* Get submatrices from subdomain matrix */ 687 if (n_vertices) { 688 Mat newmat; 689 IS is_aux; 690 PetscInt ibs,mbs; 691 PetscBool issbaij; 692 693 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 694 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 695 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 696 if (ibs != mbs) { /* need to convert to SEQAIJ */ 697 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 698 ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 699 ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 700 ierr = MatGetSubMatrix(newmat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 701 ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr); 702 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 703 } else { 704 /* this is safe */ 705 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 706 ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr); 707 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 708 if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 709 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 710 ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 711 ierr = MatTranspose(A_VR,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 712 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 713 ierr = MatConvert(A_VV,MATSEQBAIJ,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr); 714 } else { 715 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 716 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 717 } 718 /* need to convert to SeqAIJ for MatMatMult with dense matrix */ 719 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_REUSE_MATRIX,&A_VR);CHKERRQ(ierr); 720 } 721 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 722 } 723 724 /* Matrix of coarse basis functions (local) */ 725 if (pcbddc->coarse_phi_B) { 726 PetscInt on_B,on_primal,on_D=n_D; 727 if (pcbddc->coarse_phi_D) { 728 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr); 729 } 730 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 731 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 732 PetscScalar *marray; 733 734 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr); 735 ierr = PetscFree(marray);CHKERRQ(ierr); 736 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 737 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 738 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 739 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 740 } 741 } 742 743 if (!pcbddc->coarse_phi_B) { 744 PetscScalar *marray; 745 746 n = n_B*pcbddc->local_primal_size; 747 if (pcbddc->switch_static || pcbddc->dbg_flag) { 748 n += n_D*pcbddc->local_primal_size; 749 } 750 if (!pcbddc->issym) { 751 n *= 2; 752 } 753 ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr); 754 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 755 n = n_B*pcbddc->local_primal_size; 756 if (pcbddc->switch_static || pcbddc->dbg_flag) { 757 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 758 n += n_D*pcbddc->local_primal_size; 759 } 760 if (!pcbddc->issym) { 761 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 762 if (pcbddc->switch_static || pcbddc->dbg_flag) { 763 n = n_B*pcbddc->local_primal_size; 764 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 765 } 766 } else { 767 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 768 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 769 if (pcbddc->switch_static || pcbddc->dbg_flag) { 770 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 771 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 772 } 773 } 774 } 775 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 776 /* vertices */ 777 if (n_vertices) { 778 779 if (n_R) { 780 Mat A_RRmA_RV,S_VVt; /* S_VVt with LDA=N */ 781 PetscBLASInt B_N,B_one = 1; 782 PetscScalar *x,*y; 783 784 ierr = PetscMemzero(work,n_R*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 785 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 786 ierr = MatScale(A_RV,m_one);CHKERRQ(ierr); 787 ierr = MatConvert(A_RV,impMatType,MAT_REUSE_MATRIX,&A_RV);CHKERRQ(ierr); 788 if (F) { 789 ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr); 790 } else { 791 ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr); 792 for (i=0;i<n_vertices;i++) { 793 ierr = VecPlaceArray(pcbddc->vec1_R,y+i*n_R);CHKERRQ(ierr); 794 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 795 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 796 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 797 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 798 } 799 ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr); 800 } 801 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 802 /* S_VV and S_CV are the subdomain contribution to coarse matrix. WARNING -> column major ordering */ 803 if (n_constraints) { 804 Mat B; 805 806 for (i=0;i<n_vertices;i++) { 807 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 808 ierr = VecPlaceArray(pcis->vec1_B,work+n_R*n_vertices+i*n_B);CHKERRQ(ierr); 809 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 810 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 811 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 812 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 813 } 814 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr); 815 ierr = MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr); 816 ierr = MatDestroy(&B);CHKERRQ(ierr); 817 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr); 818 ierr = MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 819 ierr = MatScale(S_CV,m_one);CHKERRQ(ierr); 820 ierr = PetscBLASIntCast(n_R*n_vertices,&B_N);CHKERRQ(ierr); 821 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+n_R*n_vertices,&B_one,work,&B_one)); 822 ierr = MatDestroy(&B);CHKERRQ(ierr); 823 } 824 ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr); 825 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 826 ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr); 827 ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr); 828 ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr); 829 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one)); 830 ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr); 831 ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr); 832 ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 833 ierr = MatDestroy(&S_VVt);CHKERRQ(ierr); 834 } else { 835 ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 836 } 837 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 838 /* coarse basis functions */ 839 for (i=0;i<n_vertices;i++) { 840 PetscScalar *y; 841 842 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr); 843 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 844 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 845 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 846 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 847 y[n_B*i+idx_V_B[i]] = 1.0; 848 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 849 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 850 851 if (pcbddc->switch_static || pcbddc->dbg_flag) { 852 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 853 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 854 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 855 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 856 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 857 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 858 } 859 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 860 } 861 } 862 863 if (n_constraints) { 864 Mat B; 865 866 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 867 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 868 ierr = MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 869 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 870 if (n_vertices) { 871 if (isCHOL) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */ 872 ierr = MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);CHKERRQ(ierr); 873 } else { 874 Mat S_VCt; 875 876 ierr = MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);CHKERRQ(ierr); 877 ierr = MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 878 ierr = MatDestroy(&S_VCt);CHKERRQ(ierr); 879 } 880 } 881 ierr = MatDestroy(&B);CHKERRQ(ierr); 882 /* coarse basis functions */ 883 for (i=0;i<n_constraints;i++) { 884 PetscScalar *y; 885 886 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr); 887 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 888 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr); 889 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 890 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 891 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 892 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 893 if (pcbddc->switch_static || pcbddc->dbg_flag) { 894 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 895 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr); 896 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 897 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 898 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 899 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 900 } 901 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 902 } 903 } 904 if (n_constraints) { 905 ierr = MatDestroy(&local_auxmat2_R);CHKERRQ(ierr); 906 } 907 908 /* compute other basis functions for non-symmetric problems */ 909 if (!pcbddc->issym) { 910 911 if (n_constraints) { 912 Mat S_CCT,B_C; 913 914 /* this is a lazy thing */ 915 ierr = MatConvert(C_CR,impMatType,MAT_REUSE_MATRIX,&C_CR);CHKERRQ(ierr); 916 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work+n_vertices*n_R,&B_C);CHKERRQ(ierr); 917 ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr); 918 ierr = MatTransposeMatMult(C_CR,S_CCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr); 919 ierr = MatDestroy(&S_CCT);CHKERRQ(ierr); 920 if (n_vertices) { 921 Mat B_V,S_VCT; 922 923 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&B_V);CHKERRQ(ierr); 924 ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr); 925 ierr = MatTransposeMatMult(C_CR,S_VCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr); 926 ierr = MatDestroy(&B_V);CHKERRQ(ierr); 927 ierr = MatDestroy(&S_VCT);CHKERRQ(ierr); 928 } 929 ierr = MatDestroy(&B_C);CHKERRQ(ierr); 930 } else { /* if there are no constraints, reset work */ 931 ierr = PetscMemzero(work,n_R*pcbddc->local_primal_size*sizeof(PetscScalar));CHKERRQ(ierr); 932 } 933 if (n_vertices && n_R) { 934 Mat A_VRT; 935 PetscScalar *marray; 936 PetscBLASInt B_N,B_one = 1; 937 938 ierr = MatTranspose(A_VR,MAT_INITIAL_MATRIX,&A_VRT);CHKERRQ(ierr); 939 ierr = MatConvert(A_VRT,impMatType,MAT_REUSE_MATRIX,&A_VRT);CHKERRQ(ierr); 940 ierr = MatDenseGetArray(A_VRT,&marray);CHKERRQ(ierr); 941 ierr = PetscBLASIntCast(n_vertices*n_R,&B_N);CHKERRQ(ierr); 942 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&m_one,marray,&B_one,work,&B_one)); 943 ierr = MatDenseRestoreArray(A_VRT,&marray);CHKERRQ(ierr); 944 ierr = MatDestroy(&A_VRT);CHKERRQ(ierr); 945 } 946 947 if (F) { /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 948 for (i=0;i<pcbddc->local_primal_size;i++) { 949 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 950 ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr); 951 ierr = MatSolveTranspose(F,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 952 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 953 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 954 } 955 } else { 956 for (i=0;i<pcbddc->local_primal_size;i++) { 957 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 958 ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr); 959 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 960 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 961 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 962 } 963 } 964 /* coarse basis functions */ 965 for (i=0;i<pcbddc->local_primal_size;i++) { 966 PetscScalar *y; 967 968 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*(i+pcbddc->local_primal_size));CHKERRQ(ierr); 969 ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 970 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 971 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 972 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 973 if (i<n_vertices) { 974 y[n_B*i+idx_V_B[i]] = 1.0; 975 } 976 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 977 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 978 979 if (pcbddc->switch_static || pcbddc->dbg_flag) { 980 ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 981 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 982 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 983 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 984 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 985 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 986 } 987 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 988 } 989 } 990 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 991 ierr = MatDestroy(&S_VV);CHKERRQ(ierr); 992 ierr = MatDestroy(&S_CV);CHKERRQ(ierr); 993 ierr = MatDestroy(&S_VC);CHKERRQ(ierr); 994 ierr = MatDestroy(&S_CC);CHKERRQ(ierr); 995 /* Checking coarse_sub_mat and coarse basis functios */ 996 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 997 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 998 if (pcbddc->dbg_flag) { 999 Mat coarse_sub_mat; 1000 Mat AUXMAT,TM1,TM2,TM3,TM4; 1001 Mat coarse_phi_D,coarse_phi_B; 1002 Mat coarse_psi_D,coarse_psi_B; 1003 Mat A_II,A_BB,A_IB,A_BI; 1004 Mat C_B,CPHI; 1005 IS is_dummy; 1006 Vec mones; 1007 MatType checkmattype=MATSEQAIJ; 1008 PetscReal real_value; 1009 1010 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 1011 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 1012 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 1013 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 1014 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 1015 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 1016 if (unsymmetric_check) { 1017 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 1018 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 1019 } 1020 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 1021 1022 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1023 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation\n");CHKERRQ(ierr); 1024 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1025 if (unsymmetric_check) { 1026 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1027 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 1028 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1029 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1030 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 1031 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1032 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1033 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 1034 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1035 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1036 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 1037 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1038 } else { 1039 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 1040 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 1041 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1042 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 1043 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1044 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1045 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 1046 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1047 } 1048 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1049 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1050 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1051 ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr); 1052 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1053 ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1054 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1055 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 1056 1057 /* check constraints */ 1058 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size,0,1,&is_dummy);CHKERRQ(ierr); 1059 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B); 1060 ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 1061 ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr); 1062 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 1063 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 1064 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1065 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 1066 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 1067 ierr = MatDestroy(&CPHI);CHKERRQ(ierr); 1068 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 1069 ierr = VecDestroy(&mones);CHKERRQ(ierr); 1070 1071 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1072 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 1073 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 1074 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 1075 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 1076 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 1077 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 1078 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 1079 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 1080 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 1081 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 1082 if (unsymmetric_check) { 1083 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 1084 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 1085 } 1086 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 1087 } 1088 /* free memory */ 1089 ierr = PetscFree(work);CHKERRQ(ierr); 1090 if (n_vertices) { 1091 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 1092 } 1093 if (n_constraints) { 1094 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 1095 } 1096 /* get back data */ 1097 *coarse_submat_vals_n = coarse_submat_vals; 1098 PetscFunctionReturn(0); 1099 } 1100 1101 #undef __FUNCT__ 1102 #define __FUNCT__ "MatGetSubMatrixUnsorted" 1103 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B) 1104 { 1105 Mat *work_mat; 1106 IS isrow_s,iscol_s; 1107 PetscBool rsorted,csorted; 1108 PetscInt rsize,*idxs_perm_r,csize,*idxs_perm_c; 1109 PetscErrorCode ierr; 1110 1111 PetscFunctionBegin; 1112 ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr); 1113 ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr); 1114 ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr); 1115 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 1116 1117 if (!rsorted) { 1118 const PetscInt *idxs; 1119 PetscInt *idxs_sorted,i; 1120 1121 ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr); 1122 ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr); 1123 for (i=0;i<rsize;i++) { 1124 idxs_perm_r[i] = i; 1125 } 1126 ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr); 1127 ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr); 1128 for (i=0;i<rsize;i++) { 1129 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 1130 } 1131 ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr); 1132 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr); 1133 } else { 1134 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1135 isrow_s = isrow; 1136 } 1137 1138 if (!csorted) { 1139 if (isrow == iscol) { 1140 ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr); 1141 iscol_s = isrow_s; 1142 } else { 1143 const PetscInt *idxs; 1144 PetscInt *idxs_sorted,i; 1145 1146 ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr); 1147 ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr); 1148 for (i=0;i<csize;i++) { 1149 idxs_perm_c[i] = i; 1150 } 1151 ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr); 1152 ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr); 1153 for (i=0;i<csize;i++) { 1154 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 1155 } 1156 ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr); 1157 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr); 1158 } 1159 } else { 1160 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1161 iscol_s = iscol; 1162 } 1163 1164 ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 1165 1166 if (!rsorted || !csorted) { 1167 Mat new_mat; 1168 IS is_perm_r,is_perm_c; 1169 1170 if (!rsorted) { 1171 PetscInt *idxs_r,i; 1172 ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr); 1173 for (i=0;i<rsize;i++) { 1174 idxs_r[idxs_perm_r[i]] = i; 1175 } 1176 ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr); 1177 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr); 1178 } else { 1179 ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr); 1180 } 1181 ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr); 1182 1183 if (!csorted) { 1184 if (isrow_s == iscol_s) { 1185 ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr); 1186 is_perm_c = is_perm_r; 1187 } else { 1188 PetscInt *idxs_c,i; 1189 ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr); 1190 for (i=0;i<csize;i++) { 1191 idxs_c[idxs_perm_c[i]] = i; 1192 } 1193 ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr); 1194 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr); 1195 } 1196 } else { 1197 ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr); 1198 } 1199 ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr); 1200 1201 ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr); 1202 ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr); 1203 work_mat[0] = new_mat; 1204 ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr); 1205 ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr); 1206 } 1207 1208 ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr); 1209 *B = work_mat[0]; 1210 ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr); 1211 ierr = ISDestroy(&isrow_s);CHKERRQ(ierr); 1212 ierr = ISDestroy(&iscol_s);CHKERRQ(ierr); 1213 PetscFunctionReturn(0); 1214 } 1215 1216 #undef __FUNCT__ 1217 #define __FUNCT__ "PCBDDCComputeLocalMatrix" 1218 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 1219 { 1220 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1221 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1222 Mat new_mat; 1223 IS is_local,is_global; 1224 PetscInt local_size; 1225 PetscBool isseqaij; 1226 PetscErrorCode ierr; 1227 1228 PetscFunctionBegin; 1229 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1230 ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr); 1231 ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr); 1232 ierr = ISLocalToGlobalMappingApplyIS(matis->mapping,is_local,&is_global);CHKERRQ(ierr); 1233 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 1234 ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr); 1235 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 1236 1237 /* check */ 1238 if (pcbddc->dbg_flag) { 1239 Vec x,x_change; 1240 PetscReal error; 1241 1242 ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr); 1243 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 1244 ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr); 1245 ierr = VecScatterBegin(matis->ctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1246 ierr = VecScatterEnd(matis->ctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1247 ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr); 1248 ierr = VecScatterBegin(matis->ctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1249 ierr = VecScatterEnd(matis->ctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1250 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 1251 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 1252 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1253 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr); 1254 ierr = VecDestroy(&x);CHKERRQ(ierr); 1255 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 1256 } 1257 1258 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 1259 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 1260 if (isseqaij) { 1261 ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 1262 } else { 1263 Mat work_mat; 1264 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 1265 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 1266 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 1267 } 1268 ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,pcbddc->issym);CHKERRQ(ierr); 1269 #if !defined(PETSC_USE_COMPLEX) 1270 ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,pcbddc->issym);CHKERRQ(ierr); 1271 #endif 1272 /* 1273 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 1274 ierr = MatView(new_mat,(PetscViewer)0);CHKERRQ(ierr); 1275 */ 1276 ierr = MatDestroy(&new_mat);CHKERRQ(ierr); 1277 PetscFunctionReturn(0); 1278 } 1279 1280 #undef __FUNCT__ 1281 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 1282 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 1283 { 1284 PC_IS* pcis = (PC_IS*)(pc->data); 1285 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1286 IS is_aux1,is_aux2; 1287 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 1288 PetscInt n_vertices,i,j,n_R,n_D,n_B; 1289 PetscInt vbs,bs; 1290 PetscBT bitmask; 1291 PetscErrorCode ierr; 1292 1293 PetscFunctionBegin; 1294 /* 1295 No need to setup local scatters if 1296 - primal space is unchanged 1297 AND 1298 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 1299 AND 1300 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 1301 */ 1302 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 1303 PetscFunctionReturn(0); 1304 } 1305 /* destroy old objects */ 1306 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 1307 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 1308 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 1309 /* Set Non-overlapping dimensions */ 1310 n_B = pcis->n_B; 1311 n_D = pcis->n - n_B; 1312 n_vertices = pcbddc->n_vertices; 1313 /* create auxiliary bitmask */ 1314 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 1315 for (i=0;i<n_vertices;i++) { 1316 ierr = PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);CHKERRQ(ierr); 1317 } 1318 1319 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 1320 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 1321 for (i=0, n_R=0; i<pcis->n; i++) { 1322 if (!PetscBTLookup(bitmask,i)) { 1323 idx_R_local[n_R] = i; 1324 n_R++; 1325 } 1326 } 1327 1328 /* Block code */ 1329 vbs = 1; 1330 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 1331 if (bs>1 && !(n_vertices%bs)) { 1332 PetscBool is_blocked = PETSC_TRUE; 1333 PetscInt *vary; 1334 /* Verify if the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 1335 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 1336 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 1337 for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++; 1338 for (i=0; i<n_vertices/bs; i++) { 1339 if (vary[i]!=0 && vary[i]!=bs) { 1340 is_blocked = PETSC_FALSE; 1341 break; 1342 } 1343 } 1344 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 1345 vbs = bs; 1346 for (i=0;i<n_R/vbs;i++) { 1347 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 1348 } 1349 } 1350 ierr = PetscFree(vary);CHKERRQ(ierr); 1351 } 1352 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 1353 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 1354 1355 /* print some info if requested */ 1356 if (pcbddc->dbg_flag) { 1357 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1358 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1359 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1360 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 1361 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 1362 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices,pcbddc->local_primal_size);CHKERRQ(ierr); 1363 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1364 } 1365 1366 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 1367 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1368 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 1369 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 1370 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1371 for (i=0; i<n_D; i++) { 1372 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 1373 } 1374 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1375 for (i=0, j=0; i<n_R; i++) { 1376 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 1377 aux_array1[j++] = i; 1378 } 1379 } 1380 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 1381 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1382 for (i=0, j=0; i<n_B; i++) { 1383 if (!PetscBTLookup(bitmask,is_indices[i])) { 1384 aux_array2[j++] = i; 1385 } 1386 } 1387 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1388 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 1389 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 1390 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1391 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 1392 1393 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1394 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 1395 for (i=0, j=0; i<n_R; i++) { 1396 if (PetscBTLookup(bitmask,idx_R_local[i])) { 1397 aux_array1[j++] = i; 1398 } 1399 } 1400 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 1401 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 1402 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1403 } 1404 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 1405 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1406 PetscFunctionReturn(0); 1407 } 1408 1409 1410 #undef __FUNCT__ 1411 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 1412 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 1413 { 1414 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1415 PC_IS *pcis = (PC_IS*)pc->data; 1416 PC pc_temp; 1417 Mat A_RR; 1418 MatReuse reuse; 1419 PetscScalar m_one = -1.0; 1420 PetscReal value; 1421 PetscInt n_D,n_R,ibs,mbs; 1422 PetscBool use_exact,use_exact_reduced,issbaij; 1423 PetscErrorCode ierr; 1424 /* prefixes stuff */ 1425 char dir_prefix[256],neu_prefix[256],str_level[16]; 1426 size_t len; 1427 1428 PetscFunctionBegin; 1429 1430 /* compute prefixes */ 1431 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 1432 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 1433 if (!pcbddc->current_level) { 1434 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1435 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1436 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1437 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1438 } else { 1439 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 1440 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 1441 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 1442 len -= 15; /* remove "pc_bddc_coarse_" */ 1443 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 1444 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 1445 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 1446 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 1447 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1448 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1449 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 1450 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 1451 } 1452 1453 /* DIRICHLET PROBLEM */ 1454 if (dirichlet) { 1455 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1456 if (pcbddc->issym) { 1457 ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 1458 } 1459 /* Matrix for Dirichlet problem is pcis->A_II */ 1460 n_D = pcis->n - pcis->n_B; 1461 if (!pcbddc->ksp_D) { /* create object if not yet build */ 1462 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 1463 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 1464 /* default */ 1465 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 1466 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 1467 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1468 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1469 if (issbaij) { 1470 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1471 } else { 1472 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1473 } 1474 /* Allow user's customization */ 1475 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 1476 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1477 } 1478 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 1479 if (sub_schurs->interior_solver) { 1480 ierr = KSPSetPC(pcbddc->ksp_D,sub_schurs->interior_solver);CHKERRQ(ierr); 1481 } 1482 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1483 if (!n_D) { 1484 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1485 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1486 } 1487 /* Set Up KSP for Dirichlet problem of BDDC */ 1488 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 1489 /* set ksp_D into pcis data */ 1490 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 1491 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 1492 pcis->ksp_D = pcbddc->ksp_D; 1493 } 1494 1495 /* NEUMANN PROBLEM */ 1496 A_RR = 0; 1497 if (neumann) { 1498 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 1499 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 1500 if (pcbddc->ksp_R) { /* already created ksp */ 1501 PetscInt nn_R; 1502 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 1503 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 1504 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 1505 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 1506 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 1507 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1508 reuse = MAT_INITIAL_MATRIX; 1509 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 1510 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 1511 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1512 reuse = MAT_INITIAL_MATRIX; 1513 } else { /* safe to reuse the matrix */ 1514 reuse = MAT_REUSE_MATRIX; 1515 } 1516 } 1517 /* last check */ 1518 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 1519 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1520 reuse = MAT_INITIAL_MATRIX; 1521 } 1522 } else { /* first time, so we need to create the matrix */ 1523 reuse = MAT_INITIAL_MATRIX; 1524 } 1525 /* extract A_RR */ 1526 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 1527 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 1528 if (ibs != mbs) { 1529 Mat newmat; 1530 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 1531 ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 1532 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 1533 } else { 1534 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 1535 } 1536 if (pcbddc->issym) { 1537 ierr = MatSetOption(A_RR,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 1538 } 1539 if (!pcbddc->ksp_R) { /* create object if not present */ 1540 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 1541 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 1542 /* default */ 1543 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 1544 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 1545 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1546 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1547 if (issbaij) { 1548 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1549 } else { 1550 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1551 } 1552 /* Allow user's customization */ 1553 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 1554 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1555 } 1556 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 1557 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1558 if (!n_R) { 1559 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1560 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1561 } 1562 /* Set Up KSP for Neumann problem of BDDC */ 1563 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 1564 } 1565 1566 /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */ 1567 if (pcbddc->NullSpace || pcbddc->dbg_flag) { 1568 if (pcbddc->dbg_flag) { 1569 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1570 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1571 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1572 } 1573 if (dirichlet) { /* Dirichlet */ 1574 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 1575 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 1576 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 1577 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 1578 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 1579 /* need to be adapted? */ 1580 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1581 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1582 ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr); 1583 /* print info */ 1584 if (pcbddc->dbg_flag) { 1585 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 1586 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1587 } 1588 if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) { 1589 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr); 1590 } 1591 } 1592 if (neumann) { /* Neumann */ 1593 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 1594 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1595 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 1596 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 1597 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 1598 /* need to be adapted? */ 1599 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1600 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1601 /* print info */ 1602 if (pcbddc->dbg_flag) { 1603 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 1604 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1605 } 1606 if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */ 1607 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr); 1608 } 1609 } 1610 } 1611 /* free Neumann problem's matrix */ 1612 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1613 PetscFunctionReturn(0); 1614 } 1615 1616 #undef __FUNCT__ 1617 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection" 1618 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose) 1619 { 1620 PetscErrorCode ierr; 1621 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1622 1623 PetscFunctionBegin; 1624 ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr); 1625 if (!pcbddc->switch_static) { 1626 if (applytranspose && pcbddc->local_auxmat1) { 1627 ierr = MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 1628 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 1629 } 1630 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1631 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1632 } else { 1633 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1634 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1635 ierr = VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1636 ierr = VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1637 if (applytranspose && pcbddc->local_auxmat1) { 1638 ierr = MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);CHKERRQ(ierr); 1639 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 1640 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1641 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1642 } 1643 } 1644 if (applytranspose) { 1645 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 1646 } else { 1647 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 1648 } 1649 ierr = VecSet(inout_B,0.);CHKERRQ(ierr); 1650 if (!pcbddc->switch_static) { 1651 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1652 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1653 if (!applytranspose && pcbddc->local_auxmat1) { 1654 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 1655 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 1656 } 1657 } else { 1658 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1659 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1660 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1661 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1662 if (!applytranspose && pcbddc->local_auxmat1) { 1663 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 1664 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 1665 } 1666 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1667 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1668 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1669 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1670 } 1671 PetscFunctionReturn(0); 1672 } 1673 1674 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 1675 #undef __FUNCT__ 1676 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 1677 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 1678 { 1679 PetscErrorCode ierr; 1680 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1681 PC_IS* pcis = (PC_IS*) (pc->data); 1682 const PetscScalar zero = 0.0; 1683 1684 PetscFunctionBegin; 1685 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 1686 if (applytranspose) { 1687 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1688 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1689 } else { 1690 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1691 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1692 } 1693 /* start communications from local primal nodes to rhs of coarse solver */ 1694 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 1695 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1696 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1697 1698 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 1699 /* TODO remove null space when doing multilevel */ 1700 if (pcbddc->coarse_ksp) { 1701 Vec rhs,sol; 1702 1703 ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr); 1704 ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr); 1705 if (applytranspose) { 1706 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 1707 } else { 1708 ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 1709 } 1710 } 1711 1712 /* Local solution on R nodes */ 1713 if (pcis->n) { /* in/out pcbddc->vec1_B,pcbddc->vec1_D */ 1714 ierr = PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);CHKERRQ(ierr); 1715 } 1716 1717 /* communications from coarse sol to local primal nodes */ 1718 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1719 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1720 1721 /* Sum contributions from two levels */ 1722 if (applytranspose) { 1723 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1724 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1725 } else { 1726 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1727 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1728 } 1729 PetscFunctionReturn(0); 1730 } 1731 1732 #undef __FUNCT__ 1733 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 1734 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 1735 { 1736 PetscErrorCode ierr; 1737 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1738 PetscScalar *array; 1739 Vec from,to; 1740 1741 PetscFunctionBegin; 1742 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 1743 from = pcbddc->coarse_vec; 1744 to = pcbddc->vec1_P; 1745 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 1746 Vec tvec; 1747 1748 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1749 ierr = VecResetArray(tvec);CHKERRQ(ierr); 1750 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1751 ierr = VecGetArray(tvec,&array);CHKERRQ(ierr); 1752 ierr = VecPlaceArray(from,array);CHKERRQ(ierr); 1753 ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr); 1754 } 1755 } else { /* from local to global -> put data in coarse right hand side */ 1756 from = pcbddc->vec1_P; 1757 to = pcbddc->coarse_vec; 1758 } 1759 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 1760 PetscFunctionReturn(0); 1761 } 1762 1763 #undef __FUNCT__ 1764 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 1765 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 1766 { 1767 PetscErrorCode ierr; 1768 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1769 PetscScalar *array; 1770 Vec from,to; 1771 1772 PetscFunctionBegin; 1773 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 1774 from = pcbddc->coarse_vec; 1775 to = pcbddc->vec1_P; 1776 } else { /* from local to global -> put data in coarse right hand side */ 1777 from = pcbddc->vec1_P; 1778 to = pcbddc->coarse_vec; 1779 } 1780 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 1781 if (smode == SCATTER_FORWARD) { 1782 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 1783 Vec tvec; 1784 1785 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1786 ierr = VecGetArray(to,&array);CHKERRQ(ierr); 1787 ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr); 1788 ierr = VecRestoreArray(to,&array);CHKERRQ(ierr); 1789 } 1790 } else { 1791 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 1792 ierr = VecResetArray(from);CHKERRQ(ierr); 1793 } 1794 } 1795 PetscFunctionReturn(0); 1796 } 1797 1798 /* uncomment for testing purposes */ 1799 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 1800 #undef __FUNCT__ 1801 #define __FUNCT__ "PCBDDCConstraintsSetUp" 1802 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 1803 { 1804 PetscErrorCode ierr; 1805 PC_IS* pcis = (PC_IS*)(pc->data); 1806 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1807 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1808 /* one and zero */ 1809 PetscScalar one=1.0,zero=0.0; 1810 /* space to store constraints and their local indices */ 1811 PetscScalar *constraints_data; 1812 PetscInt *constraints_idxs,*constraints_idxs_B; 1813 PetscInt *constraints_idxs_ptr,*constraints_data_ptr; 1814 PetscInt *constraints_n; 1815 /* iterators */ 1816 PetscInt i,j,k,total_counts,total_counts_cc; 1817 /* BLAS integers */ 1818 PetscBLASInt lwork,lierr; 1819 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 1820 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 1821 /* reuse */ 1822 PetscInt olocal_primal_size,olocal_primal_size_cc; 1823 PetscInt *olocal_primal_ref_node,*olocal_primal_ref_mult; 1824 /* change of basis */ 1825 PetscBool boolforchange,qr_needed; 1826 PetscBT change_basis,qr_needed_idx; 1827 /* auxiliary stuff */ 1828 PetscInt *nnz,*is_indices; 1829 PetscInt ncc; 1830 /* some quantities */ 1831 PetscInt n_vertices,total_primal_vertices,valid_constraints; 1832 PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints; 1833 1834 PetscFunctionBegin; 1835 /* Destroy Mat objects computed previously */ 1836 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1837 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1838 /* save info on constraints from previous setup (if any) */ 1839 olocal_primal_size = pcbddc->local_primal_size; 1840 olocal_primal_size_cc = pcbddc->local_primal_size_cc; 1841 ierr = PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);CHKERRQ(ierr); 1842 ierr = PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 1843 ierr = PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 1844 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 1845 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 1846 1847 /* print some info */ 1848 if (pcbddc->dbg_flag) { 1849 IS vertices; 1850 PetscInt nv,nedges,nfaces; 1851 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 1852 ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr); 1853 ierr = ISDestroy(&vertices);CHKERRQ(ierr); 1854 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1855 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 1856 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 1857 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr); 1858 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr); 1859 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1860 } 1861 1862 if (!pcbddc->adaptive_selection) { 1863 IS ISForVertices,*ISForFaces,*ISForEdges; 1864 MatNullSpace nearnullsp; 1865 const Vec *nearnullvecs; 1866 Vec *localnearnullsp; 1867 PetscScalar *array; 1868 PetscInt n_ISForFaces,n_ISForEdges,nnsp_size; 1869 PetscBool nnsp_has_cnst; 1870 /* LAPACK working arrays for SVD or POD */ 1871 PetscBool skip_lapack; 1872 PetscScalar *work; 1873 PetscReal *singular_vals; 1874 #if defined(PETSC_USE_COMPLEX) 1875 PetscReal *rwork; 1876 #endif 1877 #if defined(PETSC_MISSING_LAPACK_GESVD) 1878 PetscScalar *temp_basis,*correlation_mat; 1879 #else 1880 PetscBLASInt dummy_int=1; 1881 PetscScalar dummy_scalar=1.; 1882 #endif 1883 1884 /* Get index sets for faces, edges and vertices from graph */ 1885 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 1886 /* free unneeded index sets */ 1887 if (!pcbddc->use_vertices) { 1888 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 1889 } 1890 if (!pcbddc->use_edges) { 1891 for (i=0;i<n_ISForEdges;i++) { 1892 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 1893 } 1894 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 1895 n_ISForEdges = 0; 1896 } 1897 if (!pcbddc->use_faces) { 1898 for (i=0;i<n_ISForFaces;i++) { 1899 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 1900 } 1901 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 1902 n_ISForFaces = 0; 1903 } 1904 1905 #if defined(PETSC_USE_DEBUG) 1906 /* HACK: when solving singular problems not using vertices, a change of basis is mandatory. 1907 Also use_change_of_basis should be consistent among processors */ 1908 if (pcbddc->NullSpace) { 1909 PetscBool tbool[2],gbool[2]; 1910 1911 if (!ISForVertices && !pcbddc->user_ChangeOfBasisMatrix) { 1912 pcbddc->use_change_of_basis = PETSC_TRUE; 1913 if (!ISForEdges) { 1914 pcbddc->use_change_on_faces = PETSC_TRUE; 1915 } 1916 } 1917 tbool[0] = pcbddc->use_change_of_basis; 1918 tbool[1] = pcbddc->use_change_on_faces; 1919 ierr = MPI_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1920 pcbddc->use_change_of_basis = gbool[0]; 1921 pcbddc->use_change_on_faces = gbool[1]; 1922 } 1923 #endif 1924 1925 /* check if near null space is attached to global mat */ 1926 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 1927 if (nearnullsp) { 1928 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 1929 /* remove any stored info */ 1930 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 1931 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 1932 /* store information for BDDC solver reuse */ 1933 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 1934 pcbddc->onearnullspace = nearnullsp; 1935 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 1936 for (i=0;i<nnsp_size;i++) { 1937 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 1938 } 1939 } else { /* if near null space is not provided BDDC uses constants by default */ 1940 nnsp_size = 0; 1941 nnsp_has_cnst = PETSC_TRUE; 1942 } 1943 /* get max number of constraints on a single cc */ 1944 max_constraints = nnsp_size; 1945 if (nnsp_has_cnst) max_constraints++; 1946 1947 /* 1948 Evaluate maximum storage size needed by the procedure 1949 - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]" 1950 - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]" 1951 There can be multiple constraints per connected component 1952 */ 1953 n_vertices = 0; 1954 if (ISForVertices) { 1955 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 1956 } 1957 ncc = n_vertices+n_ISForFaces+n_ISForEdges; 1958 ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr); 1959 1960 total_counts = n_ISForFaces+n_ISForEdges; 1961 total_counts *= max_constraints; 1962 total_counts += n_vertices; 1963 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 1964 1965 total_counts = 0; 1966 max_size_of_constraint = 0; 1967 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 1968 IS used_is; 1969 if (i<n_ISForEdges) { 1970 used_is = ISForEdges[i]; 1971 } else { 1972 used_is = ISForFaces[i-n_ISForEdges]; 1973 } 1974 ierr = ISGetSize(used_is,&j);CHKERRQ(ierr); 1975 total_counts += j; 1976 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 1977 } 1978 ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr); 1979 1980 /* get local part of global near null space vectors */ 1981 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 1982 for (k=0;k<nnsp_size;k++) { 1983 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 1984 ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1985 ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1986 } 1987 1988 /* whether or not to skip lapack calls */ 1989 skip_lapack = PETSC_TRUE; 1990 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 1991 1992 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 1993 if (!skip_lapack) { 1994 PetscScalar temp_work; 1995 1996 #if defined(PETSC_MISSING_LAPACK_GESVD) 1997 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 1998 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 1999 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 2000 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 2001 #if defined(PETSC_USE_COMPLEX) 2002 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 2003 #endif 2004 /* now we evaluate the optimal workspace using query with lwork=-1 */ 2005 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 2006 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 2007 lwork = -1; 2008 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2009 #if !defined(PETSC_USE_COMPLEX) 2010 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 2011 #else 2012 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 2013 #endif 2014 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2015 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 2016 #else /* on missing GESVD */ 2017 /* SVD */ 2018 PetscInt max_n,min_n; 2019 max_n = max_size_of_constraint; 2020 min_n = max_constraints; 2021 if (max_size_of_constraint < max_constraints) { 2022 min_n = max_size_of_constraint; 2023 max_n = max_constraints; 2024 } 2025 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 2026 #if defined(PETSC_USE_COMPLEX) 2027 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 2028 #endif 2029 /* now we evaluate the optimal workspace using query with lwork=-1 */ 2030 lwork = -1; 2031 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 2032 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 2033 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 2034 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2035 #if !defined(PETSC_USE_COMPLEX) 2036 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr)); 2037 #else 2038 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr)); 2039 #endif 2040 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2041 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 2042 #endif /* on missing GESVD */ 2043 /* Allocate optimal workspace */ 2044 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 2045 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 2046 } 2047 /* Now we can loop on constraining sets */ 2048 total_counts = 0; 2049 constraints_idxs_ptr[0] = 0; 2050 constraints_data_ptr[0] = 0; 2051 /* vertices */ 2052 if (n_vertices) { 2053 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2054 if (nnsp_has_cnst) { /* it considers all possible vertices */ 2055 ierr = PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 2056 for (i=0;i<n_vertices;i++) { 2057 constraints_n[total_counts] = 1; 2058 constraints_data[total_counts] = 1.0; 2059 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 2060 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 2061 total_counts++; 2062 } 2063 } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */ 2064 PetscBool used_vertex; 2065 for (i=0;i<n_vertices;i++) { 2066 used_vertex = PETSC_FALSE; 2067 k = 0; 2068 while (!used_vertex && k<nnsp_size) { 2069 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2070 if (PetscAbsScalar(array[is_indices[i]])>0.0) { 2071 constraints_n[total_counts] = 1; 2072 constraints_idxs[total_counts] = is_indices[i]; 2073 constraints_data[total_counts] = 1.0; 2074 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 2075 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 2076 total_counts++; 2077 used_vertex = PETSC_TRUE; 2078 } 2079 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2080 k++; 2081 } 2082 } 2083 } 2084 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2085 n_vertices = total_counts; 2086 } 2087 2088 /* edges and faces */ 2089 total_counts_cc = total_counts; 2090 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 2091 IS used_is; 2092 PetscBool idxs_copied = PETSC_FALSE; 2093 2094 if (ncc<n_ISForEdges) { 2095 used_is = ISForEdges[ncc]; 2096 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 2097 } else { 2098 used_is = ISForFaces[ncc-n_ISForEdges]; 2099 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 2100 } 2101 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 2102 2103 ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr); 2104 ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2105 /* change of basis should not be performed on local periodic nodes */ 2106 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 2107 if (nnsp_has_cnst) { 2108 PetscScalar quad_value; 2109 2110 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 2111 idxs_copied = PETSC_TRUE; 2112 2113 if (!pcbddc->use_nnsp_true) { 2114 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 2115 } else { 2116 quad_value = 1.0; 2117 } 2118 for (j=0;j<size_of_constraint;j++) { 2119 constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value; 2120 } 2121 temp_constraints++; 2122 total_counts++; 2123 } 2124 for (k=0;k<nnsp_size;k++) { 2125 PetscReal real_value; 2126 PetscScalar *ptr_to_data; 2127 2128 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2129 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint]; 2130 for (j=0;j<size_of_constraint;j++) { 2131 ptr_to_data[j] = array[is_indices[j]]; 2132 } 2133 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2134 /* check if array is null on the connected component */ 2135 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2136 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one)); 2137 if (real_value > 0.0) { /* keep indices and values */ 2138 temp_constraints++; 2139 total_counts++; 2140 if (!idxs_copied) { 2141 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 2142 idxs_copied = PETSC_TRUE; 2143 } 2144 } 2145 } 2146 ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2147 valid_constraints = temp_constraints; 2148 if (!pcbddc->use_nnsp_true && temp_constraints) { 2149 if (temp_constraints == 1) { /* just normalize the constraint */ 2150 PetscScalar norm,*ptr_to_data; 2151 2152 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 2153 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2154 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one)); 2155 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 2156 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one)); 2157 } else { /* perform SVD */ 2158 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 2159 PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 2160 2161 #if defined(PETSC_MISSING_LAPACK_GESVD) 2162 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 2163 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 2164 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 2165 the constraints basis will differ (by a complex factor with absolute value equal to 1) 2166 from that computed using LAPACKgesvd 2167 -> This is due to a different computation of eigenvectors in LAPACKheev 2168 -> The quality of the POD-computed basis will be the same */ 2169 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 2170 /* Store upper triangular part of correlation matrix */ 2171 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2172 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2173 for (j=0;j<temp_constraints;j++) { 2174 for (k=0;k<j+1;k++) { 2175 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one)); 2176 } 2177 } 2178 /* compute eigenvalues and eigenvectors of correlation matrix */ 2179 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2180 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 2181 #if !defined(PETSC_USE_COMPLEX) 2182 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 2183 #else 2184 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 2185 #endif 2186 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2187 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 2188 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 2189 j = 0; 2190 while (j < temp_constraints && singular_vals[j] < tol) j++; 2191 total_counts = total_counts-j; 2192 valid_constraints = temp_constraints-j; 2193 /* scale and copy POD basis into used quadrature memory */ 2194 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2195 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2196 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 2197 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2198 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 2199 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 2200 if (j<temp_constraints) { 2201 PetscInt ii; 2202 for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]); 2203 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2204 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 2205 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2206 for (k=0;k<temp_constraints-j;k++) { 2207 for (ii=0;ii<size_of_constraint;ii++) { 2208 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 2209 } 2210 } 2211 } 2212 #else /* on missing GESVD */ 2213 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2214 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2215 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2216 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2217 #if !defined(PETSC_USE_COMPLEX) 2218 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr)); 2219 #else 2220 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr)); 2221 #endif 2222 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 2223 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2224 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 2225 k = temp_constraints; 2226 if (k > size_of_constraint) k = size_of_constraint; 2227 j = 0; 2228 while (j < k && singular_vals[k-j-1] < tol) j++; 2229 valid_constraints = k-j; 2230 total_counts = total_counts-temp_constraints+valid_constraints; 2231 #endif /* on missing GESVD */ 2232 } 2233 } 2234 /* update pointers information */ 2235 if (valid_constraints) { 2236 constraints_n[total_counts_cc] = valid_constraints; 2237 constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint; 2238 constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints; 2239 total_counts_cc++; 2240 /* set change_of_basis flag */ 2241 if (boolforchange) { 2242 for (j=valid_constraints;j>0;j--) { 2243 PetscBTSet(change_basis,total_counts-j); 2244 } 2245 } 2246 } 2247 } 2248 /* free workspace */ 2249 if (!skip_lapack) { 2250 ierr = PetscFree(work);CHKERRQ(ierr); 2251 #if defined(PETSC_USE_COMPLEX) 2252 ierr = PetscFree(rwork);CHKERRQ(ierr); 2253 #endif 2254 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 2255 #if defined(PETSC_MISSING_LAPACK_GESVD) 2256 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 2257 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 2258 #endif 2259 } 2260 for (k=0;k<nnsp_size;k++) { 2261 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 2262 } 2263 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 2264 /* free index sets of faces, edges and vertices */ 2265 for (i=0;i<n_ISForFaces;i++) { 2266 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 2267 } 2268 if (n_ISForFaces) { 2269 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 2270 } 2271 for (i=0;i<n_ISForEdges;i++) { 2272 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 2273 } 2274 if (n_ISForEdges) { 2275 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 2276 } 2277 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 2278 } else { 2279 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2280 2281 total_counts = 0; 2282 n_vertices = 0; 2283 if (sub_schurs->is_Ej_com && pcbddc->use_vertices) { 2284 ierr = ISGetLocalSize(sub_schurs->is_Ej_com,&n_vertices);CHKERRQ(ierr); 2285 } 2286 max_constraints = 0; 2287 total_counts_cc = 0; 2288 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 2289 total_counts += pcbddc->adaptive_constraints_n[i]; 2290 if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++; 2291 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 2292 } 2293 constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr; 2294 constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr; 2295 constraints_idxs = pcbddc->adaptive_constraints_idxs; 2296 constraints_data = pcbddc->adaptive_constraints_data; 2297 /* constraints_n differs from pcbddc->adaptive_constraints_n */ 2298 ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr); 2299 total_counts_cc = 0; 2300 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 2301 if (pcbddc->adaptive_constraints_n[i]) { 2302 constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i]; 2303 } 2304 } 2305 #if 0 2306 printf("Found %d totals (%d)\n",total_counts_cc,total_counts); 2307 for (i=0;i<total_counts_cc;i++) { 2308 printf("const %d, start %d",i,constraints_idxs_ptr[i]); 2309 printf(" end %d:\n",constraints_idxs_ptr[i+1]); 2310 for (j=constraints_idxs_ptr[i];j<constraints_idxs_ptr[i+1];j++) { 2311 printf(" %d",constraints_idxs[j]); 2312 } 2313 printf("\n"); 2314 printf("number of cc: %d\n",constraints_n[i]); 2315 } 2316 for (i=0;i<n_vertices;i++) { 2317 PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]); 2318 } 2319 for (i=0;i<sub_schurs->n_subs;i++) { 2320 PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]); 2321 } 2322 #endif 2323 2324 max_size_of_constraint = 0; 2325 for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]); 2326 ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr); 2327 /* Change of basis */ 2328 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 2329 if (pcbddc->use_change_of_basis) { 2330 PetscInt cum = n_vertices; 2331 for (i=0;i<sub_schurs->n_subs;i++) { 2332 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 2333 for (j=0;j<pcbddc->adaptive_constraints_n[i+n_vertices];j++) { 2334 ierr = PetscBTSet(change_basis,cum+j);CHKERRQ(ierr); 2335 } 2336 } 2337 cum += pcbddc->adaptive_constraints_n[i+n_vertices]; 2338 } 2339 } 2340 } 2341 pcbddc->local_primal_size = total_counts; 2342 pcbddc->local_primal_size_cc = total_counts_cc; 2343 ierr = PetscMalloc2(pcbddc->local_primal_size_cc,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc,&pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 2344 ierr = PetscMalloc1(pcbddc->local_primal_size,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2345 2346 /* map constraints_idxs in boundary numbering */ 2347 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr); 2348 if (i != constraints_idxs_ptr[total_counts_cc]) { 2349 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",constraints_idxs_ptr[total_counts_cc],i); 2350 } 2351 2352 /* Create constraint matrix */ 2353 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 2354 ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr); 2355 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 2356 2357 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 2358 /* determine if a QR strategy is needed for change of basis */ 2359 qr_needed = PETSC_FALSE; 2360 ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr); 2361 total_primal_vertices=0; 2362 for (i=0;i<total_counts_cc;i++) { 2363 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2364 if (size_of_constraint == 1) { 2365 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]]; 2366 } else if (PetscBTLookup(change_basis,i)) { 2367 for (k=0;k<constraints_n[i];k++) { 2368 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 2369 } 2370 if (constraints_n[i] > 1 || pcbddc->use_qr_single || pcbddc->faster_deluxe) { 2371 PetscBTSet(qr_needed_idx,i); 2372 qr_needed = PETSC_TRUE; 2373 } 2374 } 2375 } 2376 /* note that the local variable n_vertices used below stores the number of pointwise constraints */ 2377 pcbddc->n_vertices = total_primal_vertices; 2378 /* permute indices in order to have a sorted set of vertices */ 2379 ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2380 ierr = PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));CHKERRQ(ierr); 2381 for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1; 2382 2383 /* nonzero structure of constraint matrix */ 2384 /* and get reference dof for local constraints */ 2385 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 2386 for (i=0;i<total_primal_vertices;i++) nnz[i] = 1; 2387 2388 j = total_primal_vertices; 2389 total_counts = total_primal_vertices; 2390 for (i=n_vertices;i<total_counts_cc;i++) { 2391 if (!PetscBTLookup(change_basis,i)) { 2392 pcbddc->local_primal_ref_node[i] = constraints_idxs[constraints_idxs_ptr[i]]; 2393 pcbddc->local_primal_ref_mult[i] = constraints_n[i]; 2394 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2395 for (k=0;k<constraints_n[i];k++) { 2396 pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 2397 nnz[j+k] = size_of_constraint; 2398 } 2399 j += constraints_n[i]; 2400 } 2401 } 2402 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 2403 ierr = PetscFree(nnz);CHKERRQ(ierr); 2404 2405 /* set values in constraint matrix */ 2406 for (i=0;i<total_primal_vertices;i++) { 2407 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 2408 } 2409 total_counts = total_primal_vertices; 2410 for (i=n_vertices;i<total_counts_cc;i++) { 2411 if (!PetscBTLookup(change_basis,i)) { 2412 PetscInt *cols; 2413 2414 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2415 cols = constraints_idxs+constraints_idxs_ptr[i]; 2416 for (k=0;k<constraints_n[i];k++) { 2417 PetscInt row = total_counts+k; 2418 PetscScalar *vals; 2419 2420 vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint; 2421 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2422 } 2423 total_counts += constraints_n[i]; 2424 } 2425 } 2426 /* assembling */ 2427 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2428 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2429 2430 /* 2431 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 2432 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 2433 */ 2434 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 2435 if (pcbddc->use_change_of_basis) { 2436 /* dual and primal dofs on a single cc */ 2437 PetscInt dual_dofs,primal_dofs; 2438 /* working stuff for GEQRF */ 2439 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 2440 PetscBLASInt lqr_work; 2441 /* working stuff for UNGQR */ 2442 PetscScalar *gqr_work,lgqr_work_t; 2443 PetscBLASInt lgqr_work; 2444 /* working stuff for TRTRS */ 2445 PetscScalar *trs_rhs; 2446 PetscBLASInt Blas_NRHS; 2447 /* pointers for values insertion into change of basis matrix */ 2448 PetscInt *start_rows,*start_cols; 2449 PetscScalar *start_vals; 2450 /* working stuff for values insertion */ 2451 PetscBT is_primal; 2452 PetscInt *aux_primal_numbering_B; 2453 /* matrix sizes */ 2454 PetscInt global_size,local_size; 2455 /* temporary change of basis */ 2456 Mat localChangeOfBasisMatrix; 2457 /* extra space for debugging */ 2458 PetscScalar *dbg_work; 2459 2460 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 2461 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 2462 ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 2463 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 2464 /* nonzeros for local mat */ 2465 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 2466 for (i=0;i<pcis->n;i++) nnz[i]=1; 2467 for (i=n_vertices;i<total_counts_cc;i++) { 2468 if (PetscBTLookup(change_basis,i)) { 2469 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2470 if (PetscBTLookup(qr_needed_idx,i)) { 2471 for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint; 2472 } else { 2473 nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint; 2474 for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2; 2475 } 2476 } 2477 } 2478 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 2479 ierr = PetscFree(nnz);CHKERRQ(ierr); 2480 /* Set initial identity in the matrix */ 2481 for (i=0;i<pcis->n;i++) { 2482 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 2483 } 2484 2485 if (pcbddc->dbg_flag) { 2486 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 2487 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 2488 } 2489 2490 2491 /* Now we loop on the constraints which need a change of basis */ 2492 /* 2493 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 2494 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 2495 2496 Basic blocks of change of basis matrix T computed by 2497 2498 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 2499 2500 | 1 0 ... 0 s_1/S | 2501 | 0 1 ... 0 s_2/S | 2502 | ... | 2503 | 0 ... 1 s_{n-1}/S | 2504 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 2505 2506 with S = \sum_{i=1}^n s_i^2 2507 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 2508 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 2509 2510 - QR decomposition of constraints otherwise 2511 */ 2512 if (qr_needed) { 2513 /* space to store Q */ 2514 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 2515 /* first we issue queries for optimal work */ 2516 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 2517 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 2518 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2519 lqr_work = -1; 2520 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 2521 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 2522 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 2523 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 2524 lgqr_work = -1; 2525 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 2526 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 2527 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 2528 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2529 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 2530 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 2531 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 2532 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 2533 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 2534 /* array to store scaling factors for reflectors */ 2535 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 2536 /* array to store rhs and solution of triangular solver */ 2537 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 2538 /* allocating workspace for check */ 2539 if (pcbddc->dbg_flag) { 2540 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr); 2541 } 2542 } 2543 /* array to store whether a node is primal or not */ 2544 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 2545 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 2546 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);CHKERRQ(ierr); 2547 if (i != total_primal_vertices) { 2548 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i); 2549 } 2550 for (i=0;i<total_primal_vertices;i++) { 2551 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 2552 } 2553 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 2554 2555 /* loop on constraints and see whether or not they need a change of basis and compute it */ 2556 for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) { 2557 size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts]; 2558 if (PetscBTLookup(change_basis,total_counts)) { 2559 /* get constraint info */ 2560 primal_dofs = constraints_n[total_counts]; 2561 dual_dofs = size_of_constraint-primal_dofs; 2562 2563 if (pcbddc->dbg_flag) { 2564 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d: %d need a change of basis (size %d)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr); 2565 } 2566 2567 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 2568 2569 /* copy quadrature constraints for change of basis check */ 2570 if (pcbddc->dbg_flag) { 2571 ierr = PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2572 } 2573 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 2574 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2575 2576 /* compute QR decomposition of constraints */ 2577 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2578 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2579 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2580 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2581 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 2582 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 2583 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2584 2585 /* explictly compute R^-T */ 2586 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 2587 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 2588 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2589 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 2590 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2591 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 2592 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2593 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 2594 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 2595 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2596 2597 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 2598 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2599 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2600 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 2601 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2602 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2603 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 2604 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 2605 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2606 2607 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 2608 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 2609 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 2610 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2611 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2612 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 2613 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2614 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 2615 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 2616 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2617 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC)); 2618 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2619 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2620 2621 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 2622 start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]]; 2623 /* insert cols for primal dofs */ 2624 for (j=0;j<primal_dofs;j++) { 2625 start_vals = &qr_basis[j*size_of_constraint]; 2626 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 2627 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 2628 } 2629 /* insert cols for dual dofs */ 2630 for (j=0,k=0;j<dual_dofs;k++) { 2631 if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) { 2632 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 2633 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 2634 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 2635 j++; 2636 } 2637 } 2638 2639 /* check change of basis */ 2640 if (pcbddc->dbg_flag) { 2641 PetscInt ii,jj; 2642 PetscBool valid_qr=PETSC_TRUE; 2643 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 2644 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2645 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 2646 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2647 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 2648 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 2649 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2650 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC)); 2651 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2652 for (jj=0;jj<size_of_constraint;jj++) { 2653 for (ii=0;ii<primal_dofs;ii++) { 2654 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 2655 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 2656 } 2657 } 2658 if (!valid_qr) { 2659 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 2660 for (jj=0;jj<size_of_constraint;jj++) { 2661 for (ii=0;ii<primal_dofs;ii++) { 2662 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 2663 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2664 } 2665 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 2666 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2667 } 2668 } 2669 } 2670 } else { 2671 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 2672 } 2673 } 2674 } else { /* simple transformation block */ 2675 PetscInt row,col; 2676 PetscScalar val,norm; 2677 2678 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2679 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one)); 2680 for (j=0;j<size_of_constraint;j++) { 2681 PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j]; 2682 row = constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 2683 if (!PetscBTLookup(is_primal,row_B)) { 2684 col = constraints_idxs[constraints_idxs_ptr[total_counts]]; 2685 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 2686 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 2687 } else { 2688 for (k=0;k<size_of_constraint;k++) { 2689 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 2690 if (row != col) { 2691 val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]]; 2692 } else { 2693 val = constraints_data[constraints_data_ptr[total_counts]]/norm; 2694 } 2695 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 2696 } 2697 } 2698 } 2699 if (pcbddc->dbg_flag) { 2700 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 2701 } 2702 } 2703 } else { 2704 if (pcbddc->dbg_flag) { 2705 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,size_of_constraint);CHKERRQ(ierr); 2706 } 2707 } 2708 } 2709 2710 /* free workspace */ 2711 if (qr_needed) { 2712 if (pcbddc->dbg_flag) { 2713 ierr = PetscFree(dbg_work);CHKERRQ(ierr); 2714 } 2715 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 2716 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 2717 ierr = PetscFree(qr_work);CHKERRQ(ierr); 2718 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 2719 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 2720 } 2721 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 2722 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2723 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2724 2725 /* assembling of global change of variable */ 2726 { 2727 Mat tmat; 2728 PetscInt bs; 2729 2730 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 2731 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 2732 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 2733 ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr); 2734 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2735 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 2736 ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr); 2737 ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr); 2738 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 2739 ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr); 2740 ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2741 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 2742 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 2743 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 2744 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2745 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2746 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 2747 ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr); 2748 } 2749 /* check */ 2750 if (pcbddc->dbg_flag) { 2751 PetscReal error; 2752 Vec x,x_change; 2753 2754 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 2755 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 2756 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 2757 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 2758 ierr = VecScatterBegin(matis->ctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2759 ierr = VecScatterEnd(matis->ctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2760 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 2761 ierr = VecScatterBegin(matis->ctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2762 ierr = VecScatterEnd(matis->ctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2763 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 2764 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 2765 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 2766 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2767 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr); 2768 ierr = VecDestroy(&x);CHKERRQ(ierr); 2769 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 2770 } 2771 2772 /* adapt sub_schurs computed (if any) */ 2773 if (pcbddc->use_deluxe_scaling) { 2774 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 2775 if (sub_schurs->S_Ej_all) { 2776 Mat S_new,tmat; 2777 IS is_all_N; 2778 2779 ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr); 2780 ierr = MatGetSubMatrixUnsorted(localChangeOfBasisMatrix,is_all_N,is_all_N,&tmat);CHKERRQ(ierr); 2781 ierr = ISDestroy(&is_all_N);CHKERRQ(ierr); 2782 ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 2783 ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr); 2784 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 2785 sub_schurs->S_Ej_all = S_new; 2786 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 2787 if (sub_schurs->sum_S_Ej_all) { 2788 ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 2789 ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 2790 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 2791 sub_schurs->sum_S_Ej_all = S_new; 2792 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 2793 } 2794 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 2795 } 2796 } 2797 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 2798 } else if (pcbddc->user_ChangeOfBasisMatrix) { 2799 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 2800 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 2801 } 2802 2803 /* set up change of basis context */ 2804 if (pcbddc->ChangeOfBasisMatrix) { 2805 PCBDDCChange_ctx change_ctx; 2806 2807 if (!pcbddc->new_global_mat) { 2808 PetscInt global_size,local_size; 2809 2810 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 2811 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 2812 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->new_global_mat);CHKERRQ(ierr); 2813 ierr = MatSetSizes(pcbddc->new_global_mat,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 2814 ierr = MatSetType(pcbddc->new_global_mat,MATSHELL);CHKERRQ(ierr); 2815 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT,(void (*)(void))PCBDDCMatMult_Private);CHKERRQ(ierr); 2816 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCMatMultTranspose_Private);CHKERRQ(ierr); 2817 ierr = PetscNew(&change_ctx);CHKERRQ(ierr); 2818 ierr = MatShellSetContext(pcbddc->new_global_mat,change_ctx);CHKERRQ(ierr); 2819 } else { 2820 ierr = MatShellGetContext(pcbddc->new_global_mat,&change_ctx);CHKERRQ(ierr); 2821 ierr = MatDestroy(&change_ctx->global_change);CHKERRQ(ierr); 2822 ierr = VecDestroyVecs(2,&change_ctx->work);CHKERRQ(ierr); 2823 } 2824 if (!pcbddc->user_ChangeOfBasisMatrix) { 2825 ierr = PetscObjectReference((PetscObject)pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2826 change_ctx->global_change = pcbddc->ChangeOfBasisMatrix; 2827 } else { 2828 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 2829 change_ctx->global_change = pcbddc->user_ChangeOfBasisMatrix; 2830 } 2831 ierr = VecDuplicateVecs(pcis->vec1_global,2,&change_ctx->work);CHKERRQ(ierr); 2832 ierr = MatSetUp(pcbddc->new_global_mat);CHKERRQ(ierr); 2833 ierr = MatAssemblyBegin(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2834 ierr = MatAssemblyEnd(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2835 } 2836 2837 /* check if a new primal space has been introduced */ 2838 pcbddc->new_primal_space_local = PETSC_TRUE; 2839 if (olocal_primal_size == pcbddc->local_primal_size) { 2840 ierr = PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc,&pcbddc->new_primal_space_local);CHKERRQ(ierr); 2841 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 2842 if (!pcbddc->new_primal_space_local) { 2843 ierr = PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc,&pcbddc->new_primal_space_local);CHKERRQ(ierr); 2844 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 2845 } 2846 } 2847 ierr = PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);CHKERRQ(ierr); 2848 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 2849 ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2850 2851 /* flush dbg viewer */ 2852 if (pcbddc->dbg_flag) { 2853 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2854 } 2855 2856 /* free workspace */ 2857 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 2858 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 2859 if (!pcbddc->adaptive_selection) { 2860 ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr); 2861 ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr); 2862 } else { 2863 ierr = PetscFree5(pcbddc->adaptive_constraints_n, 2864 pcbddc->adaptive_constraints_idxs_ptr, 2865 pcbddc->adaptive_constraints_data_ptr, 2866 pcbddc->adaptive_constraints_idxs, 2867 pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 2868 ierr = PetscFree(constraints_n);CHKERRQ(ierr); 2869 ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr); 2870 } 2871 PetscFunctionReturn(0); 2872 } 2873 2874 #undef __FUNCT__ 2875 #define __FUNCT__ "PCBDDCAnalyzeInterface" 2876 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 2877 { 2878 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2879 PC_IS *pcis = (PC_IS*)pc->data; 2880 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2881 PetscInt ierr,i,vertex_size,N; 2882 PetscViewer viewer=pcbddc->dbg_viewer; 2883 2884 PetscFunctionBegin; 2885 /* Reset previously computed graph */ 2886 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 2887 /* Init local Graph struct */ 2888 ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr); 2889 ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping,N);CHKERRQ(ierr); 2890 2891 /* Check validity of the csr graph passed in by the user */ 2892 if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) { 2893 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 2894 } 2895 2896 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 2897 if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) { 2898 PetscInt *xadj,*adjncy; 2899 PetscInt nvtxs; 2900 PetscBool flg_row=PETSC_FALSE; 2901 2902 if (pcbddc->use_local_adj) { 2903 2904 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 2905 if (flg_row) { 2906 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 2907 pcbddc->computed_rowadj = PETSC_TRUE; 2908 } 2909 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 2910 } else if (pcbddc->current_level && pcis->n_B) { /* just compute subdomain's connected components for coarser levels when the local boundary is not empty */ 2911 IS is_dummy; 2912 ISLocalToGlobalMapping l2gmap_dummy; 2913 PetscInt j,sum; 2914 PetscInt *cxadj,*cadjncy; 2915 const PetscInt *idxs; 2916 PCBDDCGraph graph; 2917 PetscBT is_on_boundary; 2918 2919 ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n,0,1,&is_dummy);CHKERRQ(ierr); 2920 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 2921 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2922 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 2923 ierr = PCBDDCGraphInit(graph,l2gmap_dummy,pcis->n);CHKERRQ(ierr); 2924 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 2925 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 2926 if (flg_row) { 2927 graph->xadj = xadj; 2928 graph->adjncy = adjncy; 2929 } 2930 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 2931 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 2932 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 2933 2934 if (pcbddc->dbg_flag) { 2935 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] Found %d subdomains (local size %d)\n",PetscGlobalRank,graph->ncc,pcis->n);CHKERRQ(ierr); 2936 for (i=0;i<graph->ncc;i++) { 2937 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] %d cc size %d\n",PetscGlobalRank,i,graph->cptr[i+1]-graph->cptr[i]);CHKERRQ(ierr); 2938 } 2939 } 2940 2941 ierr = PetscBTCreate(pcis->n,&is_on_boundary);CHKERRQ(ierr); 2942 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2943 for (i=0;i<pcis->n_B;i++) { 2944 ierr = PetscBTSet(is_on_boundary,idxs[i]);CHKERRQ(ierr); 2945 } 2946 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2947 2948 ierr = PetscCalloc1(pcis->n+1,&cxadj);CHKERRQ(ierr); 2949 sum = 0; 2950 for (i=0;i<graph->ncc;i++) { 2951 PetscInt sizecc = 0; 2952 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 2953 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 2954 sizecc++; 2955 } 2956 } 2957 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 2958 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 2959 cxadj[graph->queue[j]] = sizecc; 2960 } 2961 } 2962 sum += sizecc*sizecc; 2963 } 2964 ierr = PetscMalloc1(sum,&cadjncy);CHKERRQ(ierr); 2965 sum = 0; 2966 for (i=0;i<pcis->n;i++) { 2967 PetscInt temp = cxadj[i]; 2968 cxadj[i] = sum; 2969 sum += temp; 2970 } 2971 cxadj[pcis->n] = sum; 2972 for (i=0;i<graph->ncc;i++) { 2973 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 2974 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 2975 PetscInt k,sizecc = 0; 2976 for (k=graph->cptr[i];k<graph->cptr[i+1];k++) { 2977 if (PetscBTLookup(is_on_boundary,graph->queue[k])) { 2978 cadjncy[cxadj[graph->queue[j]]+sizecc]=graph->queue[k]; 2979 sizecc++; 2980 } 2981 } 2982 } 2983 } 2984 } 2985 if (sum) { 2986 ierr = PCBDDCSetLocalAdjacencyGraph(pc,pcis->n,cxadj,cadjncy,PETSC_OWN_POINTER);CHKERRQ(ierr); 2987 } else { 2988 ierr = PetscFree(cxadj);CHKERRQ(ierr); 2989 ierr = PetscFree(cadjncy);CHKERRQ(ierr); 2990 } 2991 graph->xadj = 0; 2992 graph->adjncy = 0; 2993 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 2994 ierr = PetscBTDestroy(&is_on_boundary);CHKERRQ(ierr); 2995 } 2996 } 2997 if (pcbddc->dbg_flag) { 2998 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2999 } 3000 3001 /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */ 3002 vertex_size = 1; 3003 if (pcbddc->user_provided_isfordofs) { 3004 if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */ 3005 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 3006 for (i=0;i<pcbddc->n_ISForDofs;i++) { 3007 ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 3008 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 3009 } 3010 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 3011 pcbddc->n_ISForDofs = 0; 3012 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 3013 } 3014 /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */ 3015 ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr); 3016 } else { 3017 if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */ 3018 ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr); 3019 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 3020 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 3021 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 3022 } 3023 } 3024 } 3025 3026 /* Setup of Graph */ 3027 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */ 3028 ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 3029 } 3030 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */ 3031 ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 3032 } 3033 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices);CHKERRQ(ierr); 3034 3035 /* Graph's connected components analysis */ 3036 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 3037 3038 /* print some info to stdout */ 3039 if (pcbddc->dbg_flag) { 3040 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);CHKERRQ(ierr); 3041 } 3042 3043 /* mark topography has done */ 3044 pcbddc->recompute_topography = PETSC_FALSE; 3045 PetscFunctionReturn(0); 3046 } 3047 3048 #undef __FUNCT__ 3049 #define __FUNCT__ "PCBDDCSubsetNumbering" 3050 PetscErrorCode PCBDDCSubsetNumbering(MPI_Comm comm,ISLocalToGlobalMapping l2gmap, PetscInt n_local_dofs, PetscInt local_dofs[], PetscInt local_dofs_mult[], PetscInt* n_global_subset, PetscInt* global_numbering_subset[]) 3051 { 3052 Vec local_vec,global_vec; 3053 IS seqis,paris; 3054 VecScatter scatter_ctx; 3055 PetscScalar *array; 3056 PetscInt *temp_global_dofs; 3057 PetscScalar globalsum; 3058 PetscInt i,j,s; 3059 PetscInt nlocals,first_index,old_index,max_local,max_global; 3060 PetscMPIInt rank_prec_comm,size_prec_comm; 3061 PetscInt *dof_sizes,*dof_displs; 3062 PetscBool first_found; 3063 PetscErrorCode ierr; 3064 3065 PetscFunctionBegin; 3066 /* mpi buffers */ 3067 ierr = MPI_Comm_size(comm,&size_prec_comm);CHKERRQ(ierr); 3068 ierr = MPI_Comm_rank(comm,&rank_prec_comm);CHKERRQ(ierr); 3069 j = ( !rank_prec_comm ? size_prec_comm : 0); 3070 ierr = PetscMalloc2(j,&dof_sizes,j,&dof_displs);CHKERRQ(ierr); 3071 /* get maximum size of subset */ 3072 ierr = PetscMalloc1(n_local_dofs,&temp_global_dofs);CHKERRQ(ierr); 3073 ierr = ISLocalToGlobalMappingApply(l2gmap,n_local_dofs,local_dofs,temp_global_dofs);CHKERRQ(ierr); 3074 max_local = 0; 3075 for (i=0;i<n_local_dofs;i++) { 3076 if (max_local < temp_global_dofs[i] ) { 3077 max_local = temp_global_dofs[i]; 3078 } 3079 } 3080 ierr = MPI_Allreduce(&max_local,&max_global,1,MPIU_INT,MPI_MAX,comm);CHKERRQ(ierr); 3081 max_global++; 3082 max_local = 0; 3083 for (i=0;i<n_local_dofs;i++) { 3084 if (max_local < local_dofs[i] ) { 3085 max_local = local_dofs[i]; 3086 } 3087 } 3088 max_local++; 3089 /* allocate workspace */ 3090 ierr = VecCreate(PETSC_COMM_SELF,&local_vec);CHKERRQ(ierr); 3091 ierr = VecSetSizes(local_vec,PETSC_DECIDE,max_local);CHKERRQ(ierr); 3092 ierr = VecSetType(local_vec,VECSEQ);CHKERRQ(ierr); 3093 ierr = VecCreate(comm,&global_vec);CHKERRQ(ierr); 3094 ierr = VecSetSizes(global_vec,PETSC_DECIDE,max_global);CHKERRQ(ierr); 3095 ierr = VecSetType(global_vec,VECMPI);CHKERRQ(ierr); 3096 /* create scatter */ 3097 ierr = ISCreateGeneral(PETSC_COMM_SELF,n_local_dofs,local_dofs,PETSC_COPY_VALUES,&seqis);CHKERRQ(ierr); 3098 ierr = ISCreateGeneral(comm,n_local_dofs,temp_global_dofs,PETSC_COPY_VALUES,&paris);CHKERRQ(ierr); 3099 ierr = VecScatterCreate(local_vec,seqis,global_vec,paris,&scatter_ctx);CHKERRQ(ierr); 3100 ierr = ISDestroy(&seqis);CHKERRQ(ierr); 3101 ierr = ISDestroy(&paris);CHKERRQ(ierr); 3102 /* init array */ 3103 ierr = VecSet(global_vec,0.0);CHKERRQ(ierr); 3104 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 3105 ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr); 3106 if (local_dofs_mult) { 3107 for (i=0;i<n_local_dofs;i++) { 3108 array[local_dofs[i]]=(PetscScalar)local_dofs_mult[i]; 3109 } 3110 } else { 3111 for (i=0;i<n_local_dofs;i++) { 3112 array[local_dofs[i]]=1.0; 3113 } 3114 } 3115 ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr); 3116 /* scatter into global vec and get total number of global dofs */ 3117 ierr = VecScatterBegin(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3118 ierr = VecScatterEnd(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3119 ierr = VecSum(global_vec,&globalsum);CHKERRQ(ierr); 3120 *n_global_subset = (PetscInt)PetscRealPart(globalsum); 3121 /* Fill global_vec with cumulative function for global numbering */ 3122 ierr = VecGetArray(global_vec,&array);CHKERRQ(ierr); 3123 ierr = VecGetLocalSize(global_vec,&s);CHKERRQ(ierr); 3124 nlocals = 0; 3125 first_index = -1; 3126 first_found = PETSC_FALSE; 3127 for (i=0;i<s;i++) { 3128 if (!first_found && PetscRealPart(array[i]) > 0.1) { 3129 first_found = PETSC_TRUE; 3130 first_index = i; 3131 } 3132 nlocals += (PetscInt)PetscRealPart(array[i]); 3133 } 3134 ierr = MPI_Gather(&nlocals,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr); 3135 if (!rank_prec_comm) { 3136 dof_displs[0]=0; 3137 for (i=1;i<size_prec_comm;i++) { 3138 dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1]; 3139 } 3140 } 3141 ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&nlocals,1,MPIU_INT,0,comm);CHKERRQ(ierr); 3142 if (first_found) { 3143 array[first_index] += (PetscScalar)nlocals; 3144 old_index = first_index; 3145 for (i=first_index+1;i<s;i++) { 3146 if (PetscRealPart(array[i]) > 0.1) { 3147 array[i] += array[old_index]; 3148 old_index = i; 3149 } 3150 } 3151 } 3152 ierr = VecRestoreArray(global_vec,&array);CHKERRQ(ierr); 3153 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 3154 ierr = VecScatterBegin(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3155 ierr = VecScatterEnd(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3156 /* get global ordering of local dofs */ 3157 ierr = VecGetArrayRead(local_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 3158 if (local_dofs_mult) { 3159 for (i=0;i<n_local_dofs;i++) { 3160 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-local_dofs_mult[i]; 3161 } 3162 } else { 3163 for (i=0;i<n_local_dofs;i++) { 3164 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-1; 3165 } 3166 } 3167 ierr = VecRestoreArrayRead(local_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 3168 /* free workspace */ 3169 ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr); 3170 ierr = VecDestroy(&local_vec);CHKERRQ(ierr); 3171 ierr = VecDestroy(&global_vec);CHKERRQ(ierr); 3172 ierr = PetscFree2(dof_sizes,dof_displs);CHKERRQ(ierr); 3173 /* return pointer to global ordering of local dofs */ 3174 *global_numbering_subset = temp_global_dofs; 3175 PetscFunctionReturn(0); 3176 } 3177 3178 #undef __FUNCT__ 3179 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 3180 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 3181 { 3182 PetscInt i,j; 3183 PetscScalar *alphas; 3184 PetscErrorCode ierr; 3185 3186 PetscFunctionBegin; 3187 /* this implements stabilized Gram-Schmidt */ 3188 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 3189 for (i=0;i<n;i++) { 3190 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 3191 if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); } 3192 for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); } 3193 } 3194 ierr = PetscFree(alphas);CHKERRQ(ierr); 3195 PetscFunctionReturn(0); 3196 } 3197 3198 #undef __FUNCT__ 3199 #define __FUNCT__ "MatISGetSubassemblingPattern" 3200 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscBool contiguous, IS* is_sends) 3201 { 3202 Mat subdomain_adj; 3203 IS new_ranks,ranks_send_to; 3204 MatPartitioning partitioner; 3205 Mat_IS *matis; 3206 PetscInt n_neighs,*neighs,*n_shared,**shared; 3207 PetscInt prank; 3208 PetscMPIInt size,rank,color; 3209 PetscInt *xadj,*adjncy,*oldranks; 3210 PetscInt *adjncy_wgt,*v_wgt,*is_indices,*ranks_send_to_idx; 3211 PetscInt i,local_size,threshold=0; 3212 PetscErrorCode ierr; 3213 PetscBool use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE; 3214 PetscSubcomm subcomm; 3215 3216 PetscFunctionBegin; 3217 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr); 3218 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 3219 ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 3220 3221 /* Get info on mapping */ 3222 matis = (Mat_IS*)(mat->data); 3223 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&local_size);CHKERRQ(ierr); 3224 ierr = ISLocalToGlobalMappingGetInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 3225 3226 /* build local CSR graph of subdomains' connectivity */ 3227 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 3228 xadj[0] = 0; 3229 xadj[1] = PetscMax(n_neighs-1,0); 3230 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 3231 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 3232 3233 if (threshold) { 3234 PetscInt xadj_count = 0; 3235 for (i=1;i<n_neighs;i++) { 3236 if (n_shared[i] > threshold) { 3237 adjncy[xadj_count] = neighs[i]; 3238 adjncy_wgt[xadj_count] = n_shared[i]; 3239 xadj_count++; 3240 } 3241 } 3242 xadj[1] = xadj_count; 3243 } else { 3244 if (xadj[1]) { 3245 ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr); 3246 ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr); 3247 } 3248 } 3249 ierr = ISLocalToGlobalMappingRestoreInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 3250 if (use_square) { 3251 for (i=0;i<xadj[1];i++) { 3252 adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i]; 3253 } 3254 } 3255 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 3256 3257 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 3258 3259 /* 3260 Restrict work on active processes only. 3261 */ 3262 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr); 3263 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 3264 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 3265 ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr); 3266 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 3267 if (color) { 3268 ierr = PetscFree(xadj);CHKERRQ(ierr); 3269 ierr = PetscFree(adjncy);CHKERRQ(ierr); 3270 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 3271 } else { 3272 PetscInt coarsening_ratio; 3273 ierr = MPI_Comm_size(PetscSubcommChild(subcomm),&size);CHKERRQ(ierr); 3274 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 3275 prank = rank; 3276 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,PetscSubcommChild(subcomm));CHKERRQ(ierr); 3277 /* 3278 for (i=0;i<size;i++) { 3279 PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]); 3280 } 3281 */ 3282 for (i=0;i<xadj[1];i++) { 3283 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 3284 } 3285 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 3286 ierr = MatCreateMPIAdj(PetscSubcommChild(subcomm),1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 3287 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 3288 3289 /* Partition */ 3290 ierr = MatPartitioningCreate(PetscSubcommChild(subcomm),&partitioner);CHKERRQ(ierr); 3291 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 3292 if (use_vwgt) { 3293 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 3294 v_wgt[0] = local_size; 3295 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 3296 } 3297 n_subdomains = PetscMin((PetscInt)size,n_subdomains); 3298 coarsening_ratio = size/n_subdomains; 3299 ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr); 3300 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 3301 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 3302 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 3303 3304 ierr = ISGetIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3305 if (contiguous) { 3306 ranks_send_to_idx[0] = oldranks[is_indices[0]]; /* contiguos set of processes */ 3307 } else { 3308 ranks_send_to_idx[0] = coarsening_ratio*oldranks[is_indices[0]]; /* scattered set of processes */ 3309 } 3310 ierr = ISRestoreIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3311 /* clean up */ 3312 ierr = PetscFree(oldranks);CHKERRQ(ierr); 3313 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 3314 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 3315 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 3316 } 3317 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 3318 3319 /* assemble parallel IS for sends */ 3320 i = 1; 3321 if (color) i=0; 3322 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr); 3323 3324 /* get back IS */ 3325 *is_sends = ranks_send_to; 3326 PetscFunctionReturn(0); 3327 } 3328 3329 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 3330 3331 #undef __FUNCT__ 3332 #define __FUNCT__ "MatISSubassemble" 3333 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[]) 3334 { 3335 Mat local_mat; 3336 Mat_IS *matis; 3337 IS is_sends_internal; 3338 PetscInt rows,cols,new_local_rows; 3339 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals; 3340 PetscBool ismatis,isdense,newisdense,destroy_mat; 3341 ISLocalToGlobalMapping l2gmap; 3342 PetscInt* l2gmap_indices; 3343 const PetscInt* is_indices; 3344 MatType new_local_type; 3345 /* buffers */ 3346 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 3347 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 3348 PetscInt *recv_buffer_idxs_local; 3349 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 3350 /* MPI */ 3351 MPI_Comm comm,comm_n; 3352 PetscSubcomm subcomm; 3353 PetscMPIInt n_sends,n_recvs,commsize; 3354 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 3355 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 3356 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,source_dest; 3357 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals; 3358 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals; 3359 PetscErrorCode ierr; 3360 3361 PetscFunctionBegin; 3362 /* TODO: add missing checks */ 3363 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 3364 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 3365 PetscValidLogicalCollectiveEnum(mat,reuse,5); 3366 PetscValidLogicalCollectiveInt(mat,nis,7); 3367 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 3368 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 3369 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 3370 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 3371 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 3372 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 3373 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 3374 if (reuse == MAT_REUSE_MATRIX && *mat_n) { 3375 PetscInt mrows,mcols,mnrows,mncols; 3376 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 3377 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 3378 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 3379 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 3380 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 3381 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 3382 } 3383 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 3384 PetscValidLogicalCollectiveInt(mat,bs,0); 3385 /* prepare IS for sending if not provided */ 3386 if (!is_sends) { 3387 PetscBool pcontig = PETSC_TRUE; 3388 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 3389 ierr = MatISGetSubassemblingPattern(mat,n_subdomains,pcontig,&is_sends_internal);CHKERRQ(ierr); 3390 } else { 3391 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 3392 is_sends_internal = is_sends; 3393 } 3394 3395 /* get pointer of MATIS data */ 3396 matis = (Mat_IS*)mat->data; 3397 3398 /* get comm */ 3399 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3400 3401 /* compute number of sends */ 3402 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 3403 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 3404 3405 /* compute number of receives */ 3406 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 3407 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 3408 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 3409 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 3410 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 3411 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 3412 ierr = PetscFree(iflags);CHKERRQ(ierr); 3413 3414 /* restrict comm if requested */ 3415 subcomm = 0; 3416 destroy_mat = PETSC_FALSE; 3417 if (restrict_comm) { 3418 PetscMPIInt color,subcommsize; 3419 3420 color = 0; 3421 if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm */ 3422 ierr = MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 3423 subcommsize = commsize - subcommsize; 3424 /* check if reuse has been requested */ 3425 if (reuse == MAT_REUSE_MATRIX) { 3426 if (*mat_n) { 3427 PetscMPIInt subcommsize2; 3428 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 3429 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 3430 comm_n = PetscObjectComm((PetscObject)*mat_n); 3431 } else { 3432 comm_n = PETSC_COMM_SELF; 3433 } 3434 } else { /* MAT_INITIAL_MATRIX */ 3435 PetscMPIInt rank; 3436 3437 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3438 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 3439 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 3440 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 3441 comm_n = PetscSubcommChild(subcomm); 3442 } 3443 /* flag to destroy *mat_n if not significative */ 3444 if (color) destroy_mat = PETSC_TRUE; 3445 } else { 3446 comm_n = comm; 3447 } 3448 3449 /* prepare send/receive buffers */ 3450 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 3451 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 3452 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 3453 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 3454 if (nis) { 3455 ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr); 3456 } 3457 3458 /* Get data from local matrices */ 3459 if (!isdense) { 3460 SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 3461 /* TODO: See below some guidelines on how to prepare the local buffers */ 3462 /* 3463 send_buffer_vals should contain the raw values of the local matrix 3464 send_buffer_idxs should contain: 3465 - MatType_PRIVATE type 3466 - PetscInt size_of_l2gmap 3467 - PetscInt global_row_indices[size_of_l2gmap] 3468 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 3469 */ 3470 } else { 3471 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 3472 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&i);CHKERRQ(ierr); 3473 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 3474 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 3475 send_buffer_idxs[1] = i; 3476 ierr = ISLocalToGlobalMappingGetIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 3477 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 3478 ierr = ISLocalToGlobalMappingRestoreIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 3479 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 3480 for (i=0;i<n_sends;i++) { 3481 ilengths_vals[is_indices[i]] = len*len; 3482 ilengths_idxs[is_indices[i]] = len+2; 3483 } 3484 } 3485 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 3486 /* additional is (if any) */ 3487 if (nis) { 3488 PetscMPIInt psum; 3489 PetscInt j; 3490 for (j=0,psum=0;j<nis;j++) { 3491 PetscInt plen; 3492 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 3493 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 3494 psum += len+1; /* indices + lenght */ 3495 } 3496 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 3497 for (j=0,psum=0;j<nis;j++) { 3498 PetscInt plen; 3499 const PetscInt *is_array_idxs; 3500 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 3501 send_buffer_idxs_is[psum] = plen; 3502 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 3503 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 3504 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 3505 psum += plen+1; /* indices + lenght */ 3506 } 3507 for (i=0;i<n_sends;i++) { 3508 ilengths_idxs_is[is_indices[i]] = psum; 3509 } 3510 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 3511 } 3512 3513 buf_size_idxs = 0; 3514 buf_size_vals = 0; 3515 buf_size_idxs_is = 0; 3516 for (i=0;i<n_recvs;i++) { 3517 buf_size_idxs += (PetscInt)olengths_idxs[i]; 3518 buf_size_vals += (PetscInt)olengths_vals[i]; 3519 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 3520 } 3521 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 3522 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 3523 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 3524 3525 /* get new tags for clean communications */ 3526 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 3527 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 3528 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 3529 3530 /* allocate for requests */ 3531 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 3532 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 3533 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 3534 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 3535 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 3536 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 3537 3538 /* communications */ 3539 ptr_idxs = recv_buffer_idxs; 3540 ptr_vals = recv_buffer_vals; 3541 ptr_idxs_is = recv_buffer_idxs_is; 3542 for (i=0;i<n_recvs;i++) { 3543 source_dest = onodes[i]; 3544 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 3545 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 3546 ptr_idxs += olengths_idxs[i]; 3547 ptr_vals += olengths_vals[i]; 3548 if (nis) { 3549 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 3550 ptr_idxs_is += olengths_idxs_is[i]; 3551 } 3552 } 3553 for (i=0;i<n_sends;i++) { 3554 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 3555 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 3556 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 3557 if (nis) { 3558 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 3559 } 3560 } 3561 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 3562 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 3563 3564 /* assemble new l2g map */ 3565 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3566 ptr_idxs = recv_buffer_idxs; 3567 new_local_rows = 0; 3568 for (i=0;i<n_recvs;i++) { 3569 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 3570 ptr_idxs += olengths_idxs[i]; 3571 } 3572 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 3573 ptr_idxs = recv_buffer_idxs; 3574 new_local_rows = 0; 3575 for (i=0;i<n_recvs;i++) { 3576 ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 3577 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 3578 ptr_idxs += olengths_idxs[i]; 3579 } 3580 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 3581 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 3582 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 3583 3584 /* infer new local matrix type from received local matrices type */ 3585 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 3586 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 3587 if (n_recvs) { 3588 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 3589 ptr_idxs = recv_buffer_idxs; 3590 for (i=0;i<n_recvs;i++) { 3591 if ((PetscInt)new_local_type_private != *ptr_idxs) { 3592 new_local_type_private = MATAIJ_PRIVATE; 3593 break; 3594 } 3595 ptr_idxs += olengths_idxs[i]; 3596 } 3597 switch (new_local_type_private) { 3598 case MATDENSE_PRIVATE: 3599 if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */ 3600 new_local_type = MATSEQAIJ; 3601 bs = 1; 3602 } else { /* if I receive only 1 dense matrix */ 3603 new_local_type = MATSEQDENSE; 3604 bs = 1; 3605 } 3606 break; 3607 case MATAIJ_PRIVATE: 3608 new_local_type = MATSEQAIJ; 3609 bs = 1; 3610 break; 3611 case MATBAIJ_PRIVATE: 3612 new_local_type = MATSEQBAIJ; 3613 break; 3614 case MATSBAIJ_PRIVATE: 3615 new_local_type = MATSEQSBAIJ; 3616 break; 3617 default: 3618 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__); 3619 break; 3620 } 3621 } else { /* by default, new_local_type is seqdense */ 3622 new_local_type = MATSEQDENSE; 3623 bs = 1; 3624 } 3625 3626 /* create MATIS object if needed */ 3627 if (reuse == MAT_INITIAL_MATRIX) { 3628 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 3629 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,mat_n);CHKERRQ(ierr); 3630 } else { 3631 /* it also destroys the local matrices */ 3632 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 3633 } 3634 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 3635 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 3636 3637 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3638 3639 /* Global to local map of received indices */ 3640 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 3641 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 3642 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 3643 3644 /* restore attributes -> type of incoming data and its size */ 3645 buf_size_idxs = 0; 3646 for (i=0;i<n_recvs;i++) { 3647 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 3648 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 3649 buf_size_idxs += (PetscInt)olengths_idxs[i]; 3650 } 3651 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 3652 3653 /* set preallocation */ 3654 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 3655 if (!newisdense) { 3656 PetscInt *new_local_nnz=0; 3657 3658 ptr_vals = recv_buffer_vals; 3659 ptr_idxs = recv_buffer_idxs_local; 3660 if (n_recvs) { 3661 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 3662 } 3663 for (i=0;i<n_recvs;i++) { 3664 PetscInt j; 3665 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 3666 for (j=0;j<*(ptr_idxs+1);j++) { 3667 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 3668 } 3669 } else { 3670 /* TODO */ 3671 } 3672 ptr_idxs += olengths_idxs[i]; 3673 } 3674 if (new_local_nnz) { 3675 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 3676 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 3677 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 3678 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 3679 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 3680 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 3681 } else { 3682 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 3683 } 3684 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 3685 } else { 3686 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 3687 } 3688 3689 /* set values */ 3690 ptr_vals = recv_buffer_vals; 3691 ptr_idxs = recv_buffer_idxs_local; 3692 for (i=0;i<n_recvs;i++) { 3693 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 3694 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 3695 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 3696 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 3697 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 3698 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 3699 } else { 3700 /* TODO */ 3701 } 3702 ptr_idxs += olengths_idxs[i]; 3703 ptr_vals += olengths_vals[i]; 3704 } 3705 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3706 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3707 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3708 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3709 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 3710 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 3711 3712 #if 0 3713 if (!restrict_comm) { /* check */ 3714 Vec lvec,rvec; 3715 PetscReal infty_error; 3716 3717 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 3718 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 3719 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 3720 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 3721 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 3722 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 3723 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 3724 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 3725 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 3726 } 3727 #endif 3728 3729 /* assemble new additional is (if any) */ 3730 if (nis) { 3731 PetscInt **temp_idxs,*count_is,j,psum; 3732 3733 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3734 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 3735 ptr_idxs = recv_buffer_idxs_is; 3736 psum = 0; 3737 for (i=0;i<n_recvs;i++) { 3738 for (j=0;j<nis;j++) { 3739 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 3740 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 3741 psum += plen; 3742 ptr_idxs += plen+1; /* shift pointer to received data */ 3743 } 3744 } 3745 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 3746 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 3747 for (i=1;i<nis;i++) { 3748 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 3749 } 3750 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 3751 ptr_idxs = recv_buffer_idxs_is; 3752 for (i=0;i<n_recvs;i++) { 3753 for (j=0;j<nis;j++) { 3754 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 3755 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 3756 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 3757 ptr_idxs += plen+1; /* shift pointer to received data */ 3758 } 3759 } 3760 for (i=0;i<nis;i++) { 3761 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 3762 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 3763 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 3764 } 3765 ierr = PetscFree(count_is);CHKERRQ(ierr); 3766 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 3767 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 3768 } 3769 /* free workspace */ 3770 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 3771 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3772 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 3773 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3774 if (isdense) { 3775 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 3776 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 3777 } else { 3778 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 3779 } 3780 if (nis) { 3781 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3782 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 3783 } 3784 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 3785 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 3786 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 3787 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 3788 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 3789 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 3790 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 3791 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 3792 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 3793 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 3794 ierr = PetscFree(onodes);CHKERRQ(ierr); 3795 if (nis) { 3796 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 3797 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 3798 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 3799 } 3800 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 3801 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 3802 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 3803 for (i=0;i<nis;i++) { 3804 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 3805 } 3806 } 3807 PetscFunctionReturn(0); 3808 } 3809 3810 /* temporary hack into ksp private data structure */ 3811 #include <petsc/private/kspimpl.h> 3812 3813 #undef __FUNCT__ 3814 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 3815 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 3816 { 3817 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3818 PC_IS *pcis = (PC_IS*)pc->data; 3819 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 3820 MatNullSpace CoarseNullSpace=NULL; 3821 ISLocalToGlobalMapping coarse_islg; 3822 IS coarse_is,*isarray; 3823 PetscInt i,im_active=-1,active_procs=-1; 3824 PetscInt nis,nisdofs,nisneu; 3825 PC pc_temp; 3826 PCType coarse_pc_type; 3827 KSPType coarse_ksp_type; 3828 PetscBool multilevel_requested,multilevel_allowed; 3829 PetscBool isredundant,isbddc,isnn,coarse_reuse; 3830 Mat t_coarse_mat_is; 3831 PetscInt void_procs,ncoarse_ml,ncoarse_ds,ncoarse; 3832 PetscMPIInt all_procs; 3833 PetscBool csin_ml,csin_ds,csin,csin_type_simple,redist; 3834 PetscBool compute_vecs = PETSC_FALSE; 3835 PetscScalar *array; 3836 PetscErrorCode ierr; 3837 3838 PetscFunctionBegin; 3839 /* Assign global numbering to coarse dofs */ 3840 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 3841 PetscInt ocoarse_size; 3842 compute_vecs = PETSC_TRUE; 3843 ocoarse_size = pcbddc->coarse_size; 3844 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 3845 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 3846 /* see if we can avoid some work */ 3847 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 3848 if (ocoarse_size != pcbddc->coarse_size) { /* ...but with different size, so reset it and set reuse flag to false */ 3849 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 3850 coarse_reuse = PETSC_FALSE; 3851 } else { /* we can safely reuse already computed coarse matrix */ 3852 coarse_reuse = PETSC_TRUE; 3853 } 3854 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 3855 coarse_reuse = PETSC_FALSE; 3856 } 3857 /* reset any subassembling information */ 3858 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 3859 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 3860 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 3861 coarse_reuse = PETSC_TRUE; 3862 } 3863 3864 /* count "active" (i.e. with positive local size) and "void" processes */ 3865 im_active = !!(pcis->n); 3866 ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3867 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr); 3868 void_procs = all_procs-active_procs; 3869 csin_type_simple = PETSC_TRUE; 3870 redist = PETSC_FALSE; 3871 if (pcbddc->current_level && void_procs) { 3872 csin_ml = PETSC_TRUE; 3873 ncoarse_ml = void_procs; 3874 /* it has no sense to redistribute on a set of processors larger than the number of active processes */ 3875 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < active_procs) { 3876 csin_ds = PETSC_TRUE; 3877 ncoarse_ds = pcbddc->redistribute_coarse; 3878 redist = PETSC_TRUE; 3879 } else { 3880 csin_ds = PETSC_TRUE; 3881 ncoarse_ds = active_procs; 3882 redist = PETSC_TRUE; 3883 } 3884 } else { 3885 csin_ml = PETSC_FALSE; 3886 ncoarse_ml = all_procs; 3887 if (void_procs) { 3888 csin_ds = PETSC_TRUE; 3889 ncoarse_ds = void_procs; 3890 csin_type_simple = PETSC_FALSE; 3891 } else { 3892 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < all_procs) { 3893 csin_ds = PETSC_TRUE; 3894 ncoarse_ds = pcbddc->redistribute_coarse; 3895 redist = PETSC_TRUE; 3896 } else { 3897 csin_ds = PETSC_FALSE; 3898 ncoarse_ds = all_procs; 3899 } 3900 } 3901 } 3902 3903 /* 3904 test if we can go multilevel: three conditions must be satisfied: 3905 - we have not exceeded the number of levels requested 3906 - we can actually subassemble the active processes 3907 - we can find a suitable number of MPI processes where we can place the subassembled problem 3908 */ 3909 multilevel_allowed = PETSC_FALSE; 3910 multilevel_requested = PETSC_FALSE; 3911 if (pcbddc->current_level < pcbddc->max_levels) { 3912 multilevel_requested = PETSC_TRUE; 3913 if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) { 3914 multilevel_allowed = PETSC_FALSE; 3915 } else { 3916 multilevel_allowed = PETSC_TRUE; 3917 } 3918 } 3919 /* determine number of process partecipating to coarse solver */ 3920 if (multilevel_allowed) { 3921 ncoarse = ncoarse_ml; 3922 csin = csin_ml; 3923 redist = PETSC_FALSE; 3924 } else { 3925 ncoarse = ncoarse_ds; 3926 csin = csin_ds; 3927 } 3928 3929 /* creates temporary l2gmap and IS for coarse indexes */ 3930 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 3931 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 3932 3933 /* creates temporary MATIS object for coarse matrix */ 3934 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr); 3935 ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr); 3936 ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr); 3937 ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr); 3938 #if 0 3939 { 3940 PetscViewer viewer; 3941 char filename[256]; 3942 sprintf(filename,"local_coarse_mat%d.m",PetscGlobalRank); 3943 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 3944 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 3945 ierr = MatView(coarse_submat_dense,viewer);CHKERRQ(ierr); 3946 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 3947 } 3948 #endif 3949 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,&t_coarse_mat_is);CHKERRQ(ierr); 3950 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 3951 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3952 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3953 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 3954 3955 /* compute dofs splitting and neumann boundaries for coarse dofs */ 3956 if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal) ) { /* protects from unneded computations */ 3957 PetscInt *tidxs,*tidxs2,nout,tsize,i; 3958 const PetscInt *idxs; 3959 ISLocalToGlobalMapping tmap; 3960 3961 /* create map between primal indices (in local representative ordering) and local primal numbering */ 3962 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 3963 /* allocate space for temporary storage */ 3964 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 3965 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 3966 /* allocate for IS array */ 3967 nisdofs = pcbddc->n_ISForDofsLocal; 3968 nisneu = !!pcbddc->NeumannBoundariesLocal; 3969 nis = nisdofs + nisneu; 3970 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 3971 /* dofs splitting */ 3972 for (i=0;i<nisdofs;i++) { 3973 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 3974 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 3975 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 3976 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 3977 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 3978 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 3979 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->ISForDofsLocal[i]),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 3980 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 3981 } 3982 /* neumann boundaries */ 3983 if (pcbddc->NeumannBoundariesLocal) { 3984 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 3985 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 3986 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 3987 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 3988 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 3989 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 3990 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->NeumannBoundariesLocal),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 3991 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 3992 } 3993 /* free memory */ 3994 ierr = PetscFree(tidxs);CHKERRQ(ierr); 3995 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 3996 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 3997 } else { 3998 nis = 0; 3999 nisdofs = 0; 4000 nisneu = 0; 4001 isarray = NULL; 4002 } 4003 /* destroy no longer needed map */ 4004 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 4005 4006 /* restrict on coarse candidates (if needed) */ 4007 coarse_mat_is = NULL; 4008 if (csin) { 4009 if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */ 4010 if (redist) { 4011 PetscMPIInt rank; 4012 PetscInt spc,n_spc_p1,dest[1],destsize; 4013 4014 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 4015 spc = active_procs/ncoarse; 4016 n_spc_p1 = active_procs%ncoarse; 4017 if (im_active) { 4018 destsize = 1; 4019 if (rank > n_spc_p1*(spc+1)-1) { 4020 dest[0] = n_spc_p1+(rank-(n_spc_p1*(spc+1)))/spc; 4021 } else { 4022 dest[0] = rank/(spc+1); 4023 } 4024 } else { 4025 destsize = 0; 4026 } 4027 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),destsize,dest,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4028 } else if (csin_type_simple) { 4029 PetscMPIInt rank; 4030 PetscInt issize,isidx; 4031 4032 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 4033 if (im_active) { 4034 issize = 1; 4035 isidx = (PetscInt)rank; 4036 } else { 4037 issize = 0; 4038 isidx = -1; 4039 } 4040 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4041 } else { /* get a suitable subassembling pattern from MATIS code */ 4042 ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,PETSC_TRUE,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4043 } 4044 4045 /* we need to shift on coarse candidates either if we are not redistributing or we are redistributing and we have enough void processes */ 4046 if (!redist || ncoarse <= void_procs) { 4047 PetscInt ncoarse_cand,tissize,*nisindices; 4048 PetscInt *coarse_candidates; 4049 const PetscInt* tisindices; 4050 4051 /* get coarse candidates' ranks in pc communicator */ 4052 ierr = PetscMalloc1(all_procs,&coarse_candidates);CHKERRQ(ierr); 4053 ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4054 for (i=0,ncoarse_cand=0;i<all_procs;i++) { 4055 if (!coarse_candidates[i]) { 4056 coarse_candidates[ncoarse_cand++]=i; 4057 } 4058 } 4059 if (ncoarse_cand < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",ncoarse_cand,ncoarse); 4060 4061 4062 if (pcbddc->dbg_flag) { 4063 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4064 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr); 4065 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 4066 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr); 4067 for (i=0;i<ncoarse_cand;i++) { 4068 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr); 4069 } 4070 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr); 4071 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4072 } 4073 /* shift the pattern on coarse candidates */ 4074 ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr); 4075 ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 4076 ierr = PetscMalloc1(tissize,&nisindices);CHKERRQ(ierr); 4077 for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]]; 4078 ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 4079 ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr); 4080 ierr = PetscFree(coarse_candidates);CHKERRQ(ierr); 4081 } 4082 if (pcbddc->dbg_flag) { 4083 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4084 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr); 4085 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 4086 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4087 } 4088 } 4089 /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */ 4090 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 4091 } else { 4092 if (pcbddc->dbg_flag) { 4093 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4094 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr); 4095 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4096 } 4097 ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr); 4098 coarse_mat_is = t_coarse_mat_is; 4099 } 4100 4101 /* create local to global scatters for coarse problem */ 4102 if (compute_vecs) { 4103 PetscInt lrows; 4104 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 4105 if (coarse_mat_is) { 4106 ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr); 4107 } else { 4108 lrows = 0; 4109 } 4110 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 4111 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 4112 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 4113 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 4114 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 4115 } 4116 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 4117 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 4118 4119 /* set defaults for coarse KSP and PC */ 4120 if (multilevel_allowed) { 4121 coarse_ksp_type = KSPRICHARDSON; 4122 coarse_pc_type = PCBDDC; 4123 } else { 4124 coarse_ksp_type = KSPPREONLY; 4125 coarse_pc_type = PCREDUNDANT; 4126 } 4127 4128 /* print some info if requested */ 4129 if (pcbddc->dbg_flag) { 4130 if (!multilevel_allowed) { 4131 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4132 if (multilevel_requested) { 4133 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 4134 } else if (pcbddc->max_levels) { 4135 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 4136 } 4137 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4138 } 4139 } 4140 4141 /* create the coarse KSP object only once with defaults */ 4142 if (coarse_mat_is) { 4143 MatReuse coarse_mat_reuse; 4144 PetscViewer dbg_viewer = NULL; 4145 if (pcbddc->dbg_flag) { 4146 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is)); 4147 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 4148 } 4149 if (!pcbddc->coarse_ksp) { 4150 char prefix[256],str_level[16]; 4151 size_t len; 4152 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr); 4153 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 4154 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 4155 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr); 4156 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 4157 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 4158 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 4159 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 4160 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 4161 /* prefix */ 4162 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 4163 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 4164 if (!pcbddc->current_level) { 4165 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 4166 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 4167 } else { 4168 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 4169 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 4170 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 4171 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 4172 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 4173 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 4174 } 4175 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 4176 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 4177 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 4178 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 4179 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 4180 /* allow user customization */ 4181 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 4182 } 4183 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 4184 if (nisdofs) { 4185 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 4186 for (i=0;i<nisdofs;i++) { 4187 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 4188 } 4189 } 4190 if (nisneu) { 4191 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 4192 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 4193 } 4194 4195 /* get some info after set from options */ 4196 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 4197 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 4198 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 4199 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 4200 if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */ 4201 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 4202 isbddc = PETSC_FALSE; 4203 } 4204 if (isredundant) { 4205 KSP inner_ksp; 4206 PC inner_pc; 4207 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 4208 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 4209 ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr); 4210 } 4211 4212 /* assemble coarse matrix */ 4213 if (coarse_reuse) { 4214 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 4215 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 4216 coarse_mat_reuse = MAT_REUSE_MATRIX; 4217 } else { 4218 coarse_mat_reuse = MAT_INITIAL_MATRIX; 4219 } 4220 if (isbddc || isnn) { 4221 if (pcbddc->coarsening_ratio > 1) { 4222 if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */ 4223 ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,PETSC_TRUE,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 4224 if (pcbddc->dbg_flag) { 4225 ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4226 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr); 4227 ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr); 4228 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 4229 } 4230 } 4231 ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr); 4232 } else { 4233 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 4234 coarse_mat = coarse_mat_is; 4235 } 4236 } else { 4237 ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 4238 } 4239 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 4240 4241 /* propagate symmetry info to coarse matrix */ 4242 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pcbddc->issym);CHKERRQ(ierr); 4243 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 4244 4245 /* set operators */ 4246 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 4247 if (pcbddc->dbg_flag) { 4248 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 4249 } 4250 } else { /* processes non partecipating to coarse solver (if any) */ 4251 coarse_mat = 0; 4252 } 4253 ierr = PetscFree(isarray);CHKERRQ(ierr); 4254 #if 0 4255 { 4256 PetscViewer viewer; 4257 char filename[256]; 4258 sprintf(filename,"coarse_mat.m"); 4259 ierr = PetscViewerASCIIOpen(PETSC_COMM_WORLD,filename,&viewer);CHKERRQ(ierr); 4260 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 4261 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 4262 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 4263 } 4264 #endif 4265 4266 /* Compute coarse null space (special handling by BDDC only) */ 4267 if (pcbddc->NullSpace) { 4268 ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr); 4269 } 4270 4271 if (pcbddc->coarse_ksp) { 4272 Vec crhs,csol; 4273 PetscBool ispreonly; 4274 if (CoarseNullSpace) { 4275 if (isbddc) { 4276 ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr); 4277 } else { 4278 ierr = KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);CHKERRQ(ierr); 4279 } 4280 } 4281 /* setup coarse ksp */ 4282 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 4283 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 4284 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 4285 /* hack */ 4286 if (!csol) { 4287 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 4288 } 4289 if (!crhs) { 4290 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 4291 } 4292 /* Check coarse problem if in debug mode or if solving with an iterative method */ 4293 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 4294 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 4295 KSP check_ksp; 4296 KSPType check_ksp_type; 4297 PC check_pc; 4298 Vec check_vec,coarse_vec; 4299 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 4300 PetscInt its; 4301 PetscBool compute_eigs; 4302 PetscReal *eigs_r,*eigs_c; 4303 PetscInt neigs; 4304 const char *prefix; 4305 4306 /* Create ksp object suitable for estimation of extreme eigenvalues */ 4307 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 4308 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 4309 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 4310 if (ispreonly) { 4311 check_ksp_type = KSPPREONLY; 4312 compute_eigs = PETSC_FALSE; 4313 } else { 4314 check_ksp_type = KSPGMRES; 4315 compute_eigs = PETSC_TRUE; 4316 } 4317 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 4318 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 4319 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 4320 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 4321 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 4322 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 4323 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 4324 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 4325 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 4326 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 4327 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 4328 /* create random vec */ 4329 ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr); 4330 ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr); 4331 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 4332 if (CoarseNullSpace) { 4333 ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr); 4334 } 4335 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 4336 /* solve coarse problem */ 4337 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 4338 if (CoarseNullSpace) { 4339 ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr); 4340 } 4341 /* set eigenvalue estimation if preonly has not been requested */ 4342 if (compute_eigs) { 4343 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 4344 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 4345 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 4346 lambda_max = eigs_r[neigs-1]; 4347 lambda_min = eigs_r[0]; 4348 if (pcbddc->use_coarse_estimates) { 4349 if (lambda_max>lambda_min) { 4350 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr); 4351 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 4352 } 4353 } 4354 } 4355 4356 /* check coarse problem residual error */ 4357 if (pcbddc->dbg_flag) { 4358 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 4359 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 4360 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 4361 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 4362 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 4363 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 4364 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 4365 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 4366 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 4367 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 4368 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 4369 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 4370 if (compute_eigs) { 4371 PetscReal lambda_max_s,lambda_min_s; 4372 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 4373 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 4374 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 4375 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 4376 for (i=0;i<neigs;i++) { 4377 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 4378 } 4379 } 4380 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 4381 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 4382 } 4383 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 4384 if (compute_eigs) { 4385 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 4386 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 4387 } 4388 } 4389 } 4390 /* print additional info */ 4391 if (pcbddc->dbg_flag) { 4392 /* waits until all processes reaches this point */ 4393 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 4394 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 4395 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4396 } 4397 4398 /* free memory */ 4399 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 4400 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 4401 PetscFunctionReturn(0); 4402 } 4403 4404 #undef __FUNCT__ 4405 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 4406 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 4407 { 4408 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 4409 PC_IS* pcis = (PC_IS*)pc->data; 4410 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 4411 PetscInt i,local_size,coarse_size=0; 4412 PetscInt *local_primal_indices=NULL; 4413 PetscInt *t_local_primal_indices=NULL; 4414 PetscErrorCode ierr; 4415 4416 PetscFunctionBegin; 4417 /* Compute global number of coarse dofs */ 4418 if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) { 4419 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first"); 4420 } 4421 ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)(pc->pmat)),matis->mapping,pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult,&coarse_size,&t_local_primal_indices);CHKERRQ(ierr); 4422 ierr = PetscMalloc1(pcbddc->local_primal_size,&local_primal_indices);CHKERRQ(ierr); 4423 local_size = 0; 4424 for (i=0;i<pcbddc->local_primal_size_cc;i++) { 4425 PetscInt j; 4426 for (j=0;j<pcbddc->local_primal_ref_mult[i];j++) local_primal_indices[local_size++] = t_local_primal_indices[i] + j; 4427 } 4428 ierr = PetscFree(t_local_primal_indices);CHKERRQ(ierr); 4429 if (local_size != pcbddc->local_primal_size) { 4430 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %d != %d",local_size,pcbddc->local_primal_size); 4431 } 4432 4433 /* check numbering */ 4434 if (pcbddc->dbg_flag) { 4435 PetscScalar coarsesum,*array; 4436 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 4437 4438 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4439 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4440 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 4441 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 4442 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 4443 for (i=0;i<pcbddc->local_primal_size;i++) { 4444 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 4445 } 4446 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 4447 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 4448 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4449 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4450 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4451 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4452 ierr = VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4453 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4454 for (i=0;i<pcis->n;i++) { 4455 if (array[i] == 1.0) { 4456 set_error = PETSC_TRUE; 4457 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr); 4458 } 4459 } 4460 ierr = MPI_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4461 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4462 for (i=0;i<pcis->n;i++) { 4463 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 4464 } 4465 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4466 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4467 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4468 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4469 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 4470 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 4471 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 4472 PetscInt *gidxs; 4473 4474 ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr); 4475 ierr = ISLocalToGlobalMappingApply(matis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr); 4476 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 4477 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4478 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 4479 for (i=0;i<pcbddc->local_primal_size;i++) { 4480 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr); 4481 } 4482 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4483 ierr = PetscFree(gidxs);CHKERRQ(ierr); 4484 } 4485 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4486 if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 4487 } 4488 /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */ 4489 /* get back data */ 4490 *coarse_size_n = coarse_size; 4491 *local_primal_indices_n = local_primal_indices; 4492 PetscFunctionReturn(0); 4493 } 4494 4495 #undef __FUNCT__ 4496 #define __FUNCT__ "PCBDDCGlobalToLocal" 4497 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 4498 { 4499 IS localis_t; 4500 PetscInt i,lsize,*idxs,n; 4501 PetscScalar *vals; 4502 PetscErrorCode ierr; 4503 4504 PetscFunctionBegin; 4505 /* get indices in local ordering exploiting local to global map */ 4506 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 4507 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 4508 for (i=0;i<lsize;i++) vals[i] = 1.0; 4509 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 4510 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 4511 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 4512 if (idxs) { /* multilevel guard */ 4513 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 4514 } 4515 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 4516 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 4517 ierr = PetscFree(vals);CHKERRQ(ierr); 4518 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 4519 /* now compute set in local ordering */ 4520 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4521 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4522 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 4523 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 4524 for (i=0,lsize=0;i<n;i++) { 4525 if (PetscRealPart(vals[i]) > 0.5) { 4526 lsize++; 4527 } 4528 } 4529 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 4530 for (i=0,lsize=0;i<n;i++) { 4531 if (PetscRealPart(vals[i]) > 0.5) { 4532 idxs[lsize++] = i; 4533 } 4534 } 4535 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 4536 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 4537 *localis = localis_t; 4538 PetscFunctionReturn(0); 4539 } 4540 4541 /* the next two functions will be called in KSPMatMult if a change of basis has been requested */ 4542 #undef __FUNCT__ 4543 #define __FUNCT__ "PCBDDCMatMult_Private" 4544 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y) 4545 { 4546 PCBDDCChange_ctx change_ctx; 4547 PetscErrorCode ierr; 4548 4549 PetscFunctionBegin; 4550 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 4551 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 4552 ierr = MatMult(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 4553 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 4554 PetscFunctionReturn(0); 4555 } 4556 4557 #undef __FUNCT__ 4558 #define __FUNCT__ "PCBDDCMatMultTranspose_Private" 4559 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y) 4560 { 4561 PCBDDCChange_ctx change_ctx; 4562 PetscErrorCode ierr; 4563 4564 PetscFunctionBegin; 4565 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 4566 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 4567 ierr = MatMultTranspose(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 4568 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 4569 PetscFunctionReturn(0); 4570 } 4571 4572 #undef __FUNCT__ 4573 #define __FUNCT__ "PCBDDCSetUpSubSchurs" 4574 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 4575 { 4576 PC_IS *pcis=(PC_IS*)pc->data; 4577 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 4578 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 4579 Mat S_j; 4580 PetscInt *used_xadj,*used_adjncy; 4581 PetscBool free_used_adj; 4582 PetscErrorCode ierr; 4583 4584 PetscFunctionBegin; 4585 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 4586 free_used_adj = PETSC_FALSE; 4587 if (pcbddc->sub_schurs_layers == -1) { 4588 used_xadj = NULL; 4589 used_adjncy = NULL; 4590 } else { 4591 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 4592 used_xadj = pcbddc->mat_graph->xadj; 4593 used_adjncy = pcbddc->mat_graph->adjncy; 4594 } else if (pcbddc->computed_rowadj) { 4595 used_xadj = pcbddc->mat_graph->xadj; 4596 used_adjncy = pcbddc->mat_graph->adjncy; 4597 } else { 4598 PetscBool flg_row=PETSC_FALSE; 4599 const PetscInt *xadj,*adjncy; 4600 PetscInt nvtxs; 4601 4602 ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4603 if (flg_row) { 4604 ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr); 4605 ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr); 4606 ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr); 4607 free_used_adj = PETSC_TRUE; 4608 } else { 4609 pcbddc->sub_schurs_layers = -1; 4610 used_xadj = NULL; 4611 used_adjncy = NULL; 4612 } 4613 ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4614 } 4615 } 4616 4617 /* setup sub_schurs data */ 4618 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 4619 if (!sub_schurs->use_mumps) { 4620 /* pcbddc->ksp_D up to date only if not using MUMPS */ 4621 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 4622 ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,pcbddc->use_edges,pcbddc->use_faces);CHKERRQ(ierr); 4623 } else { 4624 ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,pcbddc->use_edges,pcbddc->use_faces);CHKERRQ(ierr); 4625 } 4626 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 4627 4628 /* free adjacency */ 4629 if (free_used_adj) { 4630 ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr); 4631 } 4632 PetscFunctionReturn(0); 4633 } 4634 4635 #undef __FUNCT__ 4636 #define __FUNCT__ "PCBDDCInitSubSchurs" 4637 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 4638 { 4639 PC_IS *pcis=(PC_IS*)pc->data; 4640 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 4641 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 4642 PCBDDCGraph graph; 4643 PetscErrorCode ierr; 4644 4645 PetscFunctionBegin; 4646 /* attach interface graph for determining subsets */ 4647 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 4648 IS verticesIS; 4649 4650 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 4651 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 4652 ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global);CHKERRQ(ierr); 4653 ierr = PCBDDCGraphSetUp(graph,0,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticesIS);CHKERRQ(ierr); 4654 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 4655 ierr = ISDestroy(&verticesIS);CHKERRQ(ierr); 4656 /* 4657 if (pcbddc->dbg_flag) { 4658 ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 4659 } 4660 */ 4661 } else { 4662 graph = pcbddc->mat_graph; 4663 } 4664 4665 /* sub_schurs init */ 4666 ierr = PCBDDCSubSchursInit(sub_schurs,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap);CHKERRQ(ierr); 4667 4668 /* free graph struct */ 4669 if (pcbddc->sub_schurs_rebuild) { 4670 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 4671 } 4672 PetscFunctionReturn(0); 4673 } 4674