1 #include <../src/ksp/pc/impls/bddc/bddc.h> 2 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 3 #include <petscblaslapack.h> 4 5 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y); 6 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y); 7 8 #undef __FUNCT__ 9 #define __FUNCT__ "PCBDDCBenignDetectSaddlePoint" 10 PetscErrorCode PCBDDCBenignDetectSaddlePoint(PC pc, IS *zerodiaglocal) 11 { 12 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 13 IS zerodiag; 14 PetscInt nz; 15 PetscBool sorted; 16 PetscErrorCode ierr; 17 18 PetscFunctionBegin; 19 ierr = MatDestroy(&pcbddc->benign_original_mat);CHKERRQ(ierr); 20 ierr = PetscObjectReference((PetscObject)pcbddc->local_mat);CHKERRQ(ierr); 21 pcbddc->benign_original_mat = pcbddc->local_mat; 22 ierr = MatFindZeroDiagonals(pcbddc->benign_original_mat,&zerodiag);CHKERRQ(ierr); 23 ierr = ISSorted(zerodiag,&sorted);CHKERRQ(ierr); 24 if (!sorted) { 25 ierr = ISSort(zerodiag);CHKERRQ(ierr); 26 } 27 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 28 if (nz) { 29 IS zerodiagc; 30 PetscScalar *array; 31 const PetscInt *idxs,*idxsc; 32 PetscInt i,n,*nnz; 33 34 /* TODO: add check for shared dofs and raise error */ 35 ierr = MatGetLocalSize(pcbddc->benign_original_mat,&n,NULL);CHKERRQ(ierr); 36 ierr = ISComplement(zerodiag,0,n,&zerodiagc);CHKERRQ(ierr); 37 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 38 ierr = ISGetIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 39 /* local change of basis for pressures */ 40 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 41 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->benign_original_mat),&pcbddc->benign_change);CHKERRQ(ierr); 42 ierr = MatSetType(pcbddc->benign_change,MATAIJ);CHKERRQ(ierr); 43 ierr = MatSetSizes(pcbddc->benign_change,n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 44 ierr = PetscMalloc1(n,&nnz);CHKERRQ(ierr); 45 for (i=0;i<n-nz;i++) nnz[idxsc[i]] = 1; /* identity on velocities */ 46 for (i=0;i<nz-1;i++) nnz[idxs[i]] = 2; /* change on pressures */ 47 nnz[idxs[nz-1]] = nz; /* last local pressure dof: _0 set */ 48 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_change,0,nnz);CHKERRQ(ierr); 49 ierr = PetscFree(nnz);CHKERRQ(ierr); 50 /* set identity on velocities */ 51 for (i=0;i<n-nz;i++) { 52 ierr = MatSetValue(pcbddc->benign_change,idxsc[i],idxsc[i],1.,INSERT_VALUES);CHKERRQ(ierr); 53 } 54 /* set change on pressures */ 55 for (i=0;i<nz-1;i++) { 56 PetscScalar vals[2]; 57 PetscInt cols[2]; 58 59 /* TODO: add quadrature */ 60 cols[0] = idxs[i]; 61 cols[1] = idxs[nz-1]; 62 vals[0] = 1.; 63 vals[1] = 1./nz; 64 ierr = MatSetValues(pcbddc->benign_change,1,idxs+i,2,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 65 } 66 ierr = PetscMalloc1(nz,&array);CHKERRQ(ierr); 67 for (i=0;i<nz-1;i++) array[i] = -1.; 68 array[nz-1] = 1./nz; 69 ierr = MatSetValues(pcbddc->benign_change,1,idxs+nz-1,nz,idxs,array,INSERT_VALUES);CHKERRQ(ierr); 70 ierr = PetscFree(array);CHKERRQ(ierr); 71 ierr = MatAssemblyBegin(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 72 ierr = MatAssemblyEnd(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 73 /* TODO: need optimization? */ 74 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 75 ierr = MatPtAP(pcbddc->benign_original_mat,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 76 /* store local and global idxs for p0 */ 77 pcbddc->benign_p0_lidx = idxs[nz-1]; 78 ierr = ISLocalToGlobalMappingApply(pc->pmat->rmap->mapping,1,&idxs[nz-1],&pcbddc->benign_p0_gidx);CHKERRQ(ierr); 79 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 80 ierr = ISRestoreIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 81 ierr = ISDestroy(&zerodiagc);CHKERRQ(ierr); 82 } else { /* this is unlikely to happen but, just in case, destroy the empty IS */ 83 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 84 } 85 *zerodiaglocal = zerodiag; 86 PetscFunctionReturn(0); 87 } 88 89 #undef __FUNCT__ 90 #define __FUNCT__ "PCBDDCBenignGetOrSetP0" 91 PetscErrorCode PCBDDCBenignGetOrSetP0(PC pc, Vec v, PetscBool get) 92 { 93 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 94 PetscErrorCode ierr; 95 96 PetscFunctionBegin; 97 if (!pcbddc->benign_sf) { 98 ierr = PetscSFCreate(PetscObjectComm((PetscObject)pc),&pcbddc->benign_sf);CHKERRQ(ierr); 99 if (pcbddc->benign_p0_gidx >= 0) { 100 ierr = PetscSFSetGraphLayout(pcbddc->benign_sf,pc->pmat->rmap,1,NULL,PETSC_OWN_POINTER,&pcbddc->benign_p0_gidx);CHKERRQ(ierr); 101 } else { 102 ierr = PetscSFSetGraphLayout(pcbddc->benign_sf,pc->pmat->rmap,0,NULL,PETSC_OWN_POINTER,&pcbddc->benign_p0_gidx);CHKERRQ(ierr); 103 } 104 } 105 if (get) { /* use SF to get values */ 106 PetscScalar *array; 107 108 ierr = VecGetArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 109 ierr = PetscSFBcastBegin(pcbddc->benign_sf,MPIU_SCALAR,array,&pcbddc->benign_p0);CHKERRQ(ierr); 110 ierr = PetscSFBcastEnd(pcbddc->benign_sf,MPIU_SCALAR,array,&pcbddc->benign_p0);CHKERRQ(ierr); 111 ierr = VecRestoreArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 112 } else { /* use VecSetValue */ 113 if (pcbddc->benign_p0_gidx >= 0) { 114 ierr = VecSetValue(v,pcbddc->benign_p0_gidx,pcbddc->benign_p0,INSERT_VALUES);CHKERRQ(ierr); 115 } 116 ierr = VecAssemblyBegin(v);CHKERRQ(ierr); 117 ierr = VecAssemblyEnd(v);CHKERRQ(ierr); 118 } 119 PetscFunctionReturn(0); 120 } 121 122 #undef __FUNCT__ 123 #define __FUNCT__ "PCBDDCBenignPopOrPushB0" 124 PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop) 125 { 126 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 127 PetscErrorCode ierr; 128 129 PetscFunctionBegin; 130 /* TODO: add error checking 131 - avoid nested pop (or push) calls. 132 - cannot push before pop. 133 - cannot call this if pcbddc->local_mat is NULL 134 */ 135 if (pcbddc->benign_p0_lidx < 0) { 136 PetscFunctionReturn(0); 137 } 138 if (pop) { 139 const PetscInt *cB0_cols; 140 PetscInt cB0_ncol; 141 const PetscScalar *cB0_vals; 142 143 /* extract B_0 */ 144 ierr = MatGetRow(pcbddc->local_mat,pcbddc->benign_p0_lidx,&cB0_ncol,&cB0_cols,&cB0_vals);CHKERRQ(ierr); 145 pcbddc->B0_ncol = cB0_ncol; 146 ierr = PetscFree2(pcbddc->B0_cols,pcbddc->B0_vals);CHKERRQ(ierr); 147 ierr = PetscMalloc2(cB0_ncol,&pcbddc->B0_cols,cB0_ncol,&pcbddc->B0_vals);CHKERRQ(ierr); 148 ierr = PetscMemcpy(pcbddc->B0_cols,cB0_cols,cB0_ncol*sizeof(PetscInt));CHKERRQ(ierr); 149 ierr = PetscMemcpy(pcbddc->B0_vals,cB0_vals,cB0_ncol*sizeof(PetscScalar));CHKERRQ(ierr); 150 ierr = MatRestoreRow(pcbddc->local_mat,pcbddc->benign_p0_lidx,&cB0_ncol,&cB0_cols,&cB0_vals);CHKERRQ(ierr); 151 /* remove rows and cols from local problem */ 152 ierr = MatSetOption(pcbddc->local_mat,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE);CHKERRQ(ierr); 153 ierr = MatZeroRowsColumns(pcbddc->local_mat,1,&pcbddc->benign_p0_lidx,1.,NULL,NULL);CHKERRQ(ierr); 154 } else { /* push */ 155 ierr = MatSetValues(pcbddc->local_mat,1,&pcbddc->benign_p0_lidx,pcbddc->B0_ncol,pcbddc->B0_cols,pcbddc->B0_vals,INSERT_VALUES);CHKERRQ(ierr); 156 ierr = MatSetValues(pcbddc->local_mat,pcbddc->B0_ncol,pcbddc->B0_cols,1,&pcbddc->benign_p0_lidx,pcbddc->B0_vals,INSERT_VALUES);CHKERRQ(ierr); 157 ierr = MatSetValue(pcbddc->local_mat,pcbddc->benign_p0_lidx,pcbddc->benign_p0_lidx,0.0,INSERT_VALUES);CHKERRQ(ierr); 158 ierr = MatAssemblyBegin(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 159 ierr = MatAssemblyEnd(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 160 } 161 PetscFunctionReturn(0); 162 } 163 164 #undef __FUNCT__ 165 #define __FUNCT__ "PCBDDCAdaptiveSelection" 166 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 167 { 168 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 169 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 170 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 171 PetscBLASInt *B_iwork,*B_ifail; 172 PetscScalar *work,lwork; 173 PetscScalar *St,*S,*eigv; 174 PetscScalar *Sarray,*Starray; 175 PetscReal *eigs,thresh; 176 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs; 177 PetscBool allocated_S_St; 178 #if defined(PETSC_USE_COMPLEX) 179 PetscReal *rwork; 180 #endif 181 PetscErrorCode ierr; 182 183 PetscFunctionBegin; 184 if (!sub_schurs->use_mumps) { 185 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS"); 186 } 187 188 if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) { 189 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\n",sub_schurs->is_hermitian,sub_schurs->is_posdef); 190 } 191 192 if (pcbddc->dbg_flag) { 193 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 194 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 195 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr); 196 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 197 } 198 199 if (pcbddc->dbg_flag) { 200 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef); 201 } 202 203 /* max size of subsets */ 204 mss = 0; 205 for (i=0;i<sub_schurs->n_subs;i++) { 206 PetscInt subset_size; 207 208 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 209 mss = PetscMax(mss,subset_size); 210 } 211 212 /* min/max and threshold */ 213 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 214 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0; 215 nmax = PetscMax(nmin,nmax); 216 allocated_S_St = PETSC_FALSE; 217 if (nmin) { 218 allocated_S_St = PETSC_TRUE; 219 } 220 221 /* allocate lapack workspace */ 222 cum = cum2 = 0; 223 maxneigs = 0; 224 for (i=0;i<sub_schurs->n_subs;i++) { 225 PetscInt n,subset_size; 226 227 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 228 n = PetscMin(subset_size,nmax); 229 cum += subset_size; 230 cum2 += subset_size*n; 231 maxneigs = PetscMax(maxneigs,n); 232 } 233 if (mss) { 234 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 235 PetscBLASInt B_itype = 1; 236 PetscBLASInt B_N = mss; 237 PetscReal zero = 0.0; 238 PetscReal eps = 0.0; /* dlamch? */ 239 240 B_lwork = -1; 241 S = NULL; 242 St = NULL; 243 eigs = NULL; 244 eigv = NULL; 245 B_iwork = NULL; 246 B_ifail = NULL; 247 #if defined(PETSC_USE_COMPLEX) 248 rwork = NULL; 249 #endif 250 thresh = 1.0; 251 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 252 #if defined(PETSC_USE_COMPLEX) 253 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 254 #else 255 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 256 #endif 257 if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 258 ierr = PetscFPTrapPop();CHKERRQ(ierr); 259 } else { 260 /* TODO */ 261 } 262 } else { 263 lwork = 0; 264 } 265 266 nv = 0; 267 if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */ 268 ierr = ISGetLocalSize(sub_schurs->is_vertices,&nv);CHKERRQ(ierr); 269 } 270 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr); 271 if (allocated_S_St) { 272 ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr); 273 } 274 ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr); 275 #if defined(PETSC_USE_COMPLEX) 276 ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr); 277 #endif 278 ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 279 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr, 280 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr, 281 nv+cum,&pcbddc->adaptive_constraints_idxs, 282 nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 283 ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr); 284 285 maxneigs = 0; 286 cum = cum2 = cumarray = 0; 287 pcbddc->adaptive_constraints_idxs_ptr[0] = 0; 288 pcbddc->adaptive_constraints_data_ptr[0] = 0; 289 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 290 const PetscInt *idxs; 291 292 ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 293 for (cum=0;cum<nv;cum++) { 294 pcbddc->adaptive_constraints_n[cum] = 1; 295 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 296 pcbddc->adaptive_constraints_data[cum] = 1.0; 297 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1; 298 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1; 299 } 300 cum2 = cum; 301 ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 302 } 303 304 if (mss) { /* multilevel */ 305 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 306 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 307 } 308 309 for (i=0;i<sub_schurs->n_subs;i++) { 310 311 const PetscInt *idxs; 312 PetscReal infty = PETSC_MAX_REAL; 313 PetscInt j,subset_size,eigs_start = 0; 314 PetscBLASInt B_N; 315 PetscBool same_data = PETSC_FALSE; 316 317 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 318 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 319 if (allocated_S_St) { /* S and S_t should be copied since we could need them later */ 320 if (sub_schurs->is_hermitian) { 321 PetscInt j,k; 322 if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscMemcmp later */ 323 ierr = PetscMemzero(S,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 324 ierr = PetscMemzero(St,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 325 } 326 for (j=0;j<subset_size;j++) { 327 for (k=j;k<subset_size;k++) { 328 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 329 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 330 } 331 } 332 } else { 333 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 334 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 335 } 336 } else { 337 S = Sarray + cumarray; 338 St = Starray + cumarray; 339 } 340 341 ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 342 /* see if we can save some work */ 343 if (sub_schurs->n_subs == 1) { 344 ierr = PetscMemcmp(S,St,subset_size*subset_size*sizeof(PetscScalar),&same_data);CHKERRQ(ierr); 345 } 346 347 if (same_data) { /* there's no need of constraints here, deluxe scaling is enough */ 348 B_neigs = 0; 349 } else { 350 /* Threshold: this is an heuristic for edges */ 351 thresh = pcbddc->mat_graph->count[idxs[0]]*pcbddc->adaptive_threshold; 352 353 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 354 PetscBLASInt B_itype = 1; 355 PetscBLASInt B_IL, B_IU; 356 PetscReal eps = -1.0; /* dlamch? */ 357 PetscInt nmin_s; 358 359 if (pcbddc->dbg_flag) { 360 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d %d %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]); 361 } 362 363 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 364 if (thresh > 1.+PETSC_SMALL) { 365 366 /* ask for eigenvalues larger than thresh */ 367 #if defined(PETSC_USE_COMPLEX) 368 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 369 #else 370 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 371 #endif 372 } else { 373 B_IU = PetscMax(1,PetscMin(B_N,nmax)); 374 B_IL = 1; 375 #if defined(PETSC_USE_COMPLEX) 376 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 377 #else 378 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 379 #endif 380 } 381 ierr = PetscFPTrapPop();CHKERRQ(ierr); 382 if (B_ierr) { 383 if (B_ierr < 0 ) { 384 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 385 } else if (B_ierr <= B_N) { 386 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 387 } else { 388 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 389 } 390 } 391 392 if (B_neigs > nmax) { 393 if (pcbddc->dbg_flag) { 394 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, more than maximum required %d.\n",B_neigs,nmax); 395 } 396 eigs_start = B_neigs -nmax; 397 B_neigs = nmax; 398 } 399 400 nmin_s = PetscMin(nmin,B_N); 401 if (B_neigs < nmin_s) { 402 PetscBLASInt B_neigs2; 403 404 B_IU = B_N - B_neigs; 405 B_IL = B_N - nmin_s + 1; 406 if (pcbddc->dbg_flag) { 407 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU); 408 } 409 if (sub_schurs->is_hermitian) { 410 PetscInt j; 411 for (j=0;j<subset_size;j++) { 412 ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 413 } 414 for (j=0;j<subset_size;j++) { 415 ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 416 } 417 } else { 418 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 419 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 420 } 421 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 422 #if defined(PETSC_USE_COMPLEX) 423 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 424 #else 425 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 426 #endif 427 ierr = PetscFPTrapPop();CHKERRQ(ierr); 428 B_neigs += B_neigs2; 429 } 430 if (B_ierr) { 431 if (B_ierr < 0 ) { 432 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 433 } else if (B_ierr <= B_N) { 434 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 435 } else { 436 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 437 } 438 } 439 if (pcbddc->dbg_flag) { 440 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Got %d eigs\n",B_neigs);CHKERRQ(ierr); 441 for (j=0;j<B_neigs;j++) { 442 if (eigs[j] == 0.0) { 443 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," Inf\n");CHKERRQ(ierr); 444 } else { 445 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",eigs[j+eigs_start]);CHKERRQ(ierr); 446 } 447 } 448 } 449 } else { 450 /* TODO */ 451 } 452 } 453 maxneigs = PetscMax(B_neigs,maxneigs); 454 pcbddc->adaptive_constraints_n[i+nv] = B_neigs; 455 if (B_neigs) { 456 ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 457 458 if (pcbddc->dbg_flag > 1) { 459 PetscInt ii; 460 for (ii=0;ii<B_neigs;ii++) { 461 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 462 for (j=0;j<B_N;j++) { 463 #if defined(PETSC_USE_COMPLEX) 464 PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 465 PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 466 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 467 #else 468 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);CHKERRQ(ierr); 469 #endif 470 } 471 } 472 } 473 #if 0 474 for (j=0;j<B_neigs;j++) { 475 PetscBLASInt Blas_N,Blas_one = 1.0; 476 PetscScalar norm; 477 ierr = PetscBLASIntCast(subset_size,&Blas_N);CHKERRQ(ierr); 478 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size, 479 &Blas_one,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one)); 480 if (pcbddc->adaptive_constraints_data[cum2] > 0.0) { 481 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 482 } else { 483 norm = -1.0/PetscSqrtReal(PetscRealPart(norm)); 484 } 485 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one)); 486 } 487 #endif 488 ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr); 489 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size; 490 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs; 491 cum++; 492 } 493 ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 494 /* shift for next computation */ 495 cumarray += subset_size*subset_size; 496 } 497 if (pcbddc->dbg_flag) { 498 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 499 } 500 501 if (mss) { 502 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 503 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 504 /* destroy matrices (junk) */ 505 ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr); 506 ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr); 507 } 508 if (allocated_S_St) { 509 ierr = PetscFree2(S,St);CHKERRQ(ierr); 510 } 511 ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr); 512 #if defined(PETSC_USE_COMPLEX) 513 ierr = PetscFree(rwork);CHKERRQ(ierr); 514 #endif 515 if (pcbddc->dbg_flag) { 516 PetscInt maxneigs_r; 517 ierr = MPI_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 518 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr); 519 } 520 PetscFunctionReturn(0); 521 } 522 523 #undef __FUNCT__ 524 #define __FUNCT__ "PCBDDCSetUpSolvers" 525 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 526 { 527 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 528 PetscScalar *coarse_submat_vals; 529 PetscErrorCode ierr; 530 531 PetscFunctionBegin; 532 /* Setup local scatters R_to_B and (optionally) R_to_D */ 533 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 534 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 535 536 /* Setup local neumann solver ksp_R */ 537 /* PCBDDCSetUpLocalScatters should be called first! */ 538 ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 539 540 /* Change global null space passed in by the user if change of basis has been requested */ 541 if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) { 542 ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr); 543 } 544 545 /* 546 Setup local correction and local part of coarse basis. 547 Gives back the dense local part of the coarse matrix in column major ordering 548 */ 549 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 550 551 /* Compute total number of coarse nodes and setup coarse solver */ 552 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 553 554 /* free */ 555 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 556 PetscFunctionReturn(0); 557 } 558 559 #undef __FUNCT__ 560 #define __FUNCT__ "PCBDDCResetCustomization" 561 PetscErrorCode PCBDDCResetCustomization(PC pc) 562 { 563 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 564 PetscErrorCode ierr; 565 566 PetscFunctionBegin; 567 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 568 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 569 ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr); 570 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 571 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 572 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 573 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 574 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 575 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 576 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 577 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 578 PetscFunctionReturn(0); 579 } 580 581 #undef __FUNCT__ 582 #define __FUNCT__ "PCBDDCResetTopography" 583 PetscErrorCode PCBDDCResetTopography(PC pc) 584 { 585 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 586 PetscErrorCode ierr; 587 588 PetscFunctionBegin; 589 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 590 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 591 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 592 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 593 ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr); 594 PetscFunctionReturn(0); 595 } 596 597 #undef __FUNCT__ 598 #define __FUNCT__ "PCBDDCResetSolvers" 599 PetscErrorCode PCBDDCResetSolvers(PC pc) 600 { 601 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 602 PetscScalar *array; 603 PetscErrorCode ierr; 604 605 PetscFunctionBegin; 606 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 607 if (pcbddc->coarse_phi_B) { 608 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr); 609 ierr = PetscFree(array);CHKERRQ(ierr); 610 } 611 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 612 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 613 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 614 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 615 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 616 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 617 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 618 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 619 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 620 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 621 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 622 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 623 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 624 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 625 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 626 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 627 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 628 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 629 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 630 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 631 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 632 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 633 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 634 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 635 ierr = VecDestroy(&pcbddc->benign_vec);CHKERRQ(ierr); 636 ierr = MatDestroy(&pcbddc->benign_original_mat);CHKERRQ(ierr); 637 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 638 ierr = PetscFree2(pcbddc->B0_cols,pcbddc->B0_vals);CHKERRQ(ierr); 639 PetscFunctionReturn(0); 640 } 641 642 #undef __FUNCT__ 643 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors" 644 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 645 { 646 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 647 PC_IS *pcis = (PC_IS*)pc->data; 648 VecType impVecType; 649 PetscInt n_constraints,n_R,old_size,n_benign; 650 PetscErrorCode ierr; 651 652 PetscFunctionBegin; 653 if (!pcbddc->ConstraintMatrix) { 654 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created"); 655 } 656 /* get sizes */ 657 n_benign = 0; 658 if (pcbddc->benign_p0_lidx >= 0) n_benign = 1; 659 n_constraints = pcbddc->local_primal_size - n_benign - pcbddc->n_vertices; 660 n_R = pcis->n - pcbddc->n_vertices; 661 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 662 /* local work vectors (try to avoid unneeded work)*/ 663 /* R nodes */ 664 old_size = -1; 665 if (pcbddc->vec1_R) { 666 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 667 } 668 if (n_R != old_size) { 669 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 670 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 671 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 672 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 673 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 674 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 675 } 676 /* local primal dofs */ 677 old_size = -1; 678 if (pcbddc->vec1_P) { 679 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 680 } 681 if (pcbddc->local_primal_size != old_size) { 682 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 683 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 684 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 685 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 686 } 687 /* local explicit constraints */ 688 old_size = -1; 689 if (pcbddc->vec1_C) { 690 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 691 } 692 if (n_constraints && n_constraints != old_size) { 693 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 694 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 695 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 696 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 697 } 698 PetscFunctionReturn(0); 699 } 700 701 #undef __FUNCT__ 702 #define __FUNCT__ "PCBDDCSetUpCorrection" 703 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 704 { 705 PetscErrorCode ierr; 706 /* pointers to pcis and pcbddc */ 707 PC_IS* pcis = (PC_IS*)pc->data; 708 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 709 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 710 /* submatrices of local problem */ 711 Mat A_RV,A_VR,A_VV,local_auxmat2_R; 712 /* submatrices of benign trick */ 713 Mat B0_V = NULL; 714 /* submatrices of local coarse problem */ 715 Mat S_VV,S_CV,S_VC,S_CC; 716 /* working matrices */ 717 Mat C_CR; 718 /* additional working stuff */ 719 PC pc_R; 720 Mat F,B0 = NULL; 721 PetscBool isLU,isCHOL,isILU; 722 723 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 724 PetscScalar *work; 725 PetscInt *idx_V_B; 726 PetscInt n,n_vertices,n_constraints,n_benign,p0_lidx_I = 0; 727 PetscInt i,n_R,n_D,n_B; 728 PetscBool unsymmetric_check; 729 /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */ 730 MatType impMatType; 731 /* some shortcuts to scalars */ 732 PetscScalar one=1.0,m_one=-1.0; 733 734 PetscFunctionBegin; 735 n_benign = 0; 736 if (pcbddc->benign_p0_lidx >= 0) n_benign = 1; 737 n_vertices = pcbddc->n_vertices; 738 n_constraints = pcbddc->local_primal_size - n_benign - n_vertices; 739 /* Set Non-overlapping dimensions */ 740 n_B = pcis->n_B; 741 n_D = pcis->n - n_B; 742 n_R = pcis->n - n_vertices; 743 744 /* Set types for local objects needed by BDDC precondtioner */ 745 impMatType = MATSEQDENSE; 746 747 /* vertices in boundary numbering */ 748 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 749 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);CHKERRQ(ierr); 750 if (i != n_vertices) { 751 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i); 752 } 753 754 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 755 ierr = PetscCalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 756 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr); 757 ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr); 758 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr); 759 ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr); 760 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr); 761 ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr); 762 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr); 763 ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr); 764 765 unsymmetric_check = PETSC_FALSE; 766 /* allocate workspace */ 767 n = 0; 768 if (n_constraints) { 769 n += n_R*n_constraints; 770 } 771 if (n_vertices) { 772 n = PetscMax(2*n_R*n_vertices,n); 773 n = PetscMax((n_R+n_B)*n_vertices,n); 774 } 775 if (!pcbddc->symmetric_primal) { 776 n = PetscMax(2*n_R*pcbddc->local_primal_size,n); 777 unsymmetric_check = PETSC_TRUE; 778 } 779 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 780 781 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 782 ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr); 783 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr); 784 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr); 785 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr); 786 if (isLU || isILU || isCHOL) { 787 ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr); 788 } else if (sub_schurs->reuse_mumps) { 789 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 790 MatFactorType type; 791 792 F = reuse_mumps->F; 793 ierr = MatGetFactorType(F,&type);CHKERRQ(ierr); 794 if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE; 795 } else { 796 F = NULL; 797 } 798 799 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 800 if (n_constraints) { 801 Mat M1,M2,M3; 802 Mat auxmat; 803 IS is_aux; 804 PetscScalar *array,*array2; 805 806 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 807 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 808 809 /* Extract constraints on R nodes: C_{CR} */ 810 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 811 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 812 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&auxmat);CHKERRQ(ierr); 813 814 /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */ 815 /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */ 816 ierr = PetscMemzero(work,n_R*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 817 for (i=0;i<n_constraints;i++) { 818 const PetscScalar *row_cmat_values; 819 const PetscInt *row_cmat_indices; 820 PetscInt size_of_constraint,j; 821 822 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 823 for (j=0;j<size_of_constraint;j++) { 824 work[row_cmat_indices[j]+i*n_R] = -row_cmat_values[j]; 825 } 826 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 827 } 828 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,NULL,&local_auxmat2_R);CHKERRQ(ierr); 829 if (F) { 830 Mat B; 831 832 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 833 ierr = MatMatSolve(F,B,local_auxmat2_R);CHKERRQ(ierr); 834 ierr = MatDestroy(&B);CHKERRQ(ierr); 835 } else { 836 PetscScalar *marr; 837 838 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 839 for (i=0;i<n_constraints;i++) { 840 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 841 ierr = VecPlaceArray(pcbddc->vec2_R,marr+i*n_R);CHKERRQ(ierr); 842 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 843 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 844 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 845 } 846 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 847 } 848 if (!pcbddc->switch_static) { 849 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);CHKERRQ(ierr); 850 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 851 ierr = MatDenseGetArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 852 for (i=0;i<n_constraints;i++) { 853 ierr = VecPlaceArray(pcbddc->vec1_R,array2+i*n_R);CHKERRQ(ierr); 854 ierr = VecPlaceArray(pcis->vec1_B,array+i*n_B);CHKERRQ(ierr); 855 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 856 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 857 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 858 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 859 } 860 ierr = MatDenseRestoreArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 861 ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 862 ierr = MatMatMult(auxmat,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 863 } else { 864 ierr = PetscObjectReference((PetscObject)local_auxmat2_R);CHKERRQ(ierr); 865 pcbddc->local_auxmat2 = local_auxmat2_R; 866 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 867 } 868 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 869 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 870 ierr = MatScale(M3,m_one);CHKERRQ(ierr); 871 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr); 872 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 873 if (isCHOL) { 874 ierr = MatCholeskyFactor(M3,NULL,NULL);CHKERRQ(ierr); 875 } else { 876 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 877 } 878 ierr = VecSet(pcbddc->vec1_C,one);CHKERRQ(ierr); 879 ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr); 880 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 881 ierr = MatDestroy(&M2);CHKERRQ(ierr); 882 ierr = MatDestroy(&M3);CHKERRQ(ierr); 883 /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */ 884 ierr = MatMatMult(M1,auxmat,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 885 ierr = MatDestroy(&auxmat);CHKERRQ(ierr); 886 ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 887 ierr = MatDestroy(&M1);CHKERRQ(ierr); 888 } 889 /* Get submatrices from subdomain matrix */ 890 if (n_benign) { 891 IS dummy; 892 Mat B0_R; 893 PetscReal norm; 894 PetscInt ii[2]; 895 896 ii[0] = 0; 897 ii[1] = pcbddc->B0_ncol; 898 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,1,pcis->n,ii,pcbddc->B0_cols,pcbddc->B0_vals,&B0);CHKERRQ(ierr); 899 ierr = ISCreateStride(PETSC_COMM_SELF,1,0,1,&dummy);CHKERRQ(ierr); 900 ierr = MatGetSubMatrix(B0,dummy,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&B0_R);CHKERRQ(ierr); 901 ierr = MatNorm(B0_R,NORM_INFINITY,&norm);CHKERRQ(ierr); 902 if (norm > PETSC_SMALL) { 903 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! ||B0_R|| = %f (should be numerically 0.)",norm); 904 } 905 ierr = MatDestroy(&B0_R);CHKERRQ(ierr); 906 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 907 } 908 909 if (n_vertices) { 910 IS is_aux; 911 912 if (sub_schurs->reuse_mumps) { /* is_R_local is not sorted, ISComplement doesn't like it */ 913 IS tis; 914 915 ierr = ISDuplicate(pcbddc->is_R_local,&tis);CHKERRQ(ierr); 916 ierr = ISSort(tis);CHKERRQ(ierr); 917 ierr = ISComplement(tis,0,pcis->n,&is_aux);CHKERRQ(ierr); 918 ierr = ISDestroy(&tis);CHKERRQ(ierr); 919 } else { 920 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 921 } 922 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 923 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 924 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 925 if (n_benign) { 926 IS dummy; 927 928 ierr = ISCreateStride(PETSC_COMM_SELF,1,0,1,&dummy);CHKERRQ(ierr); 929 ierr = MatGetSubMatrix(B0,dummy,is_aux,MAT_INITIAL_MATRIX,&B0_V);CHKERRQ(ierr); 930 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 931 } 932 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 933 } 934 935 /* Matrix of coarse basis functions (local) */ 936 if (pcbddc->coarse_phi_B) { 937 PetscInt on_B,on_primal,on_D=n_D; 938 if (pcbddc->coarse_phi_D) { 939 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr); 940 } 941 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 942 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 943 PetscScalar *marray; 944 945 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr); 946 ierr = PetscFree(marray);CHKERRQ(ierr); 947 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 948 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 949 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 950 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 951 } 952 } 953 954 if (!pcbddc->coarse_phi_B) { 955 PetscScalar *marray; 956 957 n = n_B*pcbddc->local_primal_size; 958 if (pcbddc->switch_static || pcbddc->dbg_flag) { 959 n += n_D*pcbddc->local_primal_size; 960 } 961 if (!pcbddc->symmetric_primal) { 962 n *= 2; 963 } 964 ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr); 965 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 966 n = n_B*pcbddc->local_primal_size; 967 if (pcbddc->switch_static || pcbddc->dbg_flag) { 968 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 969 n += n_D*pcbddc->local_primal_size; 970 } 971 if (!pcbddc->symmetric_primal) { 972 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 973 if (pcbddc->switch_static || pcbddc->dbg_flag) { 974 n = n_B*pcbddc->local_primal_size; 975 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 976 } 977 } else { 978 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 979 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 980 if (pcbddc->switch_static || pcbddc->dbg_flag) { 981 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 982 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 983 } 984 } 985 } 986 987 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 988 if (n_benign && (pcbddc->switch_static || pcbddc->dbg_flag)) { 989 const PetscInt *idxs; 990 991 ierr = ISGetIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 992 ierr = PetscFindInt(pcbddc->benign_p0_lidx,pcis->n-pcis->n_B,idxs,&p0_lidx_I);CHKERRQ(ierr); 993 ierr = ISRestoreIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 994 } 995 996 /* vertices */ 997 if (n_vertices) { 998 999 ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr); 1000 1001 if (n_R) { 1002 Mat A_RRmA_RV,S_VVt; /* S_VVt with LDA=N */ 1003 PetscBLASInt B_N,B_one = 1; 1004 PetscScalar *x,*y; 1005 PetscBool isseqaij; 1006 1007 ierr = MatScale(A_RV,m_one);CHKERRQ(ierr); 1008 ierr = MatConvert(A_RV,impMatType,MAT_REUSE_MATRIX,&A_RV);CHKERRQ(ierr); 1009 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 1010 if (F) { /* TODO could be optimized for symmetric problems */ 1011 ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr); 1012 } else { 1013 ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr); 1014 for (i=0;i<n_vertices;i++) { 1015 ierr = VecPlaceArray(pcbddc->vec1_R,y+i*n_R);CHKERRQ(ierr); 1016 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 1017 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1018 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1019 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 1020 } 1021 ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr); 1022 } 1023 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 1024 /* S_VV and S_CV are the subdomain contribution to coarse matrix. WARNING -> column major ordering */ 1025 if (n_constraints) { 1026 Mat B; 1027 1028 ierr = PetscMemzero(work+n_R*n_vertices,n_B*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 1029 for (i=0;i<n_vertices;i++) { 1030 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 1031 ierr = VecPlaceArray(pcis->vec1_B,work+n_R*n_vertices+i*n_B);CHKERRQ(ierr); 1032 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1033 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1034 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1035 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1036 } 1037 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr); 1038 ierr = MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr); 1039 ierr = MatDestroy(&B);CHKERRQ(ierr); 1040 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr); 1041 ierr = MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 1042 ierr = MatScale(S_CV,m_one);CHKERRQ(ierr); 1043 ierr = PetscBLASIntCast(n_R*n_vertices,&B_N);CHKERRQ(ierr); 1044 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+n_R*n_vertices,&B_one,work,&B_one)); 1045 ierr = MatDestroy(&B);CHKERRQ(ierr); 1046 } 1047 ierr = PetscObjectTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 1048 if (!isseqaij) { /* MatMatMult with SEQ(S)BAIJ below will raise an error */ 1049 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_REUSE_MATRIX,&A_VR);CHKERRQ(ierr); 1050 } 1051 ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr); 1052 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 1053 ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr); 1054 ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr); 1055 ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr); 1056 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one)); 1057 ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr); 1058 ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr); 1059 ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1060 ierr = MatDestroy(&S_VVt);CHKERRQ(ierr); 1061 } else { 1062 ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1063 } 1064 if (n_benign) { 1065 const PetscScalar *vals; 1066 const PetscInt *idxs; 1067 PetscInt n,j; 1068 1069 ierr = MatGetRow(B0_V,0,&n,&idxs,&vals);CHKERRQ(ierr); 1070 for (j=0;j<n;j++) { 1071 coarse_submat_vals[(pcbddc->local_primal_size-1)*pcbddc->local_primal_size+idxs[j]] = vals[j]; 1072 coarse_submat_vals[(idxs[j]+1)*pcbddc->local_primal_size-1] = vals[j]; 1073 } 1074 ierr = MatRestoreRow(B0_V,0,&n,&idxs,&vals);CHKERRQ(ierr); 1075 } 1076 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 1077 1078 /* coarse basis functions */ 1079 for (i=0;i<n_vertices;i++) { 1080 PetscScalar *y; 1081 1082 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr); 1083 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 1084 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 1085 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1086 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1087 y[n_B*i+idx_V_B[i]] = 1.0; 1088 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 1089 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1090 1091 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1092 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 1093 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 1094 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1095 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1096 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 1097 if (n_benign) y[n_D*i+p0_lidx_I] = 0.0; 1098 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 1099 } 1100 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1101 } 1102 /* if n_R == 0 the object is not destroyed */ 1103 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 1104 } 1105 1106 if (n_constraints) { 1107 Mat B; 1108 1109 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 1110 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 1111 ierr = MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 1112 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 1113 if (n_vertices) { 1114 if (isCHOL) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */ 1115 ierr = MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);CHKERRQ(ierr); 1116 } else { 1117 Mat S_VCt; 1118 1119 ierr = MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);CHKERRQ(ierr); 1120 ierr = MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1121 ierr = MatDestroy(&S_VCt);CHKERRQ(ierr); 1122 } 1123 } 1124 ierr = MatDestroy(&B);CHKERRQ(ierr); 1125 /* coarse basis functions */ 1126 for (i=0;i<n_constraints;i++) { 1127 PetscScalar *y; 1128 1129 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr); 1130 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 1131 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr); 1132 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1133 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1134 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 1135 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1136 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1137 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 1138 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr); 1139 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1140 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1141 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 1142 if (n_benign) y[n_D*(i+n_vertices)+p0_lidx_I] = 0.0; 1143 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 1144 } 1145 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1146 } 1147 } 1148 if (n_constraints) { 1149 ierr = MatDestroy(&local_auxmat2_R);CHKERRQ(ierr); 1150 } 1151 1152 ierr = MatDestroy(&B0_V);CHKERRQ(ierr); 1153 1154 /* compute other basis functions for non-symmetric problems */ 1155 if (!pcbddc->symmetric_primal) { 1156 1157 if (n_constraints) { 1158 Mat S_CCT,B_C; 1159 1160 /* this is a lazy thing */ 1161 ierr = MatConvert(C_CR,impMatType,MAT_REUSE_MATRIX,&C_CR);CHKERRQ(ierr); 1162 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work+n_vertices*n_R,&B_C);CHKERRQ(ierr); 1163 ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr); 1164 ierr = MatTransposeMatMult(C_CR,S_CCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr); 1165 ierr = MatDestroy(&S_CCT);CHKERRQ(ierr); 1166 if (n_vertices) { 1167 Mat B_V,S_VCT; 1168 1169 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&B_V);CHKERRQ(ierr); 1170 ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr); 1171 ierr = MatTransposeMatMult(C_CR,S_VCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr); 1172 ierr = MatDestroy(&B_V);CHKERRQ(ierr); 1173 ierr = MatDestroy(&S_VCT);CHKERRQ(ierr); 1174 } 1175 ierr = MatDestroy(&B_C);CHKERRQ(ierr); 1176 } else { /* if there are no constraints, reset work */ 1177 ierr = PetscMemzero(work,n_R*pcbddc->local_primal_size*sizeof(PetscScalar));CHKERRQ(ierr); 1178 } 1179 if (n_vertices && n_R) { 1180 Mat A_VRT; 1181 PetscScalar *marray; 1182 PetscBLASInt B_N,B_one = 1; 1183 1184 ierr = MatTranspose(A_VR,MAT_INITIAL_MATRIX,&A_VRT);CHKERRQ(ierr); 1185 ierr = MatConvert(A_VRT,impMatType,MAT_REUSE_MATRIX,&A_VRT);CHKERRQ(ierr); 1186 ierr = MatDenseGetArray(A_VRT,&marray);CHKERRQ(ierr); 1187 ierr = PetscBLASIntCast(n_vertices*n_R,&B_N);CHKERRQ(ierr); 1188 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&m_one,marray,&B_one,work,&B_one)); 1189 ierr = MatDenseRestoreArray(A_VRT,&marray);CHKERRQ(ierr); 1190 ierr = MatDestroy(&A_VRT);CHKERRQ(ierr); 1191 } 1192 1193 if (F) { /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 1194 for (i=0;i<pcbddc->local_primal_size;i++) { 1195 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 1196 ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr); 1197 ierr = MatSolveTranspose(F,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1198 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1199 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 1200 } 1201 } else { 1202 for (i=0;i<pcbddc->local_primal_size;i++) { 1203 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 1204 ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr); 1205 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1206 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1207 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 1208 } 1209 } 1210 /* coarse basis functions */ 1211 for (i=0;i<pcbddc->local_primal_size;i++) { 1212 PetscScalar *y; 1213 1214 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*(i+pcbddc->local_primal_size));CHKERRQ(ierr); 1215 ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 1216 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 1217 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1218 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1219 if (i<n_vertices) { 1220 y[n_B*i+idx_V_B[i]] = 1.0; 1221 } 1222 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 1223 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1224 1225 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1226 ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 1227 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 1228 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1229 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1230 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 1231 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 1232 } 1233 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1234 } 1235 } 1236 /* free memory */ 1237 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 1238 ierr = MatDestroy(&S_VV);CHKERRQ(ierr); 1239 ierr = MatDestroy(&S_CV);CHKERRQ(ierr); 1240 ierr = MatDestroy(&S_VC);CHKERRQ(ierr); 1241 ierr = MatDestroy(&S_CC);CHKERRQ(ierr); 1242 ierr = PetscFree(work);CHKERRQ(ierr); 1243 if (n_vertices) { 1244 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 1245 } 1246 if (n_constraints) { 1247 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 1248 } 1249 /* Checking coarse_sub_mat and coarse basis functios */ 1250 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 1251 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 1252 if (pcbddc->dbg_flag) { 1253 Mat coarse_sub_mat; 1254 Mat AUXMAT,TM1,TM2,TM3,TM4; 1255 Mat coarse_phi_D,coarse_phi_B; 1256 Mat coarse_psi_D,coarse_psi_B; 1257 Mat A_II,A_BB,A_IB,A_BI; 1258 Mat C_B,CPHI; 1259 IS is_dummy; 1260 Vec mones; 1261 MatType checkmattype=MATSEQAIJ; 1262 PetscReal real_value; 1263 1264 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 1265 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 1266 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 1267 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 1268 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 1269 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 1270 if (unsymmetric_check) { 1271 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 1272 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 1273 } 1274 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 1275 1276 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1277 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);CHKERRQ(ierr); 1278 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1279 if (unsymmetric_check) { 1280 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1281 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 1282 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1283 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1284 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 1285 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1286 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1287 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 1288 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1289 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1290 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 1291 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1292 } else { 1293 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 1294 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 1295 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1296 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 1297 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1298 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1299 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 1300 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1301 } 1302 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1303 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1304 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1305 ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr); 1306 if (n_benign) { 1307 Mat B0_I,B0_B,B0_BPHI,B0_IPHI; 1308 PetscScalar *data,*data2; 1309 1310 ierr = ISCreateStride(PETSC_COMM_SELF,1,0,1,&is_dummy);CHKERRQ(ierr); 1311 ierr = MatGetSubMatrix(B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B); 1312 ierr = MatGetSubMatrix(B0,is_dummy,pcis->is_I_local,MAT_INITIAL_MATRIX,&B0_I); 1313 ierr = MatMatMult(B0_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 1314 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_REUSE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 1315 ierr = MatDenseGetArray(TM1,&data);CHKERRQ(ierr); 1316 ierr = MatDenseGetArray(B0_BPHI,&data2);CHKERRQ(ierr); 1317 for (i=0;i<pcbddc->local_primal_size;i++) { 1318 data[(pcbddc->local_primal_size-1)*pcbddc->local_primal_size+i] += data2[i]; 1319 data[(i+1)*pcbddc->local_primal_size-1] += data2[i]; 1320 } 1321 ierr = MatDenseRestoreArray(TM1,&data);CHKERRQ(ierr); 1322 ierr = MatDenseRestoreArray(B0_BPHI,&data2);CHKERRQ(ierr); 1323 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 1324 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 1325 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 1326 ierr = MatMatMult(B0_I,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&B0_IPHI);CHKERRQ(ierr); 1327 ierr = MatDestroy(&B0_I);CHKERRQ(ierr); 1328 ierr = MatNorm(B0_IPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1329 ierr = MatDestroy(&B0_IPHI);CHKERRQ(ierr); 1330 } 1331 #if 0 1332 { 1333 PetscViewer viewer; 1334 char filename[256]; 1335 sprintf(filename,"proj_local_coarse_mat%d.m",PetscGlobalRank); 1336 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 1337 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 1338 ierr = MatView(TM1,viewer);CHKERRQ(ierr); 1339 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 1340 } 1341 #endif 1342 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1343 ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1344 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1345 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 1346 1347 /* check constraints */ 1348 if (!n_benign) { 1349 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size,0,1,&is_dummy);CHKERRQ(ierr); 1350 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B); 1351 ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 1352 ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr); 1353 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 1354 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 1355 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1356 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 1357 if (unsymmetric_check) { 1358 ierr = MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 1359 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 1360 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 1361 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1362 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 1363 } 1364 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 1365 ierr = MatDestroy(&CPHI);CHKERRQ(ierr); 1366 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 1367 ierr = VecDestroy(&mones);CHKERRQ(ierr); 1368 } 1369 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1370 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 1371 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 1372 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 1373 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 1374 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 1375 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 1376 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 1377 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 1378 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 1379 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 1380 if (unsymmetric_check) { 1381 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 1382 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 1383 } 1384 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 1385 } 1386 ierr = MatDestroy(&B0);CHKERRQ(ierr); 1387 /* get back data */ 1388 *coarse_submat_vals_n = coarse_submat_vals; 1389 PetscFunctionReturn(0); 1390 } 1391 1392 #undef __FUNCT__ 1393 #define __FUNCT__ "MatGetSubMatrixUnsorted" 1394 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B) 1395 { 1396 Mat *work_mat; 1397 IS isrow_s,iscol_s; 1398 PetscBool rsorted,csorted; 1399 PetscInt rsize,*idxs_perm_r,csize,*idxs_perm_c; 1400 PetscErrorCode ierr; 1401 1402 PetscFunctionBegin; 1403 ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr); 1404 ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr); 1405 ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr); 1406 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 1407 1408 if (!rsorted) { 1409 const PetscInt *idxs; 1410 PetscInt *idxs_sorted,i; 1411 1412 ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr); 1413 ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr); 1414 for (i=0;i<rsize;i++) { 1415 idxs_perm_r[i] = i; 1416 } 1417 ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr); 1418 ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr); 1419 for (i=0;i<rsize;i++) { 1420 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 1421 } 1422 ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr); 1423 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr); 1424 } else { 1425 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1426 isrow_s = isrow; 1427 } 1428 1429 if (!csorted) { 1430 if (isrow == iscol) { 1431 ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr); 1432 iscol_s = isrow_s; 1433 } else { 1434 const PetscInt *idxs; 1435 PetscInt *idxs_sorted,i; 1436 1437 ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr); 1438 ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr); 1439 for (i=0;i<csize;i++) { 1440 idxs_perm_c[i] = i; 1441 } 1442 ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr); 1443 ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr); 1444 for (i=0;i<csize;i++) { 1445 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 1446 } 1447 ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr); 1448 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr); 1449 } 1450 } else { 1451 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1452 iscol_s = iscol; 1453 } 1454 1455 ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 1456 1457 if (!rsorted || !csorted) { 1458 Mat new_mat; 1459 IS is_perm_r,is_perm_c; 1460 1461 if (!rsorted) { 1462 PetscInt *idxs_r,i; 1463 ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr); 1464 for (i=0;i<rsize;i++) { 1465 idxs_r[idxs_perm_r[i]] = i; 1466 } 1467 ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr); 1468 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr); 1469 } else { 1470 ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr); 1471 } 1472 ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr); 1473 1474 if (!csorted) { 1475 if (isrow_s == iscol_s) { 1476 ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr); 1477 is_perm_c = is_perm_r; 1478 } else { 1479 PetscInt *idxs_c,i; 1480 ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr); 1481 for (i=0;i<csize;i++) { 1482 idxs_c[idxs_perm_c[i]] = i; 1483 } 1484 ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr); 1485 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr); 1486 } 1487 } else { 1488 ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr); 1489 } 1490 ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr); 1491 1492 ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr); 1493 ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr); 1494 work_mat[0] = new_mat; 1495 ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr); 1496 ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr); 1497 } 1498 1499 ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr); 1500 *B = work_mat[0]; 1501 ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr); 1502 ierr = ISDestroy(&isrow_s);CHKERRQ(ierr); 1503 ierr = ISDestroy(&iscol_s);CHKERRQ(ierr); 1504 PetscFunctionReturn(0); 1505 } 1506 1507 #undef __FUNCT__ 1508 #define __FUNCT__ "PCBDDCComputeLocalMatrix" 1509 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 1510 { 1511 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1512 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1513 Mat new_mat; 1514 IS is_local,is_global; 1515 PetscInt local_size; 1516 PetscBool isseqaij; 1517 PetscErrorCode ierr; 1518 1519 PetscFunctionBegin; 1520 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1521 ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr); 1522 ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr); 1523 ierr = ISLocalToGlobalMappingApplyIS(pc->pmat->rmap->mapping,is_local,&is_global);CHKERRQ(ierr); 1524 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 1525 ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr); 1526 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 1527 1528 /* check */ 1529 if (pcbddc->dbg_flag) { 1530 Vec x,x_change; 1531 PetscReal error; 1532 1533 ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr); 1534 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 1535 ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr); 1536 ierr = VecScatterBegin(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1537 ierr = VecScatterEnd(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1538 ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr); 1539 ierr = VecScatterBegin(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1540 ierr = VecScatterEnd(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1541 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 1542 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 1543 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1544 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr); 1545 ierr = VecDestroy(&x);CHKERRQ(ierr); 1546 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 1547 } 1548 1549 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 1550 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 1551 if (isseqaij) { 1552 ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 1553 } else { 1554 Mat work_mat; 1555 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 1556 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 1557 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 1558 } 1559 if (matis->A->symmetric_set) { 1560 ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);CHKERRQ(ierr); 1561 #if !defined(PETSC_USE_COMPLEX) 1562 ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);CHKERRQ(ierr); 1563 #endif 1564 } 1565 /* 1566 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 1567 ierr = MatView(new_mat,(PetscViewer)0);CHKERRQ(ierr); 1568 */ 1569 ierr = MatDestroy(&new_mat);CHKERRQ(ierr); 1570 PetscFunctionReturn(0); 1571 } 1572 1573 #undef __FUNCT__ 1574 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 1575 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 1576 { 1577 PC_IS* pcis = (PC_IS*)(pc->data); 1578 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1579 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1580 PetscInt *idx_R_local=NULL; 1581 PetscInt n_vertices,i,j,n_R,n_D,n_B; 1582 PetscInt vbs,bs; 1583 PetscBT bitmask=NULL; 1584 PetscErrorCode ierr; 1585 1586 PetscFunctionBegin; 1587 /* 1588 No need to setup local scatters if 1589 - primal space is unchanged 1590 AND 1591 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 1592 AND 1593 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 1594 */ 1595 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 1596 PetscFunctionReturn(0); 1597 } 1598 /* destroy old objects */ 1599 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 1600 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 1601 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 1602 /* Set Non-overlapping dimensions */ 1603 n_B = pcis->n_B; 1604 n_D = pcis->n - n_B; 1605 n_vertices = pcbddc->n_vertices; 1606 1607 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 1608 1609 /* create auxiliary bitmask and allocate workspace */ 1610 if (!sub_schurs->reuse_mumps) { 1611 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 1612 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 1613 for (i=0;i<n_vertices;i++) { 1614 ierr = PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);CHKERRQ(ierr); 1615 } 1616 1617 for (i=0, n_R=0; i<pcis->n; i++) { 1618 if (!PetscBTLookup(bitmask,i)) { 1619 idx_R_local[n_R++] = i; 1620 } 1621 } 1622 } else { /* A different ordering (already computed) is present if we are reusing MUMPS Schur solver */ 1623 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1624 1625 ierr = ISGetIndices(reuse_mumps->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1626 ierr = ISGetLocalSize(reuse_mumps->is_R,&n_R);CHKERRQ(ierr); 1627 } 1628 1629 /* Block code */ 1630 vbs = 1; 1631 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 1632 if (bs>1 && !(n_vertices%bs)) { 1633 PetscBool is_blocked = PETSC_TRUE; 1634 PetscInt *vary; 1635 if (!sub_schurs->reuse_mumps) { 1636 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 1637 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 1638 /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 1639 /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */ 1640 for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++; 1641 for (i=0; i<pcis->n/bs; i++) { 1642 if (vary[i]!=0 && vary[i]!=bs) { 1643 is_blocked = PETSC_FALSE; 1644 break; 1645 } 1646 } 1647 ierr = PetscFree(vary);CHKERRQ(ierr); 1648 } else { 1649 /* Verify directly the R set */ 1650 for (i=0; i<n_R/bs; i++) { 1651 PetscInt j,node=idx_R_local[bs*i]; 1652 for (j=1; j<bs; j++) { 1653 if (node != idx_R_local[bs*i+j]-j) { 1654 is_blocked = PETSC_FALSE; 1655 break; 1656 } 1657 } 1658 } 1659 } 1660 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 1661 vbs = bs; 1662 for (i=0;i<n_R/vbs;i++) { 1663 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 1664 } 1665 } 1666 } 1667 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 1668 if (sub_schurs->reuse_mumps) { 1669 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1670 1671 ierr = ISRestoreIndices(reuse_mumps->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1672 ierr = ISDestroy(&reuse_mumps->is_R);CHKERRQ(ierr); 1673 ierr = PetscObjectReference((PetscObject)pcbddc->is_R_local);CHKERRQ(ierr); 1674 reuse_mumps->is_R = pcbddc->is_R_local; 1675 } else { 1676 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 1677 } 1678 1679 /* print some info if requested */ 1680 if (pcbddc->dbg_flag) { 1681 PetscInt benign = 0; 1682 1683 if (pcbddc->benign_p0_lidx >= 0) benign = 1; 1684 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1685 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1686 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1687 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 1688 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 1689 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices-benign,pcbddc->local_primal_size);CHKERRQ(ierr); 1690 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1691 } 1692 1693 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 1694 if (!sub_schurs->reuse_mumps) { 1695 IS is_aux1,is_aux2; 1696 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 1697 1698 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1699 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 1700 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 1701 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1702 for (i=0; i<n_D; i++) { 1703 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 1704 } 1705 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1706 for (i=0, j=0; i<n_R; i++) { 1707 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 1708 aux_array1[j++] = i; 1709 } 1710 } 1711 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 1712 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1713 for (i=0, j=0; i<n_B; i++) { 1714 if (!PetscBTLookup(bitmask,is_indices[i])) { 1715 aux_array2[j++] = i; 1716 } 1717 } 1718 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1719 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 1720 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 1721 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1722 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 1723 1724 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1725 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 1726 for (i=0, j=0; i<n_R; i++) { 1727 if (PetscBTLookup(bitmask,idx_R_local[i])) { 1728 aux_array1[j++] = i; 1729 } 1730 } 1731 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 1732 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 1733 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1734 } 1735 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 1736 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1737 } else { 1738 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1739 IS tis; 1740 PetscInt schur_size; 1741 1742 ierr = ISGetLocalSize(reuse_mumps->is_B,&schur_size);CHKERRQ(ierr); 1743 ierr = ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);CHKERRQ(ierr); 1744 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_mumps->is_B,&pcbddc->R_to_B);CHKERRQ(ierr); 1745 ierr = ISDestroy(&tis);CHKERRQ(ierr); 1746 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1747 ierr = ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);CHKERRQ(ierr); 1748 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 1749 ierr = ISDestroy(&tis);CHKERRQ(ierr); 1750 } 1751 } 1752 PetscFunctionReturn(0); 1753 } 1754 1755 1756 #undef __FUNCT__ 1757 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 1758 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 1759 { 1760 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1761 PC_IS *pcis = (PC_IS*)pc->data; 1762 PC pc_temp; 1763 Mat A_RR; 1764 MatReuse reuse; 1765 PetscScalar m_one = -1.0; 1766 PetscReal value; 1767 PetscInt n_D,n_R; 1768 PetscBool use_exact,use_exact_reduced,issbaij; 1769 PetscErrorCode ierr; 1770 /* prefixes stuff */ 1771 char dir_prefix[256],neu_prefix[256],str_level[16]; 1772 size_t len; 1773 1774 PetscFunctionBegin; 1775 1776 /* compute prefixes */ 1777 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 1778 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 1779 if (!pcbddc->current_level) { 1780 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1781 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1782 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1783 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1784 } else { 1785 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 1786 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 1787 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 1788 len -= 15; /* remove "pc_bddc_coarse_" */ 1789 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 1790 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 1791 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 1792 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 1793 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1794 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1795 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 1796 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 1797 } 1798 1799 /* DIRICHLET PROBLEM */ 1800 if (dirichlet) { 1801 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1802 if (pcbddc->local_mat->symmetric_set) { 1803 ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 1804 } 1805 /* Matrix for Dirichlet problem is pcis->A_II */ 1806 n_D = pcis->n - pcis->n_B; 1807 if (!pcbddc->ksp_D) { /* create object if not yet build */ 1808 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 1809 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 1810 /* default */ 1811 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 1812 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 1813 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1814 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1815 if (issbaij) { 1816 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1817 } else { 1818 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1819 } 1820 /* Allow user's customization */ 1821 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 1822 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1823 } 1824 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 1825 if (sub_schurs->reuse_mumps) { 1826 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1827 1828 ierr = KSPSetPC(pcbddc->ksp_D,reuse_mumps->interior_solver);CHKERRQ(ierr); 1829 } 1830 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1831 if (!n_D) { 1832 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1833 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1834 } 1835 /* Set Up KSP for Dirichlet problem of BDDC */ 1836 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 1837 /* set ksp_D into pcis data */ 1838 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 1839 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 1840 pcis->ksp_D = pcbddc->ksp_D; 1841 } 1842 1843 /* NEUMANN PROBLEM */ 1844 A_RR = 0; 1845 if (neumann) { 1846 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1847 PetscInt ibs,mbs; 1848 PetscBool issbaij; 1849 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1850 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 1851 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 1852 if (pcbddc->ksp_R) { /* already created ksp */ 1853 PetscInt nn_R; 1854 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 1855 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 1856 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 1857 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 1858 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 1859 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1860 reuse = MAT_INITIAL_MATRIX; 1861 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 1862 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 1863 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1864 reuse = MAT_INITIAL_MATRIX; 1865 } else { /* safe to reuse the matrix */ 1866 reuse = MAT_REUSE_MATRIX; 1867 } 1868 } 1869 /* last check */ 1870 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 1871 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1872 reuse = MAT_INITIAL_MATRIX; 1873 } 1874 } else { /* first time, so we need to create the matrix */ 1875 reuse = MAT_INITIAL_MATRIX; 1876 } 1877 /* extract A_RR */ 1878 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 1879 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 1880 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1881 if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */ 1882 if (matis->A == pcbddc->local_mat) { 1883 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1884 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 1885 } else { 1886 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 1887 } 1888 } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 1889 if (matis->A == pcbddc->local_mat) { 1890 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1891 ierr = MatConvert(matis->A,MATSEQBAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 1892 } else { 1893 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 1894 } 1895 } 1896 if (!sub_schurs->reuse_mumps) { 1897 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 1898 if (pcbddc->local_mat->symmetric_set) { 1899 ierr = MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 1900 } 1901 } else { 1902 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1903 1904 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1905 ierr = PCGetOperators(reuse_mumps->correction_solver,&A_RR,NULL);CHKERRQ(ierr); 1906 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 1907 } 1908 if (!pcbddc->ksp_R) { /* create object if not present */ 1909 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 1910 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 1911 /* default */ 1912 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 1913 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 1914 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1915 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1916 if (issbaij) { 1917 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1918 } else { 1919 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1920 } 1921 /* Allow user's customization */ 1922 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 1923 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1924 } 1925 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 1926 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1927 if (!n_R) { 1928 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1929 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1930 } 1931 /* Reuse MUMPS solver if it is present */ 1932 if (sub_schurs->reuse_mumps) { 1933 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1934 1935 ierr = KSPSetPC(pcbddc->ksp_R,reuse_mumps->correction_solver);CHKERRQ(ierr); 1936 } 1937 /* Set Up KSP for Neumann problem of BDDC */ 1938 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 1939 } 1940 /* free Neumann problem's matrix */ 1941 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1942 1943 /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */ 1944 if (pcbddc->NullSpace || pcbddc->dbg_flag) { 1945 if (pcbddc->dbg_flag) { 1946 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1947 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1948 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1949 } 1950 if (dirichlet) { /* Dirichlet */ 1951 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 1952 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 1953 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 1954 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 1955 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 1956 /* need to be adapted? */ 1957 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1958 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1959 ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr); 1960 /* print info */ 1961 if (pcbddc->dbg_flag) { 1962 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 1963 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1964 } 1965 if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) { 1966 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_TRUE,pcis->is_I_local);CHKERRQ(ierr); 1967 } 1968 } 1969 if (neumann) { /* Neumann */ 1970 ierr = KSPGetOperators(pcbddc->ksp_R,&A_RR,NULL);CHKERRQ(ierr); 1971 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 1972 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1973 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 1974 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 1975 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 1976 /* need to be adapted? */ 1977 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1978 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1979 /* print info */ 1980 if (pcbddc->dbg_flag) { 1981 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 1982 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1983 } 1984 if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */ 1985 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_FALSE,pcbddc->is_R_local);CHKERRQ(ierr); 1986 } 1987 } 1988 } 1989 PetscFunctionReturn(0); 1990 } 1991 1992 #undef __FUNCT__ 1993 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection" 1994 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose) 1995 { 1996 PetscErrorCode ierr; 1997 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1998 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1999 2000 PetscFunctionBegin; 2001 if (!sub_schurs->reuse_mumps) { 2002 ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr); 2003 } 2004 if (!pcbddc->switch_static) { 2005 if (applytranspose && pcbddc->local_auxmat1) { 2006 ierr = MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 2007 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 2008 } 2009 if (!sub_schurs->reuse_mumps) { 2010 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2011 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2012 } else { 2013 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 2014 2015 ierr = VecScatterBegin(reuse_mumps->correction_scatter_B,inout_B,reuse_mumps->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2016 ierr = VecScatterEnd(reuse_mumps->correction_scatter_B,inout_B,reuse_mumps->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2017 } 2018 } else { 2019 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2020 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2021 ierr = VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2022 ierr = VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2023 if (applytranspose && pcbddc->local_auxmat1) { 2024 ierr = MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);CHKERRQ(ierr); 2025 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 2026 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2027 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2028 } 2029 } 2030 if (!sub_schurs->reuse_mumps) { 2031 if (applytranspose) { 2032 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 2033 } else { 2034 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 2035 } 2036 #if defined(PETSC_HAVE_MUMPS) 2037 } else { 2038 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 2039 2040 if (applytranspose) { 2041 ierr = MatMumpsSolveSchurComplementTranspose(reuse_mumps->F,reuse_mumps->rhs_B,reuse_mumps->sol_B);CHKERRQ(ierr); 2042 } else { 2043 ierr = MatMumpsSolveSchurComplement(reuse_mumps->F,reuse_mumps->rhs_B,reuse_mumps->sol_B);CHKERRQ(ierr); 2044 } 2045 #endif 2046 } 2047 ierr = VecSet(inout_B,0.);CHKERRQ(ierr); 2048 if (!pcbddc->switch_static) { 2049 if (!sub_schurs->reuse_mumps) { 2050 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2051 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2052 } else { 2053 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 2054 2055 ierr = VecScatterBegin(reuse_mumps->correction_scatter_B,reuse_mumps->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2056 ierr = VecScatterEnd(reuse_mumps->correction_scatter_B,reuse_mumps->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2057 } 2058 if (!applytranspose && pcbddc->local_auxmat1) { 2059 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 2060 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 2061 } 2062 } else { 2063 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2064 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2065 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2066 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2067 if (!applytranspose && pcbddc->local_auxmat1) { 2068 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 2069 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 2070 } 2071 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2072 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2073 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2074 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2075 } 2076 PetscFunctionReturn(0); 2077 } 2078 2079 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 2080 #undef __FUNCT__ 2081 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 2082 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 2083 { 2084 PetscErrorCode ierr; 2085 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 2086 PC_IS* pcis = (PC_IS*) (pc->data); 2087 const PetscScalar zero = 0.0; 2088 2089 PetscFunctionBegin; 2090 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 2091 if (applytranspose) { 2092 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 2093 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 2094 } else { 2095 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 2096 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 2097 } 2098 2099 /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */ 2100 if (pcbddc->benign_p0_lidx >= 0) { 2101 PetscScalar *array; 2102 2103 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 2104 array[pcbddc->local_primal_size-1] += pcbddc->benign_p0; 2105 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 2106 } 2107 2108 /* start communications from local primal nodes to rhs of coarse solver */ 2109 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 2110 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2111 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2112 2113 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 2114 /* TODO remove null space when doing multilevel */ 2115 if (pcbddc->coarse_ksp) { 2116 Vec rhs,sol; 2117 2118 ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr); 2119 ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr); 2120 if (applytranspose) { 2121 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 2122 } else { 2123 ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 2124 } 2125 } 2126 2127 /* Local solution on R nodes */ 2128 if (pcis->n) { /* in/out pcbddc->vec1_B,pcbddc->vec1_D */ 2129 ierr = PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);CHKERRQ(ierr); 2130 } 2131 2132 /* communications from coarse sol to local primal nodes */ 2133 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2134 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2135 2136 /* Sum contributions from two levels */ 2137 if (applytranspose) { 2138 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 2139 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 2140 } else { 2141 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 2142 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 2143 } 2144 /* store p0 */ 2145 if (pcbddc->benign_p0_lidx >= 0) { 2146 PetscScalar *array; 2147 2148 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 2149 pcbddc->benign_p0 = array[pcbddc->local_primal_size-1]; 2150 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 2151 } 2152 PetscFunctionReturn(0); 2153 } 2154 2155 #undef __FUNCT__ 2156 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 2157 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 2158 { 2159 PetscErrorCode ierr; 2160 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 2161 PetscScalar *array; 2162 Vec from,to; 2163 2164 PetscFunctionBegin; 2165 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 2166 from = pcbddc->coarse_vec; 2167 to = pcbddc->vec1_P; 2168 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 2169 Vec tvec; 2170 2171 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 2172 ierr = VecResetArray(tvec);CHKERRQ(ierr); 2173 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 2174 ierr = VecGetArray(tvec,&array);CHKERRQ(ierr); 2175 ierr = VecPlaceArray(from,array);CHKERRQ(ierr); 2176 ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr); 2177 } 2178 } else { /* from local to global -> put data in coarse right hand side */ 2179 from = pcbddc->vec1_P; 2180 to = pcbddc->coarse_vec; 2181 } 2182 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 2183 PetscFunctionReturn(0); 2184 } 2185 2186 #undef __FUNCT__ 2187 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 2188 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 2189 { 2190 PetscErrorCode ierr; 2191 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 2192 PetscScalar *array; 2193 Vec from,to; 2194 2195 PetscFunctionBegin; 2196 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 2197 from = pcbddc->coarse_vec; 2198 to = pcbddc->vec1_P; 2199 } else { /* from local to global -> put data in coarse right hand side */ 2200 from = pcbddc->vec1_P; 2201 to = pcbddc->coarse_vec; 2202 } 2203 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 2204 if (smode == SCATTER_FORWARD) { 2205 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 2206 Vec tvec; 2207 2208 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 2209 ierr = VecGetArray(to,&array);CHKERRQ(ierr); 2210 ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr); 2211 ierr = VecRestoreArray(to,&array);CHKERRQ(ierr); 2212 } 2213 } else { 2214 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 2215 ierr = VecResetArray(from);CHKERRQ(ierr); 2216 } 2217 } 2218 PetscFunctionReturn(0); 2219 } 2220 2221 /* uncomment for testing purposes */ 2222 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 2223 #undef __FUNCT__ 2224 #define __FUNCT__ "PCBDDCConstraintsSetUp" 2225 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 2226 { 2227 PetscErrorCode ierr; 2228 PC_IS* pcis = (PC_IS*)(pc->data); 2229 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2230 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 2231 /* one and zero */ 2232 PetscScalar one=1.0,zero=0.0; 2233 /* space to store constraints and their local indices */ 2234 PetscScalar *constraints_data; 2235 PetscInt *constraints_idxs,*constraints_idxs_B; 2236 PetscInt *constraints_idxs_ptr,*constraints_data_ptr; 2237 PetscInt *constraints_n; 2238 /* iterators */ 2239 PetscInt i,j,k,total_counts,total_counts_cc,cum; 2240 /* BLAS integers */ 2241 PetscBLASInt lwork,lierr; 2242 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 2243 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 2244 /* reuse */ 2245 PetscInt olocal_primal_size,olocal_primal_size_cc; 2246 PetscInt *olocal_primal_ref_node,*olocal_primal_ref_mult; 2247 /* change of basis */ 2248 PetscBool qr_needed; 2249 PetscBT change_basis,qr_needed_idx; 2250 /* auxiliary stuff */ 2251 PetscInt *nnz,*is_indices; 2252 PetscInt ncc; 2253 /* some quantities */ 2254 PetscInt n_vertices,total_primal_vertices,valid_constraints; 2255 PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints; 2256 2257 PetscFunctionBegin; 2258 /* Destroy Mat objects computed previously */ 2259 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2260 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 2261 /* save info on constraints from previous setup (if any) */ 2262 olocal_primal_size = pcbddc->local_primal_size; 2263 olocal_primal_size_cc = pcbddc->local_primal_size_cc; 2264 ierr = PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);CHKERRQ(ierr); 2265 ierr = PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 2266 ierr = PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 2267 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 2268 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2269 2270 /* print some info */ 2271 if (pcbddc->dbg_flag) { 2272 IS vertices; 2273 PetscInt nv,nedges,nfaces; 2274 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 2275 ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr); 2276 ierr = ISDestroy(&vertices);CHKERRQ(ierr); 2277 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 2278 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 2279 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 2280 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr); 2281 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr); 2282 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2283 } 2284 2285 if (!pcbddc->adaptive_selection) { 2286 IS ISForVertices,*ISForFaces,*ISForEdges; 2287 MatNullSpace nearnullsp; 2288 const Vec *nearnullvecs; 2289 Vec *localnearnullsp; 2290 PetscScalar *array; 2291 PetscInt n_ISForFaces,n_ISForEdges,nnsp_size; 2292 PetscBool nnsp_has_cnst; 2293 /* LAPACK working arrays for SVD or POD */ 2294 PetscBool skip_lapack,boolforchange; 2295 PetscScalar *work; 2296 PetscReal *singular_vals; 2297 #if defined(PETSC_USE_COMPLEX) 2298 PetscReal *rwork; 2299 #endif 2300 #if defined(PETSC_MISSING_LAPACK_GESVD) 2301 PetscScalar *temp_basis,*correlation_mat; 2302 #else 2303 PetscBLASInt dummy_int=1; 2304 PetscScalar dummy_scalar=1.; 2305 #endif 2306 2307 /* Get index sets for faces, edges and vertices from graph */ 2308 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 2309 /* free unneeded index sets */ 2310 if (!pcbddc->use_vertices) { 2311 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 2312 } 2313 if (!pcbddc->use_edges) { 2314 for (i=0;i<n_ISForEdges;i++) { 2315 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 2316 } 2317 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 2318 n_ISForEdges = 0; 2319 } 2320 if (!pcbddc->use_faces) { 2321 for (i=0;i<n_ISForFaces;i++) { 2322 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 2323 } 2324 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 2325 n_ISForFaces = 0; 2326 } 2327 2328 #if defined(PETSC_USE_DEBUG) 2329 /* HACK: when solving singular problems not using vertices, a change of basis is mandatory. 2330 Also use_change_of_basis should be consistent among processors */ 2331 if (pcbddc->NullSpace) { 2332 PetscBool tbool[2],gbool[2]; 2333 2334 if (!ISForVertices && !pcbddc->user_ChangeOfBasisMatrix) { 2335 pcbddc->use_change_of_basis = PETSC_TRUE; 2336 if (!ISForEdges) { 2337 pcbddc->use_change_on_faces = PETSC_TRUE; 2338 } 2339 } 2340 tbool[0] = pcbddc->use_change_of_basis; 2341 tbool[1] = pcbddc->use_change_on_faces; 2342 ierr = MPI_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2343 pcbddc->use_change_of_basis = gbool[0]; 2344 pcbddc->use_change_on_faces = gbool[1]; 2345 } 2346 #endif 2347 2348 /* check if near null space is attached to global mat */ 2349 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 2350 if (nearnullsp) { 2351 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 2352 /* remove any stored info */ 2353 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 2354 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 2355 /* store information for BDDC solver reuse */ 2356 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 2357 pcbddc->onearnullspace = nearnullsp; 2358 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 2359 for (i=0;i<nnsp_size;i++) { 2360 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 2361 } 2362 } else { /* if near null space is not provided BDDC uses constants by default */ 2363 nnsp_size = 0; 2364 nnsp_has_cnst = PETSC_TRUE; 2365 } 2366 /* get max number of constraints on a single cc */ 2367 max_constraints = nnsp_size; 2368 if (nnsp_has_cnst) max_constraints++; 2369 2370 /* 2371 Evaluate maximum storage size needed by the procedure 2372 - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]" 2373 - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]" 2374 There can be multiple constraints per connected component 2375 */ 2376 n_vertices = 0; 2377 if (ISForVertices) { 2378 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 2379 } 2380 ncc = n_vertices+n_ISForFaces+n_ISForEdges; 2381 ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr); 2382 2383 total_counts = n_ISForFaces+n_ISForEdges; 2384 total_counts *= max_constraints; 2385 total_counts += n_vertices; 2386 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 2387 2388 total_counts = 0; 2389 max_size_of_constraint = 0; 2390 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 2391 IS used_is; 2392 if (i<n_ISForEdges) { 2393 used_is = ISForEdges[i]; 2394 } else { 2395 used_is = ISForFaces[i-n_ISForEdges]; 2396 } 2397 ierr = ISGetSize(used_is,&j);CHKERRQ(ierr); 2398 total_counts += j; 2399 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 2400 } 2401 ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr); 2402 2403 /* get local part of global near null space vectors */ 2404 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 2405 for (k=0;k<nnsp_size;k++) { 2406 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 2407 ierr = VecScatterBegin(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2408 ierr = VecScatterEnd(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2409 } 2410 2411 /* whether or not to skip lapack calls */ 2412 skip_lapack = PETSC_TRUE; 2413 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 2414 2415 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 2416 if (!skip_lapack) { 2417 PetscScalar temp_work; 2418 2419 #if defined(PETSC_MISSING_LAPACK_GESVD) 2420 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 2421 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 2422 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 2423 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 2424 #if defined(PETSC_USE_COMPLEX) 2425 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 2426 #endif 2427 /* now we evaluate the optimal workspace using query with lwork=-1 */ 2428 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 2429 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 2430 lwork = -1; 2431 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2432 #if !defined(PETSC_USE_COMPLEX) 2433 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 2434 #else 2435 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 2436 #endif 2437 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2438 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 2439 #else /* on missing GESVD */ 2440 /* SVD */ 2441 PetscInt max_n,min_n; 2442 max_n = max_size_of_constraint; 2443 min_n = max_constraints; 2444 if (max_size_of_constraint < max_constraints) { 2445 min_n = max_size_of_constraint; 2446 max_n = max_constraints; 2447 } 2448 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 2449 #if defined(PETSC_USE_COMPLEX) 2450 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 2451 #endif 2452 /* now we evaluate the optimal workspace using query with lwork=-1 */ 2453 lwork = -1; 2454 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 2455 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 2456 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 2457 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2458 #if !defined(PETSC_USE_COMPLEX) 2459 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr)); 2460 #else 2461 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr)); 2462 #endif 2463 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2464 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 2465 #endif /* on missing GESVD */ 2466 /* Allocate optimal workspace */ 2467 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 2468 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 2469 } 2470 /* Now we can loop on constraining sets */ 2471 total_counts = 0; 2472 constraints_idxs_ptr[0] = 0; 2473 constraints_data_ptr[0] = 0; 2474 /* vertices */ 2475 if (n_vertices) { 2476 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2477 ierr = PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 2478 for (i=0;i<n_vertices;i++) { 2479 constraints_n[total_counts] = 1; 2480 constraints_data[total_counts] = 1.0; 2481 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 2482 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 2483 total_counts++; 2484 } 2485 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2486 n_vertices = total_counts; 2487 } 2488 2489 /* edges and faces */ 2490 total_counts_cc = total_counts; 2491 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 2492 IS used_is; 2493 PetscBool idxs_copied = PETSC_FALSE; 2494 2495 if (ncc<n_ISForEdges) { 2496 used_is = ISForEdges[ncc]; 2497 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 2498 } else { 2499 used_is = ISForFaces[ncc-n_ISForEdges]; 2500 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 2501 } 2502 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 2503 2504 ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr); 2505 ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2506 /* change of basis should not be performed on local periodic nodes */ 2507 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 2508 if (nnsp_has_cnst) { 2509 PetscScalar quad_value; 2510 2511 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 2512 idxs_copied = PETSC_TRUE; 2513 2514 if (!pcbddc->use_nnsp_true) { 2515 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 2516 } else { 2517 quad_value = 1.0; 2518 } 2519 for (j=0;j<size_of_constraint;j++) { 2520 constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value; 2521 } 2522 temp_constraints++; 2523 total_counts++; 2524 } 2525 for (k=0;k<nnsp_size;k++) { 2526 PetscReal real_value; 2527 PetscScalar *ptr_to_data; 2528 2529 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2530 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint]; 2531 for (j=0;j<size_of_constraint;j++) { 2532 ptr_to_data[j] = array[is_indices[j]]; 2533 } 2534 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2535 /* check if array is null on the connected component */ 2536 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2537 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one)); 2538 if (real_value > 0.0) { /* keep indices and values */ 2539 temp_constraints++; 2540 total_counts++; 2541 if (!idxs_copied) { 2542 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 2543 idxs_copied = PETSC_TRUE; 2544 } 2545 } 2546 } 2547 ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2548 valid_constraints = temp_constraints; 2549 if (!pcbddc->use_nnsp_true && temp_constraints) { 2550 if (temp_constraints == 1) { /* just normalize the constraint */ 2551 PetscScalar norm,*ptr_to_data; 2552 2553 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 2554 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2555 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one)); 2556 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 2557 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one)); 2558 } else { /* perform SVD */ 2559 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 2560 PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 2561 2562 #if defined(PETSC_MISSING_LAPACK_GESVD) 2563 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 2564 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 2565 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 2566 the constraints basis will differ (by a complex factor with absolute value equal to 1) 2567 from that computed using LAPACKgesvd 2568 -> This is due to a different computation of eigenvectors in LAPACKheev 2569 -> The quality of the POD-computed basis will be the same */ 2570 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 2571 /* Store upper triangular part of correlation matrix */ 2572 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2573 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2574 for (j=0;j<temp_constraints;j++) { 2575 for (k=0;k<j+1;k++) { 2576 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one)); 2577 } 2578 } 2579 /* compute eigenvalues and eigenvectors of correlation matrix */ 2580 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2581 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 2582 #if !defined(PETSC_USE_COMPLEX) 2583 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 2584 #else 2585 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 2586 #endif 2587 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2588 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 2589 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 2590 j = 0; 2591 while (j < temp_constraints && singular_vals[j] < tol) j++; 2592 total_counts = total_counts-j; 2593 valid_constraints = temp_constraints-j; 2594 /* scale and copy POD basis into used quadrature memory */ 2595 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2596 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2597 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 2598 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2599 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 2600 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 2601 if (j<temp_constraints) { 2602 PetscInt ii; 2603 for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]); 2604 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2605 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 2606 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2607 for (k=0;k<temp_constraints-j;k++) { 2608 for (ii=0;ii<size_of_constraint;ii++) { 2609 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 2610 } 2611 } 2612 } 2613 #else /* on missing GESVD */ 2614 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2615 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2616 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2617 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2618 #if !defined(PETSC_USE_COMPLEX) 2619 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr)); 2620 #else 2621 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr)); 2622 #endif 2623 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 2624 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2625 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 2626 k = temp_constraints; 2627 if (k > size_of_constraint) k = size_of_constraint; 2628 j = 0; 2629 while (j < k && singular_vals[k-j-1] < tol) j++; 2630 valid_constraints = k-j; 2631 total_counts = total_counts-temp_constraints+valid_constraints; 2632 #endif /* on missing GESVD */ 2633 } 2634 } 2635 /* update pointers information */ 2636 if (valid_constraints) { 2637 constraints_n[total_counts_cc] = valid_constraints; 2638 constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint; 2639 constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints; 2640 /* set change_of_basis flag */ 2641 if (boolforchange) { 2642 PetscBTSet(change_basis,total_counts_cc); 2643 } 2644 total_counts_cc++; 2645 } 2646 } 2647 /* free workspace */ 2648 if (!skip_lapack) { 2649 ierr = PetscFree(work);CHKERRQ(ierr); 2650 #if defined(PETSC_USE_COMPLEX) 2651 ierr = PetscFree(rwork);CHKERRQ(ierr); 2652 #endif 2653 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 2654 #if defined(PETSC_MISSING_LAPACK_GESVD) 2655 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 2656 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 2657 #endif 2658 } 2659 for (k=0;k<nnsp_size;k++) { 2660 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 2661 } 2662 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 2663 /* free index sets of faces, edges and vertices */ 2664 for (i=0;i<n_ISForFaces;i++) { 2665 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 2666 } 2667 if (n_ISForFaces) { 2668 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 2669 } 2670 for (i=0;i<n_ISForEdges;i++) { 2671 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 2672 } 2673 if (n_ISForEdges) { 2674 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 2675 } 2676 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 2677 } else { 2678 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2679 2680 total_counts = 0; 2681 n_vertices = 0; 2682 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 2683 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 2684 } 2685 max_constraints = 0; 2686 total_counts_cc = 0; 2687 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 2688 total_counts += pcbddc->adaptive_constraints_n[i]; 2689 if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++; 2690 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 2691 } 2692 constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr; 2693 constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr; 2694 constraints_idxs = pcbddc->adaptive_constraints_idxs; 2695 constraints_data = pcbddc->adaptive_constraints_data; 2696 /* constraints_n differs from pcbddc->adaptive_constraints_n */ 2697 ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr); 2698 total_counts_cc = 0; 2699 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 2700 if (pcbddc->adaptive_constraints_n[i]) { 2701 constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i]; 2702 } 2703 } 2704 #if 0 2705 printf("Found %d totals (%d)\n",total_counts_cc,total_counts); 2706 for (i=0;i<total_counts_cc;i++) { 2707 printf("const %d, start %d",i,constraints_idxs_ptr[i]); 2708 printf(" end %d:\n",constraints_idxs_ptr[i+1]); 2709 for (j=constraints_idxs_ptr[i];j<constraints_idxs_ptr[i+1];j++) { 2710 printf(" %d",constraints_idxs[j]); 2711 } 2712 printf("\n"); 2713 printf("number of cc: %d\n",constraints_n[i]); 2714 } 2715 for (i=0;i<n_vertices;i++) { 2716 PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]); 2717 } 2718 for (i=0;i<sub_schurs->n_subs;i++) { 2719 PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]); 2720 } 2721 #endif 2722 2723 max_size_of_constraint = 0; 2724 for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]); 2725 ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr); 2726 /* Change of basis */ 2727 ierr = PetscBTCreate(total_counts_cc,&change_basis);CHKERRQ(ierr); 2728 if (pcbddc->use_change_of_basis) { 2729 for (i=0;i<sub_schurs->n_subs;i++) { 2730 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 2731 ierr = PetscBTSet(change_basis,i+n_vertices);CHKERRQ(ierr); 2732 } 2733 } 2734 } 2735 } 2736 pcbddc->local_primal_size = total_counts; 2737 /* allocating one extra space (in case an extra primal dof should be stored for the benign trick */ 2738 ierr = PetscMalloc1(pcbddc->local_primal_size+1,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2739 2740 /* map constraints_idxs in boundary numbering */ 2741 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr); 2742 if (i != constraints_idxs_ptr[total_counts_cc]) { 2743 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",constraints_idxs_ptr[total_counts_cc],i); 2744 } 2745 2746 /* Create constraint matrix */ 2747 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 2748 ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr); 2749 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 2750 2751 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 2752 /* determine if a QR strategy is needed for change of basis */ 2753 qr_needed = PETSC_FALSE; 2754 ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr); 2755 total_primal_vertices=0; 2756 pcbddc->local_primal_size_cc = 0; 2757 for (i=0;i<total_counts_cc;i++) { 2758 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2759 if (size_of_constraint == 1) { 2760 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]]; 2761 pcbddc->local_primal_size_cc += 1; 2762 } else if (PetscBTLookup(change_basis,i)) { 2763 for (k=0;k<constraints_n[i];k++) { 2764 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 2765 } 2766 pcbddc->local_primal_size_cc += constraints_n[i]; 2767 if (constraints_n[i] > 1 || pcbddc->use_qr_single || pcbddc->faster_deluxe) { 2768 PetscBTSet(qr_needed_idx,i); 2769 qr_needed = PETSC_TRUE; 2770 } 2771 } else { 2772 pcbddc->local_primal_size_cc += 1; 2773 } 2774 } 2775 /* note that the local variable n_vertices used below stores the number of pointwise constraints */ 2776 pcbddc->n_vertices = total_primal_vertices; 2777 /* permute indices in order to have a sorted set of vertices */ 2778 ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2779 2780 /* allocating one extra space (in case an extra primal dof should be stored for the benign trick */ 2781 ierr = PetscMalloc2(pcbddc->local_primal_size_cc+1,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc+1,&pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 2782 ierr = PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));CHKERRQ(ierr); 2783 for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1; 2784 2785 /* nonzero structure of constraint matrix */ 2786 /* and get reference dof for local constraints */ 2787 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 2788 for (i=0;i<total_primal_vertices;i++) nnz[i] = 1; 2789 2790 j = total_primal_vertices; 2791 total_counts = total_primal_vertices; 2792 cum = total_primal_vertices; 2793 for (i=n_vertices;i<total_counts_cc;i++) { 2794 if (!PetscBTLookup(change_basis,i)) { 2795 pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]]; 2796 pcbddc->local_primal_ref_mult[cum] = constraints_n[i]; 2797 cum++; 2798 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2799 for (k=0;k<constraints_n[i];k++) { 2800 pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 2801 nnz[j+k] = size_of_constraint; 2802 } 2803 j += constraints_n[i]; 2804 } 2805 } 2806 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 2807 ierr = PetscFree(nnz);CHKERRQ(ierr); 2808 2809 /* set values in constraint matrix */ 2810 for (i=0;i<total_primal_vertices;i++) { 2811 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 2812 } 2813 total_counts = total_primal_vertices; 2814 for (i=n_vertices;i<total_counts_cc;i++) { 2815 if (!PetscBTLookup(change_basis,i)) { 2816 PetscInt *cols; 2817 2818 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2819 cols = constraints_idxs+constraints_idxs_ptr[i]; 2820 for (k=0;k<constraints_n[i];k++) { 2821 PetscInt row = total_counts+k; 2822 PetscScalar *vals; 2823 2824 vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint; 2825 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2826 } 2827 total_counts += constraints_n[i]; 2828 } 2829 } 2830 /* assembling */ 2831 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2832 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2833 2834 /* 2835 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 2836 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 2837 */ 2838 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 2839 if (pcbddc->use_change_of_basis) { 2840 /* dual and primal dofs on a single cc */ 2841 PetscInt dual_dofs,primal_dofs; 2842 /* working stuff for GEQRF */ 2843 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 2844 PetscBLASInt lqr_work; 2845 /* working stuff for UNGQR */ 2846 PetscScalar *gqr_work,lgqr_work_t; 2847 PetscBLASInt lgqr_work; 2848 /* working stuff for TRTRS */ 2849 PetscScalar *trs_rhs; 2850 PetscBLASInt Blas_NRHS; 2851 /* pointers for values insertion into change of basis matrix */ 2852 PetscInt *start_rows,*start_cols; 2853 PetscScalar *start_vals; 2854 /* working stuff for values insertion */ 2855 PetscBT is_primal; 2856 PetscInt *aux_primal_numbering_B; 2857 /* matrix sizes */ 2858 PetscInt global_size,local_size; 2859 /* temporary change of basis */ 2860 Mat localChangeOfBasisMatrix; 2861 /* extra space for debugging */ 2862 PetscScalar *dbg_work; 2863 2864 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 2865 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 2866 ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 2867 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 2868 /* nonzeros for local mat */ 2869 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 2870 for (i=0;i<pcis->n;i++) nnz[i]=1; 2871 for (i=n_vertices;i<total_counts_cc;i++) { 2872 if (PetscBTLookup(change_basis,i)) { 2873 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2874 if (PetscBTLookup(qr_needed_idx,i)) { 2875 for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint; 2876 } else { 2877 nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint; 2878 for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2; 2879 } 2880 } 2881 } 2882 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 2883 ierr = PetscFree(nnz);CHKERRQ(ierr); 2884 /* Set initial identity in the matrix */ 2885 for (i=0;i<pcis->n;i++) { 2886 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 2887 } 2888 2889 if (pcbddc->dbg_flag) { 2890 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 2891 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 2892 } 2893 2894 2895 /* Now we loop on the constraints which need a change of basis */ 2896 /* 2897 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 2898 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 2899 2900 Basic blocks of change of basis matrix T computed by 2901 2902 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 2903 2904 | 1 0 ... 0 s_1/S | 2905 | 0 1 ... 0 s_2/S | 2906 | ... | 2907 | 0 ... 1 s_{n-1}/S | 2908 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 2909 2910 with S = \sum_{i=1}^n s_i^2 2911 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 2912 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 2913 2914 - QR decomposition of constraints otherwise 2915 */ 2916 if (qr_needed) { 2917 /* space to store Q */ 2918 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 2919 /* first we issue queries for optimal work */ 2920 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 2921 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 2922 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2923 lqr_work = -1; 2924 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 2925 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 2926 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 2927 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 2928 lgqr_work = -1; 2929 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 2930 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 2931 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 2932 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2933 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 2934 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 2935 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 2936 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 2937 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 2938 /* array to store scaling factors for reflectors */ 2939 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 2940 /* array to store rhs and solution of triangular solver */ 2941 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 2942 /* allocating workspace for check */ 2943 if (pcbddc->dbg_flag) { 2944 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr); 2945 } 2946 } 2947 /* array to store whether a node is primal or not */ 2948 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 2949 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 2950 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);CHKERRQ(ierr); 2951 if (i != total_primal_vertices) { 2952 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i); 2953 } 2954 for (i=0;i<total_primal_vertices;i++) { 2955 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 2956 } 2957 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 2958 2959 /* loop on constraints and see whether or not they need a change of basis and compute it */ 2960 for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) { 2961 size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts]; 2962 if (PetscBTLookup(change_basis,total_counts)) { 2963 /* get constraint info */ 2964 primal_dofs = constraints_n[total_counts]; 2965 dual_dofs = size_of_constraint-primal_dofs; 2966 2967 if (pcbddc->dbg_flag) { 2968 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d: %d need a change of basis (size %d)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr); 2969 } 2970 2971 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 2972 2973 /* copy quadrature constraints for change of basis check */ 2974 if (pcbddc->dbg_flag) { 2975 ierr = PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2976 } 2977 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 2978 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2979 2980 /* compute QR decomposition of constraints */ 2981 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2982 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2983 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2984 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2985 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 2986 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 2987 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2988 2989 /* explictly compute R^-T */ 2990 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 2991 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 2992 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2993 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 2994 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2995 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 2996 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2997 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 2998 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 2999 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3000 3001 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 3002 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3003 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3004 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 3005 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3006 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3007 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 3008 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 3009 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3010 3011 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 3012 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 3013 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 3014 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3015 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 3016 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 3017 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3018 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 3019 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 3020 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3021 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC)); 3022 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3023 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 3024 3025 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 3026 start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]]; 3027 /* insert cols for primal dofs */ 3028 for (j=0;j<primal_dofs;j++) { 3029 start_vals = &qr_basis[j*size_of_constraint]; 3030 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 3031 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 3032 } 3033 /* insert cols for dual dofs */ 3034 for (j=0,k=0;j<dual_dofs;k++) { 3035 if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) { 3036 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 3037 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 3038 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 3039 j++; 3040 } 3041 } 3042 3043 /* check change of basis */ 3044 if (pcbddc->dbg_flag) { 3045 PetscInt ii,jj; 3046 PetscBool valid_qr=PETSC_TRUE; 3047 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 3048 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3049 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 3050 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3051 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 3052 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 3053 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3054 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC)); 3055 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3056 for (jj=0;jj<size_of_constraint;jj++) { 3057 for (ii=0;ii<primal_dofs;ii++) { 3058 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 3059 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 3060 } 3061 } 3062 if (!valid_qr) { 3063 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 3064 for (jj=0;jj<size_of_constraint;jj++) { 3065 for (ii=0;ii<primal_dofs;ii++) { 3066 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 3067 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 3068 } 3069 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 3070 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 3071 } 3072 } 3073 } 3074 } else { 3075 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 3076 } 3077 } 3078 } else { /* simple transformation block */ 3079 PetscInt row,col; 3080 PetscScalar val,norm; 3081 3082 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3083 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one)); 3084 for (j=0;j<size_of_constraint;j++) { 3085 PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j]; 3086 row = constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 3087 if (!PetscBTLookup(is_primal,row_B)) { 3088 col = constraints_idxs[constraints_idxs_ptr[total_counts]]; 3089 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 3090 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 3091 } else { 3092 for (k=0;k<size_of_constraint;k++) { 3093 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 3094 if (row != col) { 3095 val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]]; 3096 } else { 3097 val = constraints_data[constraints_data_ptr[total_counts]]/norm; 3098 } 3099 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 3100 } 3101 } 3102 } 3103 if (pcbddc->dbg_flag) { 3104 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 3105 } 3106 } 3107 } else { 3108 if (pcbddc->dbg_flag) { 3109 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,size_of_constraint);CHKERRQ(ierr); 3110 } 3111 } 3112 } 3113 3114 /* free workspace */ 3115 if (qr_needed) { 3116 if (pcbddc->dbg_flag) { 3117 ierr = PetscFree(dbg_work);CHKERRQ(ierr); 3118 } 3119 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 3120 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 3121 ierr = PetscFree(qr_work);CHKERRQ(ierr); 3122 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 3123 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 3124 } 3125 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 3126 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3127 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3128 3129 /* assembling of global change of variable */ 3130 { 3131 Mat tmat; 3132 PetscInt bs; 3133 3134 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 3135 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 3136 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 3137 ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr); 3138 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 3139 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 3140 ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr); 3141 ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr); 3142 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 3143 ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr); 3144 ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 3145 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 3146 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 3147 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 3148 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3149 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3150 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 3151 ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr); 3152 } 3153 /* check */ 3154 if (pcbddc->dbg_flag) { 3155 PetscReal error; 3156 Vec x,x_change; 3157 3158 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 3159 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 3160 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 3161 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 3162 ierr = VecScatterBegin(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3163 ierr = VecScatterEnd(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3164 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 3165 ierr = VecScatterBegin(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3166 ierr = VecScatterEnd(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3167 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 3168 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 3169 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 3170 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3171 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr); 3172 ierr = VecDestroy(&x);CHKERRQ(ierr); 3173 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 3174 } 3175 3176 /* adapt sub_schurs computed (if any) */ 3177 if (pcbddc->use_deluxe_scaling) { 3178 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 3179 if (sub_schurs->S_Ej_all) { 3180 Mat S_new,tmat; 3181 ISLocalToGlobalMapping NtoSall; 3182 IS is_all_N,is_V,is_V_Sall; 3183 const PetscScalar *array; 3184 const PetscInt *idxs_V,*idxs_all; 3185 PetscInt i,n_V; 3186 3187 ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr); 3188 ierr = MatGetSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr); 3189 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->n_vertices,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&is_V);CHKERRQ(ierr); 3190 ierr = ISLocalToGlobalMappingCreateIS(is_all_N,&NtoSall);CHKERRQ(ierr); 3191 ierr = ISGlobalToLocalMappingApplyIS(NtoSall,IS_GTOLM_DROP,is_V,&is_V_Sall);CHKERRQ(ierr); 3192 ierr = ISLocalToGlobalMappingDestroy(&NtoSall);CHKERRQ(ierr); 3193 ierr = ISDestroy(&is_all_N);CHKERRQ(ierr); 3194 ierr = ISDestroy(&is_V);CHKERRQ(ierr); 3195 ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 3196 ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr); 3197 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 3198 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 3199 ierr = ISGetLocalSize(is_V_Sall,&n_V);CHKERRQ(ierr); 3200 ierr = ISGetIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 3201 ierr = ISGetIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 3202 ierr = VecGetArrayRead(pcis->D,&array);CHKERRQ(ierr); 3203 for (i=0;i<n_V;i++) { 3204 PetscScalar val; 3205 PetscInt idx; 3206 3207 idx = idxs_V[i]; 3208 val = array[idxs_all[idxs_V[i]]]; 3209 ierr = MatSetValue(S_new,idx,idx,val,INSERT_VALUES);CHKERRQ(ierr); 3210 } 3211 ierr = MatAssemblyBegin(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3212 ierr = MatAssemblyEnd(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3213 sub_schurs->S_Ej_all = S_new; 3214 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 3215 if (sub_schurs->sum_S_Ej_all) { 3216 ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 3217 ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 3218 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 3219 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 3220 sub_schurs->sum_S_Ej_all = S_new; 3221 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 3222 } 3223 ierr = VecRestoreArrayRead(pcis->D,&array);CHKERRQ(ierr); 3224 ierr = ISRestoreIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 3225 ierr = ISRestoreIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 3226 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 3227 ierr = ISDestroy(&is_V_Sall);CHKERRQ(ierr); 3228 } 3229 } 3230 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 3231 } else if (pcbddc->user_ChangeOfBasisMatrix) { 3232 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 3233 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 3234 } 3235 3236 /* set up change of basis context */ 3237 if (pcbddc->ChangeOfBasisMatrix) { 3238 PCBDDCChange_ctx change_ctx; 3239 3240 if (!pcbddc->new_global_mat) { 3241 PetscInt global_size,local_size; 3242 3243 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 3244 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 3245 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->new_global_mat);CHKERRQ(ierr); 3246 ierr = MatSetSizes(pcbddc->new_global_mat,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 3247 ierr = MatSetType(pcbddc->new_global_mat,MATSHELL);CHKERRQ(ierr); 3248 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT,(void (*)(void))PCBDDCMatMult_Private);CHKERRQ(ierr); 3249 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCMatMultTranspose_Private);CHKERRQ(ierr); 3250 ierr = PetscNew(&change_ctx);CHKERRQ(ierr); 3251 ierr = MatShellSetContext(pcbddc->new_global_mat,change_ctx);CHKERRQ(ierr); 3252 } else { 3253 ierr = MatShellGetContext(pcbddc->new_global_mat,&change_ctx);CHKERRQ(ierr); 3254 ierr = MatDestroy(&change_ctx->global_change);CHKERRQ(ierr); 3255 ierr = VecDestroyVecs(2,&change_ctx->work);CHKERRQ(ierr); 3256 } 3257 if (!pcbddc->user_ChangeOfBasisMatrix) { 3258 ierr = PetscObjectReference((PetscObject)pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 3259 change_ctx->global_change = pcbddc->ChangeOfBasisMatrix; 3260 } else { 3261 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 3262 change_ctx->global_change = pcbddc->user_ChangeOfBasisMatrix; 3263 } 3264 ierr = VecDuplicateVecs(pcis->vec1_global,2,&change_ctx->work);CHKERRQ(ierr); 3265 ierr = MatSetUp(pcbddc->new_global_mat);CHKERRQ(ierr); 3266 ierr = MatAssemblyBegin(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3267 ierr = MatAssemblyEnd(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3268 } 3269 3270 /* add pressure dof to set of primal nodes for numbering purposes */ 3271 if (pcbddc->benign_p0_lidx >= 0) { 3272 pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc] = pcbddc->benign_p0_lidx; 3273 pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size] = pcbddc->benign_p0_lidx; 3274 pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc] = 1; 3275 pcbddc->local_primal_size_cc++; 3276 pcbddc->local_primal_size++; 3277 } 3278 3279 /* check if a new primal space has been introduced (also take into account benign trick) */ 3280 pcbddc->new_primal_space_local = PETSC_TRUE; 3281 if (olocal_primal_size == pcbddc->local_primal_size) { 3282 ierr = PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc*sizeof(PetscScalar),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 3283 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 3284 if (!pcbddc->new_primal_space_local) { 3285 ierr = PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscScalar),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 3286 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 3287 } 3288 } 3289 ierr = PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);CHKERRQ(ierr); 3290 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 3291 ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3292 3293 /* flush dbg viewer */ 3294 if (pcbddc->dbg_flag) { 3295 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3296 } 3297 3298 /* free workspace */ 3299 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 3300 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 3301 if (!pcbddc->adaptive_selection) { 3302 ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr); 3303 ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr); 3304 } else { 3305 ierr = PetscFree5(pcbddc->adaptive_constraints_n, 3306 pcbddc->adaptive_constraints_idxs_ptr, 3307 pcbddc->adaptive_constraints_data_ptr, 3308 pcbddc->adaptive_constraints_idxs, 3309 pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 3310 ierr = PetscFree(constraints_n);CHKERRQ(ierr); 3311 ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr); 3312 } 3313 PetscFunctionReturn(0); 3314 } 3315 3316 #undef __FUNCT__ 3317 #define __FUNCT__ "PCBDDCAnalyzeInterface" 3318 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 3319 { 3320 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3321 PC_IS *pcis = (PC_IS*)pc->data; 3322 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 3323 PetscInt ierr,i,vertex_size,N; 3324 PetscViewer viewer=pcbddc->dbg_viewer; 3325 3326 PetscFunctionBegin; 3327 /* Reset previously computed graph */ 3328 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 3329 /* Init local Graph struct */ 3330 ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr); 3331 ierr = PCBDDCGraphInit(pcbddc->mat_graph,pcis->mapping,N);CHKERRQ(ierr); 3332 3333 /* Check validity of the csr graph passed in by the user */ 3334 if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) { 3335 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 3336 } 3337 3338 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 3339 if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) { 3340 PetscInt *xadj,*adjncy; 3341 PetscInt nvtxs; 3342 PetscBool flg_row=PETSC_FALSE; 3343 3344 if (pcbddc->use_local_adj) { 3345 3346 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3347 if (flg_row) { 3348 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 3349 pcbddc->computed_rowadj = PETSC_TRUE; 3350 } 3351 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3352 } else if (pcbddc->current_level && pcis->n_B) { /* just compute subdomain's connected components for coarser levels when the local boundary is not empty */ 3353 IS is_dummy; 3354 ISLocalToGlobalMapping l2gmap_dummy; 3355 PetscInt j,sum; 3356 PetscInt *cxadj,*cadjncy; 3357 const PetscInt *idxs; 3358 PCBDDCGraph graph; 3359 PetscBT is_on_boundary; 3360 3361 ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n,0,1,&is_dummy);CHKERRQ(ierr); 3362 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 3363 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 3364 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 3365 ierr = PCBDDCGraphInit(graph,l2gmap_dummy,pcis->n);CHKERRQ(ierr); 3366 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 3367 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3368 if (flg_row) { 3369 graph->xadj = xadj; 3370 graph->adjncy = adjncy; 3371 } 3372 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 3373 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 3374 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3375 3376 if (pcbddc->dbg_flag) { 3377 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] Found %d subdomains (local size %d)\n",PetscGlobalRank,graph->ncc,pcis->n);CHKERRQ(ierr); 3378 for (i=0;i<graph->ncc;i++) { 3379 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] %d cc size %d\n",PetscGlobalRank,i,graph->cptr[i+1]-graph->cptr[i]);CHKERRQ(ierr); 3380 } 3381 } 3382 3383 ierr = PetscBTCreate(pcis->n,&is_on_boundary);CHKERRQ(ierr); 3384 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 3385 for (i=0;i<pcis->n_B;i++) { 3386 ierr = PetscBTSet(is_on_boundary,idxs[i]);CHKERRQ(ierr); 3387 } 3388 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 3389 3390 ierr = PetscCalloc1(pcis->n+1,&cxadj);CHKERRQ(ierr); 3391 sum = 0; 3392 for (i=0;i<graph->ncc;i++) { 3393 PetscInt sizecc = 0; 3394 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 3395 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 3396 sizecc++; 3397 } 3398 } 3399 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 3400 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 3401 cxadj[graph->queue[j]] = sizecc; 3402 } 3403 } 3404 sum += sizecc*sizecc; 3405 } 3406 ierr = PetscMalloc1(sum,&cadjncy);CHKERRQ(ierr); 3407 sum = 0; 3408 for (i=0;i<pcis->n;i++) { 3409 PetscInt temp = cxadj[i]; 3410 cxadj[i] = sum; 3411 sum += temp; 3412 } 3413 cxadj[pcis->n] = sum; 3414 for (i=0;i<graph->ncc;i++) { 3415 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 3416 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 3417 PetscInt k,sizecc = 0; 3418 for (k=graph->cptr[i];k<graph->cptr[i+1];k++) { 3419 if (PetscBTLookup(is_on_boundary,graph->queue[k])) { 3420 cadjncy[cxadj[graph->queue[j]]+sizecc]=graph->queue[k]; 3421 sizecc++; 3422 } 3423 } 3424 } 3425 } 3426 } 3427 if (sum) { 3428 ierr = PCBDDCSetLocalAdjacencyGraph(pc,pcis->n,cxadj,cadjncy,PETSC_OWN_POINTER);CHKERRQ(ierr); 3429 } else { 3430 ierr = PetscFree(cxadj);CHKERRQ(ierr); 3431 ierr = PetscFree(cadjncy);CHKERRQ(ierr); 3432 } 3433 graph->xadj = 0; 3434 graph->adjncy = 0; 3435 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 3436 ierr = PetscBTDestroy(&is_on_boundary);CHKERRQ(ierr); 3437 } 3438 } 3439 if (pcbddc->dbg_flag) { 3440 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3441 } 3442 3443 /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */ 3444 vertex_size = 1; 3445 if (pcbddc->user_provided_isfordofs) { 3446 if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */ 3447 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 3448 for (i=0;i<pcbddc->n_ISForDofs;i++) { 3449 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 3450 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 3451 } 3452 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 3453 pcbddc->n_ISForDofs = 0; 3454 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 3455 } 3456 /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */ 3457 ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr); 3458 } else { 3459 if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */ 3460 ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr); 3461 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 3462 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 3463 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 3464 } 3465 } 3466 } 3467 3468 /* Setup of Graph */ 3469 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */ 3470 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 3471 } 3472 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */ 3473 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 3474 } 3475 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices);CHKERRQ(ierr); 3476 3477 /* Graph's connected components analysis */ 3478 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 3479 3480 /* print some info to stdout */ 3481 if (pcbddc->dbg_flag) { 3482 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);CHKERRQ(ierr); 3483 } 3484 3485 /* mark topography has done */ 3486 pcbddc->recompute_topography = PETSC_FALSE; 3487 PetscFunctionReturn(0); 3488 } 3489 3490 /* given an index sets possibly with holes, renumbers the indexes removing the holes */ 3491 #undef __FUNCT__ 3492 #define __FUNCT__ "PCBDDCSubsetNumbering" 3493 PetscErrorCode PCBDDCSubsetNumbering(IS subset, IS subset_mult, PetscInt *N_n, IS *subset_n) 3494 { 3495 PetscSF sf; 3496 PetscLayout map; 3497 const PetscInt *idxs; 3498 PetscInt *leaf_data,*root_data,*gidxs; 3499 PetscInt N,n,i,lbounds[2],gbounds[2],Nl; 3500 PetscInt n_n,nlocals,start,first_index; 3501 PetscMPIInt commsize; 3502 PetscBool first_found; 3503 PetscErrorCode ierr; 3504 3505 PetscFunctionBegin; 3506 ierr = ISGetLocalSize(subset,&n);CHKERRQ(ierr); 3507 if (subset_mult) { 3508 PetscCheckSameComm(subset,1,subset_mult,2); 3509 ierr = ISGetLocalSize(subset,&i);CHKERRQ(ierr); 3510 if (i != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Local subset and multiplicity sizes don't match! %d != %d",n,i); 3511 } 3512 /* create workspace layout for computing global indices of subset */ 3513 ierr = ISGetIndices(subset,&idxs);CHKERRQ(ierr); 3514 lbounds[0] = lbounds[1] = 0; 3515 for (i=0;i<n;i++) { 3516 if (idxs[i] < lbounds[0]) lbounds[0] = idxs[i]; 3517 else if (idxs[i] > lbounds[1]) lbounds[1] = idxs[i]; 3518 } 3519 lbounds[0] = -lbounds[0]; 3520 ierr = MPI_Allreduce(lbounds,gbounds,2,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 3521 gbounds[0] = -gbounds[0]; 3522 N = gbounds[1] - gbounds[0] + 1; 3523 ierr = PetscLayoutCreate(PetscObjectComm((PetscObject)subset),&map);CHKERRQ(ierr); 3524 ierr = PetscLayoutSetBlockSize(map,1);CHKERRQ(ierr); 3525 ierr = PetscLayoutSetSize(map,N);CHKERRQ(ierr); 3526 ierr = PetscLayoutSetUp(map);CHKERRQ(ierr); 3527 ierr = PetscLayoutGetLocalSize(map,&Nl);CHKERRQ(ierr); 3528 3529 /* create sf : leaf_data == multiplicity of indexes, root data == global index in layout */ 3530 ierr = PetscMalloc2(n,&leaf_data,Nl,&root_data);CHKERRQ(ierr); 3531 if (subset_mult) { 3532 const PetscInt* idxs_mult; 3533 3534 ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 3535 ierr = PetscMemcpy(leaf_data,idxs_mult,n*sizeof(PetscInt));CHKERRQ(ierr); 3536 ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 3537 } else { 3538 for (i=0;i<n;i++) leaf_data[i] = 1; 3539 } 3540 /* local size of new subset */ 3541 n_n = 0; 3542 for (i=0;i<n;i++) n_n += leaf_data[i]; 3543 3544 /* global indexes in layout */ 3545 ierr = PetscMalloc1(n_n,&gidxs);CHKERRQ(ierr); /* allocating possibly extra space in gidxs which will be used later */ 3546 for (i=0;i<n;i++) gidxs[i] = idxs[i] - gbounds[0]; 3547 ierr = ISRestoreIndices(subset,&idxs);CHKERRQ(ierr); 3548 ierr = PetscSFCreate(PetscObjectComm((PetscObject)subset),&sf);CHKERRQ(ierr); 3549 ierr = PetscSFSetGraphLayout(sf,map,n,NULL,PETSC_COPY_VALUES,gidxs);CHKERRQ(ierr); 3550 ierr = PetscLayoutDestroy(&map);CHKERRQ(ierr); 3551 3552 /* reduce from leaves to roots */ 3553 ierr = PetscMemzero(root_data,Nl*sizeof(PetscInt));CHKERRQ(ierr); 3554 ierr = PetscSFReduceBegin(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr); 3555 ierr = PetscSFReduceEnd(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr); 3556 3557 /* count indexes in local part of layout */ 3558 nlocals = 0; 3559 first_index = -1; 3560 first_found = PETSC_FALSE; 3561 for (i=0;i<Nl;i++) { 3562 if (!first_found && root_data[i]) { 3563 first_found = PETSC_TRUE; 3564 first_index = i; 3565 } 3566 nlocals += root_data[i]; 3567 } 3568 3569 /* cumulative of number of indexes and size of subset without holes */ 3570 #if defined(PETSC_HAVE_MPI_EXSCAN) 3571 start = 0; 3572 ierr = MPI_Exscan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 3573 #else 3574 ierr = MPI_Scan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 3575 start = start-nlocals; 3576 #endif 3577 3578 if (N_n) { /* compute total size of new subset if requested */ 3579 *N_n = start + nlocals; 3580 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)subset),&commsize);CHKERRQ(ierr); 3581 ierr = MPI_Bcast(N_n,1,MPIU_INT,commsize-1,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 3582 } 3583 3584 /* adapt root data with cumulative */ 3585 if (first_found) { 3586 PetscInt old_index; 3587 3588 root_data[first_index] += start; 3589 old_index = first_index; 3590 for (i=first_index+1;i<Nl;i++) { 3591 if (root_data[i]) { 3592 root_data[i] += root_data[old_index]; 3593 old_index = i; 3594 } 3595 } 3596 } 3597 3598 /* from roots to leaves */ 3599 ierr = PetscSFBcastBegin(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr); 3600 ierr = PetscSFBcastEnd(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr); 3601 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 3602 3603 /* create new IS with global indexes without holes */ 3604 if (subset_mult) { 3605 const PetscInt* idxs_mult; 3606 PetscInt cum; 3607 3608 cum = 0; 3609 ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 3610 for (i=0;i<n;i++) { 3611 PetscInt j; 3612 for (j=0;j<idxs_mult[i];j++) gidxs[cum++] = leaf_data[i] - idxs_mult[i] + j; 3613 } 3614 ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 3615 } else { 3616 for (i=0;i<n;i++) { 3617 gidxs[i] = leaf_data[i]-1; 3618 } 3619 } 3620 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)subset),n_n,gidxs,PETSC_OWN_POINTER,subset_n);CHKERRQ(ierr); 3621 ierr = PetscFree2(leaf_data,root_data);CHKERRQ(ierr); 3622 PetscFunctionReturn(0); 3623 } 3624 3625 #undef __FUNCT__ 3626 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 3627 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 3628 { 3629 PetscInt i,j; 3630 PetscScalar *alphas; 3631 PetscErrorCode ierr; 3632 3633 PetscFunctionBegin; 3634 /* this implements stabilized Gram-Schmidt */ 3635 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 3636 for (i=0;i<n;i++) { 3637 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 3638 if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); } 3639 for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); } 3640 } 3641 ierr = PetscFree(alphas);CHKERRQ(ierr); 3642 PetscFunctionReturn(0); 3643 } 3644 3645 #undef __FUNCT__ 3646 #define __FUNCT__ "MatISGetSubassemblingPattern" 3647 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscInt redprocs, IS* is_sends) 3648 { 3649 IS ranks_send_to; 3650 PetscInt n_neighs,*neighs,*n_shared,**shared; 3651 PetscMPIInt size,rank,color; 3652 PetscInt *xadj,*adjncy; 3653 PetscInt *adjncy_wgt,*v_wgt,*ranks_send_to_idx; 3654 PetscInt i,local_size,threshold=0; 3655 PetscBool use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE; 3656 PetscSubcomm subcomm; 3657 PetscErrorCode ierr; 3658 3659 PetscFunctionBegin; 3660 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr); 3661 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 3662 ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 3663 3664 /* Get info on mapping */ 3665 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&local_size);CHKERRQ(ierr); 3666 ierr = ISLocalToGlobalMappingGetInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 3667 3668 /* build local CSR graph of subdomains' connectivity */ 3669 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 3670 xadj[0] = 0; 3671 xadj[1] = PetscMax(n_neighs-1,0); 3672 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 3673 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 3674 3675 if (threshold) { 3676 PetscInt xadj_count = 0; 3677 for (i=1;i<n_neighs;i++) { 3678 if (n_shared[i] > threshold) { 3679 adjncy[xadj_count] = neighs[i]; 3680 adjncy_wgt[xadj_count] = n_shared[i]; 3681 xadj_count++; 3682 } 3683 } 3684 xadj[1] = xadj_count; 3685 } else { 3686 if (xadj[1]) { 3687 ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr); 3688 ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr); 3689 } 3690 } 3691 ierr = ISLocalToGlobalMappingRestoreInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 3692 if (use_square) { 3693 for (i=0;i<xadj[1];i++) { 3694 adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i]; 3695 } 3696 } 3697 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 3698 3699 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 3700 3701 /* 3702 Restrict work on active processes only. 3703 */ 3704 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr); 3705 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 3706 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 3707 ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr); 3708 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 3709 if (color) { 3710 ierr = PetscFree(xadj);CHKERRQ(ierr); 3711 ierr = PetscFree(adjncy);CHKERRQ(ierr); 3712 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 3713 } else { 3714 Mat subdomain_adj; 3715 IS new_ranks,new_ranks_contig; 3716 MatPartitioning partitioner; 3717 PetscInt prank,rstart=0,rend=0; 3718 PetscInt *is_indices,*oldranks; 3719 PetscBool aggregate; 3720 3721 ierr = MPI_Comm_size(PetscSubcommChild(subcomm),&size);CHKERRQ(ierr); 3722 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 3723 prank = rank; 3724 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,PetscSubcommChild(subcomm));CHKERRQ(ierr); 3725 /* 3726 for (i=0;i<size;i++) { 3727 PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]); 3728 } 3729 */ 3730 for (i=0;i<xadj[1];i++) { 3731 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 3732 } 3733 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 3734 aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE); 3735 if (aggregate) { 3736 PetscInt lrows,row,ncols,*cols; 3737 PetscMPIInt nrank; 3738 PetscScalar *vals; 3739 3740 ierr = MPI_Comm_rank(PetscSubcommChild(subcomm),&nrank);CHKERRQ(ierr); 3741 lrows = 0; 3742 if (nrank<redprocs) { 3743 lrows = size/redprocs; 3744 if (nrank<size%redprocs) lrows++; 3745 } 3746 ierr = MatCreateAIJ(PetscSubcommChild(subcomm),lrows,lrows,size,size,50,NULL,50,NULL,&subdomain_adj);CHKERRQ(ierr); 3747 ierr = MatGetOwnershipRange(subdomain_adj,&rstart,&rend);CHKERRQ(ierr); 3748 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 3749 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 3750 row = nrank; 3751 ncols = xadj[1]-xadj[0]; 3752 cols = adjncy; 3753 ierr = PetscMalloc1(ncols,&vals);CHKERRQ(ierr); 3754 for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i]; 3755 ierr = MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 3756 ierr = MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3757 ierr = MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3758 ierr = PetscFree(xadj);CHKERRQ(ierr); 3759 ierr = PetscFree(adjncy);CHKERRQ(ierr); 3760 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 3761 ierr = PetscFree(vals);CHKERRQ(ierr); 3762 } else { 3763 ierr = MatCreateMPIAdj(PetscSubcommChild(subcomm),1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 3764 } 3765 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 3766 3767 /* Partition */ 3768 ierr = MatPartitioningCreate(PetscSubcommChild(subcomm),&partitioner);CHKERRQ(ierr); 3769 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 3770 if (use_vwgt) { 3771 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 3772 v_wgt[0] = local_size; 3773 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 3774 } 3775 n_subdomains = PetscMin((PetscInt)size,n_subdomains); 3776 ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr); 3777 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 3778 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 3779 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 3780 3781 /* renumber new_ranks to avoid "holes" in new set of processors */ 3782 ierr = PCBDDCSubsetNumbering(new_ranks,NULL,NULL,&new_ranks_contig);CHKERRQ(ierr); 3783 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 3784 ierr = ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3785 if (!redprocs) { 3786 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 3787 } else { 3788 PetscInt idxs[1]; 3789 PetscMPIInt tag; 3790 MPI_Request *reqs; 3791 3792 ierr = PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag);CHKERRQ(ierr); 3793 ierr = PetscMalloc1(rend-rstart,&reqs);CHKERRQ(ierr); 3794 for (i=rstart;i<rend;i++) { 3795 ierr = MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,PetscSubcommChild(subcomm),&reqs[i-rstart]);CHKERRQ(ierr); 3796 } 3797 ierr = MPI_Recv(idxs,1,MPIU_INT,MPI_ANY_SOURCE,tag,PetscSubcommChild(subcomm),MPI_STATUS_IGNORE);CHKERRQ(ierr); 3798 ierr = MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3799 ierr = PetscFree(reqs);CHKERRQ(ierr); 3800 ranks_send_to_idx[0] = oldranks[idxs[0]]; 3801 } 3802 ierr = ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3803 /* clean up */ 3804 ierr = PetscFree(oldranks);CHKERRQ(ierr); 3805 ierr = ISDestroy(&new_ranks_contig);CHKERRQ(ierr); 3806 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 3807 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 3808 } 3809 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 3810 3811 /* assemble parallel IS for sends */ 3812 i = 1; 3813 if (color) i=0; 3814 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr); 3815 /* get back IS */ 3816 *is_sends = ranks_send_to; 3817 PetscFunctionReturn(0); 3818 } 3819 3820 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 3821 3822 #undef __FUNCT__ 3823 #define __FUNCT__ "MatISSubassemble" 3824 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[]) 3825 { 3826 Mat local_mat; 3827 IS is_sends_internal; 3828 PetscInt rows,cols,new_local_rows; 3829 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals; 3830 PetscBool ismatis,isdense,newisdense,destroy_mat; 3831 ISLocalToGlobalMapping l2gmap; 3832 PetscInt* l2gmap_indices; 3833 const PetscInt* is_indices; 3834 MatType new_local_type; 3835 /* buffers */ 3836 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 3837 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 3838 PetscInt *recv_buffer_idxs_local; 3839 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 3840 /* MPI */ 3841 MPI_Comm comm,comm_n; 3842 PetscSubcomm subcomm; 3843 PetscMPIInt n_sends,n_recvs,commsize; 3844 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 3845 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 3846 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,source_dest; 3847 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals; 3848 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals; 3849 PetscErrorCode ierr; 3850 3851 PetscFunctionBegin; 3852 /* TODO: add missing checks */ 3853 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 3854 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 3855 PetscValidLogicalCollectiveEnum(mat,reuse,5); 3856 PetscValidLogicalCollectiveInt(mat,nis,7); 3857 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 3858 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 3859 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 3860 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 3861 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 3862 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 3863 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 3864 if (reuse == MAT_REUSE_MATRIX && *mat_n) { 3865 PetscInt mrows,mcols,mnrows,mncols; 3866 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 3867 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 3868 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 3869 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 3870 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 3871 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 3872 } 3873 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 3874 PetscValidLogicalCollectiveInt(mat,bs,0); 3875 /* prepare IS for sending if not provided */ 3876 if (!is_sends) { 3877 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 3878 ierr = MatISGetSubassemblingPattern(mat,n_subdomains,0,&is_sends_internal);CHKERRQ(ierr); 3879 } else { 3880 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 3881 is_sends_internal = is_sends; 3882 } 3883 3884 /* get comm */ 3885 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3886 3887 /* compute number of sends */ 3888 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 3889 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 3890 3891 /* compute number of receives */ 3892 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 3893 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 3894 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 3895 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 3896 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 3897 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 3898 ierr = PetscFree(iflags);CHKERRQ(ierr); 3899 3900 /* restrict comm if requested */ 3901 subcomm = 0; 3902 destroy_mat = PETSC_FALSE; 3903 if (restrict_comm) { 3904 PetscMPIInt color,subcommsize; 3905 3906 color = 0; 3907 if (restrict_full) { 3908 if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm (full restriction) */ 3909 } else { 3910 if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not partecipate in new comm */ 3911 } 3912 ierr = MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 3913 subcommsize = commsize - subcommsize; 3914 /* check if reuse has been requested */ 3915 if (reuse == MAT_REUSE_MATRIX) { 3916 if (*mat_n) { 3917 PetscMPIInt subcommsize2; 3918 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 3919 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 3920 comm_n = PetscObjectComm((PetscObject)*mat_n); 3921 } else { 3922 comm_n = PETSC_COMM_SELF; 3923 } 3924 } else { /* MAT_INITIAL_MATRIX */ 3925 PetscMPIInt rank; 3926 3927 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3928 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 3929 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 3930 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 3931 comm_n = PetscSubcommChild(subcomm); 3932 } 3933 /* flag to destroy *mat_n if not significative */ 3934 if (color) destroy_mat = PETSC_TRUE; 3935 } else { 3936 comm_n = comm; 3937 } 3938 3939 /* prepare send/receive buffers */ 3940 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 3941 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 3942 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 3943 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 3944 if (nis) { 3945 ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr); 3946 } 3947 3948 /* Get data from local matrices */ 3949 if (!isdense) { 3950 SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 3951 /* TODO: See below some guidelines on how to prepare the local buffers */ 3952 /* 3953 send_buffer_vals should contain the raw values of the local matrix 3954 send_buffer_idxs should contain: 3955 - MatType_PRIVATE type 3956 - PetscInt size_of_l2gmap 3957 - PetscInt global_row_indices[size_of_l2gmap] 3958 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 3959 */ 3960 } else { 3961 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 3962 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&i);CHKERRQ(ierr); 3963 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 3964 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 3965 send_buffer_idxs[1] = i; 3966 ierr = ISLocalToGlobalMappingGetIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 3967 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 3968 ierr = ISLocalToGlobalMappingRestoreIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 3969 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 3970 for (i=0;i<n_sends;i++) { 3971 ilengths_vals[is_indices[i]] = len*len; 3972 ilengths_idxs[is_indices[i]] = len+2; 3973 } 3974 } 3975 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 3976 /* additional is (if any) */ 3977 if (nis) { 3978 PetscMPIInt psum; 3979 PetscInt j; 3980 for (j=0,psum=0;j<nis;j++) { 3981 PetscInt plen; 3982 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 3983 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 3984 psum += len+1; /* indices + lenght */ 3985 } 3986 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 3987 for (j=0,psum=0;j<nis;j++) { 3988 PetscInt plen; 3989 const PetscInt *is_array_idxs; 3990 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 3991 send_buffer_idxs_is[psum] = plen; 3992 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 3993 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 3994 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 3995 psum += plen+1; /* indices + lenght */ 3996 } 3997 for (i=0;i<n_sends;i++) { 3998 ilengths_idxs_is[is_indices[i]] = psum; 3999 } 4000 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 4001 } 4002 4003 buf_size_idxs = 0; 4004 buf_size_vals = 0; 4005 buf_size_idxs_is = 0; 4006 for (i=0;i<n_recvs;i++) { 4007 buf_size_idxs += (PetscInt)olengths_idxs[i]; 4008 buf_size_vals += (PetscInt)olengths_vals[i]; 4009 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 4010 } 4011 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 4012 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 4013 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 4014 4015 /* get new tags for clean communications */ 4016 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 4017 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 4018 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 4019 4020 /* allocate for requests */ 4021 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 4022 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 4023 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 4024 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 4025 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 4026 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 4027 4028 /* communications */ 4029 ptr_idxs = recv_buffer_idxs; 4030 ptr_vals = recv_buffer_vals; 4031 ptr_idxs_is = recv_buffer_idxs_is; 4032 for (i=0;i<n_recvs;i++) { 4033 source_dest = onodes[i]; 4034 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 4035 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 4036 ptr_idxs += olengths_idxs[i]; 4037 ptr_vals += olengths_vals[i]; 4038 if (nis) { 4039 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 4040 ptr_idxs_is += olengths_idxs_is[i]; 4041 } 4042 } 4043 for (i=0;i<n_sends;i++) { 4044 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 4045 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 4046 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 4047 if (nis) { 4048 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 4049 } 4050 } 4051 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 4052 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 4053 4054 /* assemble new l2g map */ 4055 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4056 ptr_idxs = recv_buffer_idxs; 4057 new_local_rows = 0; 4058 for (i=0;i<n_recvs;i++) { 4059 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 4060 ptr_idxs += olengths_idxs[i]; 4061 } 4062 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 4063 ptr_idxs = recv_buffer_idxs; 4064 new_local_rows = 0; 4065 for (i=0;i<n_recvs;i++) { 4066 ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 4067 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 4068 ptr_idxs += olengths_idxs[i]; 4069 } 4070 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 4071 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 4072 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 4073 4074 /* infer new local matrix type from received local matrices type */ 4075 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 4076 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 4077 if (n_recvs) { 4078 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 4079 ptr_idxs = recv_buffer_idxs; 4080 for (i=0;i<n_recvs;i++) { 4081 if ((PetscInt)new_local_type_private != *ptr_idxs) { 4082 new_local_type_private = MATAIJ_PRIVATE; 4083 break; 4084 } 4085 ptr_idxs += olengths_idxs[i]; 4086 } 4087 switch (new_local_type_private) { 4088 case MATDENSE_PRIVATE: 4089 if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */ 4090 new_local_type = MATSEQAIJ; 4091 bs = 1; 4092 } else { /* if I receive only 1 dense matrix */ 4093 new_local_type = MATSEQDENSE; 4094 bs = 1; 4095 } 4096 break; 4097 case MATAIJ_PRIVATE: 4098 new_local_type = MATSEQAIJ; 4099 bs = 1; 4100 break; 4101 case MATBAIJ_PRIVATE: 4102 new_local_type = MATSEQBAIJ; 4103 break; 4104 case MATSBAIJ_PRIVATE: 4105 new_local_type = MATSEQSBAIJ; 4106 break; 4107 default: 4108 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__); 4109 break; 4110 } 4111 } else { /* by default, new_local_type is seqdense */ 4112 new_local_type = MATSEQDENSE; 4113 bs = 1; 4114 } 4115 4116 /* create MATIS object if needed */ 4117 if (reuse == MAT_INITIAL_MATRIX) { 4118 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 4119 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 4120 } else { 4121 /* it also destroys the local matrices */ 4122 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 4123 } 4124 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 4125 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 4126 4127 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4128 4129 /* Global to local map of received indices */ 4130 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 4131 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 4132 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 4133 4134 /* restore attributes -> type of incoming data and its size */ 4135 buf_size_idxs = 0; 4136 for (i=0;i<n_recvs;i++) { 4137 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 4138 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 4139 buf_size_idxs += (PetscInt)olengths_idxs[i]; 4140 } 4141 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 4142 4143 /* set preallocation */ 4144 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 4145 if (!newisdense) { 4146 PetscInt *new_local_nnz=0; 4147 4148 ptr_vals = recv_buffer_vals; 4149 ptr_idxs = recv_buffer_idxs_local; 4150 if (n_recvs) { 4151 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 4152 } 4153 for (i=0;i<n_recvs;i++) { 4154 PetscInt j; 4155 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 4156 for (j=0;j<*(ptr_idxs+1);j++) { 4157 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 4158 } 4159 } else { 4160 /* TODO */ 4161 } 4162 ptr_idxs += olengths_idxs[i]; 4163 } 4164 if (new_local_nnz) { 4165 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 4166 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 4167 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 4168 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 4169 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 4170 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 4171 } else { 4172 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 4173 } 4174 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 4175 } else { 4176 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 4177 } 4178 4179 /* set values */ 4180 ptr_vals = recv_buffer_vals; 4181 ptr_idxs = recv_buffer_idxs_local; 4182 for (i=0;i<n_recvs;i++) { 4183 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 4184 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 4185 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 4186 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 4187 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 4188 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 4189 } else { 4190 /* TODO */ 4191 } 4192 ptr_idxs += olengths_idxs[i]; 4193 ptr_vals += olengths_vals[i]; 4194 } 4195 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4196 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4197 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4198 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4199 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 4200 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 4201 4202 #if 0 4203 if (!restrict_comm) { /* check */ 4204 Vec lvec,rvec; 4205 PetscReal infty_error; 4206 4207 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 4208 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 4209 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 4210 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 4211 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 4212 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 4213 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 4214 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 4215 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 4216 } 4217 #endif 4218 4219 /* assemble new additional is (if any) */ 4220 if (nis) { 4221 PetscInt **temp_idxs,*count_is,j,psum; 4222 4223 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4224 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 4225 ptr_idxs = recv_buffer_idxs_is; 4226 psum = 0; 4227 for (i=0;i<n_recvs;i++) { 4228 for (j=0;j<nis;j++) { 4229 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 4230 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 4231 psum += plen; 4232 ptr_idxs += plen+1; /* shift pointer to received data */ 4233 } 4234 } 4235 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 4236 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 4237 for (i=1;i<nis;i++) { 4238 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 4239 } 4240 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 4241 ptr_idxs = recv_buffer_idxs_is; 4242 for (i=0;i<n_recvs;i++) { 4243 for (j=0;j<nis;j++) { 4244 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 4245 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 4246 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 4247 ptr_idxs += plen+1; /* shift pointer to received data */ 4248 } 4249 } 4250 for (i=0;i<nis;i++) { 4251 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 4252 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 4253 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 4254 } 4255 ierr = PetscFree(count_is);CHKERRQ(ierr); 4256 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 4257 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 4258 } 4259 /* free workspace */ 4260 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 4261 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4262 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 4263 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4264 if (isdense) { 4265 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 4266 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 4267 } else { 4268 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 4269 } 4270 if (nis) { 4271 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4272 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 4273 } 4274 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 4275 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 4276 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 4277 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 4278 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 4279 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 4280 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 4281 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 4282 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 4283 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 4284 ierr = PetscFree(onodes);CHKERRQ(ierr); 4285 if (nis) { 4286 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 4287 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 4288 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 4289 } 4290 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 4291 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 4292 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 4293 for (i=0;i<nis;i++) { 4294 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 4295 } 4296 *mat_n = NULL; 4297 } 4298 PetscFunctionReturn(0); 4299 } 4300 4301 /* temporary hack into ksp private data structure */ 4302 #include <petsc/private/kspimpl.h> 4303 4304 #undef __FUNCT__ 4305 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 4306 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 4307 { 4308 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 4309 PC_IS *pcis = (PC_IS*)pc->data; 4310 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 4311 MatNullSpace CoarseNullSpace=NULL; 4312 ISLocalToGlobalMapping coarse_islg; 4313 IS coarse_is,*isarray; 4314 PetscInt i,im_active=-1,active_procs=-1; 4315 PetscInt nis,nisdofs,nisneu; 4316 PC pc_temp; 4317 PCType coarse_pc_type; 4318 KSPType coarse_ksp_type; 4319 PetscBool multilevel_requested,multilevel_allowed; 4320 PetscBool isredundant,isbddc,isnn,coarse_reuse; 4321 Mat t_coarse_mat_is; 4322 PetscInt void_procs,ncoarse_ml,ncoarse_ds,ncoarse; 4323 PetscMPIInt all_procs; 4324 PetscBool csin_ml,csin_ds,csin,csin_type_simple,redist; 4325 PetscBool compute_vecs = PETSC_FALSE; 4326 PetscScalar *array; 4327 PetscErrorCode ierr; 4328 4329 PetscFunctionBegin; 4330 /* Assign global numbering to coarse dofs */ 4331 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 4332 PetscInt ocoarse_size; 4333 compute_vecs = PETSC_TRUE; 4334 ocoarse_size = pcbddc->coarse_size; 4335 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 4336 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 4337 /* see if we can avoid some work */ 4338 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 4339 /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */ 4340 if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) { 4341 PC pc; 4342 PetscBool isbddc; 4343 4344 /* temporary workaround since PCBDDC does not have a reset method so far */ 4345 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc);CHKERRQ(ierr); 4346 ierr = PetscObjectTypeCompare((PetscObject)pc,PCBDDC,&isbddc);CHKERRQ(ierr); 4347 if (isbddc) { 4348 ierr = PCDestroy(&pc);CHKERRQ(ierr); 4349 } 4350 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 4351 coarse_reuse = PETSC_FALSE; 4352 } else { /* we can safely reuse already computed coarse matrix */ 4353 coarse_reuse = PETSC_TRUE; 4354 } 4355 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 4356 coarse_reuse = PETSC_FALSE; 4357 } 4358 /* reset any subassembling information */ 4359 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 4360 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4361 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 4362 coarse_reuse = PETSC_TRUE; 4363 } 4364 4365 /* count "active" (i.e. with positive local size) and "void" processes */ 4366 im_active = !!(pcis->n); 4367 ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4368 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr); 4369 void_procs = all_procs-active_procs; 4370 csin_type_simple = PETSC_TRUE; 4371 redist = PETSC_FALSE; 4372 if (pcbddc->current_level && void_procs) { 4373 csin_ml = PETSC_TRUE; 4374 ncoarse_ml = void_procs; 4375 /* it has no sense to redistribute on a set of processors larger than the number of active processes */ 4376 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < active_procs) { 4377 csin_ds = PETSC_TRUE; 4378 ncoarse_ds = pcbddc->redistribute_coarse; 4379 redist = PETSC_TRUE; 4380 } else { 4381 csin_ds = PETSC_TRUE; 4382 ncoarse_ds = active_procs; 4383 redist = PETSC_TRUE; 4384 } 4385 } else { 4386 csin_ml = PETSC_FALSE; 4387 ncoarse_ml = all_procs; 4388 if (void_procs) { 4389 csin_ds = PETSC_TRUE; 4390 ncoarse_ds = void_procs; 4391 csin_type_simple = PETSC_FALSE; 4392 } else { 4393 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < all_procs) { 4394 csin_ds = PETSC_TRUE; 4395 ncoarse_ds = pcbddc->redistribute_coarse; 4396 redist = PETSC_TRUE; 4397 } else { 4398 csin_ds = PETSC_FALSE; 4399 ncoarse_ds = all_procs; 4400 } 4401 } 4402 } 4403 4404 /* 4405 test if we can go multilevel: three conditions must be satisfied: 4406 - we have not exceeded the number of levels requested 4407 - we can actually subassemble the active processes 4408 - we can find a suitable number of MPI processes where we can place the subassembled problem 4409 */ 4410 multilevel_allowed = PETSC_FALSE; 4411 multilevel_requested = PETSC_FALSE; 4412 if (pcbddc->current_level < pcbddc->max_levels) { 4413 multilevel_requested = PETSC_TRUE; 4414 if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) { 4415 multilevel_allowed = PETSC_FALSE; 4416 } else { 4417 multilevel_allowed = PETSC_TRUE; 4418 } 4419 } 4420 /* determine number of process partecipating to coarse solver */ 4421 if (multilevel_allowed) { 4422 ncoarse = ncoarse_ml; 4423 csin = csin_ml; 4424 redist = PETSC_FALSE; 4425 } else { 4426 ncoarse = ncoarse_ds; 4427 csin = csin_ds; 4428 } 4429 4430 /* creates temporary l2gmap and IS for coarse indexes */ 4431 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 4432 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 4433 4434 /* creates temporary MATIS object for coarse matrix */ 4435 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr); 4436 ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr); 4437 ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr); 4438 ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr); 4439 #if 0 4440 { 4441 PetscViewer viewer; 4442 char filename[256]; 4443 sprintf(filename,"local_coarse_mat%d_level%d.m",PetscGlobalRank,pcbddc->current_level); 4444 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 4445 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 4446 ierr = MatView(coarse_submat_dense,viewer);CHKERRQ(ierr); 4447 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 4448 } 4449 #endif 4450 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,NULL,&t_coarse_mat_is);CHKERRQ(ierr); 4451 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 4452 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4453 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4454 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 4455 4456 /* compute dofs splitting and neumann boundaries for coarse dofs */ 4457 if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal) ) { /* protects from unneded computations */ 4458 PetscInt *tidxs,*tidxs2,nout,tsize,i; 4459 const PetscInt *idxs; 4460 ISLocalToGlobalMapping tmap; 4461 4462 /* create map between primal indices (in local representative ordering) and local primal numbering */ 4463 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 4464 /* allocate space for temporary storage */ 4465 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 4466 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 4467 /* allocate for IS array */ 4468 nisdofs = pcbddc->n_ISForDofsLocal; 4469 nisneu = !!pcbddc->NeumannBoundariesLocal; 4470 nis = nisdofs + nisneu; 4471 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 4472 /* dofs splitting */ 4473 for (i=0;i<nisdofs;i++) { 4474 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 4475 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 4476 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 4477 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 4478 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 4479 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 4480 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->ISForDofsLocal[i]),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 4481 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 4482 } 4483 /* neumann boundaries */ 4484 if (pcbddc->NeumannBoundariesLocal) { 4485 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 4486 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 4487 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 4488 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 4489 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 4490 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 4491 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->NeumannBoundariesLocal),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 4492 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 4493 } 4494 /* free memory */ 4495 ierr = PetscFree(tidxs);CHKERRQ(ierr); 4496 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 4497 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 4498 } else { 4499 nis = 0; 4500 nisdofs = 0; 4501 nisneu = 0; 4502 isarray = NULL; 4503 } 4504 /* destroy no longer needed map */ 4505 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 4506 4507 /* restrict on coarse candidates (if needed) */ 4508 coarse_mat_is = NULL; 4509 if (csin) { 4510 if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */ 4511 if (redist) { 4512 PetscMPIInt rank; 4513 PetscInt spc,n_spc_p1,dest[1],destsize; 4514 4515 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 4516 spc = active_procs/ncoarse; 4517 n_spc_p1 = active_procs%ncoarse; 4518 if (im_active) { 4519 destsize = 1; 4520 if (rank > n_spc_p1*(spc+1)-1) { 4521 dest[0] = n_spc_p1+(rank-(n_spc_p1*(spc+1)))/spc; 4522 } else { 4523 dest[0] = rank/(spc+1); 4524 } 4525 } else { 4526 destsize = 0; 4527 } 4528 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),destsize,dest,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4529 } else if (csin_type_simple) { 4530 PetscMPIInt rank; 4531 PetscInt issize,isidx; 4532 4533 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 4534 if (im_active) { 4535 issize = 1; 4536 isidx = (PetscInt)rank; 4537 } else { 4538 issize = 0; 4539 isidx = -1; 4540 } 4541 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4542 } else { /* get a suitable subassembling pattern from MATIS code */ 4543 ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4544 } 4545 4546 /* we need to shift on coarse candidates either if we are not redistributing or we are redistributing and we have enough void processes */ 4547 if (!redist || ncoarse <= void_procs) { 4548 PetscInt ncoarse_cand,tissize,*nisindices; 4549 PetscInt *coarse_candidates; 4550 const PetscInt* tisindices; 4551 4552 /* get coarse candidates' ranks in pc communicator */ 4553 ierr = PetscMalloc1(all_procs,&coarse_candidates);CHKERRQ(ierr); 4554 ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4555 for (i=0,ncoarse_cand=0;i<all_procs;i++) { 4556 if (!coarse_candidates[i]) { 4557 coarse_candidates[ncoarse_cand++]=i; 4558 } 4559 } 4560 if (ncoarse_cand < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",ncoarse_cand,ncoarse); 4561 4562 4563 if (pcbddc->dbg_flag) { 4564 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4565 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr); 4566 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 4567 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr); 4568 for (i=0;i<ncoarse_cand;i++) { 4569 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr); 4570 } 4571 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr); 4572 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4573 } 4574 /* shift the pattern on coarse candidates */ 4575 ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr); 4576 ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 4577 ierr = PetscMalloc1(tissize,&nisindices);CHKERRQ(ierr); 4578 for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]]; 4579 ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 4580 ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr); 4581 ierr = PetscFree(coarse_candidates);CHKERRQ(ierr); 4582 } 4583 if (pcbddc->dbg_flag) { 4584 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4585 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr); 4586 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 4587 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4588 } 4589 } 4590 /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */ 4591 if (multilevel_allowed) { /* we need to keep tracking of void processes for future placements */ 4592 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,PETSC_FALSE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 4593 } else { /* this is the last level, so use just receiving processes in subcomm */ 4594 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 4595 } 4596 } else { 4597 if (pcbddc->dbg_flag) { 4598 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4599 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr); 4600 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4601 } 4602 ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr); 4603 coarse_mat_is = t_coarse_mat_is; 4604 } 4605 4606 /* create local to global scatters for coarse problem */ 4607 if (compute_vecs) { 4608 PetscInt lrows; 4609 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 4610 if (coarse_mat_is) { 4611 ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr); 4612 } else { 4613 lrows = 0; 4614 } 4615 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 4616 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 4617 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 4618 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 4619 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 4620 } 4621 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 4622 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 4623 4624 /* set defaults for coarse KSP and PC */ 4625 if (multilevel_allowed) { 4626 coarse_ksp_type = KSPRICHARDSON; 4627 coarse_pc_type = PCBDDC; 4628 } else { 4629 coarse_ksp_type = KSPPREONLY; 4630 coarse_pc_type = PCREDUNDANT; 4631 } 4632 4633 /* print some info if requested */ 4634 if (pcbddc->dbg_flag) { 4635 if (!multilevel_allowed) { 4636 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4637 if (multilevel_requested) { 4638 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 4639 } else if (pcbddc->max_levels) { 4640 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 4641 } 4642 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4643 } 4644 } 4645 4646 /* create the coarse KSP object only once with defaults */ 4647 if (coarse_mat_is) { 4648 MatReuse coarse_mat_reuse; 4649 PetscViewer dbg_viewer = NULL; 4650 if (pcbddc->dbg_flag) { 4651 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is)); 4652 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 4653 } 4654 if (!pcbddc->coarse_ksp) { 4655 char prefix[256],str_level[16]; 4656 size_t len; 4657 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr); 4658 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 4659 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 4660 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 4661 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr); 4662 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 4663 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 4664 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 4665 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 4666 /* prefix */ 4667 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 4668 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 4669 if (!pcbddc->current_level) { 4670 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 4671 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 4672 } else { 4673 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 4674 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 4675 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 4676 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 4677 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 4678 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 4679 } 4680 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 4681 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 4682 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 4683 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 4684 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 4685 /* allow user customization */ 4686 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 4687 } 4688 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 4689 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 4690 if (nisdofs) { 4691 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 4692 for (i=0;i<nisdofs;i++) { 4693 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 4694 } 4695 } 4696 if (nisneu) { 4697 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 4698 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 4699 } 4700 4701 /* get some info after set from options */ 4702 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 4703 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 4704 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 4705 if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */ 4706 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 4707 isbddc = PETSC_FALSE; 4708 } 4709 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 4710 if (isredundant) { 4711 KSP inner_ksp; 4712 PC inner_pc; 4713 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 4714 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 4715 ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr); 4716 } 4717 4718 /* assemble coarse matrix */ 4719 if (coarse_reuse) { 4720 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 4721 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 4722 coarse_mat_reuse = MAT_REUSE_MATRIX; 4723 } else { 4724 coarse_mat_reuse = MAT_INITIAL_MATRIX; 4725 } 4726 if (isbddc || isnn) { 4727 if (pcbddc->coarsening_ratio > 1) { 4728 if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */ 4729 ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 4730 if (pcbddc->dbg_flag) { 4731 ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4732 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr); 4733 ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr); 4734 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 4735 } 4736 } 4737 ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr); 4738 } else { 4739 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 4740 coarse_mat = coarse_mat_is; 4741 } 4742 } else { 4743 ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 4744 } 4745 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 4746 4747 /* propagate symmetry info of coarse matrix */ 4748 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 4749 if (pc->pmat->symmetric_set) { 4750 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);CHKERRQ(ierr); 4751 } 4752 if (pc->pmat->hermitian_set) { 4753 ierr = MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);CHKERRQ(ierr); 4754 } 4755 if (pc->pmat->spd_set) { 4756 ierr = MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);CHKERRQ(ierr); 4757 } 4758 /* set operators */ 4759 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 4760 if (pcbddc->dbg_flag) { 4761 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 4762 } 4763 } else { /* processes non partecipating to coarse solver (if any) */ 4764 coarse_mat = 0; 4765 } 4766 ierr = PetscFree(isarray);CHKERRQ(ierr); 4767 #if 0 4768 { 4769 PetscViewer viewer; 4770 char filename[256]; 4771 sprintf(filename,"coarse_mat_level%d.m",pcbddc->current_level); 4772 ierr = PetscViewerASCIIOpen(PetscObjectComm((PetscObject)coarse_mat),filename,&viewer);CHKERRQ(ierr); 4773 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 4774 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 4775 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 4776 } 4777 #endif 4778 4779 /* Compute coarse null space (special handling by BDDC only) */ 4780 #if 0 4781 if (pcbddc->NullSpace) { 4782 ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr); 4783 } 4784 #endif 4785 4786 if (pcbddc->coarse_ksp) { 4787 Vec crhs,csol; 4788 PetscBool ispreonly; 4789 4790 if (CoarseNullSpace) { 4791 if (isbddc) { 4792 ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr); 4793 } else { 4794 ierr = MatSetNullSpace(coarse_mat,CoarseNullSpace);CHKERRQ(ierr); 4795 } 4796 } 4797 /* setup coarse ksp */ 4798 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 4799 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 4800 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 4801 /* hack */ 4802 if (!csol) { 4803 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 4804 } 4805 if (!crhs) { 4806 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 4807 } 4808 /* Check coarse problem if in debug mode or if solving with an iterative method */ 4809 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 4810 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 4811 KSP check_ksp; 4812 KSPType check_ksp_type; 4813 PC check_pc; 4814 Vec check_vec,coarse_vec; 4815 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 4816 PetscInt its; 4817 PetscBool compute_eigs; 4818 PetscReal *eigs_r,*eigs_c; 4819 PetscInt neigs; 4820 const char *prefix; 4821 4822 /* Create ksp object suitable for estimation of extreme eigenvalues */ 4823 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 4824 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 4825 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 4826 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 4827 if (ispreonly) { 4828 check_ksp_type = KSPPREONLY; 4829 compute_eigs = PETSC_FALSE; 4830 } else { 4831 check_ksp_type = KSPGMRES; 4832 compute_eigs = PETSC_TRUE; 4833 } 4834 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 4835 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 4836 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 4837 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 4838 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 4839 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 4840 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 4841 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 4842 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 4843 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 4844 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 4845 /* create random vec */ 4846 ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr); 4847 ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr); 4848 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 4849 if (CoarseNullSpace) { 4850 ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr); 4851 } 4852 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 4853 /* solve coarse problem */ 4854 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 4855 if (CoarseNullSpace) { 4856 ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr); 4857 } 4858 /* set eigenvalue estimation if preonly has not been requested */ 4859 if (compute_eigs) { 4860 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 4861 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 4862 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 4863 lambda_max = eigs_r[neigs-1]; 4864 lambda_min = eigs_r[0]; 4865 if (pcbddc->use_coarse_estimates) { 4866 if (lambda_max>lambda_min) { 4867 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr); 4868 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 4869 } 4870 } 4871 } 4872 4873 /* check coarse problem residual error */ 4874 if (pcbddc->dbg_flag) { 4875 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 4876 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 4877 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 4878 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 4879 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 4880 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 4881 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 4882 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 4883 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 4884 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 4885 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 4886 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 4887 if (compute_eigs) { 4888 PetscReal lambda_max_s,lambda_min_s; 4889 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 4890 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 4891 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 4892 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 4893 for (i=0;i<neigs;i++) { 4894 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 4895 } 4896 } 4897 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 4898 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 4899 } 4900 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 4901 if (compute_eigs) { 4902 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 4903 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 4904 } 4905 } 4906 } 4907 /* print additional info */ 4908 if (pcbddc->dbg_flag) { 4909 /* waits until all processes reaches this point */ 4910 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 4911 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 4912 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4913 } 4914 4915 /* free memory */ 4916 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 4917 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 4918 PetscFunctionReturn(0); 4919 } 4920 4921 #undef __FUNCT__ 4922 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 4923 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 4924 { 4925 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 4926 PC_IS* pcis = (PC_IS*)pc->data; 4927 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 4928 IS subset,subset_mult,subset_n; 4929 PetscInt local_size,coarse_size=0; 4930 PetscInt *local_primal_indices=NULL; 4931 const PetscInt *t_local_primal_indices; 4932 PetscErrorCode ierr; 4933 4934 PetscFunctionBegin; 4935 /* Compute global number of coarse dofs */ 4936 if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) { 4937 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first"); 4938 } 4939 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n);CHKERRQ(ierr); 4940 ierr = ISLocalToGlobalMappingApplyIS(pcis->mapping,subset_n,&subset);CHKERRQ(ierr); 4941 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 4942 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult);CHKERRQ(ierr); 4943 ierr = PCBDDCSubsetNumbering(subset,subset_mult,&coarse_size,&subset_n);CHKERRQ(ierr); 4944 ierr = ISDestroy(&subset);CHKERRQ(ierr); 4945 ierr = ISDestroy(&subset_mult);CHKERRQ(ierr); 4946 ierr = ISGetLocalSize(subset_n,&local_size);CHKERRQ(ierr); 4947 if (local_size != pcbddc->local_primal_size) { 4948 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %d != %d",local_size,pcbddc->local_primal_size); 4949 } 4950 ierr = PetscMalloc1(local_size,&local_primal_indices);CHKERRQ(ierr); 4951 ierr = ISGetIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 4952 ierr = PetscMemcpy(local_primal_indices,t_local_primal_indices,local_size*sizeof(PetscInt));CHKERRQ(ierr); 4953 ierr = ISRestoreIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 4954 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 4955 4956 /* check numbering */ 4957 if (pcbddc->dbg_flag) { 4958 PetscScalar coarsesum,*array,*array2; 4959 PetscInt i; 4960 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 4961 4962 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4963 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4964 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 4965 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 4966 /* counter */ 4967 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4968 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 4969 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4970 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4971 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4972 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4973 4974 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 4975 for (i=0;i<pcbddc->local_primal_size;i++) { 4976 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 4977 } 4978 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 4979 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 4980 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4981 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4982 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4983 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4984 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4985 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4986 ierr = VecGetArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 4987 for (i=0;i<pcis->n;i++) { 4988 if (array[i] != 0.0 && array[i] != array2[i]) { 4989 PetscInt owned = (PetscInt)(array[i]); 4990 PetscInt neigh = (PetscInt)(array2[i]); 4991 set_error = PETSC_TRUE; 4992 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a %d processes instead of %d!\n",PetscGlobalRank,i,owned,neigh);CHKERRQ(ierr); 4993 } 4994 } 4995 ierr = VecRestoreArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 4996 ierr = MPI_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4997 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4998 for (i=0;i<pcis->n;i++) { 4999 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 5000 } 5001 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 5002 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 5003 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5004 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5005 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 5006 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 5007 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 5008 PetscInt *gidxs; 5009 5010 ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr); 5011 ierr = ISLocalToGlobalMappingApply(pcis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr); 5012 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 5013 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5014 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 5015 for (i=0;i<pcbddc->local_primal_size;i++) { 5016 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr); 5017 } 5018 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5019 ierr = PetscFree(gidxs);CHKERRQ(ierr); 5020 } 5021 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5022 if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 5023 } 5024 /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */ 5025 /* get back data */ 5026 *coarse_size_n = coarse_size; 5027 *local_primal_indices_n = local_primal_indices; 5028 PetscFunctionReturn(0); 5029 } 5030 5031 #undef __FUNCT__ 5032 #define __FUNCT__ "PCBDDCGlobalToLocal" 5033 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 5034 { 5035 IS localis_t; 5036 PetscInt i,lsize,*idxs,n; 5037 PetscScalar *vals; 5038 PetscErrorCode ierr; 5039 5040 PetscFunctionBegin; 5041 /* get indices in local ordering exploiting local to global map */ 5042 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 5043 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 5044 for (i=0;i<lsize;i++) vals[i] = 1.0; 5045 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 5046 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 5047 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 5048 if (idxs) { /* multilevel guard */ 5049 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 5050 } 5051 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 5052 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 5053 ierr = PetscFree(vals);CHKERRQ(ierr); 5054 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 5055 /* now compute set in local ordering */ 5056 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5057 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5058 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 5059 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 5060 for (i=0,lsize=0;i<n;i++) { 5061 if (PetscRealPart(vals[i]) > 0.5) { 5062 lsize++; 5063 } 5064 } 5065 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 5066 for (i=0,lsize=0;i<n;i++) { 5067 if (PetscRealPart(vals[i]) > 0.5) { 5068 idxs[lsize++] = i; 5069 } 5070 } 5071 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 5072 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 5073 *localis = localis_t; 5074 PetscFunctionReturn(0); 5075 } 5076 5077 /* the next two functions will be called in KSPMatMult if a change of basis has been requested */ 5078 #undef __FUNCT__ 5079 #define __FUNCT__ "PCBDDCMatMult_Private" 5080 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y) 5081 { 5082 PCBDDCChange_ctx change_ctx; 5083 PetscErrorCode ierr; 5084 5085 PetscFunctionBegin; 5086 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 5087 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 5088 ierr = MatMult(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 5089 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 5090 PetscFunctionReturn(0); 5091 } 5092 5093 #undef __FUNCT__ 5094 #define __FUNCT__ "PCBDDCMatMultTranspose_Private" 5095 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y) 5096 { 5097 PCBDDCChange_ctx change_ctx; 5098 PetscErrorCode ierr; 5099 5100 PetscFunctionBegin; 5101 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 5102 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 5103 ierr = MatMultTranspose(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 5104 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 5105 PetscFunctionReturn(0); 5106 } 5107 5108 #undef __FUNCT__ 5109 #define __FUNCT__ "PCBDDCSetUpSubSchurs" 5110 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 5111 { 5112 PC_IS *pcis=(PC_IS*)pc->data; 5113 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 5114 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 5115 Mat S_j; 5116 PetscInt *used_xadj,*used_adjncy; 5117 PetscBool free_used_adj; 5118 PetscErrorCode ierr; 5119 5120 PetscFunctionBegin; 5121 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 5122 free_used_adj = PETSC_FALSE; 5123 if (pcbddc->sub_schurs_layers == -1) { 5124 used_xadj = NULL; 5125 used_adjncy = NULL; 5126 } else { 5127 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 5128 used_xadj = pcbddc->mat_graph->xadj; 5129 used_adjncy = pcbddc->mat_graph->adjncy; 5130 } else if (pcbddc->computed_rowadj) { 5131 used_xadj = pcbddc->mat_graph->xadj; 5132 used_adjncy = pcbddc->mat_graph->adjncy; 5133 } else { 5134 PetscBool flg_row=PETSC_FALSE; 5135 const PetscInt *xadj,*adjncy; 5136 PetscInt nvtxs; 5137 5138 ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 5139 if (flg_row) { 5140 ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr); 5141 ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr); 5142 ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr); 5143 free_used_adj = PETSC_TRUE; 5144 } else { 5145 pcbddc->sub_schurs_layers = -1; 5146 used_xadj = NULL; 5147 used_adjncy = NULL; 5148 } 5149 ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 5150 } 5151 } 5152 5153 /* setup sub_schurs data */ 5154 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 5155 if (!sub_schurs->use_mumps) { 5156 /* pcbddc->ksp_D up to date only if not using MUMPS */ 5157 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 5158 ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,PETSC_FALSE,PETSC_FALSE);CHKERRQ(ierr); 5159 } else { 5160 PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis; 5161 PetscBool isseqaij; 5162 if (!pcbddc->use_vertices && reuse_solvers) { 5163 PetscInt n_vertices; 5164 5165 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 5166 reuse_solvers = (PetscBool)!n_vertices; 5167 } 5168 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 5169 if (!isseqaij) { 5170 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 5171 if (matis->A == pcbddc->local_mat) { 5172 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 5173 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 5174 } else { 5175 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 5176 } 5177 } 5178 ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point);CHKERRQ(ierr); 5179 } 5180 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 5181 5182 /* free adjacency */ 5183 if (free_used_adj) { 5184 ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr); 5185 } 5186 PetscFunctionReturn(0); 5187 } 5188 5189 #undef __FUNCT__ 5190 #define __FUNCT__ "PCBDDCInitSubSchurs" 5191 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 5192 { 5193 PC_IS *pcis=(PC_IS*)pc->data; 5194 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 5195 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 5196 PCBDDCGraph graph; 5197 PetscErrorCode ierr; 5198 5199 PetscFunctionBegin; 5200 /* attach interface graph for determining subsets */ 5201 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 5202 IS verticesIS,verticescomm; 5203 PetscInt vsize,*idxs; 5204 5205 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 5206 ierr = ISGetSize(verticesIS,&vsize);CHKERRQ(ierr); 5207 ierr = ISGetIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 5208 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);CHKERRQ(ierr); 5209 ierr = ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 5210 ierr = ISDestroy(&verticesIS);CHKERRQ(ierr); 5211 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 5212 ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global);CHKERRQ(ierr); 5213 ierr = PCBDDCGraphSetUp(graph,0,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);CHKERRQ(ierr); 5214 ierr = ISDestroy(&verticescomm);CHKERRQ(ierr); 5215 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 5216 /* 5217 if (pcbddc->dbg_flag) { 5218 ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 5219 } 5220 */ 5221 } else { 5222 graph = pcbddc->mat_graph; 5223 } 5224 5225 /* sub_schurs init */ 5226 ierr = PCBDDCSubSchursInit(sub_schurs,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap);CHKERRQ(ierr); 5227 5228 /* free graph struct */ 5229 if (pcbddc->sub_schurs_rebuild) { 5230 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 5231 } 5232 PetscFunctionReturn(0); 5233 } 5234