1 #include <../src/ksp/pc/impls/bddc/bddc.h> 2 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 3 #include <petscblaslapack.h> 4 5 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y); 6 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y); 7 8 #undef __FUNCT__ 9 #define __FUNCT__ "PCBDDCBenignCheck" 10 PetscErrorCode PCBDDCBenignCheck(PC pc, IS zerodiag) 11 { 12 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 13 PC_IS* pcis = (PC_IS*)(pc->data); 14 IS dirIS = NULL; 15 PetscErrorCode ierr; 16 17 PetscFunctionBegin; 18 ierr = PCBDDCGraphGetDirichletDofs(pcbddc->mat_graph,&dirIS);CHKERRQ(ierr); 19 if (zerodiag) { 20 Mat A; 21 Vec vec3_N; 22 PetscScalar *vals; 23 const PetscInt *idxs; 24 PetscInt i,nz; 25 26 /* p0 */ 27 ierr = VecSet(pcis->vec1_N,0.);CHKERRQ(ierr); 28 ierr = PetscMalloc1(pcis->n,&vals);CHKERRQ(ierr); 29 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 30 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 31 for (i=0;i<nz;i++) vals[i] = 1.; /* TODO add quadrature */ 32 ierr = VecSetValues(pcis->vec1_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 33 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 34 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 35 /* v_I */ 36 ierr = VecSetRandom(pcis->vec2_N,NULL);CHKERRQ(ierr); 37 for (i=0;i<nz;i++) vals[i] = 0.; 38 ierr = VecSetValues(pcis->vec2_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 39 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 40 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 41 for (i=0;i<pcis->n_B;i++) vals[i] = 0.; 42 ierr = VecSetValues(pcis->vec2_N,pcis->n_B,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 43 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 44 if (dirIS) { 45 PetscInt n; 46 47 ierr = ISGetLocalSize(dirIS,&n);CHKERRQ(ierr); 48 ierr = ISGetIndices(dirIS,&idxs);CHKERRQ(ierr); 49 for (i=0;i<n;i++) vals[i] = 0.; 50 ierr = VecSetValues(pcis->vec2_N,n,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 51 ierr = ISRestoreIndices(dirIS,&idxs);CHKERRQ(ierr); 52 } 53 ierr = VecAssemblyBegin(pcis->vec2_N);CHKERRQ(ierr); 54 ierr = VecAssemblyEnd(pcis->vec2_N);CHKERRQ(ierr); 55 ierr = VecDuplicate(pcis->vec1_N,&vec3_N);CHKERRQ(ierr); 56 ierr = VecSet(vec3_N,0.);CHKERRQ(ierr); 57 ierr = MatISGetLocalMat(pc->mat,&A);CHKERRQ(ierr); 58 ierr = MatMult(A,pcis->vec1_N,vec3_N);CHKERRQ(ierr); 59 ierr = VecDot(vec3_N,pcis->vec2_N,&vals[0]);CHKERRQ(ierr); 60 if (PetscAbsScalar(vals[0]) > PETSC_SMALL) { /* TODO: should I add the A-norm in test? */ 61 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! b(v_I,p_0) = %f (should be numerically 0.)",PetscAbsScalar(vals[0])); 62 } 63 ierr = PetscFree(vals);CHKERRQ(ierr); 64 ierr = VecDestroy(&vec3_N);CHKERRQ(ierr); 65 } 66 ierr = ISDestroy(&dirIS);CHKERRQ(ierr); 67 68 /* check PCBDDCBenignGetOrSetP0 */ 69 ierr = VecSetRandom(pcis->vec1_global,NULL);CHKERRQ(ierr); 70 pcbddc->benign_p0 = -PetscGlobalRank; 71 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_FALSE);CHKERRQ(ierr); 72 pcbddc->benign_p0 = 1; 73 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_TRUE);CHKERRQ(ierr); 74 if (pcbddc->benign_p0_gidx >=0 && (PetscInt)PetscRealPart(pcbddc->benign_p0) != -PetscGlobalRank) { 75 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error testing PCBDDCBenignGetOrSetP0! Found %1.4e instead of %1.4e\n",pcbddc->benign_p0,-PetscGlobalRank);CHKERRQ(ierr); 76 } 77 PetscFunctionReturn(0); 78 } 79 80 #undef __FUNCT__ 81 #define __FUNCT__ "PCBDDCBenignDetectSaddlePoint" 82 PetscErrorCode PCBDDCBenignDetectSaddlePoint(PC pc, IS *zerodiaglocal) 83 { 84 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 85 IS pressures,zerodiag; 86 PetscInt nz,n; 87 PetscBool sorted,have_null; 88 PetscErrorCode ierr; 89 90 PetscFunctionBegin; 91 ierr = MatDestroy(&pcbddc->benign_original_mat);CHKERRQ(ierr); 92 ierr = PetscObjectReference((PetscObject)pcbddc->local_mat);CHKERRQ(ierr); 93 pcbddc->benign_original_mat = pcbddc->local_mat; 94 /* if a local info on dofs is present, assumes the last field is represented by "pressures", otherwise, uses only zerodiagonal dofs (ok if the pressure block is all zero) */ 95 if (pcbddc->n_ISForDofsLocal) { 96 PetscInt p = pcbddc->n_ISForDofsLocal-1; 97 ierr = PetscObjectReference((PetscObject)pcbddc->ISForDofsLocal[p]);CHKERRQ(ierr); 98 pressures = pcbddc->ISForDofsLocal[p]; 99 ierr = ISSorted(pressures,&sorted);CHKERRQ(ierr); 100 if (!sorted) { 101 ierr = ISSort(pressures);CHKERRQ(ierr); 102 } 103 } else { 104 pressures = NULL; 105 } 106 ierr = MatFindZeroDiagonals(pcbddc->benign_original_mat,&zerodiag);CHKERRQ(ierr); 107 ierr = ISSorted(zerodiag,&sorted);CHKERRQ(ierr); 108 if (!sorted) { 109 ierr = ISSort(zerodiag);CHKERRQ(ierr); 110 } 111 /* 112 Check if all the pressure dofs have a zero diagonal 113 If not, a change of basis on pressures is not needed 114 since the local Schur complements are SPD 115 */ 116 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 117 if (pressures) { 118 PetscInt np; 119 PetscBool sameis; 120 121 sameis = PETSC_FALSE; 122 ierr = ISGetLocalSize(pressures,&np);CHKERRQ(ierr); 123 if (nz == np) { 124 const PetscInt *idxs,*pidxs; 125 126 ierr = ISGetIndices(pressures,&pidxs);CHKERRQ(ierr); 127 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 128 ierr = PetscMemcmp(pidxs,idxs,np*sizeof(PetscInt),&sameis);CHKERRQ(ierr); 129 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 130 ierr = ISRestoreIndices(pressures,&pidxs);CHKERRQ(ierr); 131 } 132 if (!sameis) { /* destroy index sets and set nz to 0 to avoid next code branch */ 133 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 134 nz = 0; 135 } 136 ierr = ISDestroy(&pressures);CHKERRQ(ierr); 137 } 138 if (nz) { 139 IS zerodiagc; 140 PetscScalar *array; 141 const PetscInt *idxs,*idxsc; 142 PetscInt i,*nnz; 143 144 /* TODO: add check for shared dofs and raise error */ 145 ierr = MatGetLocalSize(pcbddc->benign_original_mat,&n,NULL);CHKERRQ(ierr); 146 ierr = ISComplement(zerodiag,0,n,&zerodiagc);CHKERRQ(ierr); 147 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 148 ierr = ISGetIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 149 /* local change of basis for pressures */ 150 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 151 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->benign_original_mat),&pcbddc->benign_change);CHKERRQ(ierr); 152 ierr = MatSetType(pcbddc->benign_change,MATAIJ);CHKERRQ(ierr); 153 ierr = MatSetSizes(pcbddc->benign_change,n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 154 ierr = PetscMalloc1(n,&nnz);CHKERRQ(ierr); 155 for (i=0;i<n-nz;i++) nnz[idxsc[i]] = 1; /* identity on velocities */ 156 for (i=0;i<nz-1;i++) nnz[idxs[i]] = 2; /* change on pressures */ 157 nnz[idxs[nz-1]] = nz; /* last local pressure dof: _0 set */ 158 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_change,0,nnz);CHKERRQ(ierr); 159 ierr = PetscFree(nnz);CHKERRQ(ierr); 160 /* set identity on velocities */ 161 for (i=0;i<n-nz;i++) { 162 ierr = MatSetValue(pcbddc->benign_change,idxsc[i],idxsc[i],1.,INSERT_VALUES);CHKERRQ(ierr); 163 } 164 /* set change on pressures */ 165 for (i=0;i<nz-1;i++) { 166 PetscScalar vals[2]; 167 PetscInt cols[2]; 168 169 /* TODO: add quadrature */ 170 cols[0] = idxs[i]; 171 cols[1] = idxs[nz-1]; 172 vals[0] = 1.; 173 vals[1] = 1.; 174 ierr = MatSetValues(pcbddc->benign_change,1,idxs+i,2,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 175 } 176 ierr = PetscMalloc1(nz,&array);CHKERRQ(ierr); 177 for (i=0;i<nz-1;i++) array[i] = -1.; 178 array[nz-1] = 1.; 179 ierr = MatSetValues(pcbddc->benign_change,1,idxs+nz-1,nz,idxs,array,INSERT_VALUES);CHKERRQ(ierr); 180 ierr = PetscFree(array);CHKERRQ(ierr); 181 ierr = MatAssemblyBegin(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 182 ierr = MatAssemblyEnd(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 183 /* TODO: need optimization? */ 184 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 185 ierr = MatPtAP(pcbddc->benign_original_mat,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 186 /* store local and global idxs for p0 */ 187 pcbddc->benign_p0_lidx = idxs[nz-1]; 188 ierr = ISLocalToGlobalMappingApply(pc->pmat->rmap->mapping,1,&idxs[nz-1],&pcbddc->benign_p0_gidx);CHKERRQ(ierr); 189 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 190 ierr = ISRestoreIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 191 ierr = ISDestroy(&zerodiagc);CHKERRQ(ierr); 192 } else { /* if nz == 0, destroy local IS */ 193 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 194 } 195 196 /* determines if the coarse solver will be singular or not */ 197 ierr = MatGetLocalSize(pcbddc->benign_original_mat,&n,NULL);CHKERRQ(ierr); 198 have_null = PETSC_TRUE; 199 if (!zerodiag && n) have_null = PETSC_FALSE; 200 ierr = MPI_Allreduce(&have_null,&pcbddc->benign_null,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 201 *zerodiaglocal = zerodiag; 202 PetscFunctionReturn(0); 203 } 204 205 #undef __FUNCT__ 206 #define __FUNCT__ "PCBDDCBenignGetOrSetP0" 207 PetscErrorCode PCBDDCBenignGetOrSetP0(PC pc, Vec v, PetscBool get) 208 { 209 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 210 PetscErrorCode ierr; 211 212 PetscFunctionBegin; 213 if (!pcbddc->benign_sf) { 214 ierr = PetscSFCreate(PetscObjectComm((PetscObject)pc),&pcbddc->benign_sf);CHKERRQ(ierr); 215 if (pcbddc->benign_p0_gidx >= 0) { 216 ierr = PetscSFSetGraphLayout(pcbddc->benign_sf,pc->pmat->rmap,1,NULL,PETSC_OWN_POINTER,&pcbddc->benign_p0_gidx);CHKERRQ(ierr); 217 } else { 218 ierr = PetscSFSetGraphLayout(pcbddc->benign_sf,pc->pmat->rmap,0,NULL,PETSC_OWN_POINTER,&pcbddc->benign_p0_gidx);CHKERRQ(ierr); 219 } 220 } 221 if (get) { /* use SF to get values */ 222 PetscScalar *array; 223 224 ierr = VecGetArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 225 ierr = PetscSFBcastBegin(pcbddc->benign_sf,MPIU_SCALAR,array,&pcbddc->benign_p0);CHKERRQ(ierr); 226 ierr = PetscSFBcastEnd(pcbddc->benign_sf,MPIU_SCALAR,array,&pcbddc->benign_p0);CHKERRQ(ierr); 227 ierr = VecRestoreArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 228 } else { /* use VecSetValue */ 229 if (pcbddc->benign_p0_gidx >= 0) { 230 ierr = VecSetValue(v,pcbddc->benign_p0_gidx,pcbddc->benign_p0,INSERT_VALUES);CHKERRQ(ierr); 231 } 232 ierr = VecAssemblyBegin(v);CHKERRQ(ierr); 233 ierr = VecAssemblyEnd(v);CHKERRQ(ierr); 234 } 235 PetscFunctionReturn(0); 236 } 237 238 #undef __FUNCT__ 239 #define __FUNCT__ "PCBDDCBenignPopOrPushB0" 240 PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop) 241 { 242 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 243 PetscErrorCode ierr; 244 245 PetscFunctionBegin; 246 /* TODO: add error checking 247 - avoid nested pop (or push) calls. 248 - cannot push before pop. 249 - cannot call this if pcbddc->local_mat is NULL 250 */ 251 if (pcbddc->benign_p0_lidx < 0) { 252 PetscFunctionReturn(0); 253 } 254 if (pop) { 255 const PetscInt *cB0_cols; 256 PetscInt cB0_ncol; 257 const PetscScalar *cB0_vals; 258 259 /* extract B_0 */ 260 ierr = MatGetRow(pcbddc->local_mat,pcbddc->benign_p0_lidx,&cB0_ncol,&cB0_cols,&cB0_vals);CHKERRQ(ierr); 261 pcbddc->B0_ncol = cB0_ncol; 262 ierr = PetscFree2(pcbddc->B0_cols,pcbddc->B0_vals);CHKERRQ(ierr); 263 ierr = PetscMalloc2(cB0_ncol,&pcbddc->B0_cols,cB0_ncol,&pcbddc->B0_vals);CHKERRQ(ierr); 264 ierr = PetscMemcpy(pcbddc->B0_cols,cB0_cols,cB0_ncol*sizeof(PetscInt));CHKERRQ(ierr); 265 ierr = PetscMemcpy(pcbddc->B0_vals,cB0_vals,cB0_ncol*sizeof(PetscScalar));CHKERRQ(ierr); 266 ierr = MatRestoreRow(pcbddc->local_mat,pcbddc->benign_p0_lidx,&cB0_ncol,&cB0_cols,&cB0_vals);CHKERRQ(ierr); 267 /* remove rows and cols from local problem */ 268 ierr = MatSetOption(pcbddc->local_mat,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE);CHKERRQ(ierr); 269 ierr = MatZeroRowsColumns(pcbddc->local_mat,1,&pcbddc->benign_p0_lidx,1.,NULL,NULL);CHKERRQ(ierr); 270 } else { /* push */ 271 ierr = MatSetValues(pcbddc->local_mat,1,&pcbddc->benign_p0_lidx,pcbddc->B0_ncol,pcbddc->B0_cols,pcbddc->B0_vals,INSERT_VALUES);CHKERRQ(ierr); 272 ierr = MatSetValues(pcbddc->local_mat,pcbddc->B0_ncol,pcbddc->B0_cols,1,&pcbddc->benign_p0_lidx,pcbddc->B0_vals,INSERT_VALUES);CHKERRQ(ierr); 273 ierr = MatSetValue(pcbddc->local_mat,pcbddc->benign_p0_lidx,pcbddc->benign_p0_lidx,0.0,INSERT_VALUES);CHKERRQ(ierr); 274 ierr = MatAssemblyBegin(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 275 ierr = MatAssemblyEnd(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 276 } 277 PetscFunctionReturn(0); 278 } 279 280 #undef __FUNCT__ 281 #define __FUNCT__ "PCBDDCAdaptiveSelection" 282 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 283 { 284 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 285 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 286 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 287 PetscBLASInt *B_iwork,*B_ifail; 288 PetscScalar *work,lwork; 289 PetscScalar *St,*S,*eigv; 290 PetscScalar *Sarray,*Starray; 291 PetscReal *eigs,thresh; 292 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs; 293 PetscBool allocated_S_St; 294 #if defined(PETSC_USE_COMPLEX) 295 PetscReal *rwork; 296 #endif 297 PetscErrorCode ierr; 298 299 PetscFunctionBegin; 300 if (!sub_schurs->use_mumps) { 301 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS"); 302 } 303 304 if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) { 305 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\n",sub_schurs->is_hermitian,sub_schurs->is_posdef); 306 } 307 308 if (pcbddc->dbg_flag) { 309 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 310 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 311 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr); 312 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 313 } 314 315 if (pcbddc->dbg_flag) { 316 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef); 317 } 318 319 /* max size of subsets */ 320 mss = 0; 321 for (i=0;i<sub_schurs->n_subs;i++) { 322 PetscInt subset_size; 323 324 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 325 mss = PetscMax(mss,subset_size); 326 } 327 328 /* min/max and threshold */ 329 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 330 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0; 331 nmax = PetscMax(nmin,nmax); 332 allocated_S_St = PETSC_FALSE; 333 if (nmin) { 334 allocated_S_St = PETSC_TRUE; 335 } 336 337 /* allocate lapack workspace */ 338 cum = cum2 = 0; 339 maxneigs = 0; 340 for (i=0;i<sub_schurs->n_subs;i++) { 341 PetscInt n,subset_size; 342 343 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 344 n = PetscMin(subset_size,nmax); 345 cum += subset_size; 346 cum2 += subset_size*n; 347 maxneigs = PetscMax(maxneigs,n); 348 } 349 if (mss) { 350 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 351 PetscBLASInt B_itype = 1; 352 PetscBLASInt B_N = mss; 353 PetscReal zero = 0.0; 354 PetscReal eps = 0.0; /* dlamch? */ 355 356 B_lwork = -1; 357 S = NULL; 358 St = NULL; 359 eigs = NULL; 360 eigv = NULL; 361 B_iwork = NULL; 362 B_ifail = NULL; 363 #if defined(PETSC_USE_COMPLEX) 364 rwork = NULL; 365 #endif 366 thresh = 1.0; 367 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 368 #if defined(PETSC_USE_COMPLEX) 369 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 370 #else 371 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 372 #endif 373 if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 374 ierr = PetscFPTrapPop();CHKERRQ(ierr); 375 } else { 376 /* TODO */ 377 } 378 } else { 379 lwork = 0; 380 } 381 382 nv = 0; 383 if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */ 384 ierr = ISGetLocalSize(sub_schurs->is_vertices,&nv);CHKERRQ(ierr); 385 } 386 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr); 387 if (allocated_S_St) { 388 ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr); 389 } 390 ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr); 391 #if defined(PETSC_USE_COMPLEX) 392 ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr); 393 #endif 394 ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 395 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr, 396 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr, 397 nv+cum,&pcbddc->adaptive_constraints_idxs, 398 nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 399 ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr); 400 401 maxneigs = 0; 402 cum = cum2 = cumarray = 0; 403 pcbddc->adaptive_constraints_idxs_ptr[0] = 0; 404 pcbddc->adaptive_constraints_data_ptr[0] = 0; 405 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 406 const PetscInt *idxs; 407 408 ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 409 for (cum=0;cum<nv;cum++) { 410 pcbddc->adaptive_constraints_n[cum] = 1; 411 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 412 pcbddc->adaptive_constraints_data[cum] = 1.0; 413 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1; 414 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1; 415 } 416 cum2 = cum; 417 ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 418 } 419 420 if (mss) { /* multilevel */ 421 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 422 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 423 } 424 425 for (i=0;i<sub_schurs->n_subs;i++) { 426 427 const PetscInt *idxs; 428 PetscReal infty = PETSC_MAX_REAL; 429 PetscInt j,subset_size,eigs_start = 0; 430 PetscBLASInt B_N; 431 PetscBool same_data = PETSC_FALSE; 432 433 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 434 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 435 if (allocated_S_St) { /* S and S_t should be copied since we could need them later */ 436 if (sub_schurs->is_hermitian) { 437 PetscInt j,k; 438 if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscMemcmp later */ 439 ierr = PetscMemzero(S,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 440 ierr = PetscMemzero(St,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 441 } 442 for (j=0;j<subset_size;j++) { 443 for (k=j;k<subset_size;k++) { 444 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 445 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 446 } 447 } 448 } else { 449 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 450 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 451 } 452 } else { 453 S = Sarray + cumarray; 454 St = Starray + cumarray; 455 } 456 457 ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 458 /* see if we can save some work */ 459 if (sub_schurs->n_subs == 1) { 460 ierr = PetscMemcmp(S,St,subset_size*subset_size*sizeof(PetscScalar),&same_data);CHKERRQ(ierr); 461 } 462 463 if (same_data) { /* there's no need of constraints here, deluxe scaling is enough */ 464 B_neigs = 0; 465 } else { 466 /* Threshold: this is an heuristic for edges */ 467 thresh = pcbddc->mat_graph->count[idxs[0]]*pcbddc->adaptive_threshold; 468 469 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 470 PetscBLASInt B_itype = 1; 471 PetscBLASInt B_IL, B_IU; 472 PetscReal eps = -1.0; /* dlamch? */ 473 PetscInt nmin_s; 474 475 if (pcbddc->dbg_flag) { 476 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d %d %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]); 477 } 478 479 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 480 if (thresh > 1.+PETSC_SMALL) { 481 482 /* ask for eigenvalues larger than thresh */ 483 #if defined(PETSC_USE_COMPLEX) 484 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 485 #else 486 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 487 #endif 488 } else { 489 B_IU = PetscMax(1,PetscMin(B_N,nmax)); 490 B_IL = 1; 491 #if defined(PETSC_USE_COMPLEX) 492 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 493 #else 494 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 495 #endif 496 } 497 ierr = PetscFPTrapPop();CHKERRQ(ierr); 498 if (B_ierr) { 499 if (B_ierr < 0 ) { 500 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 501 } else if (B_ierr <= B_N) { 502 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 503 } else { 504 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 505 } 506 } 507 508 if (B_neigs > nmax) { 509 if (pcbddc->dbg_flag) { 510 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, more than maximum required %d.\n",B_neigs,nmax); 511 } 512 eigs_start = B_neigs -nmax; 513 B_neigs = nmax; 514 } 515 516 nmin_s = PetscMin(nmin,B_N); 517 if (B_neigs < nmin_s) { 518 PetscBLASInt B_neigs2; 519 520 B_IU = B_N - B_neigs; 521 B_IL = B_N - nmin_s + 1; 522 if (pcbddc->dbg_flag) { 523 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU); 524 } 525 if (sub_schurs->is_hermitian) { 526 PetscInt j; 527 for (j=0;j<subset_size;j++) { 528 ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 529 } 530 for (j=0;j<subset_size;j++) { 531 ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 532 } 533 } else { 534 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 535 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 536 } 537 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 538 #if defined(PETSC_USE_COMPLEX) 539 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 540 #else 541 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 542 #endif 543 ierr = PetscFPTrapPop();CHKERRQ(ierr); 544 B_neigs += B_neigs2; 545 } 546 if (B_ierr) { 547 if (B_ierr < 0 ) { 548 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 549 } else if (B_ierr <= B_N) { 550 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 551 } else { 552 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 553 } 554 } 555 if (pcbddc->dbg_flag) { 556 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Got %d eigs\n",B_neigs);CHKERRQ(ierr); 557 for (j=0;j<B_neigs;j++) { 558 if (eigs[j] == 0.0) { 559 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," Inf\n");CHKERRQ(ierr); 560 } else { 561 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",eigs[j+eigs_start]);CHKERRQ(ierr); 562 } 563 } 564 } 565 } else { 566 /* TODO */ 567 } 568 } 569 maxneigs = PetscMax(B_neigs,maxneigs); 570 pcbddc->adaptive_constraints_n[i+nv] = B_neigs; 571 if (B_neigs) { 572 ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 573 574 if (pcbddc->dbg_flag > 1) { 575 PetscInt ii; 576 for (ii=0;ii<B_neigs;ii++) { 577 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 578 for (j=0;j<B_N;j++) { 579 #if defined(PETSC_USE_COMPLEX) 580 PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 581 PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 582 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 583 #else 584 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);CHKERRQ(ierr); 585 #endif 586 } 587 } 588 } 589 #if 0 590 for (j=0;j<B_neigs;j++) { 591 PetscBLASInt Blas_N,Blas_one = 1.0; 592 PetscScalar norm; 593 ierr = PetscBLASIntCast(subset_size,&Blas_N);CHKERRQ(ierr); 594 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size, 595 &Blas_one,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one)); 596 if (pcbddc->adaptive_constraints_data[cum2] > 0.0) { 597 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 598 } else { 599 norm = -1.0/PetscSqrtReal(PetscRealPart(norm)); 600 } 601 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one)); 602 } 603 #endif 604 ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr); 605 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size; 606 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs; 607 cum++; 608 } 609 ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 610 /* shift for next computation */ 611 cumarray += subset_size*subset_size; 612 } 613 if (pcbddc->dbg_flag) { 614 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 615 } 616 617 if (mss) { 618 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 619 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 620 /* destroy matrices (junk) */ 621 ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr); 622 ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr); 623 } 624 if (allocated_S_St) { 625 ierr = PetscFree2(S,St);CHKERRQ(ierr); 626 } 627 ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr); 628 #if defined(PETSC_USE_COMPLEX) 629 ierr = PetscFree(rwork);CHKERRQ(ierr); 630 #endif 631 if (pcbddc->dbg_flag) { 632 PetscInt maxneigs_r; 633 ierr = MPI_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 634 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr); 635 } 636 PetscFunctionReturn(0); 637 } 638 639 #undef __FUNCT__ 640 #define __FUNCT__ "PCBDDCSetUpSolvers" 641 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 642 { 643 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 644 PetscScalar *coarse_submat_vals; 645 PetscErrorCode ierr; 646 647 PetscFunctionBegin; 648 /* Setup local scatters R_to_B and (optionally) R_to_D */ 649 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 650 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 651 652 /* Setup local neumann solver ksp_R */ 653 /* PCBDDCSetUpLocalScatters should be called first! */ 654 ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 655 656 /* Change global null space passed in by the user if change of basis has been requested */ 657 if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) { 658 ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr); 659 } 660 661 /* 662 Setup local correction and local part of coarse basis. 663 Gives back the dense local part of the coarse matrix in column major ordering 664 */ 665 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 666 667 /* Compute total number of coarse nodes and setup coarse solver */ 668 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 669 670 /* free */ 671 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 672 PetscFunctionReturn(0); 673 } 674 675 #undef __FUNCT__ 676 #define __FUNCT__ "PCBDDCResetCustomization" 677 PetscErrorCode PCBDDCResetCustomization(PC pc) 678 { 679 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 680 PetscErrorCode ierr; 681 682 PetscFunctionBegin; 683 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 684 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 685 ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr); 686 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 687 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 688 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 689 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 690 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 691 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 692 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 693 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 694 PetscFunctionReturn(0); 695 } 696 697 #undef __FUNCT__ 698 #define __FUNCT__ "PCBDDCResetTopography" 699 PetscErrorCode PCBDDCResetTopography(PC pc) 700 { 701 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 702 PetscErrorCode ierr; 703 704 PetscFunctionBegin; 705 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 706 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 707 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 708 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 709 ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr); 710 PetscFunctionReturn(0); 711 } 712 713 #undef __FUNCT__ 714 #define __FUNCT__ "PCBDDCResetSolvers" 715 PetscErrorCode PCBDDCResetSolvers(PC pc) 716 { 717 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 718 PetscScalar *array; 719 PetscErrorCode ierr; 720 721 PetscFunctionBegin; 722 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 723 if (pcbddc->coarse_phi_B) { 724 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr); 725 ierr = PetscFree(array);CHKERRQ(ierr); 726 } 727 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 728 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 729 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 730 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 731 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 732 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 733 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 734 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 735 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 736 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 737 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 738 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 739 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 740 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 741 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 742 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 743 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 744 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 745 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 746 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 747 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 748 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 749 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 750 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 751 ierr = VecDestroy(&pcbddc->benign_vec);CHKERRQ(ierr); 752 ierr = MatDestroy(&pcbddc->benign_original_mat);CHKERRQ(ierr); 753 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 754 ierr = PetscFree2(pcbddc->B0_cols,pcbddc->B0_vals);CHKERRQ(ierr); 755 PetscFunctionReturn(0); 756 } 757 758 #undef __FUNCT__ 759 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors" 760 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 761 { 762 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 763 PC_IS *pcis = (PC_IS*)pc->data; 764 VecType impVecType; 765 PetscInt n_constraints,n_R,old_size,n_benign; 766 PetscErrorCode ierr; 767 768 PetscFunctionBegin; 769 if (!pcbddc->ConstraintMatrix) { 770 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created"); 771 } 772 /* get sizes */ 773 n_benign = 0; 774 if (pcbddc->benign_p0_lidx >= 0) n_benign = 1; 775 n_constraints = pcbddc->local_primal_size - n_benign - pcbddc->n_vertices; 776 n_R = pcis->n - pcbddc->n_vertices; 777 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 778 /* local work vectors (try to avoid unneeded work)*/ 779 /* R nodes */ 780 old_size = -1; 781 if (pcbddc->vec1_R) { 782 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 783 } 784 if (n_R != old_size) { 785 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 786 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 787 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 788 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 789 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 790 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 791 } 792 /* local primal dofs */ 793 old_size = -1; 794 if (pcbddc->vec1_P) { 795 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 796 } 797 if (pcbddc->local_primal_size != old_size) { 798 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 799 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 800 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 801 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 802 } 803 /* local explicit constraints */ 804 old_size = -1; 805 if (pcbddc->vec1_C) { 806 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 807 } 808 if (n_constraints && n_constraints != old_size) { 809 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 810 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 811 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 812 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 813 } 814 PetscFunctionReturn(0); 815 } 816 817 #undef __FUNCT__ 818 #define __FUNCT__ "PCBDDCSetUpCorrection" 819 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 820 { 821 PetscErrorCode ierr; 822 /* pointers to pcis and pcbddc */ 823 PC_IS* pcis = (PC_IS*)pc->data; 824 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 825 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 826 /* submatrices of local problem */ 827 Mat A_RV,A_VR,A_VV,local_auxmat2_R; 828 /* submatrices of benign trick */ 829 Mat B0_V = NULL; 830 /* submatrices of local coarse problem */ 831 Mat S_VV,S_CV,S_VC,S_CC; 832 /* working matrices */ 833 Mat C_CR; 834 /* additional working stuff */ 835 PC pc_R; 836 Mat F,B0 = NULL; 837 PetscBool isLU,isCHOL,isILU; 838 839 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 840 PetscScalar *work; 841 PetscInt *idx_V_B; 842 PetscInt n,n_vertices,n_constraints,n_benign,p0_lidx_I = 0; 843 PetscInt i,n_R,n_D,n_B; 844 PetscBool unsymmetric_check; 845 /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */ 846 MatType impMatType; 847 /* some shortcuts to scalars */ 848 PetscScalar one=1.0,m_one=-1.0; 849 850 PetscFunctionBegin; 851 n_benign = 0; 852 if (pcbddc->benign_p0_lidx >= 0) n_benign = 1; 853 n_vertices = pcbddc->n_vertices; 854 n_constraints = pcbddc->local_primal_size - n_benign - n_vertices; 855 /* Set Non-overlapping dimensions */ 856 n_B = pcis->n_B; 857 n_D = pcis->n - n_B; 858 n_R = pcis->n - n_vertices; 859 860 /* Set types for local objects needed by BDDC precondtioner */ 861 impMatType = MATSEQDENSE; 862 863 /* vertices in boundary numbering */ 864 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 865 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);CHKERRQ(ierr); 866 if (i != n_vertices) { 867 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i); 868 } 869 870 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 871 ierr = PetscCalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 872 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr); 873 ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr); 874 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr); 875 ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr); 876 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr); 877 ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr); 878 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr); 879 ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr); 880 881 unsymmetric_check = PETSC_FALSE; 882 /* allocate workspace */ 883 n = 0; 884 if (n_constraints) { 885 n += n_R*n_constraints; 886 } 887 if (n_vertices) { 888 n = PetscMax(2*n_R*n_vertices,n); 889 n = PetscMax((n_R+n_B)*n_vertices,n); 890 } 891 if (!pcbddc->symmetric_primal) { 892 n = PetscMax(2*n_R*pcbddc->local_primal_size,n); 893 unsymmetric_check = PETSC_TRUE; 894 } 895 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 896 897 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 898 ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr); 899 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr); 900 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr); 901 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr); 902 if (isLU || isILU || isCHOL) { 903 ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr); 904 } else if (sub_schurs->reuse_mumps) { 905 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 906 MatFactorType type; 907 908 F = reuse_mumps->F; 909 ierr = MatGetFactorType(F,&type);CHKERRQ(ierr); 910 if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE; 911 } else { 912 F = NULL; 913 } 914 915 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 916 if (n_constraints) { 917 Mat M1,M2,M3; 918 Mat auxmat; 919 IS is_aux; 920 PetscScalar *array,*array2; 921 922 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 923 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 924 925 /* Extract constraints on R nodes: C_{CR} */ 926 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 927 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 928 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&auxmat);CHKERRQ(ierr); 929 930 /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */ 931 /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */ 932 ierr = PetscMemzero(work,n_R*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 933 for (i=0;i<n_constraints;i++) { 934 const PetscScalar *row_cmat_values; 935 const PetscInt *row_cmat_indices; 936 PetscInt size_of_constraint,j; 937 938 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 939 for (j=0;j<size_of_constraint;j++) { 940 work[row_cmat_indices[j]+i*n_R] = -row_cmat_values[j]; 941 } 942 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 943 } 944 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,NULL,&local_auxmat2_R);CHKERRQ(ierr); 945 if (F) { 946 Mat B; 947 948 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 949 ierr = MatMatSolve(F,B,local_auxmat2_R);CHKERRQ(ierr); 950 ierr = MatDestroy(&B);CHKERRQ(ierr); 951 } else { 952 PetscScalar *marr; 953 954 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 955 for (i=0;i<n_constraints;i++) { 956 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 957 ierr = VecPlaceArray(pcbddc->vec2_R,marr+i*n_R);CHKERRQ(ierr); 958 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 959 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 960 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 961 } 962 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 963 } 964 if (!pcbddc->switch_static) { 965 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);CHKERRQ(ierr); 966 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 967 ierr = MatDenseGetArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 968 for (i=0;i<n_constraints;i++) { 969 ierr = VecPlaceArray(pcbddc->vec1_R,array2+i*n_R);CHKERRQ(ierr); 970 ierr = VecPlaceArray(pcis->vec1_B,array+i*n_B);CHKERRQ(ierr); 971 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 972 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 973 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 974 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 975 } 976 ierr = MatDenseRestoreArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 977 ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 978 ierr = MatMatMult(auxmat,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 979 } else { 980 ierr = PetscObjectReference((PetscObject)local_auxmat2_R);CHKERRQ(ierr); 981 pcbddc->local_auxmat2 = local_auxmat2_R; 982 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 983 } 984 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 985 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 986 ierr = MatScale(M3,m_one);CHKERRQ(ierr); 987 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr); 988 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 989 if (isCHOL) { 990 ierr = MatCholeskyFactor(M3,NULL,NULL);CHKERRQ(ierr); 991 } else { 992 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 993 } 994 ierr = VecSet(pcbddc->vec1_C,one);CHKERRQ(ierr); 995 ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr); 996 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 997 ierr = MatDestroy(&M2);CHKERRQ(ierr); 998 ierr = MatDestroy(&M3);CHKERRQ(ierr); 999 /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */ 1000 ierr = MatMatMult(M1,auxmat,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 1001 ierr = MatDestroy(&auxmat);CHKERRQ(ierr); 1002 ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 1003 ierr = MatDestroy(&M1);CHKERRQ(ierr); 1004 } 1005 /* Get submatrices from subdomain matrix */ 1006 if (n_benign) { 1007 IS dummy; 1008 Mat B0_R; 1009 PetscReal norm; 1010 PetscInt ii[2]; 1011 1012 ii[0] = 0; 1013 ii[1] = pcbddc->B0_ncol; 1014 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,1,pcis->n,ii,pcbddc->B0_cols,pcbddc->B0_vals,&B0);CHKERRQ(ierr); 1015 ierr = ISCreateStride(PETSC_COMM_SELF,1,0,1,&dummy);CHKERRQ(ierr); 1016 ierr = MatGetSubMatrix(B0,dummy,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&B0_R);CHKERRQ(ierr); 1017 ierr = MatNorm(B0_R,NORM_INFINITY,&norm);CHKERRQ(ierr); 1018 if (norm > PETSC_SMALL) { 1019 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! ||B0_R|| = %f (should be numerically 0.)",norm); 1020 } 1021 ierr = MatDestroy(&B0_R);CHKERRQ(ierr); 1022 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 1023 } 1024 1025 if (n_vertices) { 1026 IS is_aux; 1027 1028 if (sub_schurs->reuse_mumps) { /* is_R_local is not sorted, ISComplement doesn't like it */ 1029 IS tis; 1030 1031 ierr = ISDuplicate(pcbddc->is_R_local,&tis);CHKERRQ(ierr); 1032 ierr = ISSort(tis);CHKERRQ(ierr); 1033 ierr = ISComplement(tis,0,pcis->n,&is_aux);CHKERRQ(ierr); 1034 ierr = ISDestroy(&tis);CHKERRQ(ierr); 1035 } else { 1036 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 1037 } 1038 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 1039 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 1040 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 1041 if (n_benign) { 1042 IS dummy; 1043 1044 ierr = ISCreateStride(PETSC_COMM_SELF,1,0,1,&dummy);CHKERRQ(ierr); 1045 ierr = MatGetSubMatrix(B0,dummy,is_aux,MAT_INITIAL_MATRIX,&B0_V);CHKERRQ(ierr); 1046 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 1047 } 1048 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 1049 } 1050 1051 /* Matrix of coarse basis functions (local) */ 1052 if (pcbddc->coarse_phi_B) { 1053 PetscInt on_B,on_primal,on_D=n_D; 1054 if (pcbddc->coarse_phi_D) { 1055 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr); 1056 } 1057 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 1058 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 1059 PetscScalar *marray; 1060 1061 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr); 1062 ierr = PetscFree(marray);CHKERRQ(ierr); 1063 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 1064 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 1065 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 1066 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 1067 } 1068 } 1069 1070 if (!pcbddc->coarse_phi_B) { 1071 PetscScalar *marray; 1072 1073 n = n_B*pcbddc->local_primal_size; 1074 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1075 n += n_D*pcbddc->local_primal_size; 1076 } 1077 if (!pcbddc->symmetric_primal) { 1078 n *= 2; 1079 } 1080 ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr); 1081 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 1082 n = n_B*pcbddc->local_primal_size; 1083 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1084 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 1085 n += n_D*pcbddc->local_primal_size; 1086 } 1087 if (!pcbddc->symmetric_primal) { 1088 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 1089 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1090 n = n_B*pcbddc->local_primal_size; 1091 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 1092 } 1093 } else { 1094 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 1095 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 1096 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1097 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 1098 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 1099 } 1100 } 1101 } 1102 1103 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 1104 if (n_benign && (pcbddc->switch_static || pcbddc->dbg_flag)) { 1105 const PetscInt *idxs; 1106 1107 ierr = ISGetIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 1108 ierr = PetscFindInt(pcbddc->benign_p0_lidx,pcis->n-pcis->n_B,idxs,&p0_lidx_I);CHKERRQ(ierr); 1109 ierr = ISRestoreIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 1110 } 1111 1112 /* vertices */ 1113 if (n_vertices) { 1114 1115 ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr); 1116 1117 if (n_R) { 1118 Mat A_RRmA_RV,S_VVt; /* S_VVt with LDA=N */ 1119 PetscBLASInt B_N,B_one = 1; 1120 PetscScalar *x,*y; 1121 PetscBool isseqaij; 1122 1123 ierr = MatScale(A_RV,m_one);CHKERRQ(ierr); 1124 ierr = MatConvert(A_RV,impMatType,MAT_REUSE_MATRIX,&A_RV);CHKERRQ(ierr); 1125 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 1126 if (F) { /* TODO could be optimized for symmetric problems */ 1127 ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr); 1128 } else { 1129 ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr); 1130 for (i=0;i<n_vertices;i++) { 1131 ierr = VecPlaceArray(pcbddc->vec1_R,y+i*n_R);CHKERRQ(ierr); 1132 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 1133 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1134 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1135 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 1136 } 1137 ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr); 1138 } 1139 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 1140 /* S_VV and S_CV are the subdomain contribution to coarse matrix. WARNING -> column major ordering */ 1141 if (n_constraints) { 1142 Mat B; 1143 1144 ierr = PetscMemzero(work+n_R*n_vertices,n_B*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 1145 for (i=0;i<n_vertices;i++) { 1146 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 1147 ierr = VecPlaceArray(pcis->vec1_B,work+n_R*n_vertices+i*n_B);CHKERRQ(ierr); 1148 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1149 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1150 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1151 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1152 } 1153 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr); 1154 ierr = MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr); 1155 ierr = MatDestroy(&B);CHKERRQ(ierr); 1156 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr); 1157 ierr = MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 1158 ierr = MatScale(S_CV,m_one);CHKERRQ(ierr); 1159 ierr = PetscBLASIntCast(n_R*n_vertices,&B_N);CHKERRQ(ierr); 1160 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+n_R*n_vertices,&B_one,work,&B_one)); 1161 ierr = MatDestroy(&B);CHKERRQ(ierr); 1162 } 1163 ierr = PetscObjectTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 1164 if (!isseqaij) { /* MatMatMult with SEQ(S)BAIJ below will raise an error */ 1165 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_REUSE_MATRIX,&A_VR);CHKERRQ(ierr); 1166 } 1167 ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr); 1168 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 1169 ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr); 1170 ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr); 1171 ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr); 1172 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one)); 1173 ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr); 1174 ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr); 1175 ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1176 ierr = MatDestroy(&S_VVt);CHKERRQ(ierr); 1177 } else { 1178 ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1179 } 1180 if (n_benign) { 1181 const PetscScalar *vals; 1182 const PetscInt *idxs; 1183 PetscInt n,j; 1184 1185 ierr = MatGetRow(B0_V,0,&n,&idxs,&vals);CHKERRQ(ierr); 1186 for (j=0;j<n;j++) { 1187 coarse_submat_vals[(pcbddc->local_primal_size-1)*pcbddc->local_primal_size+idxs[j]] = vals[j]; 1188 coarse_submat_vals[(idxs[j]+1)*pcbddc->local_primal_size-1] = vals[j]; 1189 } 1190 ierr = MatRestoreRow(B0_V,0,&n,&idxs,&vals);CHKERRQ(ierr); 1191 } 1192 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 1193 1194 /* coarse basis functions */ 1195 for (i=0;i<n_vertices;i++) { 1196 PetscScalar *y; 1197 1198 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr); 1199 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 1200 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 1201 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1202 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1203 y[n_B*i+idx_V_B[i]] = 1.0; 1204 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 1205 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1206 1207 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1208 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 1209 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 1210 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1211 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1212 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 1213 if (n_benign) y[n_D*i+p0_lidx_I] = 0.0; 1214 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 1215 } 1216 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1217 } 1218 /* if n_R == 0 the object is not destroyed */ 1219 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 1220 } 1221 1222 if (n_constraints) { 1223 Mat B; 1224 1225 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 1226 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 1227 ierr = MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 1228 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 1229 if (n_vertices) { 1230 if (isCHOL) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */ 1231 ierr = MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);CHKERRQ(ierr); 1232 } else { 1233 Mat S_VCt; 1234 1235 ierr = MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);CHKERRQ(ierr); 1236 ierr = MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1237 ierr = MatDestroy(&S_VCt);CHKERRQ(ierr); 1238 } 1239 } 1240 ierr = MatDestroy(&B);CHKERRQ(ierr); 1241 /* coarse basis functions */ 1242 for (i=0;i<n_constraints;i++) { 1243 PetscScalar *y; 1244 1245 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr); 1246 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 1247 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr); 1248 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1249 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1250 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 1251 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1252 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1253 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 1254 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr); 1255 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1256 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1257 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 1258 if (n_benign) y[n_D*(i+n_vertices)+p0_lidx_I] = 0.0; 1259 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 1260 } 1261 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1262 } 1263 } 1264 if (n_constraints) { 1265 ierr = MatDestroy(&local_auxmat2_R);CHKERRQ(ierr); 1266 } 1267 1268 ierr = MatDestroy(&B0_V);CHKERRQ(ierr); 1269 1270 /* compute other basis functions for non-symmetric problems */ 1271 if (!pcbddc->symmetric_primal) { 1272 1273 if (n_constraints) { 1274 Mat S_CCT,B_C; 1275 1276 /* this is a lazy thing */ 1277 ierr = MatConvert(C_CR,impMatType,MAT_REUSE_MATRIX,&C_CR);CHKERRQ(ierr); 1278 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work+n_vertices*n_R,&B_C);CHKERRQ(ierr); 1279 ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr); 1280 ierr = MatTransposeMatMult(C_CR,S_CCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr); 1281 ierr = MatDestroy(&S_CCT);CHKERRQ(ierr); 1282 if (n_vertices) { 1283 Mat B_V,S_VCT; 1284 1285 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&B_V);CHKERRQ(ierr); 1286 ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr); 1287 ierr = MatTransposeMatMult(C_CR,S_VCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr); 1288 ierr = MatDestroy(&B_V);CHKERRQ(ierr); 1289 ierr = MatDestroy(&S_VCT);CHKERRQ(ierr); 1290 } 1291 ierr = MatDestroy(&B_C);CHKERRQ(ierr); 1292 } else { /* if there are no constraints, reset work */ 1293 ierr = PetscMemzero(work,n_R*pcbddc->local_primal_size*sizeof(PetscScalar));CHKERRQ(ierr); 1294 } 1295 if (n_vertices && n_R) { 1296 Mat A_VRT; 1297 PetscScalar *marray; 1298 PetscBLASInt B_N,B_one = 1; 1299 1300 ierr = MatTranspose(A_VR,MAT_INITIAL_MATRIX,&A_VRT);CHKERRQ(ierr); 1301 ierr = MatConvert(A_VRT,impMatType,MAT_REUSE_MATRIX,&A_VRT);CHKERRQ(ierr); 1302 ierr = MatDenseGetArray(A_VRT,&marray);CHKERRQ(ierr); 1303 ierr = PetscBLASIntCast(n_vertices*n_R,&B_N);CHKERRQ(ierr); 1304 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&m_one,marray,&B_one,work,&B_one)); 1305 ierr = MatDenseRestoreArray(A_VRT,&marray);CHKERRQ(ierr); 1306 ierr = MatDestroy(&A_VRT);CHKERRQ(ierr); 1307 } 1308 1309 if (F) { /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 1310 for (i=0;i<pcbddc->local_primal_size;i++) { 1311 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 1312 ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr); 1313 ierr = MatSolveTranspose(F,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1314 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1315 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 1316 } 1317 } else { 1318 for (i=0;i<pcbddc->local_primal_size;i++) { 1319 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 1320 ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr); 1321 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1322 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1323 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 1324 } 1325 } 1326 /* coarse basis functions */ 1327 for (i=0;i<pcbddc->local_primal_size;i++) { 1328 PetscScalar *y; 1329 1330 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*(i+pcbddc->local_primal_size));CHKERRQ(ierr); 1331 ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 1332 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 1333 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1334 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1335 if (i<n_vertices) { 1336 y[n_B*i+idx_V_B[i]] = 1.0; 1337 } 1338 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 1339 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1340 1341 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1342 ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 1343 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 1344 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1345 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1346 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 1347 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 1348 } 1349 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1350 } 1351 } 1352 /* free memory */ 1353 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 1354 ierr = MatDestroy(&S_VV);CHKERRQ(ierr); 1355 ierr = MatDestroy(&S_CV);CHKERRQ(ierr); 1356 ierr = MatDestroy(&S_VC);CHKERRQ(ierr); 1357 ierr = MatDestroy(&S_CC);CHKERRQ(ierr); 1358 ierr = PetscFree(work);CHKERRQ(ierr); 1359 if (n_vertices) { 1360 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 1361 } 1362 if (n_constraints) { 1363 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 1364 } 1365 /* Checking coarse_sub_mat and coarse basis functios */ 1366 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 1367 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 1368 if (pcbddc->dbg_flag) { 1369 Mat coarse_sub_mat; 1370 Mat AUXMAT,TM1,TM2,TM3,TM4; 1371 Mat coarse_phi_D,coarse_phi_B; 1372 Mat coarse_psi_D,coarse_psi_B; 1373 Mat A_II,A_BB,A_IB,A_BI; 1374 Mat C_B,CPHI; 1375 IS is_dummy; 1376 Vec mones; 1377 MatType checkmattype=MATSEQAIJ; 1378 PetscReal real_value; 1379 1380 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 1381 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 1382 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 1383 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 1384 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 1385 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 1386 if (unsymmetric_check) { 1387 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 1388 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 1389 } 1390 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 1391 1392 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1393 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);CHKERRQ(ierr); 1394 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1395 if (unsymmetric_check) { 1396 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1397 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 1398 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1399 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1400 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 1401 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1402 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1403 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 1404 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1405 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1406 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 1407 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1408 } else { 1409 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 1410 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 1411 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1412 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 1413 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1414 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1415 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 1416 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1417 } 1418 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1419 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1420 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1421 ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr); 1422 if (n_benign) { 1423 Mat B0_I,B0_B,B0_BPHI,B0_IPHI; 1424 PetscScalar *data,*data2; 1425 1426 ierr = ISCreateStride(PETSC_COMM_SELF,1,0,1,&is_dummy);CHKERRQ(ierr); 1427 ierr = MatGetSubMatrix(B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B); 1428 ierr = MatGetSubMatrix(B0,is_dummy,pcis->is_I_local,MAT_INITIAL_MATRIX,&B0_I); 1429 ierr = MatMatMult(B0_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 1430 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_REUSE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 1431 ierr = MatDenseGetArray(TM1,&data);CHKERRQ(ierr); 1432 ierr = MatDenseGetArray(B0_BPHI,&data2);CHKERRQ(ierr); 1433 for (i=0;i<pcbddc->local_primal_size;i++) { 1434 data[(pcbddc->local_primal_size-1)*pcbddc->local_primal_size+i] += data2[i]; 1435 data[(i+1)*pcbddc->local_primal_size-1] += data2[i]; 1436 } 1437 ierr = MatDenseRestoreArray(TM1,&data);CHKERRQ(ierr); 1438 ierr = MatDenseRestoreArray(B0_BPHI,&data2);CHKERRQ(ierr); 1439 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 1440 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 1441 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 1442 ierr = MatMatMult(B0_I,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&B0_IPHI);CHKERRQ(ierr); 1443 ierr = MatDestroy(&B0_I);CHKERRQ(ierr); 1444 ierr = MatNorm(B0_IPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1445 ierr = MatDestroy(&B0_IPHI);CHKERRQ(ierr); 1446 } 1447 #if 0 1448 { 1449 PetscViewer viewer; 1450 char filename[256]; 1451 sprintf(filename,"proj_local_coarse_mat%d.m",PetscGlobalRank); 1452 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 1453 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 1454 ierr = MatView(TM1,viewer);CHKERRQ(ierr); 1455 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 1456 } 1457 #endif 1458 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1459 ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1460 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1461 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 1462 1463 /* check constraints */ 1464 if (!n_benign) { 1465 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size,0,1,&is_dummy);CHKERRQ(ierr); 1466 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B); 1467 ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 1468 ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr); 1469 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 1470 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 1471 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1472 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 1473 if (unsymmetric_check) { 1474 ierr = MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 1475 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 1476 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 1477 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1478 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 1479 } 1480 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 1481 ierr = MatDestroy(&CPHI);CHKERRQ(ierr); 1482 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 1483 ierr = VecDestroy(&mones);CHKERRQ(ierr); 1484 } 1485 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1486 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 1487 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 1488 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 1489 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 1490 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 1491 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 1492 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 1493 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 1494 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 1495 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 1496 if (unsymmetric_check) { 1497 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 1498 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 1499 } 1500 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 1501 } 1502 ierr = MatDestroy(&B0);CHKERRQ(ierr); 1503 /* get back data */ 1504 *coarse_submat_vals_n = coarse_submat_vals; 1505 PetscFunctionReturn(0); 1506 } 1507 1508 #undef __FUNCT__ 1509 #define __FUNCT__ "MatGetSubMatrixUnsorted" 1510 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B) 1511 { 1512 Mat *work_mat; 1513 IS isrow_s,iscol_s; 1514 PetscBool rsorted,csorted; 1515 PetscInt rsize,*idxs_perm_r,csize,*idxs_perm_c; 1516 PetscErrorCode ierr; 1517 1518 PetscFunctionBegin; 1519 ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr); 1520 ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr); 1521 ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr); 1522 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 1523 1524 if (!rsorted) { 1525 const PetscInt *idxs; 1526 PetscInt *idxs_sorted,i; 1527 1528 ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr); 1529 ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr); 1530 for (i=0;i<rsize;i++) { 1531 idxs_perm_r[i] = i; 1532 } 1533 ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr); 1534 ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr); 1535 for (i=0;i<rsize;i++) { 1536 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 1537 } 1538 ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr); 1539 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr); 1540 } else { 1541 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1542 isrow_s = isrow; 1543 } 1544 1545 if (!csorted) { 1546 if (isrow == iscol) { 1547 ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr); 1548 iscol_s = isrow_s; 1549 } else { 1550 const PetscInt *idxs; 1551 PetscInt *idxs_sorted,i; 1552 1553 ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr); 1554 ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr); 1555 for (i=0;i<csize;i++) { 1556 idxs_perm_c[i] = i; 1557 } 1558 ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr); 1559 ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr); 1560 for (i=0;i<csize;i++) { 1561 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 1562 } 1563 ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr); 1564 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr); 1565 } 1566 } else { 1567 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1568 iscol_s = iscol; 1569 } 1570 1571 ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 1572 1573 if (!rsorted || !csorted) { 1574 Mat new_mat; 1575 IS is_perm_r,is_perm_c; 1576 1577 if (!rsorted) { 1578 PetscInt *idxs_r,i; 1579 ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr); 1580 for (i=0;i<rsize;i++) { 1581 idxs_r[idxs_perm_r[i]] = i; 1582 } 1583 ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr); 1584 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr); 1585 } else { 1586 ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr); 1587 } 1588 ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr); 1589 1590 if (!csorted) { 1591 if (isrow_s == iscol_s) { 1592 ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr); 1593 is_perm_c = is_perm_r; 1594 } else { 1595 PetscInt *idxs_c,i; 1596 ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr); 1597 for (i=0;i<csize;i++) { 1598 idxs_c[idxs_perm_c[i]] = i; 1599 } 1600 ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr); 1601 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr); 1602 } 1603 } else { 1604 ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr); 1605 } 1606 ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr); 1607 1608 ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr); 1609 ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr); 1610 work_mat[0] = new_mat; 1611 ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr); 1612 ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr); 1613 } 1614 1615 ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr); 1616 *B = work_mat[0]; 1617 ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr); 1618 ierr = ISDestroy(&isrow_s);CHKERRQ(ierr); 1619 ierr = ISDestroy(&iscol_s);CHKERRQ(ierr); 1620 PetscFunctionReturn(0); 1621 } 1622 1623 #undef __FUNCT__ 1624 #define __FUNCT__ "PCBDDCComputeLocalMatrix" 1625 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 1626 { 1627 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1628 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1629 Mat new_mat; 1630 IS is_local,is_global; 1631 PetscInt local_size; 1632 PetscBool isseqaij; 1633 PetscErrorCode ierr; 1634 1635 PetscFunctionBegin; 1636 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1637 ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr); 1638 ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr); 1639 ierr = ISLocalToGlobalMappingApplyIS(pc->pmat->rmap->mapping,is_local,&is_global);CHKERRQ(ierr); 1640 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 1641 ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr); 1642 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 1643 1644 /* check */ 1645 if (pcbddc->dbg_flag) { 1646 Vec x,x_change; 1647 PetscReal error; 1648 1649 ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr); 1650 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 1651 ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr); 1652 ierr = VecScatterBegin(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1653 ierr = VecScatterEnd(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1654 ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr); 1655 ierr = VecScatterBegin(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1656 ierr = VecScatterEnd(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1657 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 1658 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 1659 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1660 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr); 1661 ierr = VecDestroy(&x);CHKERRQ(ierr); 1662 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 1663 } 1664 1665 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 1666 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 1667 if (isseqaij) { 1668 ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 1669 } else { 1670 Mat work_mat; 1671 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 1672 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 1673 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 1674 } 1675 if (matis->A->symmetric_set) { 1676 ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);CHKERRQ(ierr); 1677 #if !defined(PETSC_USE_COMPLEX) 1678 ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);CHKERRQ(ierr); 1679 #endif 1680 } 1681 /* 1682 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 1683 ierr = MatView(new_mat,(PetscViewer)0);CHKERRQ(ierr); 1684 */ 1685 ierr = MatDestroy(&new_mat);CHKERRQ(ierr); 1686 PetscFunctionReturn(0); 1687 } 1688 1689 #undef __FUNCT__ 1690 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 1691 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 1692 { 1693 PC_IS* pcis = (PC_IS*)(pc->data); 1694 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1695 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1696 PetscInt *idx_R_local=NULL; 1697 PetscInt n_vertices,i,j,n_R,n_D,n_B; 1698 PetscInt vbs,bs; 1699 PetscBT bitmask=NULL; 1700 PetscErrorCode ierr; 1701 1702 PetscFunctionBegin; 1703 /* 1704 No need to setup local scatters if 1705 - primal space is unchanged 1706 AND 1707 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 1708 AND 1709 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 1710 */ 1711 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 1712 PetscFunctionReturn(0); 1713 } 1714 /* destroy old objects */ 1715 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 1716 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 1717 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 1718 /* Set Non-overlapping dimensions */ 1719 n_B = pcis->n_B; 1720 n_D = pcis->n - n_B; 1721 n_vertices = pcbddc->n_vertices; 1722 1723 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 1724 1725 /* create auxiliary bitmask and allocate workspace */ 1726 if (!sub_schurs->reuse_mumps) { 1727 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 1728 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 1729 for (i=0;i<n_vertices;i++) { 1730 ierr = PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);CHKERRQ(ierr); 1731 } 1732 1733 for (i=0, n_R=0; i<pcis->n; i++) { 1734 if (!PetscBTLookup(bitmask,i)) { 1735 idx_R_local[n_R++] = i; 1736 } 1737 } 1738 } else { /* A different ordering (already computed) is present if we are reusing MUMPS Schur solver */ 1739 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1740 1741 ierr = ISGetIndices(reuse_mumps->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1742 ierr = ISGetLocalSize(reuse_mumps->is_R,&n_R);CHKERRQ(ierr); 1743 } 1744 1745 /* Block code */ 1746 vbs = 1; 1747 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 1748 if (bs>1 && !(n_vertices%bs)) { 1749 PetscBool is_blocked = PETSC_TRUE; 1750 PetscInt *vary; 1751 if (!sub_schurs->reuse_mumps) { 1752 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 1753 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 1754 /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 1755 /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */ 1756 for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++; 1757 for (i=0; i<pcis->n/bs; i++) { 1758 if (vary[i]!=0 && vary[i]!=bs) { 1759 is_blocked = PETSC_FALSE; 1760 break; 1761 } 1762 } 1763 ierr = PetscFree(vary);CHKERRQ(ierr); 1764 } else { 1765 /* Verify directly the R set */ 1766 for (i=0; i<n_R/bs; i++) { 1767 PetscInt j,node=idx_R_local[bs*i]; 1768 for (j=1; j<bs; j++) { 1769 if (node != idx_R_local[bs*i+j]-j) { 1770 is_blocked = PETSC_FALSE; 1771 break; 1772 } 1773 } 1774 } 1775 } 1776 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 1777 vbs = bs; 1778 for (i=0;i<n_R/vbs;i++) { 1779 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 1780 } 1781 } 1782 } 1783 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 1784 if (sub_schurs->reuse_mumps) { 1785 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1786 1787 ierr = ISRestoreIndices(reuse_mumps->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1788 ierr = ISDestroy(&reuse_mumps->is_R);CHKERRQ(ierr); 1789 ierr = PetscObjectReference((PetscObject)pcbddc->is_R_local);CHKERRQ(ierr); 1790 reuse_mumps->is_R = pcbddc->is_R_local; 1791 } else { 1792 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 1793 } 1794 1795 /* print some info if requested */ 1796 if (pcbddc->dbg_flag) { 1797 PetscInt benign = 0; 1798 1799 if (pcbddc->benign_p0_lidx >= 0) benign = 1; 1800 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1801 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1802 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1803 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 1804 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 1805 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices-benign,pcbddc->local_primal_size);CHKERRQ(ierr); 1806 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1807 } 1808 1809 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 1810 if (!sub_schurs->reuse_mumps) { 1811 IS is_aux1,is_aux2; 1812 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 1813 1814 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1815 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 1816 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 1817 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1818 for (i=0; i<n_D; i++) { 1819 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 1820 } 1821 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1822 for (i=0, j=0; i<n_R; i++) { 1823 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 1824 aux_array1[j++] = i; 1825 } 1826 } 1827 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 1828 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1829 for (i=0, j=0; i<n_B; i++) { 1830 if (!PetscBTLookup(bitmask,is_indices[i])) { 1831 aux_array2[j++] = i; 1832 } 1833 } 1834 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1835 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 1836 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 1837 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1838 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 1839 1840 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1841 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 1842 for (i=0, j=0; i<n_R; i++) { 1843 if (PetscBTLookup(bitmask,idx_R_local[i])) { 1844 aux_array1[j++] = i; 1845 } 1846 } 1847 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 1848 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 1849 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1850 } 1851 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 1852 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1853 } else { 1854 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1855 IS tis; 1856 PetscInt schur_size; 1857 1858 ierr = ISGetLocalSize(reuse_mumps->is_B,&schur_size);CHKERRQ(ierr); 1859 ierr = ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);CHKERRQ(ierr); 1860 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_mumps->is_B,&pcbddc->R_to_B);CHKERRQ(ierr); 1861 ierr = ISDestroy(&tis);CHKERRQ(ierr); 1862 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1863 ierr = ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);CHKERRQ(ierr); 1864 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 1865 ierr = ISDestroy(&tis);CHKERRQ(ierr); 1866 } 1867 } 1868 PetscFunctionReturn(0); 1869 } 1870 1871 1872 #undef __FUNCT__ 1873 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 1874 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 1875 { 1876 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1877 PC_IS *pcis = (PC_IS*)pc->data; 1878 PC pc_temp; 1879 Mat A_RR; 1880 MatReuse reuse; 1881 PetscScalar m_one = -1.0; 1882 PetscReal value; 1883 PetscInt n_D,n_R; 1884 PetscBool use_exact,use_exact_reduced,issbaij; 1885 PetscErrorCode ierr; 1886 /* prefixes stuff */ 1887 char dir_prefix[256],neu_prefix[256],str_level[16]; 1888 size_t len; 1889 1890 PetscFunctionBegin; 1891 1892 /* compute prefixes */ 1893 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 1894 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 1895 if (!pcbddc->current_level) { 1896 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1897 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1898 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1899 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1900 } else { 1901 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 1902 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 1903 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 1904 len -= 15; /* remove "pc_bddc_coarse_" */ 1905 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 1906 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 1907 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 1908 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 1909 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1910 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1911 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 1912 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 1913 } 1914 1915 /* DIRICHLET PROBLEM */ 1916 if (dirichlet) { 1917 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1918 if (pcbddc->local_mat->symmetric_set) { 1919 ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 1920 } 1921 /* Matrix for Dirichlet problem is pcis->A_II */ 1922 n_D = pcis->n - pcis->n_B; 1923 if (!pcbddc->ksp_D) { /* create object if not yet build */ 1924 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 1925 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 1926 /* default */ 1927 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 1928 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 1929 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1930 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1931 if (issbaij) { 1932 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1933 } else { 1934 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1935 } 1936 /* Allow user's customization */ 1937 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 1938 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1939 } 1940 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 1941 if (sub_schurs->reuse_mumps) { 1942 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1943 1944 ierr = KSPSetPC(pcbddc->ksp_D,reuse_mumps->interior_solver);CHKERRQ(ierr); 1945 } 1946 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1947 if (!n_D) { 1948 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1949 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1950 } 1951 /* Set Up KSP for Dirichlet problem of BDDC */ 1952 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 1953 /* set ksp_D into pcis data */ 1954 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 1955 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 1956 pcis->ksp_D = pcbddc->ksp_D; 1957 } 1958 1959 /* NEUMANN PROBLEM */ 1960 A_RR = 0; 1961 if (neumann) { 1962 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1963 PetscInt ibs,mbs; 1964 PetscBool issbaij; 1965 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1966 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 1967 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 1968 if (pcbddc->ksp_R) { /* already created ksp */ 1969 PetscInt nn_R; 1970 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 1971 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 1972 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 1973 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 1974 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 1975 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1976 reuse = MAT_INITIAL_MATRIX; 1977 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 1978 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 1979 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1980 reuse = MAT_INITIAL_MATRIX; 1981 } else { /* safe to reuse the matrix */ 1982 reuse = MAT_REUSE_MATRIX; 1983 } 1984 } 1985 /* last check */ 1986 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 1987 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1988 reuse = MAT_INITIAL_MATRIX; 1989 } 1990 } else { /* first time, so we need to create the matrix */ 1991 reuse = MAT_INITIAL_MATRIX; 1992 } 1993 /* extract A_RR */ 1994 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 1995 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 1996 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1997 if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */ 1998 if (matis->A == pcbddc->local_mat) { 1999 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2000 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 2001 } else { 2002 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 2003 } 2004 } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 2005 if (matis->A == pcbddc->local_mat) { 2006 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2007 ierr = MatConvert(matis->A,MATSEQBAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 2008 } else { 2009 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 2010 } 2011 } 2012 if (!sub_schurs->reuse_mumps) { 2013 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 2014 if (pcbddc->local_mat->symmetric_set) { 2015 ierr = MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 2016 } 2017 } else { 2018 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 2019 2020 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2021 ierr = PCGetOperators(reuse_mumps->correction_solver,&A_RR,NULL);CHKERRQ(ierr); 2022 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 2023 } 2024 if (!pcbddc->ksp_R) { /* create object if not present */ 2025 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 2026 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 2027 /* default */ 2028 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 2029 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 2030 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 2031 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 2032 if (issbaij) { 2033 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 2034 } else { 2035 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 2036 } 2037 /* Allow user's customization */ 2038 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 2039 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 2040 } 2041 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 2042 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 2043 if (!n_R) { 2044 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 2045 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 2046 } 2047 /* Reuse MUMPS solver if it is present */ 2048 if (sub_schurs->reuse_mumps) { 2049 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 2050 2051 ierr = KSPSetPC(pcbddc->ksp_R,reuse_mumps->correction_solver);CHKERRQ(ierr); 2052 } 2053 /* Set Up KSP for Neumann problem of BDDC */ 2054 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 2055 } 2056 /* free Neumann problem's matrix */ 2057 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2058 2059 /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */ 2060 if (pcbddc->NullSpace || pcbddc->dbg_flag) { 2061 if (pcbddc->dbg_flag) { 2062 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2063 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 2064 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2065 } 2066 if (dirichlet) { /* Dirichlet */ 2067 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 2068 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 2069 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 2070 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 2071 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 2072 /* need to be adapted? */ 2073 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 2074 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2075 ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr); 2076 /* print info */ 2077 if (pcbddc->dbg_flag) { 2078 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 2079 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2080 } 2081 if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) { 2082 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_TRUE,pcis->is_I_local);CHKERRQ(ierr); 2083 } 2084 } 2085 if (neumann) { /* Neumann */ 2086 ierr = KSPGetOperators(pcbddc->ksp_R,&A_RR,NULL);CHKERRQ(ierr); 2087 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 2088 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 2089 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 2090 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 2091 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 2092 /* need to be adapted? */ 2093 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 2094 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2095 /* print info */ 2096 if (pcbddc->dbg_flag) { 2097 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 2098 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2099 } 2100 if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */ 2101 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_FALSE,pcbddc->is_R_local);CHKERRQ(ierr); 2102 } 2103 } 2104 } 2105 PetscFunctionReturn(0); 2106 } 2107 2108 #undef __FUNCT__ 2109 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection" 2110 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose) 2111 { 2112 PetscErrorCode ierr; 2113 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 2114 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2115 2116 PetscFunctionBegin; 2117 if (!sub_schurs->reuse_mumps) { 2118 ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr); 2119 } 2120 if (!pcbddc->switch_static) { 2121 if (applytranspose && pcbddc->local_auxmat1) { 2122 ierr = MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 2123 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 2124 } 2125 if (!sub_schurs->reuse_mumps) { 2126 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2127 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2128 } else { 2129 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 2130 2131 ierr = VecScatterBegin(reuse_mumps->correction_scatter_B,inout_B,reuse_mumps->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2132 ierr = VecScatterEnd(reuse_mumps->correction_scatter_B,inout_B,reuse_mumps->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2133 } 2134 } else { 2135 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2136 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2137 ierr = VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2138 ierr = VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2139 if (applytranspose && pcbddc->local_auxmat1) { 2140 ierr = MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);CHKERRQ(ierr); 2141 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 2142 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2143 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2144 } 2145 } 2146 if (!sub_schurs->reuse_mumps) { 2147 if (applytranspose) { 2148 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 2149 } else { 2150 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 2151 } 2152 #if defined(PETSC_HAVE_MUMPS) 2153 } else { 2154 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 2155 2156 if (applytranspose) { 2157 ierr = MatMumpsSolveSchurComplementTranspose(reuse_mumps->F,reuse_mumps->rhs_B,reuse_mumps->sol_B);CHKERRQ(ierr); 2158 } else { 2159 ierr = MatMumpsSolveSchurComplement(reuse_mumps->F,reuse_mumps->rhs_B,reuse_mumps->sol_B);CHKERRQ(ierr); 2160 } 2161 #endif 2162 } 2163 ierr = VecSet(inout_B,0.);CHKERRQ(ierr); 2164 if (!pcbddc->switch_static) { 2165 if (!sub_schurs->reuse_mumps) { 2166 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2167 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2168 } else { 2169 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 2170 2171 ierr = VecScatterBegin(reuse_mumps->correction_scatter_B,reuse_mumps->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2172 ierr = VecScatterEnd(reuse_mumps->correction_scatter_B,reuse_mumps->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2173 } 2174 if (!applytranspose && pcbddc->local_auxmat1) { 2175 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 2176 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 2177 } 2178 } else { 2179 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2180 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2181 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2182 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2183 if (!applytranspose && pcbddc->local_auxmat1) { 2184 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 2185 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 2186 } 2187 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2188 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2189 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2190 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2191 } 2192 PetscFunctionReturn(0); 2193 } 2194 2195 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 2196 #undef __FUNCT__ 2197 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 2198 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 2199 { 2200 PetscErrorCode ierr; 2201 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 2202 PC_IS* pcis = (PC_IS*) (pc->data); 2203 const PetscScalar zero = 0.0; 2204 2205 PetscFunctionBegin; 2206 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 2207 if (applytranspose) { 2208 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 2209 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 2210 } else { 2211 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 2212 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 2213 } 2214 2215 /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */ 2216 if (pcbddc->benign_p0_lidx >= 0) { 2217 PetscScalar *array; 2218 2219 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 2220 array[pcbddc->local_primal_size-1] += pcbddc->benign_p0; 2221 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 2222 } 2223 2224 /* start communications from local primal nodes to rhs of coarse solver */ 2225 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 2226 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2227 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2228 2229 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 2230 /* TODO remove null space when doing multilevel */ 2231 if (pcbddc->coarse_ksp) { 2232 Mat coarse_mat; 2233 Vec rhs,sol; 2234 MatNullSpace nullsp; 2235 2236 ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr); 2237 ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr); 2238 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 2239 ierr = MatGetNullSpace(coarse_mat,&nullsp);CHKERRQ(ierr); 2240 if (nullsp) { 2241 ierr = MatNullSpaceRemove(nullsp,rhs);CHKERRQ(ierr); 2242 } 2243 if (applytranspose) { 2244 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 2245 } else { 2246 ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 2247 } 2248 if (nullsp) { 2249 ierr = MatNullSpaceRemove(nullsp,sol);CHKERRQ(ierr); 2250 } 2251 } 2252 2253 /* Local solution on R nodes */ 2254 if (pcis->n) { /* in/out pcbddc->vec1_B,pcbddc->vec1_D */ 2255 ierr = PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);CHKERRQ(ierr); 2256 } 2257 2258 /* communications from coarse sol to local primal nodes */ 2259 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2260 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2261 2262 /* Sum contributions from two levels */ 2263 if (applytranspose) { 2264 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 2265 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 2266 } else { 2267 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 2268 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 2269 } 2270 /* store p0 */ 2271 if (pcbddc->benign_p0_lidx >= 0) { 2272 PetscScalar *array; 2273 2274 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 2275 pcbddc->benign_p0 = array[pcbddc->local_primal_size-1]; 2276 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 2277 } 2278 PetscFunctionReturn(0); 2279 } 2280 2281 #undef __FUNCT__ 2282 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 2283 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 2284 { 2285 PetscErrorCode ierr; 2286 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 2287 PetscScalar *array; 2288 Vec from,to; 2289 2290 PetscFunctionBegin; 2291 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 2292 from = pcbddc->coarse_vec; 2293 to = pcbddc->vec1_P; 2294 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 2295 Vec tvec; 2296 2297 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 2298 ierr = VecResetArray(tvec);CHKERRQ(ierr); 2299 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 2300 ierr = VecGetArray(tvec,&array);CHKERRQ(ierr); 2301 ierr = VecPlaceArray(from,array);CHKERRQ(ierr); 2302 ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr); 2303 } 2304 } else { /* from local to global -> put data in coarse right hand side */ 2305 from = pcbddc->vec1_P; 2306 to = pcbddc->coarse_vec; 2307 } 2308 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 2309 PetscFunctionReturn(0); 2310 } 2311 2312 #undef __FUNCT__ 2313 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 2314 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 2315 { 2316 PetscErrorCode ierr; 2317 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 2318 PetscScalar *array; 2319 Vec from,to; 2320 2321 PetscFunctionBegin; 2322 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 2323 from = pcbddc->coarse_vec; 2324 to = pcbddc->vec1_P; 2325 } else { /* from local to global -> put data in coarse right hand side */ 2326 from = pcbddc->vec1_P; 2327 to = pcbddc->coarse_vec; 2328 } 2329 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 2330 if (smode == SCATTER_FORWARD) { 2331 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 2332 Vec tvec; 2333 2334 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 2335 ierr = VecGetArray(to,&array);CHKERRQ(ierr); 2336 ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr); 2337 ierr = VecRestoreArray(to,&array);CHKERRQ(ierr); 2338 } 2339 } else { 2340 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 2341 ierr = VecResetArray(from);CHKERRQ(ierr); 2342 } 2343 } 2344 PetscFunctionReturn(0); 2345 } 2346 2347 /* uncomment for testing purposes */ 2348 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 2349 #undef __FUNCT__ 2350 #define __FUNCT__ "PCBDDCConstraintsSetUp" 2351 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 2352 { 2353 PetscErrorCode ierr; 2354 PC_IS* pcis = (PC_IS*)(pc->data); 2355 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2356 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 2357 /* one and zero */ 2358 PetscScalar one=1.0,zero=0.0; 2359 /* space to store constraints and their local indices */ 2360 PetscScalar *constraints_data; 2361 PetscInt *constraints_idxs,*constraints_idxs_B; 2362 PetscInt *constraints_idxs_ptr,*constraints_data_ptr; 2363 PetscInt *constraints_n; 2364 /* iterators */ 2365 PetscInt i,j,k,total_counts,total_counts_cc,cum; 2366 /* BLAS integers */ 2367 PetscBLASInt lwork,lierr; 2368 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 2369 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 2370 /* reuse */ 2371 PetscInt olocal_primal_size,olocal_primal_size_cc; 2372 PetscInt *olocal_primal_ref_node,*olocal_primal_ref_mult; 2373 /* change of basis */ 2374 PetscBool qr_needed; 2375 PetscBT change_basis,qr_needed_idx; 2376 /* auxiliary stuff */ 2377 PetscInt *nnz,*is_indices; 2378 PetscInt ncc; 2379 /* some quantities */ 2380 PetscInt n_vertices,total_primal_vertices,valid_constraints; 2381 PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints; 2382 2383 PetscFunctionBegin; 2384 /* Destroy Mat objects computed previously */ 2385 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2386 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 2387 /* save info on constraints from previous setup (if any) */ 2388 olocal_primal_size = pcbddc->local_primal_size; 2389 olocal_primal_size_cc = pcbddc->local_primal_size_cc; 2390 ierr = PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);CHKERRQ(ierr); 2391 ierr = PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 2392 ierr = PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 2393 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 2394 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2395 2396 /* print some info */ 2397 if (pcbddc->dbg_flag) { 2398 IS vertices; 2399 PetscInt nv,nedges,nfaces; 2400 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 2401 ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr); 2402 ierr = ISDestroy(&vertices);CHKERRQ(ierr); 2403 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 2404 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 2405 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 2406 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr); 2407 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr); 2408 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2409 } 2410 2411 if (!pcbddc->adaptive_selection) { 2412 IS ISForVertices,*ISForFaces,*ISForEdges; 2413 MatNullSpace nearnullsp; 2414 const Vec *nearnullvecs; 2415 Vec *localnearnullsp; 2416 PetscScalar *array; 2417 PetscInt n_ISForFaces,n_ISForEdges,nnsp_size; 2418 PetscBool nnsp_has_cnst; 2419 /* LAPACK working arrays for SVD or POD */ 2420 PetscBool skip_lapack,boolforchange; 2421 PetscScalar *work; 2422 PetscReal *singular_vals; 2423 #if defined(PETSC_USE_COMPLEX) 2424 PetscReal *rwork; 2425 #endif 2426 #if defined(PETSC_MISSING_LAPACK_GESVD) 2427 PetscScalar *temp_basis,*correlation_mat; 2428 #else 2429 PetscBLASInt dummy_int=1; 2430 PetscScalar dummy_scalar=1.; 2431 #endif 2432 2433 /* Get index sets for faces, edges and vertices from graph */ 2434 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 2435 /* free unneeded index sets */ 2436 if (!pcbddc->use_vertices) { 2437 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 2438 } 2439 if (!pcbddc->use_edges) { 2440 for (i=0;i<n_ISForEdges;i++) { 2441 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 2442 } 2443 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 2444 n_ISForEdges = 0; 2445 } 2446 if (!pcbddc->use_faces) { 2447 for (i=0;i<n_ISForFaces;i++) { 2448 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 2449 } 2450 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 2451 n_ISForFaces = 0; 2452 } 2453 2454 #if defined(PETSC_USE_DEBUG) 2455 /* HACK: when solving singular problems not using vertices, a change of basis is mandatory. 2456 Also use_change_of_basis should be consistent among processors */ 2457 if (pcbddc->NullSpace) { 2458 PetscBool tbool[2],gbool[2]; 2459 2460 if (!ISForVertices && !pcbddc->user_ChangeOfBasisMatrix) { 2461 pcbddc->use_change_of_basis = PETSC_TRUE; 2462 if (!ISForEdges) { 2463 pcbddc->use_change_on_faces = PETSC_TRUE; 2464 } 2465 } 2466 tbool[0] = pcbddc->use_change_of_basis; 2467 tbool[1] = pcbddc->use_change_on_faces; 2468 ierr = MPI_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2469 pcbddc->use_change_of_basis = gbool[0]; 2470 pcbddc->use_change_on_faces = gbool[1]; 2471 } 2472 #endif 2473 2474 /* check if near null space is attached to global mat */ 2475 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 2476 if (nearnullsp) { 2477 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 2478 /* remove any stored info */ 2479 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 2480 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 2481 /* store information for BDDC solver reuse */ 2482 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 2483 pcbddc->onearnullspace = nearnullsp; 2484 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 2485 for (i=0;i<nnsp_size;i++) { 2486 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 2487 } 2488 } else { /* if near null space is not provided BDDC uses constants by default */ 2489 nnsp_size = 0; 2490 nnsp_has_cnst = PETSC_TRUE; 2491 } 2492 /* get max number of constraints on a single cc */ 2493 max_constraints = nnsp_size; 2494 if (nnsp_has_cnst) max_constraints++; 2495 2496 /* 2497 Evaluate maximum storage size needed by the procedure 2498 - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]" 2499 - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]" 2500 There can be multiple constraints per connected component 2501 */ 2502 n_vertices = 0; 2503 if (ISForVertices) { 2504 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 2505 } 2506 ncc = n_vertices+n_ISForFaces+n_ISForEdges; 2507 ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr); 2508 2509 total_counts = n_ISForFaces+n_ISForEdges; 2510 total_counts *= max_constraints; 2511 total_counts += n_vertices; 2512 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 2513 2514 total_counts = 0; 2515 max_size_of_constraint = 0; 2516 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 2517 IS used_is; 2518 if (i<n_ISForEdges) { 2519 used_is = ISForEdges[i]; 2520 } else { 2521 used_is = ISForFaces[i-n_ISForEdges]; 2522 } 2523 ierr = ISGetSize(used_is,&j);CHKERRQ(ierr); 2524 total_counts += j; 2525 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 2526 } 2527 ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr); 2528 2529 /* get local part of global near null space vectors */ 2530 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 2531 for (k=0;k<nnsp_size;k++) { 2532 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 2533 ierr = VecScatterBegin(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2534 ierr = VecScatterEnd(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2535 } 2536 2537 /* whether or not to skip lapack calls */ 2538 skip_lapack = PETSC_TRUE; 2539 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 2540 2541 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 2542 if (!skip_lapack) { 2543 PetscScalar temp_work; 2544 2545 #if defined(PETSC_MISSING_LAPACK_GESVD) 2546 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 2547 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 2548 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 2549 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 2550 #if defined(PETSC_USE_COMPLEX) 2551 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 2552 #endif 2553 /* now we evaluate the optimal workspace using query with lwork=-1 */ 2554 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 2555 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 2556 lwork = -1; 2557 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2558 #if !defined(PETSC_USE_COMPLEX) 2559 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 2560 #else 2561 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 2562 #endif 2563 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2564 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 2565 #else /* on missing GESVD */ 2566 /* SVD */ 2567 PetscInt max_n,min_n; 2568 max_n = max_size_of_constraint; 2569 min_n = max_constraints; 2570 if (max_size_of_constraint < max_constraints) { 2571 min_n = max_size_of_constraint; 2572 max_n = max_constraints; 2573 } 2574 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 2575 #if defined(PETSC_USE_COMPLEX) 2576 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 2577 #endif 2578 /* now we evaluate the optimal workspace using query with lwork=-1 */ 2579 lwork = -1; 2580 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 2581 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 2582 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 2583 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2584 #if !defined(PETSC_USE_COMPLEX) 2585 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr)); 2586 #else 2587 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr)); 2588 #endif 2589 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2590 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 2591 #endif /* on missing GESVD */ 2592 /* Allocate optimal workspace */ 2593 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 2594 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 2595 } 2596 /* Now we can loop on constraining sets */ 2597 total_counts = 0; 2598 constraints_idxs_ptr[0] = 0; 2599 constraints_data_ptr[0] = 0; 2600 /* vertices */ 2601 if (n_vertices) { 2602 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2603 ierr = PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 2604 for (i=0;i<n_vertices;i++) { 2605 constraints_n[total_counts] = 1; 2606 constraints_data[total_counts] = 1.0; 2607 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 2608 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 2609 total_counts++; 2610 } 2611 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2612 n_vertices = total_counts; 2613 } 2614 2615 /* edges and faces */ 2616 total_counts_cc = total_counts; 2617 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 2618 IS used_is; 2619 PetscBool idxs_copied = PETSC_FALSE; 2620 2621 if (ncc<n_ISForEdges) { 2622 used_is = ISForEdges[ncc]; 2623 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 2624 } else { 2625 used_is = ISForFaces[ncc-n_ISForEdges]; 2626 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 2627 } 2628 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 2629 2630 ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr); 2631 ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2632 /* change of basis should not be performed on local periodic nodes */ 2633 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 2634 if (nnsp_has_cnst) { 2635 PetscScalar quad_value; 2636 2637 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 2638 idxs_copied = PETSC_TRUE; 2639 2640 if (!pcbddc->use_nnsp_true) { 2641 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 2642 } else { 2643 quad_value = 1.0; 2644 } 2645 for (j=0;j<size_of_constraint;j++) { 2646 constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value; 2647 } 2648 temp_constraints++; 2649 total_counts++; 2650 } 2651 for (k=0;k<nnsp_size;k++) { 2652 PetscReal real_value; 2653 PetscScalar *ptr_to_data; 2654 2655 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2656 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint]; 2657 for (j=0;j<size_of_constraint;j++) { 2658 ptr_to_data[j] = array[is_indices[j]]; 2659 } 2660 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2661 /* check if array is null on the connected component */ 2662 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2663 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one)); 2664 if (real_value > 0.0) { /* keep indices and values */ 2665 temp_constraints++; 2666 total_counts++; 2667 if (!idxs_copied) { 2668 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 2669 idxs_copied = PETSC_TRUE; 2670 } 2671 } 2672 } 2673 ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2674 valid_constraints = temp_constraints; 2675 if (!pcbddc->use_nnsp_true && temp_constraints) { 2676 if (temp_constraints == 1) { /* just normalize the constraint */ 2677 PetscScalar norm,*ptr_to_data; 2678 2679 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 2680 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2681 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one)); 2682 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 2683 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one)); 2684 } else { /* perform SVD */ 2685 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 2686 PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 2687 2688 #if defined(PETSC_MISSING_LAPACK_GESVD) 2689 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 2690 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 2691 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 2692 the constraints basis will differ (by a complex factor with absolute value equal to 1) 2693 from that computed using LAPACKgesvd 2694 -> This is due to a different computation of eigenvectors in LAPACKheev 2695 -> The quality of the POD-computed basis will be the same */ 2696 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 2697 /* Store upper triangular part of correlation matrix */ 2698 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2699 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2700 for (j=0;j<temp_constraints;j++) { 2701 for (k=0;k<j+1;k++) { 2702 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one)); 2703 } 2704 } 2705 /* compute eigenvalues and eigenvectors of correlation matrix */ 2706 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2707 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 2708 #if !defined(PETSC_USE_COMPLEX) 2709 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 2710 #else 2711 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 2712 #endif 2713 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2714 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 2715 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 2716 j = 0; 2717 while (j < temp_constraints && singular_vals[j] < tol) j++; 2718 total_counts = total_counts-j; 2719 valid_constraints = temp_constraints-j; 2720 /* scale and copy POD basis into used quadrature memory */ 2721 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2722 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2723 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 2724 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2725 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 2726 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 2727 if (j<temp_constraints) { 2728 PetscInt ii; 2729 for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]); 2730 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2731 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 2732 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2733 for (k=0;k<temp_constraints-j;k++) { 2734 for (ii=0;ii<size_of_constraint;ii++) { 2735 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 2736 } 2737 } 2738 } 2739 #else /* on missing GESVD */ 2740 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2741 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2742 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2743 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2744 #if !defined(PETSC_USE_COMPLEX) 2745 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr)); 2746 #else 2747 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr)); 2748 #endif 2749 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 2750 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2751 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 2752 k = temp_constraints; 2753 if (k > size_of_constraint) k = size_of_constraint; 2754 j = 0; 2755 while (j < k && singular_vals[k-j-1] < tol) j++; 2756 valid_constraints = k-j; 2757 total_counts = total_counts-temp_constraints+valid_constraints; 2758 #endif /* on missing GESVD */ 2759 } 2760 } 2761 /* update pointers information */ 2762 if (valid_constraints) { 2763 constraints_n[total_counts_cc] = valid_constraints; 2764 constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint; 2765 constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints; 2766 /* set change_of_basis flag */ 2767 if (boolforchange) { 2768 PetscBTSet(change_basis,total_counts_cc); 2769 } 2770 total_counts_cc++; 2771 } 2772 } 2773 /* free workspace */ 2774 if (!skip_lapack) { 2775 ierr = PetscFree(work);CHKERRQ(ierr); 2776 #if defined(PETSC_USE_COMPLEX) 2777 ierr = PetscFree(rwork);CHKERRQ(ierr); 2778 #endif 2779 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 2780 #if defined(PETSC_MISSING_LAPACK_GESVD) 2781 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 2782 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 2783 #endif 2784 } 2785 for (k=0;k<nnsp_size;k++) { 2786 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 2787 } 2788 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 2789 /* free index sets of faces, edges and vertices */ 2790 for (i=0;i<n_ISForFaces;i++) { 2791 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 2792 } 2793 if (n_ISForFaces) { 2794 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 2795 } 2796 for (i=0;i<n_ISForEdges;i++) { 2797 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 2798 } 2799 if (n_ISForEdges) { 2800 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 2801 } 2802 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 2803 } else { 2804 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2805 2806 total_counts = 0; 2807 n_vertices = 0; 2808 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 2809 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 2810 } 2811 max_constraints = 0; 2812 total_counts_cc = 0; 2813 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 2814 total_counts += pcbddc->adaptive_constraints_n[i]; 2815 if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++; 2816 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 2817 } 2818 constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr; 2819 constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr; 2820 constraints_idxs = pcbddc->adaptive_constraints_idxs; 2821 constraints_data = pcbddc->adaptive_constraints_data; 2822 /* constraints_n differs from pcbddc->adaptive_constraints_n */ 2823 ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr); 2824 total_counts_cc = 0; 2825 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 2826 if (pcbddc->adaptive_constraints_n[i]) { 2827 constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i]; 2828 } 2829 } 2830 #if 0 2831 printf("Found %d totals (%d)\n",total_counts_cc,total_counts); 2832 for (i=0;i<total_counts_cc;i++) { 2833 printf("const %d, start %d",i,constraints_idxs_ptr[i]); 2834 printf(" end %d:\n",constraints_idxs_ptr[i+1]); 2835 for (j=constraints_idxs_ptr[i];j<constraints_idxs_ptr[i+1];j++) { 2836 printf(" %d",constraints_idxs[j]); 2837 } 2838 printf("\n"); 2839 printf("number of cc: %d\n",constraints_n[i]); 2840 } 2841 for (i=0;i<n_vertices;i++) { 2842 PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]); 2843 } 2844 for (i=0;i<sub_schurs->n_subs;i++) { 2845 PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]); 2846 } 2847 #endif 2848 2849 max_size_of_constraint = 0; 2850 for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]); 2851 ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr); 2852 /* Change of basis */ 2853 ierr = PetscBTCreate(total_counts_cc,&change_basis);CHKERRQ(ierr); 2854 if (pcbddc->use_change_of_basis) { 2855 for (i=0;i<sub_schurs->n_subs;i++) { 2856 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 2857 ierr = PetscBTSet(change_basis,i+n_vertices);CHKERRQ(ierr); 2858 } 2859 } 2860 } 2861 } 2862 pcbddc->local_primal_size = total_counts; 2863 /* allocating one extra space (in case an extra primal dof should be stored for the benign trick */ 2864 ierr = PetscMalloc1(pcbddc->local_primal_size+1,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2865 2866 /* map constraints_idxs in boundary numbering */ 2867 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr); 2868 if (i != constraints_idxs_ptr[total_counts_cc]) { 2869 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",constraints_idxs_ptr[total_counts_cc],i); 2870 } 2871 2872 /* Create constraint matrix */ 2873 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 2874 ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr); 2875 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 2876 2877 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 2878 /* determine if a QR strategy is needed for change of basis */ 2879 qr_needed = PETSC_FALSE; 2880 ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr); 2881 total_primal_vertices=0; 2882 pcbddc->local_primal_size_cc = 0; 2883 for (i=0;i<total_counts_cc;i++) { 2884 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2885 if (size_of_constraint == 1) { 2886 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]]; 2887 pcbddc->local_primal_size_cc += 1; 2888 } else if (PetscBTLookup(change_basis,i)) { 2889 for (k=0;k<constraints_n[i];k++) { 2890 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 2891 } 2892 pcbddc->local_primal_size_cc += constraints_n[i]; 2893 if (constraints_n[i] > 1 || pcbddc->use_qr_single || pcbddc->faster_deluxe) { 2894 PetscBTSet(qr_needed_idx,i); 2895 qr_needed = PETSC_TRUE; 2896 } 2897 } else { 2898 pcbddc->local_primal_size_cc += 1; 2899 } 2900 } 2901 /* note that the local variable n_vertices used below stores the number of pointwise constraints */ 2902 pcbddc->n_vertices = total_primal_vertices; 2903 /* permute indices in order to have a sorted set of vertices */ 2904 ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2905 2906 /* allocating one extra space (in case an extra primal dof should be stored for the benign trick */ 2907 ierr = PetscMalloc2(pcbddc->local_primal_size_cc+1,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc+1,&pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 2908 ierr = PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));CHKERRQ(ierr); 2909 for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1; 2910 2911 /* nonzero structure of constraint matrix */ 2912 /* and get reference dof for local constraints */ 2913 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 2914 for (i=0;i<total_primal_vertices;i++) nnz[i] = 1; 2915 2916 j = total_primal_vertices; 2917 total_counts = total_primal_vertices; 2918 cum = total_primal_vertices; 2919 for (i=n_vertices;i<total_counts_cc;i++) { 2920 if (!PetscBTLookup(change_basis,i)) { 2921 pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]]; 2922 pcbddc->local_primal_ref_mult[cum] = constraints_n[i]; 2923 cum++; 2924 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2925 for (k=0;k<constraints_n[i];k++) { 2926 pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 2927 nnz[j+k] = size_of_constraint; 2928 } 2929 j += constraints_n[i]; 2930 } 2931 } 2932 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 2933 ierr = PetscFree(nnz);CHKERRQ(ierr); 2934 2935 /* set values in constraint matrix */ 2936 for (i=0;i<total_primal_vertices;i++) { 2937 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 2938 } 2939 total_counts = total_primal_vertices; 2940 for (i=n_vertices;i<total_counts_cc;i++) { 2941 if (!PetscBTLookup(change_basis,i)) { 2942 PetscInt *cols; 2943 2944 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2945 cols = constraints_idxs+constraints_idxs_ptr[i]; 2946 for (k=0;k<constraints_n[i];k++) { 2947 PetscInt row = total_counts+k; 2948 PetscScalar *vals; 2949 2950 vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint; 2951 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2952 } 2953 total_counts += constraints_n[i]; 2954 } 2955 } 2956 /* assembling */ 2957 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2958 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2959 2960 /* 2961 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 2962 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 2963 */ 2964 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 2965 if (pcbddc->use_change_of_basis) { 2966 /* dual and primal dofs on a single cc */ 2967 PetscInt dual_dofs,primal_dofs; 2968 /* working stuff for GEQRF */ 2969 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 2970 PetscBLASInt lqr_work; 2971 /* working stuff for UNGQR */ 2972 PetscScalar *gqr_work,lgqr_work_t; 2973 PetscBLASInt lgqr_work; 2974 /* working stuff for TRTRS */ 2975 PetscScalar *trs_rhs; 2976 PetscBLASInt Blas_NRHS; 2977 /* pointers for values insertion into change of basis matrix */ 2978 PetscInt *start_rows,*start_cols; 2979 PetscScalar *start_vals; 2980 /* working stuff for values insertion */ 2981 PetscBT is_primal; 2982 PetscInt *aux_primal_numbering_B; 2983 /* matrix sizes */ 2984 PetscInt global_size,local_size; 2985 /* temporary change of basis */ 2986 Mat localChangeOfBasisMatrix; 2987 /* extra space for debugging */ 2988 PetscScalar *dbg_work; 2989 2990 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 2991 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 2992 ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 2993 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 2994 /* nonzeros for local mat */ 2995 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 2996 for (i=0;i<pcis->n;i++) nnz[i]=1; 2997 for (i=n_vertices;i<total_counts_cc;i++) { 2998 if (PetscBTLookup(change_basis,i)) { 2999 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 3000 if (PetscBTLookup(qr_needed_idx,i)) { 3001 for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint; 3002 } else { 3003 nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint; 3004 for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2; 3005 } 3006 } 3007 } 3008 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 3009 ierr = PetscFree(nnz);CHKERRQ(ierr); 3010 /* Set initial identity in the matrix */ 3011 for (i=0;i<pcis->n;i++) { 3012 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 3013 } 3014 3015 if (pcbddc->dbg_flag) { 3016 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 3017 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 3018 } 3019 3020 3021 /* Now we loop on the constraints which need a change of basis */ 3022 /* 3023 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 3024 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 3025 3026 Basic blocks of change of basis matrix T computed by 3027 3028 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 3029 3030 | 1 0 ... 0 s_1/S | 3031 | 0 1 ... 0 s_2/S | 3032 | ... | 3033 | 0 ... 1 s_{n-1}/S | 3034 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 3035 3036 with S = \sum_{i=1}^n s_i^2 3037 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 3038 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 3039 3040 - QR decomposition of constraints otherwise 3041 */ 3042 if (qr_needed) { 3043 /* space to store Q */ 3044 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 3045 /* first we issue queries for optimal work */ 3046 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 3047 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 3048 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3049 lqr_work = -1; 3050 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 3051 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 3052 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 3053 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 3054 lgqr_work = -1; 3055 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 3056 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 3057 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 3058 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3059 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 3060 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 3061 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 3062 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 3063 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 3064 /* array to store scaling factors for reflectors */ 3065 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 3066 /* array to store rhs and solution of triangular solver */ 3067 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 3068 /* allocating workspace for check */ 3069 if (pcbddc->dbg_flag) { 3070 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr); 3071 } 3072 } 3073 /* array to store whether a node is primal or not */ 3074 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 3075 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 3076 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);CHKERRQ(ierr); 3077 if (i != total_primal_vertices) { 3078 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i); 3079 } 3080 for (i=0;i<total_primal_vertices;i++) { 3081 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 3082 } 3083 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 3084 3085 /* loop on constraints and see whether or not they need a change of basis and compute it */ 3086 for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) { 3087 size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts]; 3088 if (PetscBTLookup(change_basis,total_counts)) { 3089 /* get constraint info */ 3090 primal_dofs = constraints_n[total_counts]; 3091 dual_dofs = size_of_constraint-primal_dofs; 3092 3093 if (pcbddc->dbg_flag) { 3094 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d: %d need a change of basis (size %d)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr); 3095 } 3096 3097 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 3098 3099 /* copy quadrature constraints for change of basis check */ 3100 if (pcbddc->dbg_flag) { 3101 ierr = PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 3102 } 3103 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 3104 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 3105 3106 /* compute QR decomposition of constraints */ 3107 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3108 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 3109 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3110 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3111 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 3112 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 3113 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3114 3115 /* explictly compute R^-T */ 3116 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 3117 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 3118 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 3119 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 3120 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3121 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 3122 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3123 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 3124 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 3125 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3126 3127 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 3128 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3129 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3130 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 3131 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3132 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3133 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 3134 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 3135 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3136 3137 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 3138 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 3139 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 3140 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3141 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 3142 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 3143 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3144 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 3145 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 3146 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3147 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC)); 3148 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3149 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 3150 3151 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 3152 start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]]; 3153 /* insert cols for primal dofs */ 3154 for (j=0;j<primal_dofs;j++) { 3155 start_vals = &qr_basis[j*size_of_constraint]; 3156 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 3157 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 3158 } 3159 /* insert cols for dual dofs */ 3160 for (j=0,k=0;j<dual_dofs;k++) { 3161 if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) { 3162 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 3163 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 3164 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 3165 j++; 3166 } 3167 } 3168 3169 /* check change of basis */ 3170 if (pcbddc->dbg_flag) { 3171 PetscInt ii,jj; 3172 PetscBool valid_qr=PETSC_TRUE; 3173 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 3174 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3175 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 3176 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3177 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 3178 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 3179 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3180 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC)); 3181 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3182 for (jj=0;jj<size_of_constraint;jj++) { 3183 for (ii=0;ii<primal_dofs;ii++) { 3184 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 3185 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 3186 } 3187 } 3188 if (!valid_qr) { 3189 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 3190 for (jj=0;jj<size_of_constraint;jj++) { 3191 for (ii=0;ii<primal_dofs;ii++) { 3192 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 3193 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 3194 } 3195 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 3196 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 3197 } 3198 } 3199 } 3200 } else { 3201 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 3202 } 3203 } 3204 } else { /* simple transformation block */ 3205 PetscInt row,col; 3206 PetscScalar val,norm; 3207 3208 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3209 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one)); 3210 for (j=0;j<size_of_constraint;j++) { 3211 PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j]; 3212 row = constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 3213 if (!PetscBTLookup(is_primal,row_B)) { 3214 col = constraints_idxs[constraints_idxs_ptr[total_counts]]; 3215 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 3216 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 3217 } else { 3218 for (k=0;k<size_of_constraint;k++) { 3219 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 3220 if (row != col) { 3221 val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]]; 3222 } else { 3223 val = constraints_data[constraints_data_ptr[total_counts]]/norm; 3224 } 3225 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 3226 } 3227 } 3228 } 3229 if (pcbddc->dbg_flag) { 3230 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 3231 } 3232 } 3233 } else { 3234 if (pcbddc->dbg_flag) { 3235 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,size_of_constraint);CHKERRQ(ierr); 3236 } 3237 } 3238 } 3239 3240 /* free workspace */ 3241 if (qr_needed) { 3242 if (pcbddc->dbg_flag) { 3243 ierr = PetscFree(dbg_work);CHKERRQ(ierr); 3244 } 3245 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 3246 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 3247 ierr = PetscFree(qr_work);CHKERRQ(ierr); 3248 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 3249 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 3250 } 3251 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 3252 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3253 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3254 3255 /* assembling of global change of variable */ 3256 { 3257 Mat tmat; 3258 PetscInt bs; 3259 3260 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 3261 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 3262 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 3263 ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr); 3264 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 3265 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 3266 ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr); 3267 ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr); 3268 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 3269 ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr); 3270 ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 3271 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 3272 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 3273 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 3274 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3275 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3276 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 3277 ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr); 3278 } 3279 /* check */ 3280 if (pcbddc->dbg_flag) { 3281 PetscReal error; 3282 Vec x,x_change; 3283 3284 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 3285 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 3286 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 3287 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 3288 ierr = VecScatterBegin(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3289 ierr = VecScatterEnd(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3290 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 3291 ierr = VecScatterBegin(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3292 ierr = VecScatterEnd(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3293 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 3294 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 3295 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 3296 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3297 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr); 3298 ierr = VecDestroy(&x);CHKERRQ(ierr); 3299 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 3300 } 3301 3302 /* adapt sub_schurs computed (if any) */ 3303 if (pcbddc->use_deluxe_scaling) { 3304 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 3305 if (sub_schurs->S_Ej_all) { 3306 Mat S_new,tmat; 3307 ISLocalToGlobalMapping NtoSall; 3308 IS is_all_N,is_V,is_V_Sall; 3309 const PetscScalar *array; 3310 const PetscInt *idxs_V,*idxs_all; 3311 PetscInt i,n_V; 3312 3313 ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr); 3314 ierr = MatGetSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr); 3315 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->n_vertices,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&is_V);CHKERRQ(ierr); 3316 ierr = ISLocalToGlobalMappingCreateIS(is_all_N,&NtoSall);CHKERRQ(ierr); 3317 ierr = ISGlobalToLocalMappingApplyIS(NtoSall,IS_GTOLM_DROP,is_V,&is_V_Sall);CHKERRQ(ierr); 3318 ierr = ISLocalToGlobalMappingDestroy(&NtoSall);CHKERRQ(ierr); 3319 ierr = ISDestroy(&is_all_N);CHKERRQ(ierr); 3320 ierr = ISDestroy(&is_V);CHKERRQ(ierr); 3321 ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 3322 ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr); 3323 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 3324 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 3325 ierr = ISGetLocalSize(is_V_Sall,&n_V);CHKERRQ(ierr); 3326 ierr = ISGetIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 3327 ierr = ISGetIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 3328 ierr = VecGetArrayRead(pcis->D,&array);CHKERRQ(ierr); 3329 for (i=0;i<n_V;i++) { 3330 PetscScalar val; 3331 PetscInt idx; 3332 3333 idx = idxs_V[i]; 3334 val = array[idxs_all[idxs_V[i]]]; 3335 ierr = MatSetValue(S_new,idx,idx,val,INSERT_VALUES);CHKERRQ(ierr); 3336 } 3337 ierr = MatAssemblyBegin(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3338 ierr = MatAssemblyEnd(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3339 sub_schurs->S_Ej_all = S_new; 3340 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 3341 if (sub_schurs->sum_S_Ej_all) { 3342 ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 3343 ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 3344 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 3345 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 3346 sub_schurs->sum_S_Ej_all = S_new; 3347 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 3348 } 3349 ierr = VecRestoreArrayRead(pcis->D,&array);CHKERRQ(ierr); 3350 ierr = ISRestoreIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 3351 ierr = ISRestoreIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 3352 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 3353 ierr = ISDestroy(&is_V_Sall);CHKERRQ(ierr); 3354 } 3355 } 3356 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 3357 } else if (pcbddc->user_ChangeOfBasisMatrix) { 3358 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 3359 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 3360 } 3361 3362 /* set up change of basis context */ 3363 if (pcbddc->ChangeOfBasisMatrix) { 3364 PCBDDCChange_ctx change_ctx; 3365 3366 if (!pcbddc->new_global_mat) { 3367 PetscInt global_size,local_size; 3368 3369 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 3370 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 3371 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->new_global_mat);CHKERRQ(ierr); 3372 ierr = MatSetSizes(pcbddc->new_global_mat,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 3373 ierr = MatSetType(pcbddc->new_global_mat,MATSHELL);CHKERRQ(ierr); 3374 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT,(void (*)(void))PCBDDCMatMult_Private);CHKERRQ(ierr); 3375 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCMatMultTranspose_Private);CHKERRQ(ierr); 3376 ierr = PetscNew(&change_ctx);CHKERRQ(ierr); 3377 ierr = MatShellSetContext(pcbddc->new_global_mat,change_ctx);CHKERRQ(ierr); 3378 } else { 3379 ierr = MatShellGetContext(pcbddc->new_global_mat,&change_ctx);CHKERRQ(ierr); 3380 ierr = MatDestroy(&change_ctx->global_change);CHKERRQ(ierr); 3381 ierr = VecDestroyVecs(2,&change_ctx->work);CHKERRQ(ierr); 3382 } 3383 if (!pcbddc->user_ChangeOfBasisMatrix) { 3384 ierr = PetscObjectReference((PetscObject)pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 3385 change_ctx->global_change = pcbddc->ChangeOfBasisMatrix; 3386 } else { 3387 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 3388 change_ctx->global_change = pcbddc->user_ChangeOfBasisMatrix; 3389 } 3390 ierr = VecDuplicateVecs(pcis->vec1_global,2,&change_ctx->work);CHKERRQ(ierr); 3391 ierr = MatSetUp(pcbddc->new_global_mat);CHKERRQ(ierr); 3392 ierr = MatAssemblyBegin(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3393 ierr = MatAssemblyEnd(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3394 } 3395 3396 /* add pressure dof to set of primal nodes for numbering purposes */ 3397 if (pcbddc->benign_p0_lidx >= 0) { 3398 pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc] = pcbddc->benign_p0_lidx; 3399 pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size] = pcbddc->benign_p0_lidx; 3400 pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc] = 1; 3401 pcbddc->local_primal_size_cc++; 3402 pcbddc->local_primal_size++; 3403 } 3404 3405 /* check if a new primal space has been introduced (also take into account benign trick) */ 3406 pcbddc->new_primal_space_local = PETSC_TRUE; 3407 if (olocal_primal_size == pcbddc->local_primal_size) { 3408 ierr = PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc*sizeof(PetscScalar),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 3409 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 3410 if (!pcbddc->new_primal_space_local) { 3411 ierr = PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscScalar),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 3412 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 3413 } 3414 } 3415 ierr = PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);CHKERRQ(ierr); 3416 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 3417 ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3418 3419 /* flush dbg viewer */ 3420 if (pcbddc->dbg_flag) { 3421 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3422 } 3423 3424 /* free workspace */ 3425 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 3426 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 3427 if (!pcbddc->adaptive_selection) { 3428 ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr); 3429 ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr); 3430 } else { 3431 ierr = PetscFree5(pcbddc->adaptive_constraints_n, 3432 pcbddc->adaptive_constraints_idxs_ptr, 3433 pcbddc->adaptive_constraints_data_ptr, 3434 pcbddc->adaptive_constraints_idxs, 3435 pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 3436 ierr = PetscFree(constraints_n);CHKERRQ(ierr); 3437 ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr); 3438 } 3439 PetscFunctionReturn(0); 3440 } 3441 3442 #undef __FUNCT__ 3443 #define __FUNCT__ "PCBDDCAnalyzeInterface" 3444 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 3445 { 3446 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3447 PC_IS *pcis = (PC_IS*)pc->data; 3448 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 3449 PetscInt ierr,i,vertex_size,N; 3450 PetscViewer viewer=pcbddc->dbg_viewer; 3451 3452 PetscFunctionBegin; 3453 /* Reset previously computed graph */ 3454 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 3455 /* Init local Graph struct */ 3456 ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr); 3457 ierr = PCBDDCGraphInit(pcbddc->mat_graph,pcis->mapping,N);CHKERRQ(ierr); 3458 3459 /* Check validity of the csr graph passed in by the user */ 3460 if (pcbddc->mat_graph->nvtxs_csr && pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) { 3461 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid size of local CSR graph! Found %d, expected %d\n",pcbddc->mat_graph->nvtxs_csr,pcbddc->mat_graph->nvtxs); 3462 } 3463 3464 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 3465 if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) { 3466 PetscInt *xadj,*adjncy; 3467 PetscInt nvtxs; 3468 PetscBool flg_row=PETSC_FALSE; 3469 3470 if (pcbddc->use_local_adj) { 3471 3472 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3473 if (flg_row) { 3474 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 3475 pcbddc->computed_rowadj = PETSC_TRUE; 3476 } 3477 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3478 } else if (pcbddc->current_level && pcis->n_B) { /* just compute subdomain's connected components for coarser levels when the local boundary is not empty */ 3479 IS is_dummy; 3480 ISLocalToGlobalMapping l2gmap_dummy; 3481 PetscInt j,sum; 3482 PetscInt *cxadj,*cadjncy; 3483 const PetscInt *idxs; 3484 PCBDDCGraph graph; 3485 PetscBT is_on_boundary; 3486 3487 ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n,0,1,&is_dummy);CHKERRQ(ierr); 3488 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 3489 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 3490 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 3491 ierr = PCBDDCGraphInit(graph,l2gmap_dummy,pcis->n);CHKERRQ(ierr); 3492 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 3493 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3494 if (flg_row) { 3495 graph->xadj = xadj; 3496 graph->adjncy = adjncy; 3497 } 3498 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 3499 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 3500 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3501 3502 if (pcbddc->dbg_flag) { 3503 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] Found %d subdomains (local size %d)\n",PetscGlobalRank,graph->ncc,pcis->n);CHKERRQ(ierr); 3504 for (i=0;i<graph->ncc;i++) { 3505 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] %d cc size %d\n",PetscGlobalRank,i,graph->cptr[i+1]-graph->cptr[i]);CHKERRQ(ierr); 3506 } 3507 } 3508 3509 ierr = PetscBTCreate(pcis->n,&is_on_boundary);CHKERRQ(ierr); 3510 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 3511 for (i=0;i<pcis->n_B;i++) { 3512 ierr = PetscBTSet(is_on_boundary,idxs[i]);CHKERRQ(ierr); 3513 } 3514 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 3515 3516 ierr = PetscCalloc1(pcis->n+1,&cxadj);CHKERRQ(ierr); 3517 sum = 0; 3518 for (i=0;i<graph->ncc;i++) { 3519 PetscInt sizecc = 0; 3520 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 3521 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 3522 sizecc++; 3523 } 3524 } 3525 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 3526 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 3527 cxadj[graph->queue[j]] = sizecc; 3528 } 3529 } 3530 sum += sizecc*sizecc; 3531 } 3532 ierr = PetscMalloc1(sum,&cadjncy);CHKERRQ(ierr); 3533 sum = 0; 3534 for (i=0;i<pcis->n;i++) { 3535 PetscInt temp = cxadj[i]; 3536 cxadj[i] = sum; 3537 sum += temp; 3538 } 3539 cxadj[pcis->n] = sum; 3540 for (i=0;i<graph->ncc;i++) { 3541 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 3542 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 3543 PetscInt k,sizecc = 0; 3544 for (k=graph->cptr[i];k<graph->cptr[i+1];k++) { 3545 if (PetscBTLookup(is_on_boundary,graph->queue[k])) { 3546 cadjncy[cxadj[graph->queue[j]]+sizecc]=graph->queue[k]; 3547 sizecc++; 3548 } 3549 } 3550 } 3551 } 3552 } 3553 if (sum) { 3554 ierr = PCBDDCSetLocalAdjacencyGraph(pc,pcis->n,cxadj,cadjncy,PETSC_OWN_POINTER);CHKERRQ(ierr); 3555 } else { 3556 ierr = PetscFree(cxadj);CHKERRQ(ierr); 3557 ierr = PetscFree(cadjncy);CHKERRQ(ierr); 3558 } 3559 graph->xadj = 0; 3560 graph->adjncy = 0; 3561 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 3562 ierr = PetscBTDestroy(&is_on_boundary);CHKERRQ(ierr); 3563 } 3564 } 3565 if (pcbddc->dbg_flag) { 3566 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3567 } 3568 3569 /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */ 3570 vertex_size = 1; 3571 if (pcbddc->user_provided_isfordofs) { 3572 if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */ 3573 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 3574 for (i=0;i<pcbddc->n_ISForDofs;i++) { 3575 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 3576 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 3577 } 3578 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 3579 pcbddc->n_ISForDofs = 0; 3580 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 3581 } 3582 /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */ 3583 ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr); 3584 } else { 3585 if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */ 3586 ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr); 3587 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 3588 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 3589 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 3590 } 3591 } 3592 } 3593 3594 /* Setup of Graph */ 3595 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */ 3596 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 3597 } 3598 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */ 3599 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 3600 } 3601 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices);CHKERRQ(ierr); 3602 3603 /* Graph's connected components analysis */ 3604 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 3605 3606 /* print some info to stdout */ 3607 if (pcbddc->dbg_flag) { 3608 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);CHKERRQ(ierr); 3609 } 3610 3611 /* mark topography has done */ 3612 pcbddc->recompute_topography = PETSC_FALSE; 3613 PetscFunctionReturn(0); 3614 } 3615 3616 /* given an index sets possibly with holes, renumbers the indexes removing the holes */ 3617 #undef __FUNCT__ 3618 #define __FUNCT__ "PCBDDCSubsetNumbering" 3619 PetscErrorCode PCBDDCSubsetNumbering(IS subset, IS subset_mult, PetscInt *N_n, IS *subset_n) 3620 { 3621 PetscSF sf; 3622 PetscLayout map; 3623 const PetscInt *idxs; 3624 PetscInt *leaf_data,*root_data,*gidxs; 3625 PetscInt N,n,i,lbounds[2],gbounds[2],Nl; 3626 PetscInt n_n,nlocals,start,first_index; 3627 PetscMPIInt commsize; 3628 PetscBool first_found; 3629 PetscErrorCode ierr; 3630 3631 PetscFunctionBegin; 3632 ierr = ISGetLocalSize(subset,&n);CHKERRQ(ierr); 3633 if (subset_mult) { 3634 PetscCheckSameComm(subset,1,subset_mult,2); 3635 ierr = ISGetLocalSize(subset,&i);CHKERRQ(ierr); 3636 if (i != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Local subset and multiplicity sizes don't match! %d != %d",n,i); 3637 } 3638 /* create workspace layout for computing global indices of subset */ 3639 ierr = ISGetIndices(subset,&idxs);CHKERRQ(ierr); 3640 lbounds[0] = lbounds[1] = 0; 3641 for (i=0;i<n;i++) { 3642 if (idxs[i] < lbounds[0]) lbounds[0] = idxs[i]; 3643 else if (idxs[i] > lbounds[1]) lbounds[1] = idxs[i]; 3644 } 3645 lbounds[0] = -lbounds[0]; 3646 ierr = MPI_Allreduce(lbounds,gbounds,2,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 3647 gbounds[0] = -gbounds[0]; 3648 N = gbounds[1] - gbounds[0] + 1; 3649 ierr = PetscLayoutCreate(PetscObjectComm((PetscObject)subset),&map);CHKERRQ(ierr); 3650 ierr = PetscLayoutSetBlockSize(map,1);CHKERRQ(ierr); 3651 ierr = PetscLayoutSetSize(map,N);CHKERRQ(ierr); 3652 ierr = PetscLayoutSetUp(map);CHKERRQ(ierr); 3653 ierr = PetscLayoutGetLocalSize(map,&Nl);CHKERRQ(ierr); 3654 3655 /* create sf : leaf_data == multiplicity of indexes, root data == global index in layout */ 3656 ierr = PetscMalloc2(n,&leaf_data,Nl,&root_data);CHKERRQ(ierr); 3657 if (subset_mult) { 3658 const PetscInt* idxs_mult; 3659 3660 ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 3661 ierr = PetscMemcpy(leaf_data,idxs_mult,n*sizeof(PetscInt));CHKERRQ(ierr); 3662 ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 3663 } else { 3664 for (i=0;i<n;i++) leaf_data[i] = 1; 3665 } 3666 /* local size of new subset */ 3667 n_n = 0; 3668 for (i=0;i<n;i++) n_n += leaf_data[i]; 3669 3670 /* global indexes in layout */ 3671 ierr = PetscMalloc1(n_n,&gidxs);CHKERRQ(ierr); /* allocating possibly extra space in gidxs which will be used later */ 3672 for (i=0;i<n;i++) gidxs[i] = idxs[i] - gbounds[0]; 3673 ierr = ISRestoreIndices(subset,&idxs);CHKERRQ(ierr); 3674 ierr = PetscSFCreate(PetscObjectComm((PetscObject)subset),&sf);CHKERRQ(ierr); 3675 ierr = PetscSFSetGraphLayout(sf,map,n,NULL,PETSC_COPY_VALUES,gidxs);CHKERRQ(ierr); 3676 ierr = PetscLayoutDestroy(&map);CHKERRQ(ierr); 3677 3678 /* reduce from leaves to roots */ 3679 ierr = PetscMemzero(root_data,Nl*sizeof(PetscInt));CHKERRQ(ierr); 3680 ierr = PetscSFReduceBegin(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr); 3681 ierr = PetscSFReduceEnd(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr); 3682 3683 /* count indexes in local part of layout */ 3684 nlocals = 0; 3685 first_index = -1; 3686 first_found = PETSC_FALSE; 3687 for (i=0;i<Nl;i++) { 3688 if (!first_found && root_data[i]) { 3689 first_found = PETSC_TRUE; 3690 first_index = i; 3691 } 3692 nlocals += root_data[i]; 3693 } 3694 3695 /* cumulative of number of indexes and size of subset without holes */ 3696 #if defined(PETSC_HAVE_MPI_EXSCAN) 3697 start = 0; 3698 ierr = MPI_Exscan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 3699 #else 3700 ierr = MPI_Scan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 3701 start = start-nlocals; 3702 #endif 3703 3704 if (N_n) { /* compute total size of new subset if requested */ 3705 *N_n = start + nlocals; 3706 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)subset),&commsize);CHKERRQ(ierr); 3707 ierr = MPI_Bcast(N_n,1,MPIU_INT,commsize-1,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 3708 } 3709 3710 /* adapt root data with cumulative */ 3711 if (first_found) { 3712 PetscInt old_index; 3713 3714 root_data[first_index] += start; 3715 old_index = first_index; 3716 for (i=first_index+1;i<Nl;i++) { 3717 if (root_data[i]) { 3718 root_data[i] += root_data[old_index]; 3719 old_index = i; 3720 } 3721 } 3722 } 3723 3724 /* from roots to leaves */ 3725 ierr = PetscSFBcastBegin(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr); 3726 ierr = PetscSFBcastEnd(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr); 3727 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 3728 3729 /* create new IS with global indexes without holes */ 3730 if (subset_mult) { 3731 const PetscInt* idxs_mult; 3732 PetscInt cum; 3733 3734 cum = 0; 3735 ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 3736 for (i=0;i<n;i++) { 3737 PetscInt j; 3738 for (j=0;j<idxs_mult[i];j++) gidxs[cum++] = leaf_data[i] - idxs_mult[i] + j; 3739 } 3740 ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 3741 } else { 3742 for (i=0;i<n;i++) { 3743 gidxs[i] = leaf_data[i]-1; 3744 } 3745 } 3746 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)subset),n_n,gidxs,PETSC_OWN_POINTER,subset_n);CHKERRQ(ierr); 3747 ierr = PetscFree2(leaf_data,root_data);CHKERRQ(ierr); 3748 PetscFunctionReturn(0); 3749 } 3750 3751 #undef __FUNCT__ 3752 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 3753 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 3754 { 3755 PetscInt i,j; 3756 PetscScalar *alphas; 3757 PetscErrorCode ierr; 3758 3759 PetscFunctionBegin; 3760 /* this implements stabilized Gram-Schmidt */ 3761 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 3762 for (i=0;i<n;i++) { 3763 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 3764 if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); } 3765 for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); } 3766 } 3767 ierr = PetscFree(alphas);CHKERRQ(ierr); 3768 PetscFunctionReturn(0); 3769 } 3770 3771 #undef __FUNCT__ 3772 #define __FUNCT__ "MatISGetSubassemblingPattern" 3773 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscInt redprocs, IS* is_sends) 3774 { 3775 IS ranks_send_to; 3776 PetscInt n_neighs,*neighs,*n_shared,**shared; 3777 PetscMPIInt size,rank,color; 3778 PetscInt *xadj,*adjncy; 3779 PetscInt *adjncy_wgt,*v_wgt,*ranks_send_to_idx; 3780 PetscInt i,local_size,threshold=0; 3781 PetscBool use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE; 3782 PetscSubcomm subcomm; 3783 PetscErrorCode ierr; 3784 3785 PetscFunctionBegin; 3786 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr); 3787 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 3788 ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 3789 3790 /* Get info on mapping */ 3791 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&local_size);CHKERRQ(ierr); 3792 ierr = ISLocalToGlobalMappingGetInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 3793 3794 /* build local CSR graph of subdomains' connectivity */ 3795 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 3796 xadj[0] = 0; 3797 xadj[1] = PetscMax(n_neighs-1,0); 3798 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 3799 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 3800 3801 if (threshold) { 3802 PetscInt xadj_count = 0; 3803 for (i=1;i<n_neighs;i++) { 3804 if (n_shared[i] > threshold) { 3805 adjncy[xadj_count] = neighs[i]; 3806 adjncy_wgt[xadj_count] = n_shared[i]; 3807 xadj_count++; 3808 } 3809 } 3810 xadj[1] = xadj_count; 3811 } else { 3812 if (xadj[1]) { 3813 ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr); 3814 ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr); 3815 } 3816 } 3817 ierr = ISLocalToGlobalMappingRestoreInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 3818 if (use_square) { 3819 for (i=0;i<xadj[1];i++) { 3820 adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i]; 3821 } 3822 } 3823 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 3824 3825 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 3826 3827 /* 3828 Restrict work on active processes only. 3829 */ 3830 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr); 3831 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 3832 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 3833 ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr); 3834 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 3835 if (color) { 3836 ierr = PetscFree(xadj);CHKERRQ(ierr); 3837 ierr = PetscFree(adjncy);CHKERRQ(ierr); 3838 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 3839 } else { 3840 Mat subdomain_adj; 3841 IS new_ranks,new_ranks_contig; 3842 MatPartitioning partitioner; 3843 PetscInt prank,rstart=0,rend=0; 3844 PetscInt *is_indices,*oldranks; 3845 PetscBool aggregate; 3846 3847 ierr = MPI_Comm_size(PetscSubcommChild(subcomm),&size);CHKERRQ(ierr); 3848 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 3849 prank = rank; 3850 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,PetscSubcommChild(subcomm));CHKERRQ(ierr); 3851 /* 3852 for (i=0;i<size;i++) { 3853 PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]); 3854 } 3855 */ 3856 for (i=0;i<xadj[1];i++) { 3857 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 3858 } 3859 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 3860 aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE); 3861 if (aggregate) { 3862 PetscInt lrows,row,ncols,*cols; 3863 PetscMPIInt nrank; 3864 PetscScalar *vals; 3865 3866 ierr = MPI_Comm_rank(PetscSubcommChild(subcomm),&nrank);CHKERRQ(ierr); 3867 lrows = 0; 3868 if (nrank<redprocs) { 3869 lrows = size/redprocs; 3870 if (nrank<size%redprocs) lrows++; 3871 } 3872 ierr = MatCreateAIJ(PetscSubcommChild(subcomm),lrows,lrows,size,size,50,NULL,50,NULL,&subdomain_adj);CHKERRQ(ierr); 3873 ierr = MatGetOwnershipRange(subdomain_adj,&rstart,&rend);CHKERRQ(ierr); 3874 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 3875 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 3876 row = nrank; 3877 ncols = xadj[1]-xadj[0]; 3878 cols = adjncy; 3879 ierr = PetscMalloc1(ncols,&vals);CHKERRQ(ierr); 3880 for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i]; 3881 ierr = MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 3882 ierr = MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3883 ierr = MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3884 ierr = PetscFree(xadj);CHKERRQ(ierr); 3885 ierr = PetscFree(adjncy);CHKERRQ(ierr); 3886 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 3887 ierr = PetscFree(vals);CHKERRQ(ierr); 3888 } else { 3889 ierr = MatCreateMPIAdj(PetscSubcommChild(subcomm),1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 3890 } 3891 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 3892 3893 /* Partition */ 3894 ierr = MatPartitioningCreate(PetscSubcommChild(subcomm),&partitioner);CHKERRQ(ierr); 3895 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 3896 if (use_vwgt) { 3897 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 3898 v_wgt[0] = local_size; 3899 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 3900 } 3901 n_subdomains = PetscMin((PetscInt)size,n_subdomains); 3902 ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr); 3903 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 3904 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 3905 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 3906 3907 /* renumber new_ranks to avoid "holes" in new set of processors */ 3908 ierr = PCBDDCSubsetNumbering(new_ranks,NULL,NULL,&new_ranks_contig);CHKERRQ(ierr); 3909 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 3910 ierr = ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3911 if (!redprocs) { 3912 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 3913 } else { 3914 PetscInt idxs[1]; 3915 PetscMPIInt tag; 3916 MPI_Request *reqs; 3917 3918 ierr = PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag);CHKERRQ(ierr); 3919 ierr = PetscMalloc1(rend-rstart,&reqs);CHKERRQ(ierr); 3920 for (i=rstart;i<rend;i++) { 3921 ierr = MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,PetscSubcommChild(subcomm),&reqs[i-rstart]);CHKERRQ(ierr); 3922 } 3923 ierr = MPI_Recv(idxs,1,MPIU_INT,MPI_ANY_SOURCE,tag,PetscSubcommChild(subcomm),MPI_STATUS_IGNORE);CHKERRQ(ierr); 3924 ierr = MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3925 ierr = PetscFree(reqs);CHKERRQ(ierr); 3926 ranks_send_to_idx[0] = oldranks[idxs[0]]; 3927 } 3928 ierr = ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3929 /* clean up */ 3930 ierr = PetscFree(oldranks);CHKERRQ(ierr); 3931 ierr = ISDestroy(&new_ranks_contig);CHKERRQ(ierr); 3932 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 3933 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 3934 } 3935 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 3936 3937 /* assemble parallel IS for sends */ 3938 i = 1; 3939 if (color) i=0; 3940 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr); 3941 /* get back IS */ 3942 *is_sends = ranks_send_to; 3943 PetscFunctionReturn(0); 3944 } 3945 3946 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 3947 3948 #undef __FUNCT__ 3949 #define __FUNCT__ "MatISSubassemble" 3950 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[]) 3951 { 3952 Mat local_mat; 3953 IS is_sends_internal; 3954 PetscInt rows,cols,new_local_rows; 3955 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals; 3956 PetscBool ismatis,isdense,newisdense,destroy_mat; 3957 ISLocalToGlobalMapping l2gmap; 3958 PetscInt* l2gmap_indices; 3959 const PetscInt* is_indices; 3960 MatType new_local_type; 3961 /* buffers */ 3962 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 3963 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 3964 PetscInt *recv_buffer_idxs_local; 3965 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 3966 /* MPI */ 3967 MPI_Comm comm,comm_n; 3968 PetscSubcomm subcomm; 3969 PetscMPIInt n_sends,n_recvs,commsize; 3970 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 3971 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 3972 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,source_dest; 3973 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals; 3974 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals; 3975 PetscErrorCode ierr; 3976 3977 PetscFunctionBegin; 3978 /* TODO: add missing checks */ 3979 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 3980 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 3981 PetscValidLogicalCollectiveEnum(mat,reuse,5); 3982 PetscValidLogicalCollectiveInt(mat,nis,7); 3983 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 3984 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 3985 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 3986 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 3987 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 3988 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 3989 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 3990 if (reuse == MAT_REUSE_MATRIX && *mat_n) { 3991 PetscInt mrows,mcols,mnrows,mncols; 3992 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 3993 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 3994 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 3995 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 3996 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 3997 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 3998 } 3999 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 4000 PetscValidLogicalCollectiveInt(mat,bs,0); 4001 /* prepare IS for sending if not provided */ 4002 if (!is_sends) { 4003 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 4004 ierr = MatISGetSubassemblingPattern(mat,n_subdomains,0,&is_sends_internal);CHKERRQ(ierr); 4005 } else { 4006 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 4007 is_sends_internal = is_sends; 4008 } 4009 4010 /* get comm */ 4011 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 4012 4013 /* compute number of sends */ 4014 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 4015 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 4016 4017 /* compute number of receives */ 4018 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 4019 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 4020 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 4021 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 4022 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 4023 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 4024 ierr = PetscFree(iflags);CHKERRQ(ierr); 4025 4026 /* restrict comm if requested */ 4027 subcomm = 0; 4028 destroy_mat = PETSC_FALSE; 4029 if (restrict_comm) { 4030 PetscMPIInt color,subcommsize; 4031 4032 color = 0; 4033 if (restrict_full) { 4034 if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm (full restriction) */ 4035 } else { 4036 if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not partecipate in new comm */ 4037 } 4038 ierr = MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 4039 subcommsize = commsize - subcommsize; 4040 /* check if reuse has been requested */ 4041 if (reuse == MAT_REUSE_MATRIX) { 4042 if (*mat_n) { 4043 PetscMPIInt subcommsize2; 4044 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 4045 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 4046 comm_n = PetscObjectComm((PetscObject)*mat_n); 4047 } else { 4048 comm_n = PETSC_COMM_SELF; 4049 } 4050 } else { /* MAT_INITIAL_MATRIX */ 4051 PetscMPIInt rank; 4052 4053 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4054 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 4055 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 4056 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 4057 comm_n = PetscSubcommChild(subcomm); 4058 } 4059 /* flag to destroy *mat_n if not significative */ 4060 if (color) destroy_mat = PETSC_TRUE; 4061 } else { 4062 comm_n = comm; 4063 } 4064 4065 /* prepare send/receive buffers */ 4066 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 4067 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 4068 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 4069 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 4070 if (nis) { 4071 ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr); 4072 } 4073 4074 /* Get data from local matrices */ 4075 if (!isdense) { 4076 SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 4077 /* TODO: See below some guidelines on how to prepare the local buffers */ 4078 /* 4079 send_buffer_vals should contain the raw values of the local matrix 4080 send_buffer_idxs should contain: 4081 - MatType_PRIVATE type 4082 - PetscInt size_of_l2gmap 4083 - PetscInt global_row_indices[size_of_l2gmap] 4084 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 4085 */ 4086 } else { 4087 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 4088 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&i);CHKERRQ(ierr); 4089 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 4090 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 4091 send_buffer_idxs[1] = i; 4092 ierr = ISLocalToGlobalMappingGetIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 4093 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 4094 ierr = ISLocalToGlobalMappingRestoreIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 4095 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 4096 for (i=0;i<n_sends;i++) { 4097 ilengths_vals[is_indices[i]] = len*len; 4098 ilengths_idxs[is_indices[i]] = len+2; 4099 } 4100 } 4101 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 4102 /* additional is (if any) */ 4103 if (nis) { 4104 PetscMPIInt psum; 4105 PetscInt j; 4106 for (j=0,psum=0;j<nis;j++) { 4107 PetscInt plen; 4108 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 4109 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 4110 psum += len+1; /* indices + lenght */ 4111 } 4112 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 4113 for (j=0,psum=0;j<nis;j++) { 4114 PetscInt plen; 4115 const PetscInt *is_array_idxs; 4116 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 4117 send_buffer_idxs_is[psum] = plen; 4118 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 4119 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 4120 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 4121 psum += plen+1; /* indices + lenght */ 4122 } 4123 for (i=0;i<n_sends;i++) { 4124 ilengths_idxs_is[is_indices[i]] = psum; 4125 } 4126 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 4127 } 4128 4129 buf_size_idxs = 0; 4130 buf_size_vals = 0; 4131 buf_size_idxs_is = 0; 4132 for (i=0;i<n_recvs;i++) { 4133 buf_size_idxs += (PetscInt)olengths_idxs[i]; 4134 buf_size_vals += (PetscInt)olengths_vals[i]; 4135 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 4136 } 4137 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 4138 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 4139 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 4140 4141 /* get new tags for clean communications */ 4142 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 4143 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 4144 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 4145 4146 /* allocate for requests */ 4147 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 4148 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 4149 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 4150 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 4151 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 4152 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 4153 4154 /* communications */ 4155 ptr_idxs = recv_buffer_idxs; 4156 ptr_vals = recv_buffer_vals; 4157 ptr_idxs_is = recv_buffer_idxs_is; 4158 for (i=0;i<n_recvs;i++) { 4159 source_dest = onodes[i]; 4160 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 4161 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 4162 ptr_idxs += olengths_idxs[i]; 4163 ptr_vals += olengths_vals[i]; 4164 if (nis) { 4165 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 4166 ptr_idxs_is += olengths_idxs_is[i]; 4167 } 4168 } 4169 for (i=0;i<n_sends;i++) { 4170 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 4171 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 4172 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 4173 if (nis) { 4174 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 4175 } 4176 } 4177 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 4178 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 4179 4180 /* assemble new l2g map */ 4181 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4182 ptr_idxs = recv_buffer_idxs; 4183 new_local_rows = 0; 4184 for (i=0;i<n_recvs;i++) { 4185 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 4186 ptr_idxs += olengths_idxs[i]; 4187 } 4188 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 4189 ptr_idxs = recv_buffer_idxs; 4190 new_local_rows = 0; 4191 for (i=0;i<n_recvs;i++) { 4192 ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 4193 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 4194 ptr_idxs += olengths_idxs[i]; 4195 } 4196 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 4197 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 4198 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 4199 4200 /* infer new local matrix type from received local matrices type */ 4201 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 4202 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 4203 if (n_recvs) { 4204 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 4205 ptr_idxs = recv_buffer_idxs; 4206 for (i=0;i<n_recvs;i++) { 4207 if ((PetscInt)new_local_type_private != *ptr_idxs) { 4208 new_local_type_private = MATAIJ_PRIVATE; 4209 break; 4210 } 4211 ptr_idxs += olengths_idxs[i]; 4212 } 4213 switch (new_local_type_private) { 4214 case MATDENSE_PRIVATE: 4215 if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */ 4216 new_local_type = MATSEQAIJ; 4217 bs = 1; 4218 } else { /* if I receive only 1 dense matrix */ 4219 new_local_type = MATSEQDENSE; 4220 bs = 1; 4221 } 4222 break; 4223 case MATAIJ_PRIVATE: 4224 new_local_type = MATSEQAIJ; 4225 bs = 1; 4226 break; 4227 case MATBAIJ_PRIVATE: 4228 new_local_type = MATSEQBAIJ; 4229 break; 4230 case MATSBAIJ_PRIVATE: 4231 new_local_type = MATSEQSBAIJ; 4232 break; 4233 default: 4234 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__); 4235 break; 4236 } 4237 } else { /* by default, new_local_type is seqdense */ 4238 new_local_type = MATSEQDENSE; 4239 bs = 1; 4240 } 4241 4242 /* create MATIS object if needed */ 4243 if (reuse == MAT_INITIAL_MATRIX) { 4244 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 4245 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 4246 } else { 4247 /* it also destroys the local matrices */ 4248 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 4249 } 4250 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 4251 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 4252 4253 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4254 4255 /* Global to local map of received indices */ 4256 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 4257 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 4258 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 4259 4260 /* restore attributes -> type of incoming data and its size */ 4261 buf_size_idxs = 0; 4262 for (i=0;i<n_recvs;i++) { 4263 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 4264 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 4265 buf_size_idxs += (PetscInt)olengths_idxs[i]; 4266 } 4267 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 4268 4269 /* set preallocation */ 4270 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 4271 if (!newisdense) { 4272 PetscInt *new_local_nnz=0; 4273 4274 ptr_vals = recv_buffer_vals; 4275 ptr_idxs = recv_buffer_idxs_local; 4276 if (n_recvs) { 4277 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 4278 } 4279 for (i=0;i<n_recvs;i++) { 4280 PetscInt j; 4281 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 4282 for (j=0;j<*(ptr_idxs+1);j++) { 4283 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 4284 } 4285 } else { 4286 /* TODO */ 4287 } 4288 ptr_idxs += olengths_idxs[i]; 4289 } 4290 if (new_local_nnz) { 4291 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 4292 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 4293 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 4294 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 4295 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 4296 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 4297 } else { 4298 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 4299 } 4300 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 4301 } else { 4302 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 4303 } 4304 4305 /* set values */ 4306 ptr_vals = recv_buffer_vals; 4307 ptr_idxs = recv_buffer_idxs_local; 4308 for (i=0;i<n_recvs;i++) { 4309 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 4310 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 4311 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 4312 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 4313 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 4314 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 4315 } else { 4316 /* TODO */ 4317 } 4318 ptr_idxs += olengths_idxs[i]; 4319 ptr_vals += olengths_vals[i]; 4320 } 4321 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4322 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4323 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4324 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4325 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 4326 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 4327 4328 #if 0 4329 if (!restrict_comm) { /* check */ 4330 Vec lvec,rvec; 4331 PetscReal infty_error; 4332 4333 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 4334 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 4335 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 4336 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 4337 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 4338 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 4339 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 4340 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 4341 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 4342 } 4343 #endif 4344 4345 /* assemble new additional is (if any) */ 4346 if (nis) { 4347 PetscInt **temp_idxs,*count_is,j,psum; 4348 4349 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4350 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 4351 ptr_idxs = recv_buffer_idxs_is; 4352 psum = 0; 4353 for (i=0;i<n_recvs;i++) { 4354 for (j=0;j<nis;j++) { 4355 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 4356 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 4357 psum += plen; 4358 ptr_idxs += plen+1; /* shift pointer to received data */ 4359 } 4360 } 4361 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 4362 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 4363 for (i=1;i<nis;i++) { 4364 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 4365 } 4366 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 4367 ptr_idxs = recv_buffer_idxs_is; 4368 for (i=0;i<n_recvs;i++) { 4369 for (j=0;j<nis;j++) { 4370 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 4371 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 4372 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 4373 ptr_idxs += plen+1; /* shift pointer to received data */ 4374 } 4375 } 4376 for (i=0;i<nis;i++) { 4377 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 4378 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 4379 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 4380 } 4381 ierr = PetscFree(count_is);CHKERRQ(ierr); 4382 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 4383 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 4384 } 4385 /* free workspace */ 4386 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 4387 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4388 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 4389 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4390 if (isdense) { 4391 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 4392 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 4393 } else { 4394 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 4395 } 4396 if (nis) { 4397 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4398 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 4399 } 4400 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 4401 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 4402 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 4403 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 4404 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 4405 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 4406 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 4407 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 4408 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 4409 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 4410 ierr = PetscFree(onodes);CHKERRQ(ierr); 4411 if (nis) { 4412 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 4413 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 4414 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 4415 } 4416 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 4417 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 4418 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 4419 for (i=0;i<nis;i++) { 4420 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 4421 } 4422 *mat_n = NULL; 4423 } 4424 PetscFunctionReturn(0); 4425 } 4426 4427 /* temporary hack into ksp private data structure */ 4428 #include <petsc/private/kspimpl.h> 4429 4430 #undef __FUNCT__ 4431 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 4432 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 4433 { 4434 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 4435 PC_IS *pcis = (PC_IS*)pc->data; 4436 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 4437 MatNullSpace CoarseNullSpace=NULL; 4438 ISLocalToGlobalMapping coarse_islg; 4439 IS coarse_is,*isarray; 4440 PetscInt i,im_active=-1,active_procs=-1; 4441 PetscInt nis,nisdofs,nisneu; 4442 PC pc_temp; 4443 PCType coarse_pc_type; 4444 KSPType coarse_ksp_type; 4445 PetscBool multilevel_requested,multilevel_allowed; 4446 PetscBool isredundant,isbddc,isnn,coarse_reuse; 4447 Mat t_coarse_mat_is; 4448 PetscInt void_procs,ncoarse_ml,ncoarse_ds,ncoarse; 4449 PetscMPIInt all_procs; 4450 PetscBool csin_ml,csin_ds,csin,csin_type_simple,redist; 4451 PetscBool compute_vecs = PETSC_FALSE; 4452 PetscScalar *array; 4453 PetscErrorCode ierr; 4454 4455 PetscFunctionBegin; 4456 /* Assign global numbering to coarse dofs */ 4457 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 4458 PetscInt ocoarse_size; 4459 compute_vecs = PETSC_TRUE; 4460 ocoarse_size = pcbddc->coarse_size; 4461 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 4462 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 4463 /* see if we can avoid some work */ 4464 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 4465 /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */ 4466 if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) { 4467 PC pc; 4468 PetscBool isbddc; 4469 4470 /* temporary workaround since PCBDDC does not have a reset method so far */ 4471 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc);CHKERRQ(ierr); 4472 ierr = PetscObjectTypeCompare((PetscObject)pc,PCBDDC,&isbddc);CHKERRQ(ierr); 4473 if (isbddc) { 4474 ierr = PCDestroy(&pc);CHKERRQ(ierr); 4475 } 4476 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 4477 coarse_reuse = PETSC_FALSE; 4478 } else { /* we can safely reuse already computed coarse matrix */ 4479 coarse_reuse = PETSC_TRUE; 4480 } 4481 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 4482 coarse_reuse = PETSC_FALSE; 4483 } 4484 /* reset any subassembling information */ 4485 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 4486 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4487 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 4488 coarse_reuse = PETSC_TRUE; 4489 } 4490 4491 /* count "active" (i.e. with positive local size) and "void" processes */ 4492 im_active = !!(pcis->n); 4493 ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4494 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr); 4495 void_procs = all_procs-active_procs; 4496 csin_type_simple = PETSC_TRUE; 4497 redist = PETSC_FALSE; 4498 if (pcbddc->current_level && void_procs) { 4499 csin_ml = PETSC_TRUE; 4500 ncoarse_ml = void_procs; 4501 /* it has no sense to redistribute on a set of processors larger than the number of active processes */ 4502 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < active_procs) { 4503 csin_ds = PETSC_TRUE; 4504 ncoarse_ds = pcbddc->redistribute_coarse; 4505 redist = PETSC_TRUE; 4506 } else { 4507 csin_ds = PETSC_TRUE; 4508 ncoarse_ds = active_procs; 4509 redist = PETSC_TRUE; 4510 } 4511 } else { 4512 csin_ml = PETSC_FALSE; 4513 ncoarse_ml = all_procs; 4514 if (void_procs) { 4515 csin_ds = PETSC_TRUE; 4516 ncoarse_ds = void_procs; 4517 csin_type_simple = PETSC_FALSE; 4518 } else { 4519 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < all_procs) { 4520 csin_ds = PETSC_TRUE; 4521 ncoarse_ds = pcbddc->redistribute_coarse; 4522 redist = PETSC_TRUE; 4523 } else { 4524 csin_ds = PETSC_FALSE; 4525 ncoarse_ds = all_procs; 4526 } 4527 } 4528 } 4529 4530 /* 4531 test if we can go multilevel: three conditions must be satisfied: 4532 - we have not exceeded the number of levels requested 4533 - we can actually subassemble the active processes 4534 - we can find a suitable number of MPI processes where we can place the subassembled problem 4535 */ 4536 multilevel_allowed = PETSC_FALSE; 4537 multilevel_requested = PETSC_FALSE; 4538 if (pcbddc->current_level < pcbddc->max_levels) { 4539 multilevel_requested = PETSC_TRUE; 4540 if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) { 4541 multilevel_allowed = PETSC_FALSE; 4542 } else { 4543 multilevel_allowed = PETSC_TRUE; 4544 } 4545 } 4546 /* determine number of process partecipating to coarse solver */ 4547 if (multilevel_allowed) { 4548 ncoarse = ncoarse_ml; 4549 csin = csin_ml; 4550 redist = PETSC_FALSE; 4551 } else { 4552 ncoarse = ncoarse_ds; 4553 csin = csin_ds; 4554 } 4555 4556 /* creates temporary l2gmap and IS for coarse indexes */ 4557 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 4558 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 4559 4560 /* creates temporary MATIS object for coarse matrix */ 4561 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr); 4562 ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr); 4563 ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr); 4564 ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr); 4565 #if 0 4566 { 4567 PetscViewer viewer; 4568 char filename[256]; 4569 sprintf(filename,"local_coarse_mat%d_level%d.m",PetscGlobalRank,pcbddc->current_level); 4570 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 4571 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 4572 ierr = MatView(coarse_submat_dense,viewer);CHKERRQ(ierr); 4573 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 4574 } 4575 #endif 4576 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,NULL,&t_coarse_mat_is);CHKERRQ(ierr); 4577 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 4578 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4579 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4580 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 4581 4582 /* compute dofs splitting and neumann boundaries for coarse dofs */ 4583 if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal) ) { /* protects from unneded computations */ 4584 PetscInt *tidxs,*tidxs2,nout,tsize,i; 4585 const PetscInt *idxs; 4586 ISLocalToGlobalMapping tmap; 4587 4588 /* create map between primal indices (in local representative ordering) and local primal numbering */ 4589 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 4590 /* allocate space for temporary storage */ 4591 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 4592 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 4593 /* allocate for IS array */ 4594 nisdofs = pcbddc->n_ISForDofsLocal; 4595 nisneu = !!pcbddc->NeumannBoundariesLocal; 4596 nis = nisdofs + nisneu; 4597 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 4598 /* dofs splitting */ 4599 for (i=0;i<nisdofs;i++) { 4600 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 4601 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 4602 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 4603 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 4604 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 4605 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 4606 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->ISForDofsLocal[i]),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 4607 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 4608 } 4609 /* neumann boundaries */ 4610 if (pcbddc->NeumannBoundariesLocal) { 4611 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 4612 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 4613 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 4614 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 4615 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 4616 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 4617 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->NeumannBoundariesLocal),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 4618 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 4619 } 4620 /* free memory */ 4621 ierr = PetscFree(tidxs);CHKERRQ(ierr); 4622 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 4623 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 4624 } else { 4625 nis = 0; 4626 nisdofs = 0; 4627 nisneu = 0; 4628 isarray = NULL; 4629 } 4630 /* destroy no longer needed map */ 4631 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 4632 4633 /* restrict on coarse candidates (if needed) */ 4634 coarse_mat_is = NULL; 4635 if (csin) { 4636 if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */ 4637 if (redist) { 4638 PetscMPIInt rank; 4639 PetscInt spc,n_spc_p1,dest[1],destsize; 4640 4641 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 4642 spc = active_procs/ncoarse; 4643 n_spc_p1 = active_procs%ncoarse; 4644 if (im_active) { 4645 destsize = 1; 4646 if (rank > n_spc_p1*(spc+1)-1) { 4647 dest[0] = n_spc_p1+(rank-(n_spc_p1*(spc+1)))/spc; 4648 } else { 4649 dest[0] = rank/(spc+1); 4650 } 4651 } else { 4652 destsize = 0; 4653 } 4654 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),destsize,dest,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4655 } else if (csin_type_simple) { 4656 PetscMPIInt rank; 4657 PetscInt issize,isidx; 4658 4659 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 4660 if (im_active) { 4661 issize = 1; 4662 isidx = (PetscInt)rank; 4663 } else { 4664 issize = 0; 4665 isidx = -1; 4666 } 4667 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4668 } else { /* get a suitable subassembling pattern from MATIS code */ 4669 ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4670 } 4671 4672 /* we need to shift on coarse candidates either if we are not redistributing or we are redistributing and we have enough void processes */ 4673 if (!redist || ncoarse <= void_procs) { 4674 PetscInt ncoarse_cand,tissize,*nisindices; 4675 PetscInt *coarse_candidates; 4676 const PetscInt* tisindices; 4677 4678 /* get coarse candidates' ranks in pc communicator */ 4679 ierr = PetscMalloc1(all_procs,&coarse_candidates);CHKERRQ(ierr); 4680 ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4681 for (i=0,ncoarse_cand=0;i<all_procs;i++) { 4682 if (!coarse_candidates[i]) { 4683 coarse_candidates[ncoarse_cand++]=i; 4684 } 4685 } 4686 if (ncoarse_cand < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",ncoarse_cand,ncoarse); 4687 4688 4689 if (pcbddc->dbg_flag) { 4690 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4691 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr); 4692 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 4693 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr); 4694 for (i=0;i<ncoarse_cand;i++) { 4695 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr); 4696 } 4697 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr); 4698 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4699 } 4700 /* shift the pattern on coarse candidates */ 4701 ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr); 4702 ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 4703 ierr = PetscMalloc1(tissize,&nisindices);CHKERRQ(ierr); 4704 for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]]; 4705 ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 4706 ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr); 4707 ierr = PetscFree(coarse_candidates);CHKERRQ(ierr); 4708 } 4709 if (pcbddc->dbg_flag) { 4710 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4711 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr); 4712 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 4713 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4714 } 4715 } 4716 /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */ 4717 if (multilevel_allowed) { /* we need to keep tracking of void processes for future placements */ 4718 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,PETSC_FALSE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 4719 } else { /* this is the last level, so use just receiving processes in subcomm */ 4720 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 4721 } 4722 } else { 4723 if (pcbddc->dbg_flag) { 4724 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4725 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr); 4726 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4727 } 4728 ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr); 4729 coarse_mat_is = t_coarse_mat_is; 4730 } 4731 4732 /* create local to global scatters for coarse problem */ 4733 if (compute_vecs) { 4734 PetscInt lrows; 4735 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 4736 if (coarse_mat_is) { 4737 ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr); 4738 } else { 4739 lrows = 0; 4740 } 4741 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 4742 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 4743 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 4744 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 4745 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 4746 } 4747 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 4748 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 4749 4750 /* set defaults for coarse KSP and PC */ 4751 if (multilevel_allowed) { 4752 coarse_ksp_type = KSPRICHARDSON; 4753 coarse_pc_type = PCBDDC; 4754 } else { 4755 coarse_ksp_type = KSPPREONLY; 4756 coarse_pc_type = PCREDUNDANT; 4757 } 4758 4759 /* print some info if requested */ 4760 if (pcbddc->dbg_flag) { 4761 if (!multilevel_allowed) { 4762 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4763 if (multilevel_requested) { 4764 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 4765 } else if (pcbddc->max_levels) { 4766 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 4767 } 4768 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4769 } 4770 } 4771 4772 /* create the coarse KSP object only once with defaults */ 4773 if (coarse_mat_is) { 4774 MatReuse coarse_mat_reuse; 4775 PetscViewer dbg_viewer = NULL; 4776 if (pcbddc->dbg_flag) { 4777 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is)); 4778 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 4779 } 4780 if (!pcbddc->coarse_ksp) { 4781 char prefix[256],str_level[16]; 4782 size_t len; 4783 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr); 4784 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 4785 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 4786 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 4787 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr); 4788 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 4789 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 4790 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 4791 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 4792 /* prefix */ 4793 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 4794 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 4795 if (!pcbddc->current_level) { 4796 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 4797 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 4798 } else { 4799 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 4800 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 4801 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 4802 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 4803 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 4804 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 4805 } 4806 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 4807 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 4808 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 4809 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 4810 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 4811 /* allow user customization */ 4812 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 4813 } 4814 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 4815 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 4816 if (nisdofs) { 4817 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 4818 for (i=0;i<nisdofs;i++) { 4819 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 4820 } 4821 } 4822 if (nisneu) { 4823 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 4824 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 4825 } 4826 4827 /* get some info after set from options */ 4828 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 4829 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 4830 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 4831 if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */ 4832 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 4833 isbddc = PETSC_FALSE; 4834 } 4835 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 4836 if (isredundant) { 4837 KSP inner_ksp; 4838 PC inner_pc; 4839 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 4840 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 4841 ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr); 4842 } 4843 4844 /* assemble coarse matrix */ 4845 if (coarse_reuse) { 4846 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 4847 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 4848 coarse_mat_reuse = MAT_REUSE_MATRIX; 4849 } else { 4850 coarse_mat_reuse = MAT_INITIAL_MATRIX; 4851 } 4852 if (isbddc || isnn) { 4853 if (pcbddc->coarsening_ratio > 1) { 4854 if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */ 4855 ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 4856 if (pcbddc->dbg_flag) { 4857 ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4858 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr); 4859 ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr); 4860 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 4861 } 4862 } 4863 ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr); 4864 } else { 4865 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 4866 coarse_mat = coarse_mat_is; 4867 } 4868 } else { 4869 ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 4870 } 4871 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 4872 4873 /* propagate symmetry info of coarse matrix */ 4874 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 4875 if (pc->pmat->symmetric_set) { 4876 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);CHKERRQ(ierr); 4877 } 4878 if (pc->pmat->hermitian_set) { 4879 ierr = MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);CHKERRQ(ierr); 4880 } 4881 if (pc->pmat->spd_set) { 4882 ierr = MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);CHKERRQ(ierr); 4883 } 4884 /* set operators */ 4885 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 4886 if (pcbddc->dbg_flag) { 4887 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 4888 } 4889 } else { /* processes non partecipating to coarse solver (if any) */ 4890 coarse_mat = 0; 4891 } 4892 ierr = PetscFree(isarray);CHKERRQ(ierr); 4893 #if 0 4894 { 4895 PetscViewer viewer; 4896 char filename[256]; 4897 sprintf(filename,"coarse_mat_level%d.m",pcbddc->current_level); 4898 ierr = PetscViewerASCIIOpen(PetscObjectComm((PetscObject)coarse_mat),filename,&viewer);CHKERRQ(ierr); 4899 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 4900 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 4901 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 4902 } 4903 #endif 4904 4905 /* Compute coarse null space (special handling by BDDC only) */ 4906 #if 0 4907 if (pcbddc->NullSpace) { 4908 ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr); 4909 } 4910 #endif 4911 /* hack */ 4912 if (pcbddc->coarse_ksp) { 4913 Vec crhs,csol; 4914 4915 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 4916 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 4917 if (!csol) { 4918 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 4919 } 4920 if (!crhs) { 4921 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 4922 } 4923 } 4924 4925 /* compute null space for coarse solver if the benign trick has been requested */ 4926 if (pcbddc->benign_null) { 4927 4928 ierr = VecSet(pcbddc->vec1_P,0.);CHKERRQ(ierr); 4929 ierr = VecSetValue(pcbddc->vec1_P,pcbddc->local_primal_size-1,1.0,INSERT_VALUES);CHKERRQ(ierr); 4930 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 4931 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 4932 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4933 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4934 if (coarse_mat) { 4935 Vec nullv; 4936 PetscScalar *array,*array2; 4937 PetscInt nl; 4938 4939 ierr = MatCreateVecs(coarse_mat,&nullv,NULL);CHKERRQ(ierr); 4940 ierr = VecGetLocalSize(nullv,&nl);CHKERRQ(ierr); 4941 ierr = VecGetArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 4942 ierr = VecGetArray(nullv,&array2);CHKERRQ(ierr); 4943 ierr = PetscMemcpy(array2,array,nl*sizeof(*array));CHKERRQ(ierr); 4944 ierr = VecRestoreArray(nullv,&array2);CHKERRQ(ierr); 4945 ierr = VecRestoreArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 4946 ierr = VecNormalize(nullv,NULL);CHKERRQ(ierr); 4947 ierr = MatNullSpaceCreate(PetscObjectComm((PetscObject)coarse_mat),PETSC_FALSE,1,&nullv,&CoarseNullSpace);CHKERRQ(ierr); 4948 ierr = VecDestroy(&nullv);CHKERRQ(ierr); 4949 } 4950 } 4951 4952 if (pcbddc->coarse_ksp) { 4953 PetscBool ispreonly; 4954 4955 if (CoarseNullSpace) { 4956 PetscBool isnull; 4957 ierr = MatNullSpaceTest(CoarseNullSpace,coarse_mat,&isnull);CHKERRQ(ierr); 4958 if (isnull) { 4959 if (isbddc) { 4960 ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr); 4961 } else { 4962 ierr = MatSetNullSpace(coarse_mat,CoarseNullSpace);CHKERRQ(ierr); 4963 } 4964 } else { 4965 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 4966 } 4967 } 4968 /* setup coarse ksp */ 4969 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 4970 /* Check coarse problem if in debug mode or if solving with an iterative method */ 4971 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 4972 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 4973 KSP check_ksp; 4974 KSPType check_ksp_type; 4975 PC check_pc; 4976 Vec check_vec,coarse_vec; 4977 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 4978 PetscInt its; 4979 PetscBool compute_eigs; 4980 PetscReal *eigs_r,*eigs_c; 4981 PetscInt neigs; 4982 const char *prefix; 4983 4984 /* Create ksp object suitable for estimation of extreme eigenvalues */ 4985 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 4986 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 4987 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 4988 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 4989 if (ispreonly) { 4990 check_ksp_type = KSPPREONLY; 4991 compute_eigs = PETSC_FALSE; 4992 } else { 4993 check_ksp_type = KSPGMRES; 4994 compute_eigs = PETSC_TRUE; 4995 } 4996 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 4997 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 4998 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 4999 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 5000 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 5001 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 5002 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 5003 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 5004 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 5005 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 5006 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 5007 /* create random vec */ 5008 ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr); 5009 ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr); 5010 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 5011 if (CoarseNullSpace) { 5012 ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr); 5013 } 5014 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 5015 /* solve coarse problem */ 5016 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 5017 if (CoarseNullSpace) { 5018 ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr); 5019 } 5020 /* set eigenvalue estimation if preonly has not been requested */ 5021 if (compute_eigs) { 5022 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 5023 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 5024 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 5025 lambda_max = eigs_r[neigs-1]; 5026 lambda_min = eigs_r[0]; 5027 if (pcbddc->use_coarse_estimates) { 5028 if (lambda_max>lambda_min) { 5029 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr); 5030 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 5031 } 5032 } 5033 } 5034 5035 /* check coarse problem residual error */ 5036 if (pcbddc->dbg_flag) { 5037 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 5038 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 5039 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 5040 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 5041 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 5042 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 5043 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 5044 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 5045 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 5046 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 5047 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 5048 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 5049 if (CoarseNullSpace) { 5050 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem is singular\n");CHKERRQ(ierr); 5051 } 5052 if (compute_eigs) { 5053 PetscReal lambda_max_s,lambda_min_s; 5054 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 5055 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 5056 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 5057 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 5058 for (i=0;i<neigs;i++) { 5059 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 5060 } 5061 } 5062 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 5063 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 5064 } 5065 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 5066 if (compute_eigs) { 5067 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 5068 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 5069 } 5070 } 5071 } 5072 /* print additional info */ 5073 if (pcbddc->dbg_flag) { 5074 /* waits until all processes reaches this point */ 5075 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 5076 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 5077 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5078 } 5079 5080 /* free memory */ 5081 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 5082 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 5083 PetscFunctionReturn(0); 5084 } 5085 5086 #undef __FUNCT__ 5087 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 5088 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 5089 { 5090 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 5091 PC_IS* pcis = (PC_IS*)pc->data; 5092 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 5093 IS subset,subset_mult,subset_n; 5094 PetscInt local_size,coarse_size=0; 5095 PetscInt *local_primal_indices=NULL; 5096 const PetscInt *t_local_primal_indices; 5097 PetscErrorCode ierr; 5098 5099 PetscFunctionBegin; 5100 /* Compute global number of coarse dofs */ 5101 if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) { 5102 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first"); 5103 } 5104 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n);CHKERRQ(ierr); 5105 ierr = ISLocalToGlobalMappingApplyIS(pcis->mapping,subset_n,&subset);CHKERRQ(ierr); 5106 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 5107 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult);CHKERRQ(ierr); 5108 ierr = PCBDDCSubsetNumbering(subset,subset_mult,&coarse_size,&subset_n);CHKERRQ(ierr); 5109 ierr = ISDestroy(&subset);CHKERRQ(ierr); 5110 ierr = ISDestroy(&subset_mult);CHKERRQ(ierr); 5111 ierr = ISGetLocalSize(subset_n,&local_size);CHKERRQ(ierr); 5112 if (local_size != pcbddc->local_primal_size) { 5113 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %d != %d",local_size,pcbddc->local_primal_size); 5114 } 5115 ierr = PetscMalloc1(local_size,&local_primal_indices);CHKERRQ(ierr); 5116 ierr = ISGetIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 5117 ierr = PetscMemcpy(local_primal_indices,t_local_primal_indices,local_size*sizeof(PetscInt));CHKERRQ(ierr); 5118 ierr = ISRestoreIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 5119 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 5120 5121 /* check numbering */ 5122 if (pcbddc->dbg_flag) { 5123 PetscScalar coarsesum,*array,*array2; 5124 PetscInt i; 5125 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 5126 5127 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5128 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5129 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 5130 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 5131 /* counter */ 5132 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 5133 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 5134 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5135 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5136 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5137 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5138 5139 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 5140 for (i=0;i<pcbddc->local_primal_size;i++) { 5141 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 5142 } 5143 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 5144 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 5145 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 5146 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5147 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5148 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5149 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5150 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 5151 ierr = VecGetArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 5152 for (i=0;i<pcis->n;i++) { 5153 if (array[i] != 0.0 && array[i] != array2[i]) { 5154 PetscInt owned = (PetscInt)PetscRealPart(array[i]); 5155 PetscInt neigh = (PetscInt)PetscRealPart(array2[i]); 5156 set_error = PETSC_TRUE; 5157 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a %d processes instead of %d!\n",PetscGlobalRank,i,owned,neigh);CHKERRQ(ierr); 5158 } 5159 } 5160 ierr = VecRestoreArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 5161 ierr = MPI_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 5162 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5163 for (i=0;i<pcis->n;i++) { 5164 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 5165 } 5166 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 5167 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 5168 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5169 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5170 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 5171 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 5172 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 5173 PetscInt *gidxs; 5174 5175 ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr); 5176 ierr = ISLocalToGlobalMappingApply(pcis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr); 5177 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 5178 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5179 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 5180 for (i=0;i<pcbddc->local_primal_size;i++) { 5181 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr); 5182 } 5183 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5184 ierr = PetscFree(gidxs);CHKERRQ(ierr); 5185 } 5186 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5187 if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 5188 } 5189 /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */ 5190 /* get back data */ 5191 *coarse_size_n = coarse_size; 5192 *local_primal_indices_n = local_primal_indices; 5193 PetscFunctionReturn(0); 5194 } 5195 5196 #undef __FUNCT__ 5197 #define __FUNCT__ "PCBDDCGlobalToLocal" 5198 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 5199 { 5200 IS localis_t; 5201 PetscInt i,lsize,*idxs,n; 5202 PetscScalar *vals; 5203 PetscErrorCode ierr; 5204 5205 PetscFunctionBegin; 5206 /* get indices in local ordering exploiting local to global map */ 5207 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 5208 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 5209 for (i=0;i<lsize;i++) vals[i] = 1.0; 5210 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 5211 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 5212 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 5213 if (idxs) { /* multilevel guard */ 5214 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 5215 } 5216 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 5217 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 5218 ierr = PetscFree(vals);CHKERRQ(ierr); 5219 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 5220 /* now compute set in local ordering */ 5221 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5222 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5223 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 5224 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 5225 for (i=0,lsize=0;i<n;i++) { 5226 if (PetscRealPart(vals[i]) > 0.5) { 5227 lsize++; 5228 } 5229 } 5230 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 5231 for (i=0,lsize=0;i<n;i++) { 5232 if (PetscRealPart(vals[i]) > 0.5) { 5233 idxs[lsize++] = i; 5234 } 5235 } 5236 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 5237 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 5238 *localis = localis_t; 5239 PetscFunctionReturn(0); 5240 } 5241 5242 /* the next two functions will be called in KSPMatMult if a change of basis has been requested */ 5243 #undef __FUNCT__ 5244 #define __FUNCT__ "PCBDDCMatMult_Private" 5245 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y) 5246 { 5247 PCBDDCChange_ctx change_ctx; 5248 PetscErrorCode ierr; 5249 5250 PetscFunctionBegin; 5251 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 5252 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 5253 ierr = MatMult(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 5254 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 5255 PetscFunctionReturn(0); 5256 } 5257 5258 #undef __FUNCT__ 5259 #define __FUNCT__ "PCBDDCMatMultTranspose_Private" 5260 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y) 5261 { 5262 PCBDDCChange_ctx change_ctx; 5263 PetscErrorCode ierr; 5264 5265 PetscFunctionBegin; 5266 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 5267 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 5268 ierr = MatMultTranspose(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 5269 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 5270 PetscFunctionReturn(0); 5271 } 5272 5273 #undef __FUNCT__ 5274 #define __FUNCT__ "PCBDDCSetUpSubSchurs" 5275 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 5276 { 5277 PC_IS *pcis=(PC_IS*)pc->data; 5278 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 5279 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 5280 Mat S_j; 5281 PetscInt *used_xadj,*used_adjncy; 5282 PetscBool free_used_adj; 5283 PetscErrorCode ierr; 5284 5285 PetscFunctionBegin; 5286 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 5287 free_used_adj = PETSC_FALSE; 5288 if (pcbddc->sub_schurs_layers == -1) { 5289 used_xadj = NULL; 5290 used_adjncy = NULL; 5291 } else { 5292 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 5293 used_xadj = pcbddc->mat_graph->xadj; 5294 used_adjncy = pcbddc->mat_graph->adjncy; 5295 } else if (pcbddc->computed_rowadj) { 5296 used_xadj = pcbddc->mat_graph->xadj; 5297 used_adjncy = pcbddc->mat_graph->adjncy; 5298 } else { 5299 PetscBool flg_row=PETSC_FALSE; 5300 const PetscInt *xadj,*adjncy; 5301 PetscInt nvtxs; 5302 5303 ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 5304 if (flg_row) { 5305 ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr); 5306 ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr); 5307 ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr); 5308 free_used_adj = PETSC_TRUE; 5309 } else { 5310 pcbddc->sub_schurs_layers = -1; 5311 used_xadj = NULL; 5312 used_adjncy = NULL; 5313 } 5314 ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 5315 } 5316 } 5317 5318 /* setup sub_schurs data */ 5319 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 5320 if (!sub_schurs->use_mumps) { 5321 /* pcbddc->ksp_D up to date only if not using MUMPS */ 5322 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 5323 ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,PETSC_FALSE,PETSC_FALSE);CHKERRQ(ierr); 5324 } else { 5325 PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis; 5326 PetscBool isseqaij; 5327 if (!pcbddc->use_vertices && reuse_solvers) { 5328 PetscInt n_vertices; 5329 5330 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 5331 reuse_solvers = (PetscBool)!n_vertices; 5332 } 5333 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 5334 if (!isseqaij) { 5335 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 5336 if (matis->A == pcbddc->local_mat) { 5337 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 5338 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 5339 } else { 5340 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 5341 } 5342 } 5343 ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point);CHKERRQ(ierr); 5344 } 5345 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 5346 5347 /* free adjacency */ 5348 if (free_used_adj) { 5349 ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr); 5350 } 5351 PetscFunctionReturn(0); 5352 } 5353 5354 #undef __FUNCT__ 5355 #define __FUNCT__ "PCBDDCInitSubSchurs" 5356 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 5357 { 5358 PC_IS *pcis=(PC_IS*)pc->data; 5359 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 5360 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 5361 PCBDDCGraph graph; 5362 PetscErrorCode ierr; 5363 5364 PetscFunctionBegin; 5365 /* attach interface graph for determining subsets */ 5366 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 5367 IS verticesIS,verticescomm; 5368 PetscInt vsize,*idxs; 5369 5370 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 5371 ierr = ISGetSize(verticesIS,&vsize);CHKERRQ(ierr); 5372 ierr = ISGetIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 5373 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);CHKERRQ(ierr); 5374 ierr = ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 5375 ierr = ISDestroy(&verticesIS);CHKERRQ(ierr); 5376 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 5377 ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global);CHKERRQ(ierr); 5378 ierr = PCBDDCGraphSetUp(graph,0,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);CHKERRQ(ierr); 5379 ierr = ISDestroy(&verticescomm);CHKERRQ(ierr); 5380 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 5381 /* 5382 if (pcbddc->dbg_flag) { 5383 ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 5384 } 5385 */ 5386 } else { 5387 graph = pcbddc->mat_graph; 5388 } 5389 5390 /* sub_schurs init */ 5391 ierr = PCBDDCSubSchursInit(sub_schurs,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap);CHKERRQ(ierr); 5392 5393 /* free graph struct */ 5394 if (pcbddc->sub_schurs_rebuild) { 5395 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 5396 } 5397 PetscFunctionReturn(0); 5398 } 5399 5400 #undef __FUNCT__ 5401 #define __FUNCT__ "PCBDDCCheckOperator" 5402 PetscErrorCode PCBDDCCheckOperator(PC pc) 5403 { 5404 PC_IS *pcis=(PC_IS*)pc->data; 5405 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 5406 PetscErrorCode ierr; 5407 5408 PetscFunctionBegin; 5409 if (pcbddc->n_vertices == pcbddc->local_primal_size) { 5410 IS zerodiag = NULL; 5411 Mat S_j,B0=NULL,B0_B=NULL; 5412 Vec dummy_vec=NULL,vec_check_B,vec_scale_P; 5413 PetscScalar p0_check,*array,*array2; 5414 PetscReal norm; 5415 PetscInt i; 5416 5417 /* B0 and B0_B */ 5418 if (zerodiag) { 5419 IS dummy; 5420 PetscInt ii[2]; 5421 5422 ii[0] = 0; 5423 ii[1] = pcbddc->B0_ncol; 5424 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,1,pcis->n,ii,pcbddc->B0_cols,pcbddc->B0_vals,&B0);CHKERRQ(ierr); 5425 ierr = ISCreateStride(PETSC_COMM_SELF,1,0,1,&dummy);CHKERRQ(ierr); 5426 ierr = MatGetSubMatrix(B0,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 5427 ierr = MatCreateVecs(B0_B,NULL,&dummy_vec);CHKERRQ(ierr); 5428 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 5429 } 5430 /* I need a primal vector to scale primal nodes since BDDC sums contibutions */ 5431 ierr = VecDuplicate(pcbddc->vec1_P,&vec_scale_P);CHKERRQ(ierr); 5432 ierr = VecSet(pcbddc->vec1_P,1.0);CHKERRQ(ierr); 5433 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5434 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5435 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5436 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5437 ierr = VecReciprocal(vec_scale_P);CHKERRQ(ierr); 5438 /* S_j */ 5439 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 5440 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 5441 5442 /* mimic vector in \widetilde{W}_\Gamma */ 5443 ierr = VecSetRandom(pcis->vec1_N,NULL);CHKERRQ(ierr); 5444 /* continuous in primal space */ 5445 ierr = VecSetRandom(pcbddc->coarse_vec,NULL);CHKERRQ(ierr); 5446 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5447 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5448 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5449 if (zerodiag) { 5450 p0_check = array[pcbddc->local_primal_size-1]; 5451 } else { 5452 p0_check = 0; 5453 } 5454 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 5455 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5456 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 5457 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 5458 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5459 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5460 ierr = VecDuplicate(pcis->vec2_B,&vec_check_B);CHKERRQ(ierr); 5461 ierr = VecCopy(pcis->vec2_B,vec_check_B);CHKERRQ(ierr); 5462 5463 /* assemble rhs for coarse problem */ 5464 /* widetilde{S}_\Gamma w_\Gamma + \widetilde{B0}^T_B p0 */ 5465 /* local with Schur */ 5466 ierr = MatMult(S_j,pcis->vec2_B,pcis->vec1_B);CHKERRQ(ierr); 5467 if (zerodiag) { 5468 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 5469 array[0] = p0_check; 5470 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 5471 ierr = MatMultTransposeAdd(B0_B,dummy_vec,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 5472 } 5473 /* sum on primal nodes the local contributions */ 5474 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5475 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5476 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 5477 ierr = VecGetArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 5478 for (i=0;i<pcbddc->local_primal_size;i++) array2[i] = array[pcbddc->local_primal_ref_node[i]]; 5479 ierr = VecRestoreArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 5480 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 5481 ierr = VecSet(pcbddc->coarse_vec,0.);CHKERRQ(ierr); 5482 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5483 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5484 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5485 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5486 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5487 /* scale primal nodes (BDDC sums contibutions) */ 5488 ierr = VecPointwiseMult(pcbddc->vec1_P,vec_scale_P,pcbddc->vec1_P);CHKERRQ(ierr); 5489 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 5490 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5491 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 5492 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 5493 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5494 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5495 /* global: \widetilde{B0}_B w_\Gamma */ 5496 if (zerodiag) { 5497 ierr = MatMult(B0_B,pcis->vec2_B,dummy_vec);CHKERRQ(ierr); 5498 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 5499 pcbddc->benign_p0 = array[0]; 5500 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 5501 } else { 5502 pcbddc->benign_p0 = 0.; 5503 } 5504 /* BDDC */ 5505 ierr = VecSet(pcis->vec1_D,0.);CHKERRQ(ierr); 5506 ierr = PCBDDCApplyInterfacePreconditioner(pc,PETSC_FALSE);CHKERRQ(ierr); 5507 5508 ierr = VecCopy(pcis->vec1_B,pcis->vec2_B);CHKERRQ(ierr); 5509 ierr = VecAXPY(pcis->vec1_B,-1.0,vec_check_B);CHKERRQ(ierr); 5510 ierr = VecNorm(pcis->vec1_B,NORM_INFINITY,&norm);CHKERRQ(ierr); 5511 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC local error is %1.4e\n",PetscGlobalRank,norm); 5512 if (pcbddc->benign_p0_lidx >= 0) { 5513 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC p0 error is %1.4e\n",PetscGlobalRank,PetscAbsScalar(pcbddc->benign_p0-p0_check)); 5514 } 5515 5516 ierr = VecDestroy(&vec_scale_P);CHKERRQ(ierr); 5517 ierr = VecDestroy(&vec_check_B);CHKERRQ(ierr); 5518 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 5519 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 5520 ierr = MatDestroy(&B0);CHKERRQ(ierr); 5521 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 5522 } 5523 PetscFunctionReturn(0); 5524 } 5525