1 #include <../src/ksp/pc/impls/bddc/bddc.h> 2 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 3 #include <petscblaslapack.h> 4 5 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y); 6 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y); 7 8 #undef __FUNCT__ 9 #define __FUNCT__ "PCBDDCBenignCheck" 10 PetscErrorCode PCBDDCBenignCheck(PC pc, IS zerodiag) 11 { 12 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 13 PC_IS* pcis = (PC_IS*)(pc->data); 14 IS dirIS = NULL; 15 PetscErrorCode ierr; 16 17 PetscFunctionBegin; 18 ierr = PCBDDCGraphGetDirichletDofs(pcbddc->mat_graph,&dirIS);CHKERRQ(ierr); 19 if (zerodiag) { 20 Mat A; 21 Vec vec3_N; 22 PetscScalar *vals; 23 const PetscInt *idxs; 24 PetscInt i,nz; 25 26 /* p0 */ 27 ierr = VecSet(pcis->vec1_N,0.);CHKERRQ(ierr); 28 ierr = PetscMalloc1(pcis->n,&vals);CHKERRQ(ierr); 29 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 30 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 31 for (i=0;i<nz;i++) vals[i] = 1.; /* TODO add quadrature */ 32 ierr = VecSetValues(pcis->vec1_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 33 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 34 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 35 /* v_I */ 36 ierr = VecSetRandom(pcis->vec2_N,NULL);CHKERRQ(ierr); 37 for (i=0;i<nz;i++) vals[i] = 0.; 38 ierr = VecSetValues(pcis->vec2_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 39 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 40 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 41 for (i=0;i<pcis->n_B;i++) vals[i] = 0.; 42 ierr = VecSetValues(pcis->vec2_N,pcis->n_B,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 43 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 44 if (dirIS) { 45 PetscInt n; 46 47 ierr = ISGetLocalSize(dirIS,&n);CHKERRQ(ierr); 48 ierr = ISGetIndices(dirIS,&idxs);CHKERRQ(ierr); 49 for (i=0;i<n;i++) vals[i] = 0.; 50 ierr = VecSetValues(pcis->vec2_N,n,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 51 ierr = ISRestoreIndices(dirIS,&idxs);CHKERRQ(ierr); 52 } 53 ierr = VecAssemblyBegin(pcis->vec2_N);CHKERRQ(ierr); 54 ierr = VecAssemblyEnd(pcis->vec2_N);CHKERRQ(ierr); 55 ierr = VecDuplicate(pcis->vec1_N,&vec3_N);CHKERRQ(ierr); 56 ierr = VecSet(vec3_N,0.);CHKERRQ(ierr); 57 ierr = MatISGetLocalMat(pc->mat,&A);CHKERRQ(ierr); 58 ierr = MatMult(A,pcis->vec1_N,vec3_N);CHKERRQ(ierr); 59 ierr = VecDot(vec3_N,pcis->vec2_N,&vals[0]);CHKERRQ(ierr); 60 if (PetscAbsScalar(vals[0]) > PETSC_SMALL) { /* TODO: should I add the A-norm in test? */ 61 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! b(v_I,p_0) = %f (should be numerically 0.)",PetscAbsScalar(vals[0])); 62 } 63 ierr = PetscFree(vals);CHKERRQ(ierr); 64 ierr = VecDestroy(&vec3_N);CHKERRQ(ierr); 65 } 66 ierr = ISDestroy(&dirIS);CHKERRQ(ierr); 67 68 /* check PCBDDCBenignGetOrSetP0 */ 69 ierr = VecSetRandom(pcis->vec1_global,NULL);CHKERRQ(ierr); 70 pcbddc->benign_p0 = -PetscGlobalRank; 71 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_FALSE);CHKERRQ(ierr); 72 pcbddc->benign_p0 = 1; 73 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_TRUE);CHKERRQ(ierr); 74 if (pcbddc->benign_p0_gidx >=0 && pcbddc->benign_p0 != -PetscGlobalRank) { 75 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error testing PCBDDCBenignGetOrSetP0! Found %1.4e instead of %1.4e\n",pcbddc->benign_p0,-PetscGlobalRank);CHKERRQ(ierr); 76 } 77 PetscFunctionReturn(0); 78 } 79 80 #undef __FUNCT__ 81 #define __FUNCT__ "PCBDDCBenignDetectSaddlePoint" 82 PetscErrorCode PCBDDCBenignDetectSaddlePoint(PC pc, IS *zerodiaglocal) 83 { 84 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 85 IS zerodiag; 86 PetscInt nz; 87 PetscBool sorted; 88 PetscErrorCode ierr; 89 90 PetscFunctionBegin; 91 ierr = MatDestroy(&pcbddc->benign_original_mat);CHKERRQ(ierr); 92 ierr = PetscObjectReference((PetscObject)pcbddc->local_mat);CHKERRQ(ierr); 93 pcbddc->benign_original_mat = pcbddc->local_mat; 94 ierr = MatFindZeroDiagonals(pcbddc->benign_original_mat,&zerodiag);CHKERRQ(ierr); 95 ierr = ISSorted(zerodiag,&sorted);CHKERRQ(ierr); 96 if (!sorted) { 97 ierr = ISSort(zerodiag);CHKERRQ(ierr); 98 } 99 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 100 if (nz) { 101 IS zerodiagc; 102 PetscScalar *array; 103 const PetscInt *idxs,*idxsc; 104 PetscInt i,n,*nnz; 105 106 /* TODO: add check for shared dofs and raise error */ 107 ierr = MatGetLocalSize(pcbddc->benign_original_mat,&n,NULL);CHKERRQ(ierr); 108 ierr = ISComplement(zerodiag,0,n,&zerodiagc);CHKERRQ(ierr); 109 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 110 ierr = ISGetIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 111 /* local change of basis for pressures */ 112 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 113 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->benign_original_mat),&pcbddc->benign_change);CHKERRQ(ierr); 114 ierr = MatSetType(pcbddc->benign_change,MATAIJ);CHKERRQ(ierr); 115 ierr = MatSetSizes(pcbddc->benign_change,n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 116 ierr = PetscMalloc1(n,&nnz);CHKERRQ(ierr); 117 for (i=0;i<n-nz;i++) nnz[idxsc[i]] = 1; /* identity on velocities */ 118 for (i=0;i<nz-1;i++) nnz[idxs[i]] = 2; /* change on pressures */ 119 nnz[idxs[nz-1]] = nz; /* last local pressure dof: _0 set */ 120 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_change,0,nnz);CHKERRQ(ierr); 121 ierr = PetscFree(nnz);CHKERRQ(ierr); 122 /* set identity on velocities */ 123 for (i=0;i<n-nz;i++) { 124 ierr = MatSetValue(pcbddc->benign_change,idxsc[i],idxsc[i],1.,INSERT_VALUES);CHKERRQ(ierr); 125 } 126 /* set change on pressures */ 127 for (i=0;i<nz-1;i++) { 128 PetscScalar vals[2]; 129 PetscInt cols[2]; 130 131 /* TODO: add quadrature */ 132 cols[0] = idxs[i]; 133 cols[1] = idxs[nz-1]; 134 vals[0] = 1.; 135 vals[1] = 1./nz; 136 ierr = MatSetValues(pcbddc->benign_change,1,idxs+i,2,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 137 } 138 ierr = PetscMalloc1(nz,&array);CHKERRQ(ierr); 139 for (i=0;i<nz-1;i++) array[i] = -1.; 140 array[nz-1] = 1./nz; 141 ierr = MatSetValues(pcbddc->benign_change,1,idxs+nz-1,nz,idxs,array,INSERT_VALUES);CHKERRQ(ierr); 142 ierr = PetscFree(array);CHKERRQ(ierr); 143 ierr = MatAssemblyBegin(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 144 ierr = MatAssemblyEnd(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 145 /* TODO: need optimization? */ 146 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 147 ierr = MatPtAP(pcbddc->benign_original_mat,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 148 /* store local and global idxs for p0 */ 149 pcbddc->benign_p0_lidx = idxs[nz-1]; 150 ierr = ISLocalToGlobalMappingApply(pc->pmat->rmap->mapping,1,&idxs[nz-1],&pcbddc->benign_p0_gidx);CHKERRQ(ierr); 151 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 152 ierr = ISRestoreIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 153 ierr = ISDestroy(&zerodiagc);CHKERRQ(ierr); 154 } else { /* this is unlikely to happen but, just in case, destroy the empty IS */ 155 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 156 } 157 *zerodiaglocal = zerodiag; 158 PetscFunctionReturn(0); 159 } 160 161 #undef __FUNCT__ 162 #define __FUNCT__ "PCBDDCBenignGetOrSetP0" 163 PetscErrorCode PCBDDCBenignGetOrSetP0(PC pc, Vec v, PetscBool get) 164 { 165 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 166 PetscErrorCode ierr; 167 168 PetscFunctionBegin; 169 if (!pcbddc->benign_sf) { 170 ierr = PetscSFCreate(PetscObjectComm((PetscObject)pc),&pcbddc->benign_sf);CHKERRQ(ierr); 171 if (pcbddc->benign_p0_gidx >= 0) { 172 ierr = PetscSFSetGraphLayout(pcbddc->benign_sf,pc->pmat->rmap,1,NULL,PETSC_OWN_POINTER,&pcbddc->benign_p0_gidx);CHKERRQ(ierr); 173 } else { 174 ierr = PetscSFSetGraphLayout(pcbddc->benign_sf,pc->pmat->rmap,0,NULL,PETSC_OWN_POINTER,&pcbddc->benign_p0_gidx);CHKERRQ(ierr); 175 } 176 } 177 if (get) { /* use SF to get values */ 178 PetscScalar *array; 179 180 ierr = VecGetArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 181 ierr = PetscSFBcastBegin(pcbddc->benign_sf,MPIU_SCALAR,array,&pcbddc->benign_p0);CHKERRQ(ierr); 182 ierr = PetscSFBcastEnd(pcbddc->benign_sf,MPIU_SCALAR,array,&pcbddc->benign_p0);CHKERRQ(ierr); 183 ierr = VecRestoreArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 184 } else { /* use VecSetValue */ 185 if (pcbddc->benign_p0_gidx >= 0) { 186 ierr = VecSetValue(v,pcbddc->benign_p0_gidx,pcbddc->benign_p0,INSERT_VALUES);CHKERRQ(ierr); 187 } 188 ierr = VecAssemblyBegin(v);CHKERRQ(ierr); 189 ierr = VecAssemblyEnd(v);CHKERRQ(ierr); 190 } 191 PetscFunctionReturn(0); 192 } 193 194 #undef __FUNCT__ 195 #define __FUNCT__ "PCBDDCBenignPopOrPushB0" 196 PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop) 197 { 198 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 199 PetscErrorCode ierr; 200 201 PetscFunctionBegin; 202 /* TODO: add error checking 203 - avoid nested pop (or push) calls. 204 - cannot push before pop. 205 - cannot call this if pcbddc->local_mat is NULL 206 */ 207 if (pcbddc->benign_p0_lidx < 0) { 208 PetscFunctionReturn(0); 209 } 210 if (pop) { 211 const PetscInt *cB0_cols; 212 PetscInt cB0_ncol; 213 const PetscScalar *cB0_vals; 214 215 /* extract B_0 */ 216 ierr = MatGetRow(pcbddc->local_mat,pcbddc->benign_p0_lidx,&cB0_ncol,&cB0_cols,&cB0_vals);CHKERRQ(ierr); 217 pcbddc->B0_ncol = cB0_ncol; 218 ierr = PetscFree2(pcbddc->B0_cols,pcbddc->B0_vals);CHKERRQ(ierr); 219 ierr = PetscMalloc2(cB0_ncol,&pcbddc->B0_cols,cB0_ncol,&pcbddc->B0_vals);CHKERRQ(ierr); 220 ierr = PetscMemcpy(pcbddc->B0_cols,cB0_cols,cB0_ncol*sizeof(PetscInt));CHKERRQ(ierr); 221 ierr = PetscMemcpy(pcbddc->B0_vals,cB0_vals,cB0_ncol*sizeof(PetscScalar));CHKERRQ(ierr); 222 ierr = MatRestoreRow(pcbddc->local_mat,pcbddc->benign_p0_lidx,&cB0_ncol,&cB0_cols,&cB0_vals);CHKERRQ(ierr); 223 /* remove rows and cols from local problem */ 224 ierr = MatSetOption(pcbddc->local_mat,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE);CHKERRQ(ierr); 225 ierr = MatZeroRowsColumns(pcbddc->local_mat,1,&pcbddc->benign_p0_lidx,1.,NULL,NULL);CHKERRQ(ierr); 226 } else { /* push */ 227 ierr = MatSetValues(pcbddc->local_mat,1,&pcbddc->benign_p0_lidx,pcbddc->B0_ncol,pcbddc->B0_cols,pcbddc->B0_vals,INSERT_VALUES);CHKERRQ(ierr); 228 ierr = MatSetValues(pcbddc->local_mat,pcbddc->B0_ncol,pcbddc->B0_cols,1,&pcbddc->benign_p0_lidx,pcbddc->B0_vals,INSERT_VALUES);CHKERRQ(ierr); 229 ierr = MatSetValue(pcbddc->local_mat,pcbddc->benign_p0_lidx,pcbddc->benign_p0_lidx,0.0,INSERT_VALUES);CHKERRQ(ierr); 230 ierr = MatAssemblyBegin(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 231 ierr = MatAssemblyEnd(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 232 } 233 PetscFunctionReturn(0); 234 } 235 236 #undef __FUNCT__ 237 #define __FUNCT__ "PCBDDCAdaptiveSelection" 238 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 239 { 240 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 241 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 242 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 243 PetscBLASInt *B_iwork,*B_ifail; 244 PetscScalar *work,lwork; 245 PetscScalar *St,*S,*eigv; 246 PetscScalar *Sarray,*Starray; 247 PetscReal *eigs,thresh; 248 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs; 249 PetscBool allocated_S_St; 250 #if defined(PETSC_USE_COMPLEX) 251 PetscReal *rwork; 252 #endif 253 PetscErrorCode ierr; 254 255 PetscFunctionBegin; 256 if (!sub_schurs->use_mumps) { 257 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS"); 258 } 259 260 if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) { 261 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\n",sub_schurs->is_hermitian,sub_schurs->is_posdef); 262 } 263 264 if (pcbddc->dbg_flag) { 265 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 266 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 267 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr); 268 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 269 } 270 271 if (pcbddc->dbg_flag) { 272 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef); 273 } 274 275 /* max size of subsets */ 276 mss = 0; 277 for (i=0;i<sub_schurs->n_subs;i++) { 278 PetscInt subset_size; 279 280 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 281 mss = PetscMax(mss,subset_size); 282 } 283 284 /* min/max and threshold */ 285 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 286 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0; 287 nmax = PetscMax(nmin,nmax); 288 allocated_S_St = PETSC_FALSE; 289 if (nmin) { 290 allocated_S_St = PETSC_TRUE; 291 } 292 293 /* allocate lapack workspace */ 294 cum = cum2 = 0; 295 maxneigs = 0; 296 for (i=0;i<sub_schurs->n_subs;i++) { 297 PetscInt n,subset_size; 298 299 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 300 n = PetscMin(subset_size,nmax); 301 cum += subset_size; 302 cum2 += subset_size*n; 303 maxneigs = PetscMax(maxneigs,n); 304 } 305 if (mss) { 306 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 307 PetscBLASInt B_itype = 1; 308 PetscBLASInt B_N = mss; 309 PetscReal zero = 0.0; 310 PetscReal eps = 0.0; /* dlamch? */ 311 312 B_lwork = -1; 313 S = NULL; 314 St = NULL; 315 eigs = NULL; 316 eigv = NULL; 317 B_iwork = NULL; 318 B_ifail = NULL; 319 #if defined(PETSC_USE_COMPLEX) 320 rwork = NULL; 321 #endif 322 thresh = 1.0; 323 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 324 #if defined(PETSC_USE_COMPLEX) 325 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 326 #else 327 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 328 #endif 329 if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 330 ierr = PetscFPTrapPop();CHKERRQ(ierr); 331 } else { 332 /* TODO */ 333 } 334 } else { 335 lwork = 0; 336 } 337 338 nv = 0; 339 if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */ 340 ierr = ISGetLocalSize(sub_schurs->is_vertices,&nv);CHKERRQ(ierr); 341 } 342 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr); 343 if (allocated_S_St) { 344 ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr); 345 } 346 ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr); 347 #if defined(PETSC_USE_COMPLEX) 348 ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr); 349 #endif 350 ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 351 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr, 352 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr, 353 nv+cum,&pcbddc->adaptive_constraints_idxs, 354 nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 355 ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr); 356 357 maxneigs = 0; 358 cum = cum2 = cumarray = 0; 359 pcbddc->adaptive_constraints_idxs_ptr[0] = 0; 360 pcbddc->adaptive_constraints_data_ptr[0] = 0; 361 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 362 const PetscInt *idxs; 363 364 ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 365 for (cum=0;cum<nv;cum++) { 366 pcbddc->adaptive_constraints_n[cum] = 1; 367 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 368 pcbddc->adaptive_constraints_data[cum] = 1.0; 369 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1; 370 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1; 371 } 372 cum2 = cum; 373 ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 374 } 375 376 if (mss) { /* multilevel */ 377 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 378 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 379 } 380 381 for (i=0;i<sub_schurs->n_subs;i++) { 382 383 const PetscInt *idxs; 384 PetscReal infty = PETSC_MAX_REAL; 385 PetscInt j,subset_size,eigs_start = 0; 386 PetscBLASInt B_N; 387 PetscBool same_data = PETSC_FALSE; 388 389 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 390 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 391 if (allocated_S_St) { /* S and S_t should be copied since we could need them later */ 392 if (sub_schurs->is_hermitian) { 393 PetscInt j,k; 394 if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscMemcmp later */ 395 ierr = PetscMemzero(S,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 396 ierr = PetscMemzero(St,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 397 } 398 for (j=0;j<subset_size;j++) { 399 for (k=j;k<subset_size;k++) { 400 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 401 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 402 } 403 } 404 } else { 405 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 406 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 407 } 408 } else { 409 S = Sarray + cumarray; 410 St = Starray + cumarray; 411 } 412 413 ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 414 /* see if we can save some work */ 415 if (sub_schurs->n_subs == 1) { 416 ierr = PetscMemcmp(S,St,subset_size*subset_size*sizeof(PetscScalar),&same_data);CHKERRQ(ierr); 417 } 418 419 if (same_data) { /* there's no need of constraints here, deluxe scaling is enough */ 420 B_neigs = 0; 421 } else { 422 /* Threshold: this is an heuristic for edges */ 423 thresh = pcbddc->mat_graph->count[idxs[0]]*pcbddc->adaptive_threshold; 424 425 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 426 PetscBLASInt B_itype = 1; 427 PetscBLASInt B_IL, B_IU; 428 PetscReal eps = -1.0; /* dlamch? */ 429 PetscInt nmin_s; 430 431 if (pcbddc->dbg_flag) { 432 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d %d %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]); 433 } 434 435 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 436 if (thresh > 1.+PETSC_SMALL) { 437 438 /* ask for eigenvalues larger than thresh */ 439 #if defined(PETSC_USE_COMPLEX) 440 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 441 #else 442 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 443 #endif 444 } else { 445 B_IU = PetscMax(1,PetscMin(B_N,nmax)); 446 B_IL = 1; 447 #if defined(PETSC_USE_COMPLEX) 448 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 449 #else 450 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 451 #endif 452 } 453 ierr = PetscFPTrapPop();CHKERRQ(ierr); 454 if (B_ierr) { 455 if (B_ierr < 0 ) { 456 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 457 } else if (B_ierr <= B_N) { 458 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 459 } else { 460 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 461 } 462 } 463 464 if (B_neigs > nmax) { 465 if (pcbddc->dbg_flag) { 466 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, more than maximum required %d.\n",B_neigs,nmax); 467 } 468 eigs_start = B_neigs -nmax; 469 B_neigs = nmax; 470 } 471 472 nmin_s = PetscMin(nmin,B_N); 473 if (B_neigs < nmin_s) { 474 PetscBLASInt B_neigs2; 475 476 B_IU = B_N - B_neigs; 477 B_IL = B_N - nmin_s + 1; 478 if (pcbddc->dbg_flag) { 479 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU); 480 } 481 if (sub_schurs->is_hermitian) { 482 PetscInt j; 483 for (j=0;j<subset_size;j++) { 484 ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 485 } 486 for (j=0;j<subset_size;j++) { 487 ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 488 } 489 } else { 490 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 491 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 492 } 493 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 494 #if defined(PETSC_USE_COMPLEX) 495 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 496 #else 497 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 498 #endif 499 ierr = PetscFPTrapPop();CHKERRQ(ierr); 500 B_neigs += B_neigs2; 501 } 502 if (B_ierr) { 503 if (B_ierr < 0 ) { 504 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 505 } else if (B_ierr <= B_N) { 506 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 507 } else { 508 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 509 } 510 } 511 if (pcbddc->dbg_flag) { 512 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Got %d eigs\n",B_neigs);CHKERRQ(ierr); 513 for (j=0;j<B_neigs;j++) { 514 if (eigs[j] == 0.0) { 515 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," Inf\n");CHKERRQ(ierr); 516 } else { 517 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",eigs[j+eigs_start]);CHKERRQ(ierr); 518 } 519 } 520 } 521 } else { 522 /* TODO */ 523 } 524 } 525 maxneigs = PetscMax(B_neigs,maxneigs); 526 pcbddc->adaptive_constraints_n[i+nv] = B_neigs; 527 if (B_neigs) { 528 ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 529 530 if (pcbddc->dbg_flag > 1) { 531 PetscInt ii; 532 for (ii=0;ii<B_neigs;ii++) { 533 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 534 for (j=0;j<B_N;j++) { 535 #if defined(PETSC_USE_COMPLEX) 536 PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 537 PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 538 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 539 #else 540 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);CHKERRQ(ierr); 541 #endif 542 } 543 } 544 } 545 #if 0 546 for (j=0;j<B_neigs;j++) { 547 PetscBLASInt Blas_N,Blas_one = 1.0; 548 PetscScalar norm; 549 ierr = PetscBLASIntCast(subset_size,&Blas_N);CHKERRQ(ierr); 550 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size, 551 &Blas_one,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one)); 552 if (pcbddc->adaptive_constraints_data[cum2] > 0.0) { 553 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 554 } else { 555 norm = -1.0/PetscSqrtReal(PetscRealPart(norm)); 556 } 557 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one)); 558 } 559 #endif 560 ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr); 561 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size; 562 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs; 563 cum++; 564 } 565 ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 566 /* shift for next computation */ 567 cumarray += subset_size*subset_size; 568 } 569 if (pcbddc->dbg_flag) { 570 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 571 } 572 573 if (mss) { 574 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 575 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 576 /* destroy matrices (junk) */ 577 ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr); 578 ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr); 579 } 580 if (allocated_S_St) { 581 ierr = PetscFree2(S,St);CHKERRQ(ierr); 582 } 583 ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr); 584 #if defined(PETSC_USE_COMPLEX) 585 ierr = PetscFree(rwork);CHKERRQ(ierr); 586 #endif 587 if (pcbddc->dbg_flag) { 588 PetscInt maxneigs_r; 589 ierr = MPI_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 590 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr); 591 } 592 PetscFunctionReturn(0); 593 } 594 595 #undef __FUNCT__ 596 #define __FUNCT__ "PCBDDCSetUpSolvers" 597 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 598 { 599 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 600 PetscScalar *coarse_submat_vals; 601 PetscErrorCode ierr; 602 603 PetscFunctionBegin; 604 /* Setup local scatters R_to_B and (optionally) R_to_D */ 605 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 606 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 607 608 /* Setup local neumann solver ksp_R */ 609 /* PCBDDCSetUpLocalScatters should be called first! */ 610 ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 611 612 /* Change global null space passed in by the user if change of basis has been requested */ 613 if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) { 614 ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr); 615 } 616 617 /* 618 Setup local correction and local part of coarse basis. 619 Gives back the dense local part of the coarse matrix in column major ordering 620 */ 621 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 622 623 /* Compute total number of coarse nodes and setup coarse solver */ 624 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 625 626 /* free */ 627 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 628 PetscFunctionReturn(0); 629 } 630 631 #undef __FUNCT__ 632 #define __FUNCT__ "PCBDDCResetCustomization" 633 PetscErrorCode PCBDDCResetCustomization(PC pc) 634 { 635 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 636 PetscErrorCode ierr; 637 638 PetscFunctionBegin; 639 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 640 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 641 ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr); 642 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 643 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 644 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 645 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 646 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 647 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 648 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 649 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 650 PetscFunctionReturn(0); 651 } 652 653 #undef __FUNCT__ 654 #define __FUNCT__ "PCBDDCResetTopography" 655 PetscErrorCode PCBDDCResetTopography(PC pc) 656 { 657 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 658 PetscErrorCode ierr; 659 660 PetscFunctionBegin; 661 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 662 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 663 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 664 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 665 ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr); 666 PetscFunctionReturn(0); 667 } 668 669 #undef __FUNCT__ 670 #define __FUNCT__ "PCBDDCResetSolvers" 671 PetscErrorCode PCBDDCResetSolvers(PC pc) 672 { 673 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 674 PetscScalar *array; 675 PetscErrorCode ierr; 676 677 PetscFunctionBegin; 678 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 679 if (pcbddc->coarse_phi_B) { 680 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr); 681 ierr = PetscFree(array);CHKERRQ(ierr); 682 } 683 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 684 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 685 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 686 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 687 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 688 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 689 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 690 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 691 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 692 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 693 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 694 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 695 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 696 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 697 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 698 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 699 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 700 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 701 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 702 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 703 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 704 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 705 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 706 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 707 ierr = VecDestroy(&pcbddc->benign_vec);CHKERRQ(ierr); 708 ierr = MatDestroy(&pcbddc->benign_original_mat);CHKERRQ(ierr); 709 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 710 ierr = PetscFree2(pcbddc->B0_cols,pcbddc->B0_vals);CHKERRQ(ierr); 711 PetscFunctionReturn(0); 712 } 713 714 #undef __FUNCT__ 715 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors" 716 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 717 { 718 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 719 PC_IS *pcis = (PC_IS*)pc->data; 720 VecType impVecType; 721 PetscInt n_constraints,n_R,old_size,n_benign; 722 PetscErrorCode ierr; 723 724 PetscFunctionBegin; 725 if (!pcbddc->ConstraintMatrix) { 726 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created"); 727 } 728 /* get sizes */ 729 n_benign = 0; 730 if (pcbddc->benign_p0_lidx >= 0) n_benign = 1; 731 n_constraints = pcbddc->local_primal_size - n_benign - pcbddc->n_vertices; 732 n_R = pcis->n - pcbddc->n_vertices; 733 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 734 /* local work vectors (try to avoid unneeded work)*/ 735 /* R nodes */ 736 old_size = -1; 737 if (pcbddc->vec1_R) { 738 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 739 } 740 if (n_R != old_size) { 741 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 742 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 743 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 744 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 745 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 746 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 747 } 748 /* local primal dofs */ 749 old_size = -1; 750 if (pcbddc->vec1_P) { 751 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 752 } 753 if (pcbddc->local_primal_size != old_size) { 754 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 755 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 756 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 757 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 758 } 759 /* local explicit constraints */ 760 old_size = -1; 761 if (pcbddc->vec1_C) { 762 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 763 } 764 if (n_constraints && n_constraints != old_size) { 765 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 766 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 767 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 768 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 769 } 770 PetscFunctionReturn(0); 771 } 772 773 #undef __FUNCT__ 774 #define __FUNCT__ "PCBDDCSetUpCorrection" 775 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 776 { 777 PetscErrorCode ierr; 778 /* pointers to pcis and pcbddc */ 779 PC_IS* pcis = (PC_IS*)pc->data; 780 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 781 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 782 /* submatrices of local problem */ 783 Mat A_RV,A_VR,A_VV,local_auxmat2_R; 784 /* submatrices of benign trick */ 785 Mat B0_V = NULL; 786 /* submatrices of local coarse problem */ 787 Mat S_VV,S_CV,S_VC,S_CC; 788 /* working matrices */ 789 Mat C_CR; 790 /* additional working stuff */ 791 PC pc_R; 792 Mat F,B0 = NULL; 793 PetscBool isLU,isCHOL,isILU; 794 795 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 796 PetscScalar *work; 797 PetscInt *idx_V_B; 798 PetscInt n,n_vertices,n_constraints,n_benign,p0_lidx_I = 0; 799 PetscInt i,n_R,n_D,n_B; 800 PetscBool unsymmetric_check; 801 /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */ 802 MatType impMatType; 803 /* some shortcuts to scalars */ 804 PetscScalar one=1.0,m_one=-1.0; 805 806 PetscFunctionBegin; 807 n_benign = 0; 808 if (pcbddc->benign_p0_lidx >= 0) n_benign = 1; 809 n_vertices = pcbddc->n_vertices; 810 n_constraints = pcbddc->local_primal_size - n_benign - n_vertices; 811 /* Set Non-overlapping dimensions */ 812 n_B = pcis->n_B; 813 n_D = pcis->n - n_B; 814 n_R = pcis->n - n_vertices; 815 816 /* Set types for local objects needed by BDDC precondtioner */ 817 impMatType = MATSEQDENSE; 818 819 /* vertices in boundary numbering */ 820 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 821 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);CHKERRQ(ierr); 822 if (i != n_vertices) { 823 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i); 824 } 825 826 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 827 ierr = PetscCalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 828 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr); 829 ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr); 830 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr); 831 ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr); 832 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr); 833 ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr); 834 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr); 835 ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr); 836 837 unsymmetric_check = PETSC_FALSE; 838 /* allocate workspace */ 839 n = 0; 840 if (n_constraints) { 841 n += n_R*n_constraints; 842 } 843 if (n_vertices) { 844 n = PetscMax(2*n_R*n_vertices,n); 845 n = PetscMax((n_R+n_B)*n_vertices,n); 846 } 847 if (!pcbddc->symmetric_primal) { 848 n = PetscMax(2*n_R*pcbddc->local_primal_size,n); 849 unsymmetric_check = PETSC_TRUE; 850 } 851 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 852 853 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 854 ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr); 855 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr); 856 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr); 857 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr); 858 if (isLU || isILU || isCHOL) { 859 ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr); 860 } else if (sub_schurs->reuse_mumps) { 861 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 862 MatFactorType type; 863 864 F = reuse_mumps->F; 865 ierr = MatGetFactorType(F,&type);CHKERRQ(ierr); 866 if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE; 867 } else { 868 F = NULL; 869 } 870 871 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 872 if (n_constraints) { 873 Mat M1,M2,M3; 874 Mat auxmat; 875 IS is_aux; 876 PetscScalar *array,*array2; 877 878 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 879 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 880 881 /* Extract constraints on R nodes: C_{CR} */ 882 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 883 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 884 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&auxmat);CHKERRQ(ierr); 885 886 /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */ 887 /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */ 888 ierr = PetscMemzero(work,n_R*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 889 for (i=0;i<n_constraints;i++) { 890 const PetscScalar *row_cmat_values; 891 const PetscInt *row_cmat_indices; 892 PetscInt size_of_constraint,j; 893 894 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 895 for (j=0;j<size_of_constraint;j++) { 896 work[row_cmat_indices[j]+i*n_R] = -row_cmat_values[j]; 897 } 898 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 899 } 900 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,NULL,&local_auxmat2_R);CHKERRQ(ierr); 901 if (F) { 902 Mat B; 903 904 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 905 ierr = MatMatSolve(F,B,local_auxmat2_R);CHKERRQ(ierr); 906 ierr = MatDestroy(&B);CHKERRQ(ierr); 907 } else { 908 PetscScalar *marr; 909 910 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 911 for (i=0;i<n_constraints;i++) { 912 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 913 ierr = VecPlaceArray(pcbddc->vec2_R,marr+i*n_R);CHKERRQ(ierr); 914 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 915 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 916 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 917 } 918 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 919 } 920 if (!pcbddc->switch_static) { 921 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);CHKERRQ(ierr); 922 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 923 ierr = MatDenseGetArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 924 for (i=0;i<n_constraints;i++) { 925 ierr = VecPlaceArray(pcbddc->vec1_R,array2+i*n_R);CHKERRQ(ierr); 926 ierr = VecPlaceArray(pcis->vec1_B,array+i*n_B);CHKERRQ(ierr); 927 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 928 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 929 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 930 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 931 } 932 ierr = MatDenseRestoreArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 933 ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 934 ierr = MatMatMult(auxmat,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 935 } else { 936 ierr = PetscObjectReference((PetscObject)local_auxmat2_R);CHKERRQ(ierr); 937 pcbddc->local_auxmat2 = local_auxmat2_R; 938 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 939 } 940 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 941 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 942 ierr = MatScale(M3,m_one);CHKERRQ(ierr); 943 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr); 944 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 945 if (isCHOL) { 946 ierr = MatCholeskyFactor(M3,NULL,NULL);CHKERRQ(ierr); 947 } else { 948 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 949 } 950 ierr = VecSet(pcbddc->vec1_C,one);CHKERRQ(ierr); 951 ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr); 952 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 953 ierr = MatDestroy(&M2);CHKERRQ(ierr); 954 ierr = MatDestroy(&M3);CHKERRQ(ierr); 955 /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */ 956 ierr = MatMatMult(M1,auxmat,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 957 ierr = MatDestroy(&auxmat);CHKERRQ(ierr); 958 ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 959 ierr = MatDestroy(&M1);CHKERRQ(ierr); 960 } 961 /* Get submatrices from subdomain matrix */ 962 if (n_benign) { 963 IS dummy; 964 Mat B0_R; 965 PetscReal norm; 966 PetscInt ii[2]; 967 968 ii[0] = 0; 969 ii[1] = pcbddc->B0_ncol; 970 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,1,pcis->n,ii,pcbddc->B0_cols,pcbddc->B0_vals,&B0);CHKERRQ(ierr); 971 ierr = ISCreateStride(PETSC_COMM_SELF,1,0,1,&dummy);CHKERRQ(ierr); 972 ierr = MatGetSubMatrix(B0,dummy,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&B0_R);CHKERRQ(ierr); 973 ierr = MatNorm(B0_R,NORM_INFINITY,&norm);CHKERRQ(ierr); 974 if (norm > PETSC_SMALL) { 975 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! ||B0_R|| = %f (should be numerically 0.)",norm); 976 } 977 ierr = MatDestroy(&B0_R);CHKERRQ(ierr); 978 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 979 } 980 981 if (n_vertices) { 982 IS is_aux; 983 984 if (sub_schurs->reuse_mumps) { /* is_R_local is not sorted, ISComplement doesn't like it */ 985 IS tis; 986 987 ierr = ISDuplicate(pcbddc->is_R_local,&tis);CHKERRQ(ierr); 988 ierr = ISSort(tis);CHKERRQ(ierr); 989 ierr = ISComplement(tis,0,pcis->n,&is_aux);CHKERRQ(ierr); 990 ierr = ISDestroy(&tis);CHKERRQ(ierr); 991 } else { 992 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 993 } 994 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 995 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 996 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 997 if (n_benign) { 998 IS dummy; 999 1000 ierr = ISCreateStride(PETSC_COMM_SELF,1,0,1,&dummy);CHKERRQ(ierr); 1001 ierr = MatGetSubMatrix(B0,dummy,is_aux,MAT_INITIAL_MATRIX,&B0_V);CHKERRQ(ierr); 1002 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 1003 } 1004 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 1005 } 1006 1007 /* Matrix of coarse basis functions (local) */ 1008 if (pcbddc->coarse_phi_B) { 1009 PetscInt on_B,on_primal,on_D=n_D; 1010 if (pcbddc->coarse_phi_D) { 1011 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr); 1012 } 1013 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 1014 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 1015 PetscScalar *marray; 1016 1017 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr); 1018 ierr = PetscFree(marray);CHKERRQ(ierr); 1019 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 1020 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 1021 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 1022 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 1023 } 1024 } 1025 1026 if (!pcbddc->coarse_phi_B) { 1027 PetscScalar *marray; 1028 1029 n = n_B*pcbddc->local_primal_size; 1030 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1031 n += n_D*pcbddc->local_primal_size; 1032 } 1033 if (!pcbddc->symmetric_primal) { 1034 n *= 2; 1035 } 1036 ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr); 1037 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 1038 n = n_B*pcbddc->local_primal_size; 1039 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1040 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 1041 n += n_D*pcbddc->local_primal_size; 1042 } 1043 if (!pcbddc->symmetric_primal) { 1044 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 1045 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1046 n = n_B*pcbddc->local_primal_size; 1047 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 1048 } 1049 } else { 1050 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 1051 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 1052 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1053 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 1054 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 1055 } 1056 } 1057 } 1058 1059 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 1060 if (n_benign && (pcbddc->switch_static || pcbddc->dbg_flag)) { 1061 const PetscInt *idxs; 1062 1063 ierr = ISGetIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 1064 ierr = PetscFindInt(pcbddc->benign_p0_lidx,pcis->n-pcis->n_B,idxs,&p0_lidx_I);CHKERRQ(ierr); 1065 ierr = ISRestoreIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 1066 } 1067 1068 /* vertices */ 1069 if (n_vertices) { 1070 1071 ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr); 1072 1073 if (n_R) { 1074 Mat A_RRmA_RV,S_VVt; /* S_VVt with LDA=N */ 1075 PetscBLASInt B_N,B_one = 1; 1076 PetscScalar *x,*y; 1077 PetscBool isseqaij; 1078 1079 ierr = MatScale(A_RV,m_one);CHKERRQ(ierr); 1080 ierr = MatConvert(A_RV,impMatType,MAT_REUSE_MATRIX,&A_RV);CHKERRQ(ierr); 1081 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 1082 if (F) { /* TODO could be optimized for symmetric problems */ 1083 ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr); 1084 } else { 1085 ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr); 1086 for (i=0;i<n_vertices;i++) { 1087 ierr = VecPlaceArray(pcbddc->vec1_R,y+i*n_R);CHKERRQ(ierr); 1088 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 1089 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1090 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1091 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 1092 } 1093 ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr); 1094 } 1095 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 1096 /* S_VV and S_CV are the subdomain contribution to coarse matrix. WARNING -> column major ordering */ 1097 if (n_constraints) { 1098 Mat B; 1099 1100 ierr = PetscMemzero(work+n_R*n_vertices,n_B*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 1101 for (i=0;i<n_vertices;i++) { 1102 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 1103 ierr = VecPlaceArray(pcis->vec1_B,work+n_R*n_vertices+i*n_B);CHKERRQ(ierr); 1104 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1105 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1106 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1107 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1108 } 1109 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr); 1110 ierr = MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr); 1111 ierr = MatDestroy(&B);CHKERRQ(ierr); 1112 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr); 1113 ierr = MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 1114 ierr = MatScale(S_CV,m_one);CHKERRQ(ierr); 1115 ierr = PetscBLASIntCast(n_R*n_vertices,&B_N);CHKERRQ(ierr); 1116 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+n_R*n_vertices,&B_one,work,&B_one)); 1117 ierr = MatDestroy(&B);CHKERRQ(ierr); 1118 } 1119 ierr = PetscObjectTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 1120 if (!isseqaij) { /* MatMatMult with SEQ(S)BAIJ below will raise an error */ 1121 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_REUSE_MATRIX,&A_VR);CHKERRQ(ierr); 1122 } 1123 ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr); 1124 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 1125 ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr); 1126 ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr); 1127 ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr); 1128 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one)); 1129 ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr); 1130 ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr); 1131 ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1132 ierr = MatDestroy(&S_VVt);CHKERRQ(ierr); 1133 } else { 1134 ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1135 } 1136 if (n_benign) { 1137 const PetscScalar *vals; 1138 const PetscInt *idxs; 1139 PetscInt n,j; 1140 1141 ierr = MatGetRow(B0_V,0,&n,&idxs,&vals);CHKERRQ(ierr); 1142 for (j=0;j<n;j++) { 1143 coarse_submat_vals[(pcbddc->local_primal_size-1)*pcbddc->local_primal_size+idxs[j]] = vals[j]; 1144 coarse_submat_vals[(idxs[j]+1)*pcbddc->local_primal_size-1] = vals[j]; 1145 } 1146 ierr = MatRestoreRow(B0_V,0,&n,&idxs,&vals);CHKERRQ(ierr); 1147 } 1148 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 1149 1150 /* coarse basis functions */ 1151 for (i=0;i<n_vertices;i++) { 1152 PetscScalar *y; 1153 1154 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr); 1155 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 1156 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 1157 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1158 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1159 y[n_B*i+idx_V_B[i]] = 1.0; 1160 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 1161 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1162 1163 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1164 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 1165 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 1166 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1167 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1168 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 1169 if (n_benign) y[n_D*i+p0_lidx_I] = 0.0; 1170 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 1171 } 1172 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1173 } 1174 /* if n_R == 0 the object is not destroyed */ 1175 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 1176 } 1177 1178 if (n_constraints) { 1179 Mat B; 1180 1181 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 1182 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 1183 ierr = MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 1184 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 1185 if (n_vertices) { 1186 if (isCHOL) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */ 1187 ierr = MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);CHKERRQ(ierr); 1188 } else { 1189 Mat S_VCt; 1190 1191 ierr = MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);CHKERRQ(ierr); 1192 ierr = MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1193 ierr = MatDestroy(&S_VCt);CHKERRQ(ierr); 1194 } 1195 } 1196 ierr = MatDestroy(&B);CHKERRQ(ierr); 1197 /* coarse basis functions */ 1198 for (i=0;i<n_constraints;i++) { 1199 PetscScalar *y; 1200 1201 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr); 1202 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 1203 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr); 1204 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1205 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1206 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 1207 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1208 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1209 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 1210 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr); 1211 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1212 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1213 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 1214 if (n_benign) y[n_D*(i+n_vertices)+p0_lidx_I] = 0.0; 1215 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 1216 } 1217 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1218 } 1219 } 1220 if (n_constraints) { 1221 ierr = MatDestroy(&local_auxmat2_R);CHKERRQ(ierr); 1222 } 1223 1224 ierr = MatDestroy(&B0_V);CHKERRQ(ierr); 1225 1226 /* compute other basis functions for non-symmetric problems */ 1227 if (!pcbddc->symmetric_primal) { 1228 1229 if (n_constraints) { 1230 Mat S_CCT,B_C; 1231 1232 /* this is a lazy thing */ 1233 ierr = MatConvert(C_CR,impMatType,MAT_REUSE_MATRIX,&C_CR);CHKERRQ(ierr); 1234 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work+n_vertices*n_R,&B_C);CHKERRQ(ierr); 1235 ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr); 1236 ierr = MatTransposeMatMult(C_CR,S_CCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr); 1237 ierr = MatDestroy(&S_CCT);CHKERRQ(ierr); 1238 if (n_vertices) { 1239 Mat B_V,S_VCT; 1240 1241 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&B_V);CHKERRQ(ierr); 1242 ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr); 1243 ierr = MatTransposeMatMult(C_CR,S_VCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr); 1244 ierr = MatDestroy(&B_V);CHKERRQ(ierr); 1245 ierr = MatDestroy(&S_VCT);CHKERRQ(ierr); 1246 } 1247 ierr = MatDestroy(&B_C);CHKERRQ(ierr); 1248 } else { /* if there are no constraints, reset work */ 1249 ierr = PetscMemzero(work,n_R*pcbddc->local_primal_size*sizeof(PetscScalar));CHKERRQ(ierr); 1250 } 1251 if (n_vertices && n_R) { 1252 Mat A_VRT; 1253 PetscScalar *marray; 1254 PetscBLASInt B_N,B_one = 1; 1255 1256 ierr = MatTranspose(A_VR,MAT_INITIAL_MATRIX,&A_VRT);CHKERRQ(ierr); 1257 ierr = MatConvert(A_VRT,impMatType,MAT_REUSE_MATRIX,&A_VRT);CHKERRQ(ierr); 1258 ierr = MatDenseGetArray(A_VRT,&marray);CHKERRQ(ierr); 1259 ierr = PetscBLASIntCast(n_vertices*n_R,&B_N);CHKERRQ(ierr); 1260 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&m_one,marray,&B_one,work,&B_one)); 1261 ierr = MatDenseRestoreArray(A_VRT,&marray);CHKERRQ(ierr); 1262 ierr = MatDestroy(&A_VRT);CHKERRQ(ierr); 1263 } 1264 1265 if (F) { /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 1266 for (i=0;i<pcbddc->local_primal_size;i++) { 1267 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 1268 ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr); 1269 ierr = MatSolveTranspose(F,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1270 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1271 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 1272 } 1273 } else { 1274 for (i=0;i<pcbddc->local_primal_size;i++) { 1275 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 1276 ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr); 1277 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1278 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1279 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 1280 } 1281 } 1282 /* coarse basis functions */ 1283 for (i=0;i<pcbddc->local_primal_size;i++) { 1284 PetscScalar *y; 1285 1286 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*(i+pcbddc->local_primal_size));CHKERRQ(ierr); 1287 ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 1288 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 1289 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1290 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1291 if (i<n_vertices) { 1292 y[n_B*i+idx_V_B[i]] = 1.0; 1293 } 1294 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 1295 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1296 1297 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1298 ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 1299 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 1300 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1301 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1302 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 1303 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 1304 } 1305 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1306 } 1307 } 1308 /* free memory */ 1309 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 1310 ierr = MatDestroy(&S_VV);CHKERRQ(ierr); 1311 ierr = MatDestroy(&S_CV);CHKERRQ(ierr); 1312 ierr = MatDestroy(&S_VC);CHKERRQ(ierr); 1313 ierr = MatDestroy(&S_CC);CHKERRQ(ierr); 1314 ierr = PetscFree(work);CHKERRQ(ierr); 1315 if (n_vertices) { 1316 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 1317 } 1318 if (n_constraints) { 1319 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 1320 } 1321 /* Checking coarse_sub_mat and coarse basis functios */ 1322 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 1323 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 1324 if (pcbddc->dbg_flag) { 1325 Mat coarse_sub_mat; 1326 Mat AUXMAT,TM1,TM2,TM3,TM4; 1327 Mat coarse_phi_D,coarse_phi_B; 1328 Mat coarse_psi_D,coarse_psi_B; 1329 Mat A_II,A_BB,A_IB,A_BI; 1330 Mat C_B,CPHI; 1331 IS is_dummy; 1332 Vec mones; 1333 MatType checkmattype=MATSEQAIJ; 1334 PetscReal real_value; 1335 1336 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 1337 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 1338 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 1339 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 1340 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 1341 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 1342 if (unsymmetric_check) { 1343 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 1344 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 1345 } 1346 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 1347 1348 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1349 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);CHKERRQ(ierr); 1350 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1351 if (unsymmetric_check) { 1352 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1353 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 1354 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1355 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1356 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 1357 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1358 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1359 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 1360 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1361 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1362 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 1363 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1364 } else { 1365 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 1366 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 1367 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1368 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 1369 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1370 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1371 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 1372 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1373 } 1374 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1375 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1376 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1377 ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr); 1378 if (n_benign) { 1379 Mat B0_I,B0_B,B0_BPHI,B0_IPHI; 1380 PetscScalar *data,*data2; 1381 1382 ierr = ISCreateStride(PETSC_COMM_SELF,1,0,1,&is_dummy);CHKERRQ(ierr); 1383 ierr = MatGetSubMatrix(B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B); 1384 ierr = MatGetSubMatrix(B0,is_dummy,pcis->is_I_local,MAT_INITIAL_MATRIX,&B0_I); 1385 ierr = MatMatMult(B0_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 1386 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_REUSE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 1387 ierr = MatDenseGetArray(TM1,&data);CHKERRQ(ierr); 1388 ierr = MatDenseGetArray(B0_BPHI,&data2);CHKERRQ(ierr); 1389 for (i=0;i<pcbddc->local_primal_size;i++) { 1390 data[(pcbddc->local_primal_size-1)*pcbddc->local_primal_size+i] += data2[i]; 1391 data[(i+1)*pcbddc->local_primal_size-1] += data2[i]; 1392 } 1393 ierr = MatDenseRestoreArray(TM1,&data);CHKERRQ(ierr); 1394 ierr = MatDenseRestoreArray(B0_BPHI,&data2);CHKERRQ(ierr); 1395 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 1396 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 1397 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 1398 ierr = MatMatMult(B0_I,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&B0_IPHI);CHKERRQ(ierr); 1399 ierr = MatDestroy(&B0_I);CHKERRQ(ierr); 1400 ierr = MatNorm(B0_IPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1401 ierr = MatDestroy(&B0_IPHI);CHKERRQ(ierr); 1402 } 1403 #if 0 1404 { 1405 PetscViewer viewer; 1406 char filename[256]; 1407 sprintf(filename,"proj_local_coarse_mat%d.m",PetscGlobalRank); 1408 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 1409 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 1410 ierr = MatView(TM1,viewer);CHKERRQ(ierr); 1411 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 1412 } 1413 #endif 1414 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1415 ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1416 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1417 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 1418 1419 /* check constraints */ 1420 if (!n_benign) { 1421 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size,0,1,&is_dummy);CHKERRQ(ierr); 1422 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B); 1423 ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 1424 ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr); 1425 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 1426 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 1427 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1428 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 1429 if (unsymmetric_check) { 1430 ierr = MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 1431 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 1432 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 1433 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1434 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 1435 } 1436 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 1437 ierr = MatDestroy(&CPHI);CHKERRQ(ierr); 1438 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 1439 ierr = VecDestroy(&mones);CHKERRQ(ierr); 1440 } 1441 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1442 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 1443 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 1444 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 1445 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 1446 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 1447 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 1448 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 1449 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 1450 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 1451 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 1452 if (unsymmetric_check) { 1453 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 1454 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 1455 } 1456 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 1457 } 1458 ierr = MatDestroy(&B0);CHKERRQ(ierr); 1459 /* get back data */ 1460 *coarse_submat_vals_n = coarse_submat_vals; 1461 PetscFunctionReturn(0); 1462 } 1463 1464 #undef __FUNCT__ 1465 #define __FUNCT__ "MatGetSubMatrixUnsorted" 1466 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B) 1467 { 1468 Mat *work_mat; 1469 IS isrow_s,iscol_s; 1470 PetscBool rsorted,csorted; 1471 PetscInt rsize,*idxs_perm_r,csize,*idxs_perm_c; 1472 PetscErrorCode ierr; 1473 1474 PetscFunctionBegin; 1475 ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr); 1476 ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr); 1477 ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr); 1478 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 1479 1480 if (!rsorted) { 1481 const PetscInt *idxs; 1482 PetscInt *idxs_sorted,i; 1483 1484 ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr); 1485 ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr); 1486 for (i=0;i<rsize;i++) { 1487 idxs_perm_r[i] = i; 1488 } 1489 ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr); 1490 ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr); 1491 for (i=0;i<rsize;i++) { 1492 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 1493 } 1494 ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr); 1495 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr); 1496 } else { 1497 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1498 isrow_s = isrow; 1499 } 1500 1501 if (!csorted) { 1502 if (isrow == iscol) { 1503 ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr); 1504 iscol_s = isrow_s; 1505 } else { 1506 const PetscInt *idxs; 1507 PetscInt *idxs_sorted,i; 1508 1509 ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr); 1510 ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr); 1511 for (i=0;i<csize;i++) { 1512 idxs_perm_c[i] = i; 1513 } 1514 ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr); 1515 ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr); 1516 for (i=0;i<csize;i++) { 1517 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 1518 } 1519 ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr); 1520 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr); 1521 } 1522 } else { 1523 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1524 iscol_s = iscol; 1525 } 1526 1527 ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 1528 1529 if (!rsorted || !csorted) { 1530 Mat new_mat; 1531 IS is_perm_r,is_perm_c; 1532 1533 if (!rsorted) { 1534 PetscInt *idxs_r,i; 1535 ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr); 1536 for (i=0;i<rsize;i++) { 1537 idxs_r[idxs_perm_r[i]] = i; 1538 } 1539 ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr); 1540 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr); 1541 } else { 1542 ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr); 1543 } 1544 ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr); 1545 1546 if (!csorted) { 1547 if (isrow_s == iscol_s) { 1548 ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr); 1549 is_perm_c = is_perm_r; 1550 } else { 1551 PetscInt *idxs_c,i; 1552 ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr); 1553 for (i=0;i<csize;i++) { 1554 idxs_c[idxs_perm_c[i]] = i; 1555 } 1556 ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr); 1557 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr); 1558 } 1559 } else { 1560 ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr); 1561 } 1562 ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr); 1563 1564 ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr); 1565 ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr); 1566 work_mat[0] = new_mat; 1567 ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr); 1568 ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr); 1569 } 1570 1571 ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr); 1572 *B = work_mat[0]; 1573 ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr); 1574 ierr = ISDestroy(&isrow_s);CHKERRQ(ierr); 1575 ierr = ISDestroy(&iscol_s);CHKERRQ(ierr); 1576 PetscFunctionReturn(0); 1577 } 1578 1579 #undef __FUNCT__ 1580 #define __FUNCT__ "PCBDDCComputeLocalMatrix" 1581 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 1582 { 1583 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1584 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1585 Mat new_mat; 1586 IS is_local,is_global; 1587 PetscInt local_size; 1588 PetscBool isseqaij; 1589 PetscErrorCode ierr; 1590 1591 PetscFunctionBegin; 1592 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1593 ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr); 1594 ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr); 1595 ierr = ISLocalToGlobalMappingApplyIS(pc->pmat->rmap->mapping,is_local,&is_global);CHKERRQ(ierr); 1596 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 1597 ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr); 1598 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 1599 1600 /* check */ 1601 if (pcbddc->dbg_flag) { 1602 Vec x,x_change; 1603 PetscReal error; 1604 1605 ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr); 1606 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 1607 ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr); 1608 ierr = VecScatterBegin(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1609 ierr = VecScatterEnd(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1610 ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr); 1611 ierr = VecScatterBegin(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1612 ierr = VecScatterEnd(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1613 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 1614 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 1615 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1616 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr); 1617 ierr = VecDestroy(&x);CHKERRQ(ierr); 1618 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 1619 } 1620 1621 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 1622 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 1623 if (isseqaij) { 1624 ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 1625 } else { 1626 Mat work_mat; 1627 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 1628 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 1629 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 1630 } 1631 if (matis->A->symmetric_set) { 1632 ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);CHKERRQ(ierr); 1633 #if !defined(PETSC_USE_COMPLEX) 1634 ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);CHKERRQ(ierr); 1635 #endif 1636 } 1637 /* 1638 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 1639 ierr = MatView(new_mat,(PetscViewer)0);CHKERRQ(ierr); 1640 */ 1641 ierr = MatDestroy(&new_mat);CHKERRQ(ierr); 1642 PetscFunctionReturn(0); 1643 } 1644 1645 #undef __FUNCT__ 1646 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 1647 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 1648 { 1649 PC_IS* pcis = (PC_IS*)(pc->data); 1650 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1651 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1652 PetscInt *idx_R_local=NULL; 1653 PetscInt n_vertices,i,j,n_R,n_D,n_B; 1654 PetscInt vbs,bs; 1655 PetscBT bitmask=NULL; 1656 PetscErrorCode ierr; 1657 1658 PetscFunctionBegin; 1659 /* 1660 No need to setup local scatters if 1661 - primal space is unchanged 1662 AND 1663 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 1664 AND 1665 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 1666 */ 1667 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 1668 PetscFunctionReturn(0); 1669 } 1670 /* destroy old objects */ 1671 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 1672 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 1673 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 1674 /* Set Non-overlapping dimensions */ 1675 n_B = pcis->n_B; 1676 n_D = pcis->n - n_B; 1677 n_vertices = pcbddc->n_vertices; 1678 1679 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 1680 1681 /* create auxiliary bitmask and allocate workspace */ 1682 if (!sub_schurs->reuse_mumps) { 1683 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 1684 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 1685 for (i=0;i<n_vertices;i++) { 1686 ierr = PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);CHKERRQ(ierr); 1687 } 1688 1689 for (i=0, n_R=0; i<pcis->n; i++) { 1690 if (!PetscBTLookup(bitmask,i)) { 1691 idx_R_local[n_R++] = i; 1692 } 1693 } 1694 } else { /* A different ordering (already computed) is present if we are reusing MUMPS Schur solver */ 1695 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1696 1697 ierr = ISGetIndices(reuse_mumps->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1698 ierr = ISGetLocalSize(reuse_mumps->is_R,&n_R);CHKERRQ(ierr); 1699 } 1700 1701 /* Block code */ 1702 vbs = 1; 1703 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 1704 if (bs>1 && !(n_vertices%bs)) { 1705 PetscBool is_blocked = PETSC_TRUE; 1706 PetscInt *vary; 1707 if (!sub_schurs->reuse_mumps) { 1708 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 1709 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 1710 /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 1711 /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */ 1712 for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++; 1713 for (i=0; i<pcis->n/bs; i++) { 1714 if (vary[i]!=0 && vary[i]!=bs) { 1715 is_blocked = PETSC_FALSE; 1716 break; 1717 } 1718 } 1719 ierr = PetscFree(vary);CHKERRQ(ierr); 1720 } else { 1721 /* Verify directly the R set */ 1722 for (i=0; i<n_R/bs; i++) { 1723 PetscInt j,node=idx_R_local[bs*i]; 1724 for (j=1; j<bs; j++) { 1725 if (node != idx_R_local[bs*i+j]-j) { 1726 is_blocked = PETSC_FALSE; 1727 break; 1728 } 1729 } 1730 } 1731 } 1732 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 1733 vbs = bs; 1734 for (i=0;i<n_R/vbs;i++) { 1735 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 1736 } 1737 } 1738 } 1739 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 1740 if (sub_schurs->reuse_mumps) { 1741 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1742 1743 ierr = ISRestoreIndices(reuse_mumps->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1744 ierr = ISDestroy(&reuse_mumps->is_R);CHKERRQ(ierr); 1745 ierr = PetscObjectReference((PetscObject)pcbddc->is_R_local);CHKERRQ(ierr); 1746 reuse_mumps->is_R = pcbddc->is_R_local; 1747 } else { 1748 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 1749 } 1750 1751 /* print some info if requested */ 1752 if (pcbddc->dbg_flag) { 1753 PetscInt benign = 0; 1754 1755 if (pcbddc->benign_p0_lidx >= 0) benign = 1; 1756 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1757 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1758 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1759 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 1760 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 1761 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices-benign,pcbddc->local_primal_size);CHKERRQ(ierr); 1762 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1763 } 1764 1765 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 1766 if (!sub_schurs->reuse_mumps) { 1767 IS is_aux1,is_aux2; 1768 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 1769 1770 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1771 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 1772 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 1773 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1774 for (i=0; i<n_D; i++) { 1775 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 1776 } 1777 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1778 for (i=0, j=0; i<n_R; i++) { 1779 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 1780 aux_array1[j++] = i; 1781 } 1782 } 1783 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 1784 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1785 for (i=0, j=0; i<n_B; i++) { 1786 if (!PetscBTLookup(bitmask,is_indices[i])) { 1787 aux_array2[j++] = i; 1788 } 1789 } 1790 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1791 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 1792 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 1793 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1794 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 1795 1796 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1797 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 1798 for (i=0, j=0; i<n_R; i++) { 1799 if (PetscBTLookup(bitmask,idx_R_local[i])) { 1800 aux_array1[j++] = i; 1801 } 1802 } 1803 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 1804 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 1805 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1806 } 1807 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 1808 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1809 } else { 1810 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1811 IS tis; 1812 PetscInt schur_size; 1813 1814 ierr = ISGetLocalSize(reuse_mumps->is_B,&schur_size);CHKERRQ(ierr); 1815 ierr = ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);CHKERRQ(ierr); 1816 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_mumps->is_B,&pcbddc->R_to_B);CHKERRQ(ierr); 1817 ierr = ISDestroy(&tis);CHKERRQ(ierr); 1818 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1819 ierr = ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);CHKERRQ(ierr); 1820 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 1821 ierr = ISDestroy(&tis);CHKERRQ(ierr); 1822 } 1823 } 1824 PetscFunctionReturn(0); 1825 } 1826 1827 1828 #undef __FUNCT__ 1829 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 1830 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 1831 { 1832 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1833 PC_IS *pcis = (PC_IS*)pc->data; 1834 PC pc_temp; 1835 Mat A_RR; 1836 MatReuse reuse; 1837 PetscScalar m_one = -1.0; 1838 PetscReal value; 1839 PetscInt n_D,n_R; 1840 PetscBool use_exact,use_exact_reduced,issbaij; 1841 PetscErrorCode ierr; 1842 /* prefixes stuff */ 1843 char dir_prefix[256],neu_prefix[256],str_level[16]; 1844 size_t len; 1845 1846 PetscFunctionBegin; 1847 1848 /* compute prefixes */ 1849 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 1850 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 1851 if (!pcbddc->current_level) { 1852 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1853 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1854 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1855 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1856 } else { 1857 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 1858 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 1859 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 1860 len -= 15; /* remove "pc_bddc_coarse_" */ 1861 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 1862 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 1863 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 1864 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 1865 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1866 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1867 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 1868 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 1869 } 1870 1871 /* DIRICHLET PROBLEM */ 1872 if (dirichlet) { 1873 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1874 if (pcbddc->local_mat->symmetric_set) { 1875 ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 1876 } 1877 /* Matrix for Dirichlet problem is pcis->A_II */ 1878 n_D = pcis->n - pcis->n_B; 1879 if (!pcbddc->ksp_D) { /* create object if not yet build */ 1880 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 1881 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 1882 /* default */ 1883 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 1884 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 1885 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1886 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1887 if (issbaij) { 1888 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1889 } else { 1890 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1891 } 1892 /* Allow user's customization */ 1893 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 1894 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1895 } 1896 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 1897 if (sub_schurs->reuse_mumps) { 1898 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1899 1900 ierr = KSPSetPC(pcbddc->ksp_D,reuse_mumps->interior_solver);CHKERRQ(ierr); 1901 } 1902 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1903 if (!n_D) { 1904 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1905 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1906 } 1907 /* Set Up KSP for Dirichlet problem of BDDC */ 1908 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 1909 /* set ksp_D into pcis data */ 1910 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 1911 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 1912 pcis->ksp_D = pcbddc->ksp_D; 1913 } 1914 1915 /* NEUMANN PROBLEM */ 1916 A_RR = 0; 1917 if (neumann) { 1918 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1919 PetscInt ibs,mbs; 1920 PetscBool issbaij; 1921 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1922 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 1923 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 1924 if (pcbddc->ksp_R) { /* already created ksp */ 1925 PetscInt nn_R; 1926 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 1927 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 1928 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 1929 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 1930 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 1931 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1932 reuse = MAT_INITIAL_MATRIX; 1933 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 1934 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 1935 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1936 reuse = MAT_INITIAL_MATRIX; 1937 } else { /* safe to reuse the matrix */ 1938 reuse = MAT_REUSE_MATRIX; 1939 } 1940 } 1941 /* last check */ 1942 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 1943 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1944 reuse = MAT_INITIAL_MATRIX; 1945 } 1946 } else { /* first time, so we need to create the matrix */ 1947 reuse = MAT_INITIAL_MATRIX; 1948 } 1949 /* extract A_RR */ 1950 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 1951 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 1952 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1953 if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */ 1954 if (matis->A == pcbddc->local_mat) { 1955 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1956 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 1957 } else { 1958 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 1959 } 1960 } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 1961 if (matis->A == pcbddc->local_mat) { 1962 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1963 ierr = MatConvert(matis->A,MATSEQBAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 1964 } else { 1965 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 1966 } 1967 } 1968 if (!sub_schurs->reuse_mumps) { 1969 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 1970 if (pcbddc->local_mat->symmetric_set) { 1971 ierr = MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 1972 } 1973 } else { 1974 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1975 1976 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1977 ierr = PCGetOperators(reuse_mumps->correction_solver,&A_RR,NULL);CHKERRQ(ierr); 1978 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 1979 } 1980 if (!pcbddc->ksp_R) { /* create object if not present */ 1981 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 1982 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 1983 /* default */ 1984 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 1985 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 1986 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1987 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1988 if (issbaij) { 1989 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1990 } else { 1991 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1992 } 1993 /* Allow user's customization */ 1994 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 1995 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1996 } 1997 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 1998 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1999 if (!n_R) { 2000 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 2001 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 2002 } 2003 /* Reuse MUMPS solver if it is present */ 2004 if (sub_schurs->reuse_mumps) { 2005 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 2006 2007 ierr = KSPSetPC(pcbddc->ksp_R,reuse_mumps->correction_solver);CHKERRQ(ierr); 2008 } 2009 /* Set Up KSP for Neumann problem of BDDC */ 2010 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 2011 } 2012 /* free Neumann problem's matrix */ 2013 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2014 2015 /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */ 2016 if (pcbddc->NullSpace || pcbddc->dbg_flag) { 2017 if (pcbddc->dbg_flag) { 2018 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2019 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 2020 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2021 } 2022 if (dirichlet) { /* Dirichlet */ 2023 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 2024 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 2025 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 2026 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 2027 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 2028 /* need to be adapted? */ 2029 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 2030 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2031 ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr); 2032 /* print info */ 2033 if (pcbddc->dbg_flag) { 2034 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 2035 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2036 } 2037 if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) { 2038 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_TRUE,pcis->is_I_local);CHKERRQ(ierr); 2039 } 2040 } 2041 if (neumann) { /* Neumann */ 2042 ierr = KSPGetOperators(pcbddc->ksp_R,&A_RR,NULL);CHKERRQ(ierr); 2043 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 2044 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 2045 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 2046 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 2047 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 2048 /* need to be adapted? */ 2049 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 2050 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2051 /* print info */ 2052 if (pcbddc->dbg_flag) { 2053 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 2054 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2055 } 2056 if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */ 2057 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_FALSE,pcbddc->is_R_local);CHKERRQ(ierr); 2058 } 2059 } 2060 } 2061 PetscFunctionReturn(0); 2062 } 2063 2064 #undef __FUNCT__ 2065 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection" 2066 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose) 2067 { 2068 PetscErrorCode ierr; 2069 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 2070 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2071 2072 PetscFunctionBegin; 2073 if (!sub_schurs->reuse_mumps) { 2074 ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr); 2075 } 2076 if (!pcbddc->switch_static) { 2077 if (applytranspose && pcbddc->local_auxmat1) { 2078 ierr = MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 2079 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 2080 } 2081 if (!sub_schurs->reuse_mumps) { 2082 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2083 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2084 } else { 2085 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 2086 2087 ierr = VecScatterBegin(reuse_mumps->correction_scatter_B,inout_B,reuse_mumps->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2088 ierr = VecScatterEnd(reuse_mumps->correction_scatter_B,inout_B,reuse_mumps->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2089 } 2090 } else { 2091 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2092 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2093 ierr = VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2094 ierr = VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2095 if (applytranspose && pcbddc->local_auxmat1) { 2096 ierr = MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);CHKERRQ(ierr); 2097 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 2098 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2099 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2100 } 2101 } 2102 if (!sub_schurs->reuse_mumps) { 2103 if (applytranspose) { 2104 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 2105 } else { 2106 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 2107 } 2108 #if defined(PETSC_HAVE_MUMPS) 2109 } else { 2110 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 2111 2112 if (applytranspose) { 2113 ierr = MatMumpsSolveSchurComplementTranspose(reuse_mumps->F,reuse_mumps->rhs_B,reuse_mumps->sol_B);CHKERRQ(ierr); 2114 } else { 2115 ierr = MatMumpsSolveSchurComplement(reuse_mumps->F,reuse_mumps->rhs_B,reuse_mumps->sol_B);CHKERRQ(ierr); 2116 } 2117 #endif 2118 } 2119 ierr = VecSet(inout_B,0.);CHKERRQ(ierr); 2120 if (!pcbddc->switch_static) { 2121 if (!sub_schurs->reuse_mumps) { 2122 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2123 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2124 } else { 2125 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 2126 2127 ierr = VecScatterBegin(reuse_mumps->correction_scatter_B,reuse_mumps->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2128 ierr = VecScatterEnd(reuse_mumps->correction_scatter_B,reuse_mumps->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2129 } 2130 if (!applytranspose && pcbddc->local_auxmat1) { 2131 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 2132 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 2133 } 2134 } else { 2135 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2136 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2137 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2138 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2139 if (!applytranspose && pcbddc->local_auxmat1) { 2140 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 2141 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 2142 } 2143 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2144 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2145 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2146 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2147 } 2148 PetscFunctionReturn(0); 2149 } 2150 2151 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 2152 #undef __FUNCT__ 2153 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 2154 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 2155 { 2156 PetscErrorCode ierr; 2157 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 2158 PC_IS* pcis = (PC_IS*) (pc->data); 2159 const PetscScalar zero = 0.0; 2160 2161 PetscFunctionBegin; 2162 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 2163 if (applytranspose) { 2164 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 2165 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 2166 } else { 2167 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 2168 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 2169 } 2170 2171 /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */ 2172 if (pcbddc->benign_p0_lidx >= 0) { 2173 PetscScalar *array; 2174 2175 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 2176 array[pcbddc->local_primal_size-1] += pcbddc->benign_p0; 2177 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 2178 } 2179 2180 /* start communications from local primal nodes to rhs of coarse solver */ 2181 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 2182 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2183 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2184 2185 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 2186 /* TODO remove null space when doing multilevel */ 2187 if (pcbddc->coarse_ksp) { 2188 Vec rhs,sol; 2189 2190 ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr); 2191 ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr); 2192 if (applytranspose) { 2193 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 2194 } else { 2195 ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 2196 } 2197 } 2198 2199 /* Local solution on R nodes */ 2200 if (pcis->n) { /* in/out pcbddc->vec1_B,pcbddc->vec1_D */ 2201 ierr = PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);CHKERRQ(ierr); 2202 } 2203 2204 /* communications from coarse sol to local primal nodes */ 2205 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2206 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2207 2208 /* Sum contributions from two levels */ 2209 if (applytranspose) { 2210 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 2211 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 2212 } else { 2213 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 2214 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 2215 } 2216 /* store p0 */ 2217 if (pcbddc->benign_p0_lidx >= 0) { 2218 PetscScalar *array; 2219 2220 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 2221 pcbddc->benign_p0 = array[pcbddc->local_primal_size-1]; 2222 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 2223 } 2224 PetscFunctionReturn(0); 2225 } 2226 2227 #undef __FUNCT__ 2228 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 2229 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 2230 { 2231 PetscErrorCode ierr; 2232 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 2233 PetscScalar *array; 2234 Vec from,to; 2235 2236 PetscFunctionBegin; 2237 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 2238 from = pcbddc->coarse_vec; 2239 to = pcbddc->vec1_P; 2240 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 2241 Vec tvec; 2242 2243 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 2244 ierr = VecResetArray(tvec);CHKERRQ(ierr); 2245 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 2246 ierr = VecGetArray(tvec,&array);CHKERRQ(ierr); 2247 ierr = VecPlaceArray(from,array);CHKERRQ(ierr); 2248 ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr); 2249 } 2250 } else { /* from local to global -> put data in coarse right hand side */ 2251 from = pcbddc->vec1_P; 2252 to = pcbddc->coarse_vec; 2253 } 2254 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 2255 PetscFunctionReturn(0); 2256 } 2257 2258 #undef __FUNCT__ 2259 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 2260 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 2261 { 2262 PetscErrorCode ierr; 2263 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 2264 PetscScalar *array; 2265 Vec from,to; 2266 2267 PetscFunctionBegin; 2268 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 2269 from = pcbddc->coarse_vec; 2270 to = pcbddc->vec1_P; 2271 } else { /* from local to global -> put data in coarse right hand side */ 2272 from = pcbddc->vec1_P; 2273 to = pcbddc->coarse_vec; 2274 } 2275 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 2276 if (smode == SCATTER_FORWARD) { 2277 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 2278 Vec tvec; 2279 2280 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 2281 ierr = VecGetArray(to,&array);CHKERRQ(ierr); 2282 ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr); 2283 ierr = VecRestoreArray(to,&array);CHKERRQ(ierr); 2284 } 2285 } else { 2286 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 2287 ierr = VecResetArray(from);CHKERRQ(ierr); 2288 } 2289 } 2290 PetscFunctionReturn(0); 2291 } 2292 2293 /* uncomment for testing purposes */ 2294 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 2295 #undef __FUNCT__ 2296 #define __FUNCT__ "PCBDDCConstraintsSetUp" 2297 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 2298 { 2299 PetscErrorCode ierr; 2300 PC_IS* pcis = (PC_IS*)(pc->data); 2301 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2302 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 2303 /* one and zero */ 2304 PetscScalar one=1.0,zero=0.0; 2305 /* space to store constraints and their local indices */ 2306 PetscScalar *constraints_data; 2307 PetscInt *constraints_idxs,*constraints_idxs_B; 2308 PetscInt *constraints_idxs_ptr,*constraints_data_ptr; 2309 PetscInt *constraints_n; 2310 /* iterators */ 2311 PetscInt i,j,k,total_counts,total_counts_cc,cum; 2312 /* BLAS integers */ 2313 PetscBLASInt lwork,lierr; 2314 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 2315 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 2316 /* reuse */ 2317 PetscInt olocal_primal_size,olocal_primal_size_cc; 2318 PetscInt *olocal_primal_ref_node,*olocal_primal_ref_mult; 2319 /* change of basis */ 2320 PetscBool qr_needed; 2321 PetscBT change_basis,qr_needed_idx; 2322 /* auxiliary stuff */ 2323 PetscInt *nnz,*is_indices; 2324 PetscInt ncc; 2325 /* some quantities */ 2326 PetscInt n_vertices,total_primal_vertices,valid_constraints; 2327 PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints; 2328 2329 PetscFunctionBegin; 2330 /* Destroy Mat objects computed previously */ 2331 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2332 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 2333 /* save info on constraints from previous setup (if any) */ 2334 olocal_primal_size = pcbddc->local_primal_size; 2335 olocal_primal_size_cc = pcbddc->local_primal_size_cc; 2336 ierr = PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);CHKERRQ(ierr); 2337 ierr = PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 2338 ierr = PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 2339 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 2340 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2341 2342 /* print some info */ 2343 if (pcbddc->dbg_flag) { 2344 IS vertices; 2345 PetscInt nv,nedges,nfaces; 2346 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 2347 ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr); 2348 ierr = ISDestroy(&vertices);CHKERRQ(ierr); 2349 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 2350 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 2351 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 2352 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr); 2353 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr); 2354 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2355 } 2356 2357 if (!pcbddc->adaptive_selection) { 2358 IS ISForVertices,*ISForFaces,*ISForEdges; 2359 MatNullSpace nearnullsp; 2360 const Vec *nearnullvecs; 2361 Vec *localnearnullsp; 2362 PetscScalar *array; 2363 PetscInt n_ISForFaces,n_ISForEdges,nnsp_size; 2364 PetscBool nnsp_has_cnst; 2365 /* LAPACK working arrays for SVD or POD */ 2366 PetscBool skip_lapack,boolforchange; 2367 PetscScalar *work; 2368 PetscReal *singular_vals; 2369 #if defined(PETSC_USE_COMPLEX) 2370 PetscReal *rwork; 2371 #endif 2372 #if defined(PETSC_MISSING_LAPACK_GESVD) 2373 PetscScalar *temp_basis,*correlation_mat; 2374 #else 2375 PetscBLASInt dummy_int=1; 2376 PetscScalar dummy_scalar=1.; 2377 #endif 2378 2379 /* Get index sets for faces, edges and vertices from graph */ 2380 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 2381 /* free unneeded index sets */ 2382 if (!pcbddc->use_vertices) { 2383 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 2384 } 2385 if (!pcbddc->use_edges) { 2386 for (i=0;i<n_ISForEdges;i++) { 2387 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 2388 } 2389 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 2390 n_ISForEdges = 0; 2391 } 2392 if (!pcbddc->use_faces) { 2393 for (i=0;i<n_ISForFaces;i++) { 2394 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 2395 } 2396 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 2397 n_ISForFaces = 0; 2398 } 2399 2400 #if defined(PETSC_USE_DEBUG) 2401 /* HACK: when solving singular problems not using vertices, a change of basis is mandatory. 2402 Also use_change_of_basis should be consistent among processors */ 2403 if (pcbddc->NullSpace) { 2404 PetscBool tbool[2],gbool[2]; 2405 2406 if (!ISForVertices && !pcbddc->user_ChangeOfBasisMatrix) { 2407 pcbddc->use_change_of_basis = PETSC_TRUE; 2408 if (!ISForEdges) { 2409 pcbddc->use_change_on_faces = PETSC_TRUE; 2410 } 2411 } 2412 tbool[0] = pcbddc->use_change_of_basis; 2413 tbool[1] = pcbddc->use_change_on_faces; 2414 ierr = MPI_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2415 pcbddc->use_change_of_basis = gbool[0]; 2416 pcbddc->use_change_on_faces = gbool[1]; 2417 } 2418 #endif 2419 2420 /* check if near null space is attached to global mat */ 2421 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 2422 if (nearnullsp) { 2423 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 2424 /* remove any stored info */ 2425 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 2426 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 2427 /* store information for BDDC solver reuse */ 2428 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 2429 pcbddc->onearnullspace = nearnullsp; 2430 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 2431 for (i=0;i<nnsp_size;i++) { 2432 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 2433 } 2434 } else { /* if near null space is not provided BDDC uses constants by default */ 2435 nnsp_size = 0; 2436 nnsp_has_cnst = PETSC_TRUE; 2437 } 2438 /* get max number of constraints on a single cc */ 2439 max_constraints = nnsp_size; 2440 if (nnsp_has_cnst) max_constraints++; 2441 2442 /* 2443 Evaluate maximum storage size needed by the procedure 2444 - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]" 2445 - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]" 2446 There can be multiple constraints per connected component 2447 */ 2448 n_vertices = 0; 2449 if (ISForVertices) { 2450 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 2451 } 2452 ncc = n_vertices+n_ISForFaces+n_ISForEdges; 2453 ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr); 2454 2455 total_counts = n_ISForFaces+n_ISForEdges; 2456 total_counts *= max_constraints; 2457 total_counts += n_vertices; 2458 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 2459 2460 total_counts = 0; 2461 max_size_of_constraint = 0; 2462 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 2463 IS used_is; 2464 if (i<n_ISForEdges) { 2465 used_is = ISForEdges[i]; 2466 } else { 2467 used_is = ISForFaces[i-n_ISForEdges]; 2468 } 2469 ierr = ISGetSize(used_is,&j);CHKERRQ(ierr); 2470 total_counts += j; 2471 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 2472 } 2473 ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr); 2474 2475 /* get local part of global near null space vectors */ 2476 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 2477 for (k=0;k<nnsp_size;k++) { 2478 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 2479 ierr = VecScatterBegin(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2480 ierr = VecScatterEnd(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2481 } 2482 2483 /* whether or not to skip lapack calls */ 2484 skip_lapack = PETSC_TRUE; 2485 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 2486 2487 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 2488 if (!skip_lapack) { 2489 PetscScalar temp_work; 2490 2491 #if defined(PETSC_MISSING_LAPACK_GESVD) 2492 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 2493 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 2494 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 2495 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 2496 #if defined(PETSC_USE_COMPLEX) 2497 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 2498 #endif 2499 /* now we evaluate the optimal workspace using query with lwork=-1 */ 2500 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 2501 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 2502 lwork = -1; 2503 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2504 #if !defined(PETSC_USE_COMPLEX) 2505 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 2506 #else 2507 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 2508 #endif 2509 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2510 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 2511 #else /* on missing GESVD */ 2512 /* SVD */ 2513 PetscInt max_n,min_n; 2514 max_n = max_size_of_constraint; 2515 min_n = max_constraints; 2516 if (max_size_of_constraint < max_constraints) { 2517 min_n = max_size_of_constraint; 2518 max_n = max_constraints; 2519 } 2520 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 2521 #if defined(PETSC_USE_COMPLEX) 2522 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 2523 #endif 2524 /* now we evaluate the optimal workspace using query with lwork=-1 */ 2525 lwork = -1; 2526 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 2527 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 2528 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 2529 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2530 #if !defined(PETSC_USE_COMPLEX) 2531 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr)); 2532 #else 2533 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr)); 2534 #endif 2535 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2536 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 2537 #endif /* on missing GESVD */ 2538 /* Allocate optimal workspace */ 2539 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 2540 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 2541 } 2542 /* Now we can loop on constraining sets */ 2543 total_counts = 0; 2544 constraints_idxs_ptr[0] = 0; 2545 constraints_data_ptr[0] = 0; 2546 /* vertices */ 2547 if (n_vertices) { 2548 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2549 ierr = PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 2550 for (i=0;i<n_vertices;i++) { 2551 constraints_n[total_counts] = 1; 2552 constraints_data[total_counts] = 1.0; 2553 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 2554 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 2555 total_counts++; 2556 } 2557 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2558 n_vertices = total_counts; 2559 } 2560 2561 /* edges and faces */ 2562 total_counts_cc = total_counts; 2563 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 2564 IS used_is; 2565 PetscBool idxs_copied = PETSC_FALSE; 2566 2567 if (ncc<n_ISForEdges) { 2568 used_is = ISForEdges[ncc]; 2569 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 2570 } else { 2571 used_is = ISForFaces[ncc-n_ISForEdges]; 2572 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 2573 } 2574 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 2575 2576 ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr); 2577 ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2578 /* change of basis should not be performed on local periodic nodes */ 2579 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 2580 if (nnsp_has_cnst) { 2581 PetscScalar quad_value; 2582 2583 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 2584 idxs_copied = PETSC_TRUE; 2585 2586 if (!pcbddc->use_nnsp_true) { 2587 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 2588 } else { 2589 quad_value = 1.0; 2590 } 2591 for (j=0;j<size_of_constraint;j++) { 2592 constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value; 2593 } 2594 temp_constraints++; 2595 total_counts++; 2596 } 2597 for (k=0;k<nnsp_size;k++) { 2598 PetscReal real_value; 2599 PetscScalar *ptr_to_data; 2600 2601 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2602 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint]; 2603 for (j=0;j<size_of_constraint;j++) { 2604 ptr_to_data[j] = array[is_indices[j]]; 2605 } 2606 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2607 /* check if array is null on the connected component */ 2608 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2609 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one)); 2610 if (real_value > 0.0) { /* keep indices and values */ 2611 temp_constraints++; 2612 total_counts++; 2613 if (!idxs_copied) { 2614 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 2615 idxs_copied = PETSC_TRUE; 2616 } 2617 } 2618 } 2619 ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2620 valid_constraints = temp_constraints; 2621 if (!pcbddc->use_nnsp_true && temp_constraints) { 2622 if (temp_constraints == 1) { /* just normalize the constraint */ 2623 PetscScalar norm,*ptr_to_data; 2624 2625 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 2626 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2627 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one)); 2628 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 2629 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one)); 2630 } else { /* perform SVD */ 2631 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 2632 PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 2633 2634 #if defined(PETSC_MISSING_LAPACK_GESVD) 2635 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 2636 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 2637 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 2638 the constraints basis will differ (by a complex factor with absolute value equal to 1) 2639 from that computed using LAPACKgesvd 2640 -> This is due to a different computation of eigenvectors in LAPACKheev 2641 -> The quality of the POD-computed basis will be the same */ 2642 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 2643 /* Store upper triangular part of correlation matrix */ 2644 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2645 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2646 for (j=0;j<temp_constraints;j++) { 2647 for (k=0;k<j+1;k++) { 2648 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one)); 2649 } 2650 } 2651 /* compute eigenvalues and eigenvectors of correlation matrix */ 2652 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2653 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 2654 #if !defined(PETSC_USE_COMPLEX) 2655 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 2656 #else 2657 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 2658 #endif 2659 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2660 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 2661 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 2662 j = 0; 2663 while (j < temp_constraints && singular_vals[j] < tol) j++; 2664 total_counts = total_counts-j; 2665 valid_constraints = temp_constraints-j; 2666 /* scale and copy POD basis into used quadrature memory */ 2667 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2668 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2669 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 2670 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2671 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 2672 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 2673 if (j<temp_constraints) { 2674 PetscInt ii; 2675 for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]); 2676 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2677 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 2678 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2679 for (k=0;k<temp_constraints-j;k++) { 2680 for (ii=0;ii<size_of_constraint;ii++) { 2681 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 2682 } 2683 } 2684 } 2685 #else /* on missing GESVD */ 2686 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2687 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2688 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2689 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2690 #if !defined(PETSC_USE_COMPLEX) 2691 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr)); 2692 #else 2693 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr)); 2694 #endif 2695 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 2696 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2697 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 2698 k = temp_constraints; 2699 if (k > size_of_constraint) k = size_of_constraint; 2700 j = 0; 2701 while (j < k && singular_vals[k-j-1] < tol) j++; 2702 valid_constraints = k-j; 2703 total_counts = total_counts-temp_constraints+valid_constraints; 2704 #endif /* on missing GESVD */ 2705 } 2706 } 2707 /* update pointers information */ 2708 if (valid_constraints) { 2709 constraints_n[total_counts_cc] = valid_constraints; 2710 constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint; 2711 constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints; 2712 /* set change_of_basis flag */ 2713 if (boolforchange) { 2714 PetscBTSet(change_basis,total_counts_cc); 2715 } 2716 total_counts_cc++; 2717 } 2718 } 2719 /* free workspace */ 2720 if (!skip_lapack) { 2721 ierr = PetscFree(work);CHKERRQ(ierr); 2722 #if defined(PETSC_USE_COMPLEX) 2723 ierr = PetscFree(rwork);CHKERRQ(ierr); 2724 #endif 2725 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 2726 #if defined(PETSC_MISSING_LAPACK_GESVD) 2727 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 2728 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 2729 #endif 2730 } 2731 for (k=0;k<nnsp_size;k++) { 2732 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 2733 } 2734 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 2735 /* free index sets of faces, edges and vertices */ 2736 for (i=0;i<n_ISForFaces;i++) { 2737 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 2738 } 2739 if (n_ISForFaces) { 2740 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 2741 } 2742 for (i=0;i<n_ISForEdges;i++) { 2743 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 2744 } 2745 if (n_ISForEdges) { 2746 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 2747 } 2748 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 2749 } else { 2750 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2751 2752 total_counts = 0; 2753 n_vertices = 0; 2754 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 2755 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 2756 } 2757 max_constraints = 0; 2758 total_counts_cc = 0; 2759 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 2760 total_counts += pcbddc->adaptive_constraints_n[i]; 2761 if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++; 2762 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 2763 } 2764 constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr; 2765 constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr; 2766 constraints_idxs = pcbddc->adaptive_constraints_idxs; 2767 constraints_data = pcbddc->adaptive_constraints_data; 2768 /* constraints_n differs from pcbddc->adaptive_constraints_n */ 2769 ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr); 2770 total_counts_cc = 0; 2771 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 2772 if (pcbddc->adaptive_constraints_n[i]) { 2773 constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i]; 2774 } 2775 } 2776 #if 0 2777 printf("Found %d totals (%d)\n",total_counts_cc,total_counts); 2778 for (i=0;i<total_counts_cc;i++) { 2779 printf("const %d, start %d",i,constraints_idxs_ptr[i]); 2780 printf(" end %d:\n",constraints_idxs_ptr[i+1]); 2781 for (j=constraints_idxs_ptr[i];j<constraints_idxs_ptr[i+1];j++) { 2782 printf(" %d",constraints_idxs[j]); 2783 } 2784 printf("\n"); 2785 printf("number of cc: %d\n",constraints_n[i]); 2786 } 2787 for (i=0;i<n_vertices;i++) { 2788 PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]); 2789 } 2790 for (i=0;i<sub_schurs->n_subs;i++) { 2791 PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]); 2792 } 2793 #endif 2794 2795 max_size_of_constraint = 0; 2796 for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]); 2797 ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr); 2798 /* Change of basis */ 2799 ierr = PetscBTCreate(total_counts_cc,&change_basis);CHKERRQ(ierr); 2800 if (pcbddc->use_change_of_basis) { 2801 for (i=0;i<sub_schurs->n_subs;i++) { 2802 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 2803 ierr = PetscBTSet(change_basis,i+n_vertices);CHKERRQ(ierr); 2804 } 2805 } 2806 } 2807 } 2808 pcbddc->local_primal_size = total_counts; 2809 /* allocating one extra space (in case an extra primal dof should be stored for the benign trick */ 2810 ierr = PetscMalloc1(pcbddc->local_primal_size+1,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2811 2812 /* map constraints_idxs in boundary numbering */ 2813 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr); 2814 if (i != constraints_idxs_ptr[total_counts_cc]) { 2815 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",constraints_idxs_ptr[total_counts_cc],i); 2816 } 2817 2818 /* Create constraint matrix */ 2819 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 2820 ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr); 2821 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 2822 2823 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 2824 /* determine if a QR strategy is needed for change of basis */ 2825 qr_needed = PETSC_FALSE; 2826 ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr); 2827 total_primal_vertices=0; 2828 pcbddc->local_primal_size_cc = 0; 2829 for (i=0;i<total_counts_cc;i++) { 2830 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2831 if (size_of_constraint == 1) { 2832 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]]; 2833 pcbddc->local_primal_size_cc += 1; 2834 } else if (PetscBTLookup(change_basis,i)) { 2835 for (k=0;k<constraints_n[i];k++) { 2836 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 2837 } 2838 pcbddc->local_primal_size_cc += constraints_n[i]; 2839 if (constraints_n[i] > 1 || pcbddc->use_qr_single || pcbddc->faster_deluxe) { 2840 PetscBTSet(qr_needed_idx,i); 2841 qr_needed = PETSC_TRUE; 2842 } 2843 } else { 2844 pcbddc->local_primal_size_cc += 1; 2845 } 2846 } 2847 /* note that the local variable n_vertices used below stores the number of pointwise constraints */ 2848 pcbddc->n_vertices = total_primal_vertices; 2849 /* permute indices in order to have a sorted set of vertices */ 2850 ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2851 2852 /* allocating one extra space (in case an extra primal dof should be stored for the benign trick */ 2853 ierr = PetscMalloc2(pcbddc->local_primal_size_cc+1,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc+1,&pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 2854 ierr = PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));CHKERRQ(ierr); 2855 for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1; 2856 2857 /* nonzero structure of constraint matrix */ 2858 /* and get reference dof for local constraints */ 2859 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 2860 for (i=0;i<total_primal_vertices;i++) nnz[i] = 1; 2861 2862 j = total_primal_vertices; 2863 total_counts = total_primal_vertices; 2864 cum = total_primal_vertices; 2865 for (i=n_vertices;i<total_counts_cc;i++) { 2866 if (!PetscBTLookup(change_basis,i)) { 2867 pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]]; 2868 pcbddc->local_primal_ref_mult[cum] = constraints_n[i]; 2869 cum++; 2870 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2871 for (k=0;k<constraints_n[i];k++) { 2872 pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 2873 nnz[j+k] = size_of_constraint; 2874 } 2875 j += constraints_n[i]; 2876 } 2877 } 2878 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 2879 ierr = PetscFree(nnz);CHKERRQ(ierr); 2880 2881 /* set values in constraint matrix */ 2882 for (i=0;i<total_primal_vertices;i++) { 2883 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 2884 } 2885 total_counts = total_primal_vertices; 2886 for (i=n_vertices;i<total_counts_cc;i++) { 2887 if (!PetscBTLookup(change_basis,i)) { 2888 PetscInt *cols; 2889 2890 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2891 cols = constraints_idxs+constraints_idxs_ptr[i]; 2892 for (k=0;k<constraints_n[i];k++) { 2893 PetscInt row = total_counts+k; 2894 PetscScalar *vals; 2895 2896 vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint; 2897 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2898 } 2899 total_counts += constraints_n[i]; 2900 } 2901 } 2902 /* assembling */ 2903 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2904 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2905 2906 /* 2907 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 2908 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 2909 */ 2910 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 2911 if (pcbddc->use_change_of_basis) { 2912 /* dual and primal dofs on a single cc */ 2913 PetscInt dual_dofs,primal_dofs; 2914 /* working stuff for GEQRF */ 2915 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 2916 PetscBLASInt lqr_work; 2917 /* working stuff for UNGQR */ 2918 PetscScalar *gqr_work,lgqr_work_t; 2919 PetscBLASInt lgqr_work; 2920 /* working stuff for TRTRS */ 2921 PetscScalar *trs_rhs; 2922 PetscBLASInt Blas_NRHS; 2923 /* pointers for values insertion into change of basis matrix */ 2924 PetscInt *start_rows,*start_cols; 2925 PetscScalar *start_vals; 2926 /* working stuff for values insertion */ 2927 PetscBT is_primal; 2928 PetscInt *aux_primal_numbering_B; 2929 /* matrix sizes */ 2930 PetscInt global_size,local_size; 2931 /* temporary change of basis */ 2932 Mat localChangeOfBasisMatrix; 2933 /* extra space for debugging */ 2934 PetscScalar *dbg_work; 2935 2936 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 2937 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 2938 ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 2939 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 2940 /* nonzeros for local mat */ 2941 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 2942 for (i=0;i<pcis->n;i++) nnz[i]=1; 2943 for (i=n_vertices;i<total_counts_cc;i++) { 2944 if (PetscBTLookup(change_basis,i)) { 2945 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2946 if (PetscBTLookup(qr_needed_idx,i)) { 2947 for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint; 2948 } else { 2949 nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint; 2950 for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2; 2951 } 2952 } 2953 } 2954 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 2955 ierr = PetscFree(nnz);CHKERRQ(ierr); 2956 /* Set initial identity in the matrix */ 2957 for (i=0;i<pcis->n;i++) { 2958 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 2959 } 2960 2961 if (pcbddc->dbg_flag) { 2962 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 2963 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 2964 } 2965 2966 2967 /* Now we loop on the constraints which need a change of basis */ 2968 /* 2969 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 2970 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 2971 2972 Basic blocks of change of basis matrix T computed by 2973 2974 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 2975 2976 | 1 0 ... 0 s_1/S | 2977 | 0 1 ... 0 s_2/S | 2978 | ... | 2979 | 0 ... 1 s_{n-1}/S | 2980 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 2981 2982 with S = \sum_{i=1}^n s_i^2 2983 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 2984 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 2985 2986 - QR decomposition of constraints otherwise 2987 */ 2988 if (qr_needed) { 2989 /* space to store Q */ 2990 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 2991 /* first we issue queries for optimal work */ 2992 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 2993 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 2994 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2995 lqr_work = -1; 2996 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 2997 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 2998 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 2999 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 3000 lgqr_work = -1; 3001 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 3002 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 3003 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 3004 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3005 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 3006 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 3007 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 3008 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 3009 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 3010 /* array to store scaling factors for reflectors */ 3011 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 3012 /* array to store rhs and solution of triangular solver */ 3013 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 3014 /* allocating workspace for check */ 3015 if (pcbddc->dbg_flag) { 3016 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr); 3017 } 3018 } 3019 /* array to store whether a node is primal or not */ 3020 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 3021 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 3022 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);CHKERRQ(ierr); 3023 if (i != total_primal_vertices) { 3024 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i); 3025 } 3026 for (i=0;i<total_primal_vertices;i++) { 3027 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 3028 } 3029 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 3030 3031 /* loop on constraints and see whether or not they need a change of basis and compute it */ 3032 for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) { 3033 size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts]; 3034 if (PetscBTLookup(change_basis,total_counts)) { 3035 /* get constraint info */ 3036 primal_dofs = constraints_n[total_counts]; 3037 dual_dofs = size_of_constraint-primal_dofs; 3038 3039 if (pcbddc->dbg_flag) { 3040 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d: %d need a change of basis (size %d)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr); 3041 } 3042 3043 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 3044 3045 /* copy quadrature constraints for change of basis check */ 3046 if (pcbddc->dbg_flag) { 3047 ierr = PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 3048 } 3049 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 3050 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 3051 3052 /* compute QR decomposition of constraints */ 3053 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3054 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 3055 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3056 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3057 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 3058 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 3059 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3060 3061 /* explictly compute R^-T */ 3062 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 3063 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 3064 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 3065 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 3066 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3067 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 3068 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3069 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 3070 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 3071 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3072 3073 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 3074 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3075 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3076 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 3077 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3078 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3079 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 3080 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 3081 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3082 3083 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 3084 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 3085 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 3086 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3087 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 3088 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 3089 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3090 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 3091 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 3092 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3093 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC)); 3094 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3095 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 3096 3097 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 3098 start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]]; 3099 /* insert cols for primal dofs */ 3100 for (j=0;j<primal_dofs;j++) { 3101 start_vals = &qr_basis[j*size_of_constraint]; 3102 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 3103 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 3104 } 3105 /* insert cols for dual dofs */ 3106 for (j=0,k=0;j<dual_dofs;k++) { 3107 if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) { 3108 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 3109 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 3110 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 3111 j++; 3112 } 3113 } 3114 3115 /* check change of basis */ 3116 if (pcbddc->dbg_flag) { 3117 PetscInt ii,jj; 3118 PetscBool valid_qr=PETSC_TRUE; 3119 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 3120 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3121 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 3122 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3123 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 3124 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 3125 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3126 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC)); 3127 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3128 for (jj=0;jj<size_of_constraint;jj++) { 3129 for (ii=0;ii<primal_dofs;ii++) { 3130 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 3131 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 3132 } 3133 } 3134 if (!valid_qr) { 3135 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 3136 for (jj=0;jj<size_of_constraint;jj++) { 3137 for (ii=0;ii<primal_dofs;ii++) { 3138 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 3139 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 3140 } 3141 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 3142 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 3143 } 3144 } 3145 } 3146 } else { 3147 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 3148 } 3149 } 3150 } else { /* simple transformation block */ 3151 PetscInt row,col; 3152 PetscScalar val,norm; 3153 3154 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3155 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one)); 3156 for (j=0;j<size_of_constraint;j++) { 3157 PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j]; 3158 row = constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 3159 if (!PetscBTLookup(is_primal,row_B)) { 3160 col = constraints_idxs[constraints_idxs_ptr[total_counts]]; 3161 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 3162 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 3163 } else { 3164 for (k=0;k<size_of_constraint;k++) { 3165 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 3166 if (row != col) { 3167 val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]]; 3168 } else { 3169 val = constraints_data[constraints_data_ptr[total_counts]]/norm; 3170 } 3171 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 3172 } 3173 } 3174 } 3175 if (pcbddc->dbg_flag) { 3176 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 3177 } 3178 } 3179 } else { 3180 if (pcbddc->dbg_flag) { 3181 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,size_of_constraint);CHKERRQ(ierr); 3182 } 3183 } 3184 } 3185 3186 /* free workspace */ 3187 if (qr_needed) { 3188 if (pcbddc->dbg_flag) { 3189 ierr = PetscFree(dbg_work);CHKERRQ(ierr); 3190 } 3191 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 3192 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 3193 ierr = PetscFree(qr_work);CHKERRQ(ierr); 3194 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 3195 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 3196 } 3197 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 3198 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3199 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3200 3201 /* assembling of global change of variable */ 3202 { 3203 Mat tmat; 3204 PetscInt bs; 3205 3206 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 3207 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 3208 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 3209 ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr); 3210 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 3211 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 3212 ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr); 3213 ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr); 3214 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 3215 ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr); 3216 ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 3217 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 3218 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 3219 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 3220 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3221 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3222 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 3223 ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr); 3224 } 3225 /* check */ 3226 if (pcbddc->dbg_flag) { 3227 PetscReal error; 3228 Vec x,x_change; 3229 3230 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 3231 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 3232 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 3233 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 3234 ierr = VecScatterBegin(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3235 ierr = VecScatterEnd(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3236 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 3237 ierr = VecScatterBegin(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3238 ierr = VecScatterEnd(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3239 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 3240 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 3241 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 3242 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3243 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr); 3244 ierr = VecDestroy(&x);CHKERRQ(ierr); 3245 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 3246 } 3247 3248 /* adapt sub_schurs computed (if any) */ 3249 if (pcbddc->use_deluxe_scaling) { 3250 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 3251 if (sub_schurs->S_Ej_all) { 3252 Mat S_new,tmat; 3253 ISLocalToGlobalMapping NtoSall; 3254 IS is_all_N,is_V,is_V_Sall; 3255 const PetscScalar *array; 3256 const PetscInt *idxs_V,*idxs_all; 3257 PetscInt i,n_V; 3258 3259 ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr); 3260 ierr = MatGetSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr); 3261 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->n_vertices,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&is_V);CHKERRQ(ierr); 3262 ierr = ISLocalToGlobalMappingCreateIS(is_all_N,&NtoSall);CHKERRQ(ierr); 3263 ierr = ISGlobalToLocalMappingApplyIS(NtoSall,IS_GTOLM_DROP,is_V,&is_V_Sall);CHKERRQ(ierr); 3264 ierr = ISLocalToGlobalMappingDestroy(&NtoSall);CHKERRQ(ierr); 3265 ierr = ISDestroy(&is_all_N);CHKERRQ(ierr); 3266 ierr = ISDestroy(&is_V);CHKERRQ(ierr); 3267 ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 3268 ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr); 3269 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 3270 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 3271 ierr = ISGetLocalSize(is_V_Sall,&n_V);CHKERRQ(ierr); 3272 ierr = ISGetIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 3273 ierr = ISGetIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 3274 ierr = VecGetArrayRead(pcis->D,&array);CHKERRQ(ierr); 3275 for (i=0;i<n_V;i++) { 3276 PetscScalar val; 3277 PetscInt idx; 3278 3279 idx = idxs_V[i]; 3280 val = array[idxs_all[idxs_V[i]]]; 3281 ierr = MatSetValue(S_new,idx,idx,val,INSERT_VALUES);CHKERRQ(ierr); 3282 } 3283 ierr = MatAssemblyBegin(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3284 ierr = MatAssemblyEnd(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3285 sub_schurs->S_Ej_all = S_new; 3286 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 3287 if (sub_schurs->sum_S_Ej_all) { 3288 ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 3289 ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 3290 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 3291 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 3292 sub_schurs->sum_S_Ej_all = S_new; 3293 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 3294 } 3295 ierr = VecRestoreArrayRead(pcis->D,&array);CHKERRQ(ierr); 3296 ierr = ISRestoreIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 3297 ierr = ISRestoreIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 3298 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 3299 ierr = ISDestroy(&is_V_Sall);CHKERRQ(ierr); 3300 } 3301 } 3302 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 3303 } else if (pcbddc->user_ChangeOfBasisMatrix) { 3304 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 3305 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 3306 } 3307 3308 /* set up change of basis context */ 3309 if (pcbddc->ChangeOfBasisMatrix) { 3310 PCBDDCChange_ctx change_ctx; 3311 3312 if (!pcbddc->new_global_mat) { 3313 PetscInt global_size,local_size; 3314 3315 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 3316 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 3317 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->new_global_mat);CHKERRQ(ierr); 3318 ierr = MatSetSizes(pcbddc->new_global_mat,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 3319 ierr = MatSetType(pcbddc->new_global_mat,MATSHELL);CHKERRQ(ierr); 3320 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT,(void (*)(void))PCBDDCMatMult_Private);CHKERRQ(ierr); 3321 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCMatMultTranspose_Private);CHKERRQ(ierr); 3322 ierr = PetscNew(&change_ctx);CHKERRQ(ierr); 3323 ierr = MatShellSetContext(pcbddc->new_global_mat,change_ctx);CHKERRQ(ierr); 3324 } else { 3325 ierr = MatShellGetContext(pcbddc->new_global_mat,&change_ctx);CHKERRQ(ierr); 3326 ierr = MatDestroy(&change_ctx->global_change);CHKERRQ(ierr); 3327 ierr = VecDestroyVecs(2,&change_ctx->work);CHKERRQ(ierr); 3328 } 3329 if (!pcbddc->user_ChangeOfBasisMatrix) { 3330 ierr = PetscObjectReference((PetscObject)pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 3331 change_ctx->global_change = pcbddc->ChangeOfBasisMatrix; 3332 } else { 3333 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 3334 change_ctx->global_change = pcbddc->user_ChangeOfBasisMatrix; 3335 } 3336 ierr = VecDuplicateVecs(pcis->vec1_global,2,&change_ctx->work);CHKERRQ(ierr); 3337 ierr = MatSetUp(pcbddc->new_global_mat);CHKERRQ(ierr); 3338 ierr = MatAssemblyBegin(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3339 ierr = MatAssemblyEnd(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3340 } 3341 3342 /* add pressure dof to set of primal nodes for numbering purposes */ 3343 if (pcbddc->benign_p0_lidx >= 0) { 3344 pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc] = pcbddc->benign_p0_lidx; 3345 pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size] = pcbddc->benign_p0_lidx; 3346 pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc] = 1; 3347 pcbddc->local_primal_size_cc++; 3348 pcbddc->local_primal_size++; 3349 } 3350 3351 /* check if a new primal space has been introduced (also take into account benign trick) */ 3352 pcbddc->new_primal_space_local = PETSC_TRUE; 3353 if (olocal_primal_size == pcbddc->local_primal_size) { 3354 ierr = PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc*sizeof(PetscScalar),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 3355 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 3356 if (!pcbddc->new_primal_space_local) { 3357 ierr = PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscScalar),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 3358 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 3359 } 3360 } 3361 ierr = PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);CHKERRQ(ierr); 3362 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 3363 ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3364 3365 /* flush dbg viewer */ 3366 if (pcbddc->dbg_flag) { 3367 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3368 } 3369 3370 /* free workspace */ 3371 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 3372 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 3373 if (!pcbddc->adaptive_selection) { 3374 ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr); 3375 ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr); 3376 } else { 3377 ierr = PetscFree5(pcbddc->adaptive_constraints_n, 3378 pcbddc->adaptive_constraints_idxs_ptr, 3379 pcbddc->adaptive_constraints_data_ptr, 3380 pcbddc->adaptive_constraints_idxs, 3381 pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 3382 ierr = PetscFree(constraints_n);CHKERRQ(ierr); 3383 ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr); 3384 } 3385 PetscFunctionReturn(0); 3386 } 3387 3388 #undef __FUNCT__ 3389 #define __FUNCT__ "PCBDDCAnalyzeInterface" 3390 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 3391 { 3392 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3393 PC_IS *pcis = (PC_IS*)pc->data; 3394 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 3395 PetscInt ierr,i,vertex_size,N; 3396 PetscViewer viewer=pcbddc->dbg_viewer; 3397 3398 PetscFunctionBegin; 3399 /* Reset previously computed graph */ 3400 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 3401 /* Init local Graph struct */ 3402 ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr); 3403 ierr = PCBDDCGraphInit(pcbddc->mat_graph,pcis->mapping,N);CHKERRQ(ierr); 3404 3405 /* Check validity of the csr graph passed in by the user */ 3406 if (pcbddc->mat_graph->nvtxs_csr && pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) { 3407 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid size of local CSR graph! Found %d, expected %d\n",pcbddc->mat_graph->nvtxs_csr,pcbddc->mat_graph->nvtxs); 3408 } 3409 3410 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 3411 if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) { 3412 PetscInt *xadj,*adjncy; 3413 PetscInt nvtxs; 3414 PetscBool flg_row=PETSC_FALSE; 3415 3416 if (pcbddc->use_local_adj) { 3417 3418 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3419 if (flg_row) { 3420 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 3421 pcbddc->computed_rowadj = PETSC_TRUE; 3422 } 3423 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3424 } else if (pcbddc->current_level && pcis->n_B) { /* just compute subdomain's connected components for coarser levels when the local boundary is not empty */ 3425 IS is_dummy; 3426 ISLocalToGlobalMapping l2gmap_dummy; 3427 PetscInt j,sum; 3428 PetscInt *cxadj,*cadjncy; 3429 const PetscInt *idxs; 3430 PCBDDCGraph graph; 3431 PetscBT is_on_boundary; 3432 3433 ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n,0,1,&is_dummy);CHKERRQ(ierr); 3434 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 3435 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 3436 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 3437 ierr = PCBDDCGraphInit(graph,l2gmap_dummy,pcis->n);CHKERRQ(ierr); 3438 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 3439 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3440 if (flg_row) { 3441 graph->xadj = xadj; 3442 graph->adjncy = adjncy; 3443 } 3444 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 3445 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 3446 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3447 3448 if (pcbddc->dbg_flag) { 3449 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] Found %d subdomains (local size %d)\n",PetscGlobalRank,graph->ncc,pcis->n);CHKERRQ(ierr); 3450 for (i=0;i<graph->ncc;i++) { 3451 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] %d cc size %d\n",PetscGlobalRank,i,graph->cptr[i+1]-graph->cptr[i]);CHKERRQ(ierr); 3452 } 3453 } 3454 3455 ierr = PetscBTCreate(pcis->n,&is_on_boundary);CHKERRQ(ierr); 3456 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 3457 for (i=0;i<pcis->n_B;i++) { 3458 ierr = PetscBTSet(is_on_boundary,idxs[i]);CHKERRQ(ierr); 3459 } 3460 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 3461 3462 ierr = PetscCalloc1(pcis->n+1,&cxadj);CHKERRQ(ierr); 3463 sum = 0; 3464 for (i=0;i<graph->ncc;i++) { 3465 PetscInt sizecc = 0; 3466 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 3467 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 3468 sizecc++; 3469 } 3470 } 3471 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 3472 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 3473 cxadj[graph->queue[j]] = sizecc; 3474 } 3475 } 3476 sum += sizecc*sizecc; 3477 } 3478 ierr = PetscMalloc1(sum,&cadjncy);CHKERRQ(ierr); 3479 sum = 0; 3480 for (i=0;i<pcis->n;i++) { 3481 PetscInt temp = cxadj[i]; 3482 cxadj[i] = sum; 3483 sum += temp; 3484 } 3485 cxadj[pcis->n] = sum; 3486 for (i=0;i<graph->ncc;i++) { 3487 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 3488 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 3489 PetscInt k,sizecc = 0; 3490 for (k=graph->cptr[i];k<graph->cptr[i+1];k++) { 3491 if (PetscBTLookup(is_on_boundary,graph->queue[k])) { 3492 cadjncy[cxadj[graph->queue[j]]+sizecc]=graph->queue[k]; 3493 sizecc++; 3494 } 3495 } 3496 } 3497 } 3498 } 3499 if (sum) { 3500 ierr = PCBDDCSetLocalAdjacencyGraph(pc,pcis->n,cxadj,cadjncy,PETSC_OWN_POINTER);CHKERRQ(ierr); 3501 } else { 3502 ierr = PetscFree(cxadj);CHKERRQ(ierr); 3503 ierr = PetscFree(cadjncy);CHKERRQ(ierr); 3504 } 3505 graph->xadj = 0; 3506 graph->adjncy = 0; 3507 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 3508 ierr = PetscBTDestroy(&is_on_boundary);CHKERRQ(ierr); 3509 } 3510 } 3511 if (pcbddc->dbg_flag) { 3512 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3513 } 3514 3515 /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */ 3516 vertex_size = 1; 3517 if (pcbddc->user_provided_isfordofs) { 3518 if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */ 3519 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 3520 for (i=0;i<pcbddc->n_ISForDofs;i++) { 3521 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 3522 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 3523 } 3524 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 3525 pcbddc->n_ISForDofs = 0; 3526 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 3527 } 3528 /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */ 3529 ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr); 3530 } else { 3531 if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */ 3532 ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr); 3533 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 3534 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 3535 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 3536 } 3537 } 3538 } 3539 3540 /* Setup of Graph */ 3541 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */ 3542 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 3543 } 3544 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */ 3545 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 3546 } 3547 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices);CHKERRQ(ierr); 3548 3549 /* Graph's connected components analysis */ 3550 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 3551 3552 /* print some info to stdout */ 3553 if (pcbddc->dbg_flag) { 3554 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);CHKERRQ(ierr); 3555 } 3556 3557 /* mark topography has done */ 3558 pcbddc->recompute_topography = PETSC_FALSE; 3559 PetscFunctionReturn(0); 3560 } 3561 3562 /* given an index sets possibly with holes, renumbers the indexes removing the holes */ 3563 #undef __FUNCT__ 3564 #define __FUNCT__ "PCBDDCSubsetNumbering" 3565 PetscErrorCode PCBDDCSubsetNumbering(IS subset, IS subset_mult, PetscInt *N_n, IS *subset_n) 3566 { 3567 PetscSF sf; 3568 PetscLayout map; 3569 const PetscInt *idxs; 3570 PetscInt *leaf_data,*root_data,*gidxs; 3571 PetscInt N,n,i,lbounds[2],gbounds[2],Nl; 3572 PetscInt n_n,nlocals,start,first_index; 3573 PetscMPIInt commsize; 3574 PetscBool first_found; 3575 PetscErrorCode ierr; 3576 3577 PetscFunctionBegin; 3578 ierr = ISGetLocalSize(subset,&n);CHKERRQ(ierr); 3579 if (subset_mult) { 3580 PetscCheckSameComm(subset,1,subset_mult,2); 3581 ierr = ISGetLocalSize(subset,&i);CHKERRQ(ierr); 3582 if (i != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Local subset and multiplicity sizes don't match! %d != %d",n,i); 3583 } 3584 /* create workspace layout for computing global indices of subset */ 3585 ierr = ISGetIndices(subset,&idxs);CHKERRQ(ierr); 3586 lbounds[0] = lbounds[1] = 0; 3587 for (i=0;i<n;i++) { 3588 if (idxs[i] < lbounds[0]) lbounds[0] = idxs[i]; 3589 else if (idxs[i] > lbounds[1]) lbounds[1] = idxs[i]; 3590 } 3591 lbounds[0] = -lbounds[0]; 3592 ierr = MPI_Allreduce(lbounds,gbounds,2,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 3593 gbounds[0] = -gbounds[0]; 3594 N = gbounds[1] - gbounds[0] + 1; 3595 ierr = PetscLayoutCreate(PetscObjectComm((PetscObject)subset),&map);CHKERRQ(ierr); 3596 ierr = PetscLayoutSetBlockSize(map,1);CHKERRQ(ierr); 3597 ierr = PetscLayoutSetSize(map,N);CHKERRQ(ierr); 3598 ierr = PetscLayoutSetUp(map);CHKERRQ(ierr); 3599 ierr = PetscLayoutGetLocalSize(map,&Nl);CHKERRQ(ierr); 3600 3601 /* create sf : leaf_data == multiplicity of indexes, root data == global index in layout */ 3602 ierr = PetscMalloc2(n,&leaf_data,Nl,&root_data);CHKERRQ(ierr); 3603 if (subset_mult) { 3604 const PetscInt* idxs_mult; 3605 3606 ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 3607 ierr = PetscMemcpy(leaf_data,idxs_mult,n*sizeof(PetscInt));CHKERRQ(ierr); 3608 ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 3609 } else { 3610 for (i=0;i<n;i++) leaf_data[i] = 1; 3611 } 3612 /* local size of new subset */ 3613 n_n = 0; 3614 for (i=0;i<n;i++) n_n += leaf_data[i]; 3615 3616 /* global indexes in layout */ 3617 ierr = PetscMalloc1(n_n,&gidxs);CHKERRQ(ierr); /* allocating possibly extra space in gidxs which will be used later */ 3618 for (i=0;i<n;i++) gidxs[i] = idxs[i] - gbounds[0]; 3619 ierr = ISRestoreIndices(subset,&idxs);CHKERRQ(ierr); 3620 ierr = PetscSFCreate(PetscObjectComm((PetscObject)subset),&sf);CHKERRQ(ierr); 3621 ierr = PetscSFSetGraphLayout(sf,map,n,NULL,PETSC_COPY_VALUES,gidxs);CHKERRQ(ierr); 3622 ierr = PetscLayoutDestroy(&map);CHKERRQ(ierr); 3623 3624 /* reduce from leaves to roots */ 3625 ierr = PetscMemzero(root_data,Nl*sizeof(PetscInt));CHKERRQ(ierr); 3626 ierr = PetscSFReduceBegin(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr); 3627 ierr = PetscSFReduceEnd(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr); 3628 3629 /* count indexes in local part of layout */ 3630 nlocals = 0; 3631 first_index = -1; 3632 first_found = PETSC_FALSE; 3633 for (i=0;i<Nl;i++) { 3634 if (!first_found && root_data[i]) { 3635 first_found = PETSC_TRUE; 3636 first_index = i; 3637 } 3638 nlocals += root_data[i]; 3639 } 3640 3641 /* cumulative of number of indexes and size of subset without holes */ 3642 #if defined(PETSC_HAVE_MPI_EXSCAN) 3643 start = 0; 3644 ierr = MPI_Exscan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 3645 #else 3646 ierr = MPI_Scan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 3647 start = start-nlocals; 3648 #endif 3649 3650 if (N_n) { /* compute total size of new subset if requested */ 3651 *N_n = start + nlocals; 3652 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)subset),&commsize);CHKERRQ(ierr); 3653 ierr = MPI_Bcast(N_n,1,MPIU_INT,commsize-1,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 3654 } 3655 3656 /* adapt root data with cumulative */ 3657 if (first_found) { 3658 PetscInt old_index; 3659 3660 root_data[first_index] += start; 3661 old_index = first_index; 3662 for (i=first_index+1;i<Nl;i++) { 3663 if (root_data[i]) { 3664 root_data[i] += root_data[old_index]; 3665 old_index = i; 3666 } 3667 } 3668 } 3669 3670 /* from roots to leaves */ 3671 ierr = PetscSFBcastBegin(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr); 3672 ierr = PetscSFBcastEnd(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr); 3673 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 3674 3675 /* create new IS with global indexes without holes */ 3676 if (subset_mult) { 3677 const PetscInt* idxs_mult; 3678 PetscInt cum; 3679 3680 cum = 0; 3681 ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 3682 for (i=0;i<n;i++) { 3683 PetscInt j; 3684 for (j=0;j<idxs_mult[i];j++) gidxs[cum++] = leaf_data[i] - idxs_mult[i] + j; 3685 } 3686 ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 3687 } else { 3688 for (i=0;i<n;i++) { 3689 gidxs[i] = leaf_data[i]-1; 3690 } 3691 } 3692 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)subset),n_n,gidxs,PETSC_OWN_POINTER,subset_n);CHKERRQ(ierr); 3693 ierr = PetscFree2(leaf_data,root_data);CHKERRQ(ierr); 3694 PetscFunctionReturn(0); 3695 } 3696 3697 #undef __FUNCT__ 3698 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 3699 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 3700 { 3701 PetscInt i,j; 3702 PetscScalar *alphas; 3703 PetscErrorCode ierr; 3704 3705 PetscFunctionBegin; 3706 /* this implements stabilized Gram-Schmidt */ 3707 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 3708 for (i=0;i<n;i++) { 3709 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 3710 if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); } 3711 for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); } 3712 } 3713 ierr = PetscFree(alphas);CHKERRQ(ierr); 3714 PetscFunctionReturn(0); 3715 } 3716 3717 #undef __FUNCT__ 3718 #define __FUNCT__ "MatISGetSubassemblingPattern" 3719 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscInt redprocs, IS* is_sends) 3720 { 3721 IS ranks_send_to; 3722 PetscInt n_neighs,*neighs,*n_shared,**shared; 3723 PetscMPIInt size,rank,color; 3724 PetscInt *xadj,*adjncy; 3725 PetscInt *adjncy_wgt,*v_wgt,*ranks_send_to_idx; 3726 PetscInt i,local_size,threshold=0; 3727 PetscBool use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE; 3728 PetscSubcomm subcomm; 3729 PetscErrorCode ierr; 3730 3731 PetscFunctionBegin; 3732 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr); 3733 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 3734 ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 3735 3736 /* Get info on mapping */ 3737 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&local_size);CHKERRQ(ierr); 3738 ierr = ISLocalToGlobalMappingGetInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 3739 3740 /* build local CSR graph of subdomains' connectivity */ 3741 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 3742 xadj[0] = 0; 3743 xadj[1] = PetscMax(n_neighs-1,0); 3744 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 3745 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 3746 3747 if (threshold) { 3748 PetscInt xadj_count = 0; 3749 for (i=1;i<n_neighs;i++) { 3750 if (n_shared[i] > threshold) { 3751 adjncy[xadj_count] = neighs[i]; 3752 adjncy_wgt[xadj_count] = n_shared[i]; 3753 xadj_count++; 3754 } 3755 } 3756 xadj[1] = xadj_count; 3757 } else { 3758 if (xadj[1]) { 3759 ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr); 3760 ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr); 3761 } 3762 } 3763 ierr = ISLocalToGlobalMappingRestoreInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 3764 if (use_square) { 3765 for (i=0;i<xadj[1];i++) { 3766 adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i]; 3767 } 3768 } 3769 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 3770 3771 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 3772 3773 /* 3774 Restrict work on active processes only. 3775 */ 3776 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr); 3777 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 3778 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 3779 ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr); 3780 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 3781 if (color) { 3782 ierr = PetscFree(xadj);CHKERRQ(ierr); 3783 ierr = PetscFree(adjncy);CHKERRQ(ierr); 3784 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 3785 } else { 3786 Mat subdomain_adj; 3787 IS new_ranks,new_ranks_contig; 3788 MatPartitioning partitioner; 3789 PetscInt prank,rstart=0,rend=0; 3790 PetscInt *is_indices,*oldranks; 3791 PetscBool aggregate; 3792 3793 ierr = MPI_Comm_size(PetscSubcommChild(subcomm),&size);CHKERRQ(ierr); 3794 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 3795 prank = rank; 3796 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,PetscSubcommChild(subcomm));CHKERRQ(ierr); 3797 /* 3798 for (i=0;i<size;i++) { 3799 PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]); 3800 } 3801 */ 3802 for (i=0;i<xadj[1];i++) { 3803 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 3804 } 3805 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 3806 aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE); 3807 if (aggregate) { 3808 PetscInt lrows,row,ncols,*cols; 3809 PetscMPIInt nrank; 3810 PetscScalar *vals; 3811 3812 ierr = MPI_Comm_rank(PetscSubcommChild(subcomm),&nrank);CHKERRQ(ierr); 3813 lrows = 0; 3814 if (nrank<redprocs) { 3815 lrows = size/redprocs; 3816 if (nrank<size%redprocs) lrows++; 3817 } 3818 ierr = MatCreateAIJ(PetscSubcommChild(subcomm),lrows,lrows,size,size,50,NULL,50,NULL,&subdomain_adj);CHKERRQ(ierr); 3819 ierr = MatGetOwnershipRange(subdomain_adj,&rstart,&rend);CHKERRQ(ierr); 3820 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 3821 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 3822 row = nrank; 3823 ncols = xadj[1]-xadj[0]; 3824 cols = adjncy; 3825 ierr = PetscMalloc1(ncols,&vals);CHKERRQ(ierr); 3826 for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i]; 3827 ierr = MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 3828 ierr = MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3829 ierr = MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3830 ierr = PetscFree(xadj);CHKERRQ(ierr); 3831 ierr = PetscFree(adjncy);CHKERRQ(ierr); 3832 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 3833 ierr = PetscFree(vals);CHKERRQ(ierr); 3834 } else { 3835 ierr = MatCreateMPIAdj(PetscSubcommChild(subcomm),1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 3836 } 3837 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 3838 3839 /* Partition */ 3840 ierr = MatPartitioningCreate(PetscSubcommChild(subcomm),&partitioner);CHKERRQ(ierr); 3841 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 3842 if (use_vwgt) { 3843 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 3844 v_wgt[0] = local_size; 3845 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 3846 } 3847 n_subdomains = PetscMin((PetscInt)size,n_subdomains); 3848 ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr); 3849 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 3850 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 3851 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 3852 3853 /* renumber new_ranks to avoid "holes" in new set of processors */ 3854 ierr = PCBDDCSubsetNumbering(new_ranks,NULL,NULL,&new_ranks_contig);CHKERRQ(ierr); 3855 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 3856 ierr = ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3857 if (!redprocs) { 3858 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 3859 } else { 3860 PetscInt idxs[1]; 3861 PetscMPIInt tag; 3862 MPI_Request *reqs; 3863 3864 ierr = PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag);CHKERRQ(ierr); 3865 ierr = PetscMalloc1(rend-rstart,&reqs);CHKERRQ(ierr); 3866 for (i=rstart;i<rend;i++) { 3867 ierr = MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,PetscSubcommChild(subcomm),&reqs[i-rstart]);CHKERRQ(ierr); 3868 } 3869 ierr = MPI_Recv(idxs,1,MPIU_INT,MPI_ANY_SOURCE,tag,PetscSubcommChild(subcomm),MPI_STATUS_IGNORE);CHKERRQ(ierr); 3870 ierr = MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3871 ierr = PetscFree(reqs);CHKERRQ(ierr); 3872 ranks_send_to_idx[0] = oldranks[idxs[0]]; 3873 } 3874 ierr = ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3875 /* clean up */ 3876 ierr = PetscFree(oldranks);CHKERRQ(ierr); 3877 ierr = ISDestroy(&new_ranks_contig);CHKERRQ(ierr); 3878 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 3879 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 3880 } 3881 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 3882 3883 /* assemble parallel IS for sends */ 3884 i = 1; 3885 if (color) i=0; 3886 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr); 3887 /* get back IS */ 3888 *is_sends = ranks_send_to; 3889 PetscFunctionReturn(0); 3890 } 3891 3892 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 3893 3894 #undef __FUNCT__ 3895 #define __FUNCT__ "MatISSubassemble" 3896 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[]) 3897 { 3898 Mat local_mat; 3899 IS is_sends_internal; 3900 PetscInt rows,cols,new_local_rows; 3901 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals; 3902 PetscBool ismatis,isdense,newisdense,destroy_mat; 3903 ISLocalToGlobalMapping l2gmap; 3904 PetscInt* l2gmap_indices; 3905 const PetscInt* is_indices; 3906 MatType new_local_type; 3907 /* buffers */ 3908 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 3909 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 3910 PetscInt *recv_buffer_idxs_local; 3911 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 3912 /* MPI */ 3913 MPI_Comm comm,comm_n; 3914 PetscSubcomm subcomm; 3915 PetscMPIInt n_sends,n_recvs,commsize; 3916 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 3917 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 3918 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,source_dest; 3919 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals; 3920 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals; 3921 PetscErrorCode ierr; 3922 3923 PetscFunctionBegin; 3924 /* TODO: add missing checks */ 3925 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 3926 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 3927 PetscValidLogicalCollectiveEnum(mat,reuse,5); 3928 PetscValidLogicalCollectiveInt(mat,nis,7); 3929 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 3930 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 3931 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 3932 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 3933 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 3934 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 3935 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 3936 if (reuse == MAT_REUSE_MATRIX && *mat_n) { 3937 PetscInt mrows,mcols,mnrows,mncols; 3938 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 3939 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 3940 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 3941 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 3942 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 3943 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 3944 } 3945 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 3946 PetscValidLogicalCollectiveInt(mat,bs,0); 3947 /* prepare IS for sending if not provided */ 3948 if (!is_sends) { 3949 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 3950 ierr = MatISGetSubassemblingPattern(mat,n_subdomains,0,&is_sends_internal);CHKERRQ(ierr); 3951 } else { 3952 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 3953 is_sends_internal = is_sends; 3954 } 3955 3956 /* get comm */ 3957 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3958 3959 /* compute number of sends */ 3960 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 3961 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 3962 3963 /* compute number of receives */ 3964 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 3965 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 3966 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 3967 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 3968 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 3969 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 3970 ierr = PetscFree(iflags);CHKERRQ(ierr); 3971 3972 /* restrict comm if requested */ 3973 subcomm = 0; 3974 destroy_mat = PETSC_FALSE; 3975 if (restrict_comm) { 3976 PetscMPIInt color,subcommsize; 3977 3978 color = 0; 3979 if (restrict_full) { 3980 if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm (full restriction) */ 3981 } else { 3982 if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not partecipate in new comm */ 3983 } 3984 ierr = MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 3985 subcommsize = commsize - subcommsize; 3986 /* check if reuse has been requested */ 3987 if (reuse == MAT_REUSE_MATRIX) { 3988 if (*mat_n) { 3989 PetscMPIInt subcommsize2; 3990 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 3991 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 3992 comm_n = PetscObjectComm((PetscObject)*mat_n); 3993 } else { 3994 comm_n = PETSC_COMM_SELF; 3995 } 3996 } else { /* MAT_INITIAL_MATRIX */ 3997 PetscMPIInt rank; 3998 3999 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4000 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 4001 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 4002 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 4003 comm_n = PetscSubcommChild(subcomm); 4004 } 4005 /* flag to destroy *mat_n if not significative */ 4006 if (color) destroy_mat = PETSC_TRUE; 4007 } else { 4008 comm_n = comm; 4009 } 4010 4011 /* prepare send/receive buffers */ 4012 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 4013 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 4014 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 4015 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 4016 if (nis) { 4017 ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr); 4018 } 4019 4020 /* Get data from local matrices */ 4021 if (!isdense) { 4022 SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 4023 /* TODO: See below some guidelines on how to prepare the local buffers */ 4024 /* 4025 send_buffer_vals should contain the raw values of the local matrix 4026 send_buffer_idxs should contain: 4027 - MatType_PRIVATE type 4028 - PetscInt size_of_l2gmap 4029 - PetscInt global_row_indices[size_of_l2gmap] 4030 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 4031 */ 4032 } else { 4033 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 4034 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&i);CHKERRQ(ierr); 4035 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 4036 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 4037 send_buffer_idxs[1] = i; 4038 ierr = ISLocalToGlobalMappingGetIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 4039 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 4040 ierr = ISLocalToGlobalMappingRestoreIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 4041 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 4042 for (i=0;i<n_sends;i++) { 4043 ilengths_vals[is_indices[i]] = len*len; 4044 ilengths_idxs[is_indices[i]] = len+2; 4045 } 4046 } 4047 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 4048 /* additional is (if any) */ 4049 if (nis) { 4050 PetscMPIInt psum; 4051 PetscInt j; 4052 for (j=0,psum=0;j<nis;j++) { 4053 PetscInt plen; 4054 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 4055 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 4056 psum += len+1; /* indices + lenght */ 4057 } 4058 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 4059 for (j=0,psum=0;j<nis;j++) { 4060 PetscInt plen; 4061 const PetscInt *is_array_idxs; 4062 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 4063 send_buffer_idxs_is[psum] = plen; 4064 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 4065 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 4066 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 4067 psum += plen+1; /* indices + lenght */ 4068 } 4069 for (i=0;i<n_sends;i++) { 4070 ilengths_idxs_is[is_indices[i]] = psum; 4071 } 4072 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 4073 } 4074 4075 buf_size_idxs = 0; 4076 buf_size_vals = 0; 4077 buf_size_idxs_is = 0; 4078 for (i=0;i<n_recvs;i++) { 4079 buf_size_idxs += (PetscInt)olengths_idxs[i]; 4080 buf_size_vals += (PetscInt)olengths_vals[i]; 4081 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 4082 } 4083 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 4084 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 4085 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 4086 4087 /* get new tags for clean communications */ 4088 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 4089 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 4090 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 4091 4092 /* allocate for requests */ 4093 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 4094 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 4095 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 4096 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 4097 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 4098 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 4099 4100 /* communications */ 4101 ptr_idxs = recv_buffer_idxs; 4102 ptr_vals = recv_buffer_vals; 4103 ptr_idxs_is = recv_buffer_idxs_is; 4104 for (i=0;i<n_recvs;i++) { 4105 source_dest = onodes[i]; 4106 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 4107 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 4108 ptr_idxs += olengths_idxs[i]; 4109 ptr_vals += olengths_vals[i]; 4110 if (nis) { 4111 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 4112 ptr_idxs_is += olengths_idxs_is[i]; 4113 } 4114 } 4115 for (i=0;i<n_sends;i++) { 4116 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 4117 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 4118 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 4119 if (nis) { 4120 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 4121 } 4122 } 4123 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 4124 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 4125 4126 /* assemble new l2g map */ 4127 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4128 ptr_idxs = recv_buffer_idxs; 4129 new_local_rows = 0; 4130 for (i=0;i<n_recvs;i++) { 4131 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 4132 ptr_idxs += olengths_idxs[i]; 4133 } 4134 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 4135 ptr_idxs = recv_buffer_idxs; 4136 new_local_rows = 0; 4137 for (i=0;i<n_recvs;i++) { 4138 ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 4139 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 4140 ptr_idxs += olengths_idxs[i]; 4141 } 4142 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 4143 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 4144 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 4145 4146 /* infer new local matrix type from received local matrices type */ 4147 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 4148 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 4149 if (n_recvs) { 4150 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 4151 ptr_idxs = recv_buffer_idxs; 4152 for (i=0;i<n_recvs;i++) { 4153 if ((PetscInt)new_local_type_private != *ptr_idxs) { 4154 new_local_type_private = MATAIJ_PRIVATE; 4155 break; 4156 } 4157 ptr_idxs += olengths_idxs[i]; 4158 } 4159 switch (new_local_type_private) { 4160 case MATDENSE_PRIVATE: 4161 if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */ 4162 new_local_type = MATSEQAIJ; 4163 bs = 1; 4164 } else { /* if I receive only 1 dense matrix */ 4165 new_local_type = MATSEQDENSE; 4166 bs = 1; 4167 } 4168 break; 4169 case MATAIJ_PRIVATE: 4170 new_local_type = MATSEQAIJ; 4171 bs = 1; 4172 break; 4173 case MATBAIJ_PRIVATE: 4174 new_local_type = MATSEQBAIJ; 4175 break; 4176 case MATSBAIJ_PRIVATE: 4177 new_local_type = MATSEQSBAIJ; 4178 break; 4179 default: 4180 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__); 4181 break; 4182 } 4183 } else { /* by default, new_local_type is seqdense */ 4184 new_local_type = MATSEQDENSE; 4185 bs = 1; 4186 } 4187 4188 /* create MATIS object if needed */ 4189 if (reuse == MAT_INITIAL_MATRIX) { 4190 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 4191 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 4192 } else { 4193 /* it also destroys the local matrices */ 4194 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 4195 } 4196 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 4197 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 4198 4199 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4200 4201 /* Global to local map of received indices */ 4202 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 4203 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 4204 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 4205 4206 /* restore attributes -> type of incoming data and its size */ 4207 buf_size_idxs = 0; 4208 for (i=0;i<n_recvs;i++) { 4209 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 4210 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 4211 buf_size_idxs += (PetscInt)olengths_idxs[i]; 4212 } 4213 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 4214 4215 /* set preallocation */ 4216 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 4217 if (!newisdense) { 4218 PetscInt *new_local_nnz=0; 4219 4220 ptr_vals = recv_buffer_vals; 4221 ptr_idxs = recv_buffer_idxs_local; 4222 if (n_recvs) { 4223 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 4224 } 4225 for (i=0;i<n_recvs;i++) { 4226 PetscInt j; 4227 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 4228 for (j=0;j<*(ptr_idxs+1);j++) { 4229 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 4230 } 4231 } else { 4232 /* TODO */ 4233 } 4234 ptr_idxs += olengths_idxs[i]; 4235 } 4236 if (new_local_nnz) { 4237 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 4238 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 4239 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 4240 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 4241 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 4242 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 4243 } else { 4244 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 4245 } 4246 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 4247 } else { 4248 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 4249 } 4250 4251 /* set values */ 4252 ptr_vals = recv_buffer_vals; 4253 ptr_idxs = recv_buffer_idxs_local; 4254 for (i=0;i<n_recvs;i++) { 4255 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 4256 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 4257 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 4258 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 4259 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 4260 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 4261 } else { 4262 /* TODO */ 4263 } 4264 ptr_idxs += olengths_idxs[i]; 4265 ptr_vals += olengths_vals[i]; 4266 } 4267 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4268 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4269 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4270 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4271 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 4272 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 4273 4274 #if 0 4275 if (!restrict_comm) { /* check */ 4276 Vec lvec,rvec; 4277 PetscReal infty_error; 4278 4279 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 4280 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 4281 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 4282 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 4283 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 4284 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 4285 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 4286 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 4287 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 4288 } 4289 #endif 4290 4291 /* assemble new additional is (if any) */ 4292 if (nis) { 4293 PetscInt **temp_idxs,*count_is,j,psum; 4294 4295 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4296 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 4297 ptr_idxs = recv_buffer_idxs_is; 4298 psum = 0; 4299 for (i=0;i<n_recvs;i++) { 4300 for (j=0;j<nis;j++) { 4301 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 4302 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 4303 psum += plen; 4304 ptr_idxs += plen+1; /* shift pointer to received data */ 4305 } 4306 } 4307 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 4308 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 4309 for (i=1;i<nis;i++) { 4310 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 4311 } 4312 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 4313 ptr_idxs = recv_buffer_idxs_is; 4314 for (i=0;i<n_recvs;i++) { 4315 for (j=0;j<nis;j++) { 4316 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 4317 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 4318 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 4319 ptr_idxs += plen+1; /* shift pointer to received data */ 4320 } 4321 } 4322 for (i=0;i<nis;i++) { 4323 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 4324 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 4325 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 4326 } 4327 ierr = PetscFree(count_is);CHKERRQ(ierr); 4328 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 4329 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 4330 } 4331 /* free workspace */ 4332 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 4333 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4334 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 4335 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4336 if (isdense) { 4337 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 4338 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 4339 } else { 4340 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 4341 } 4342 if (nis) { 4343 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4344 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 4345 } 4346 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 4347 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 4348 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 4349 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 4350 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 4351 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 4352 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 4353 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 4354 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 4355 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 4356 ierr = PetscFree(onodes);CHKERRQ(ierr); 4357 if (nis) { 4358 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 4359 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 4360 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 4361 } 4362 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 4363 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 4364 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 4365 for (i=0;i<nis;i++) { 4366 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 4367 } 4368 *mat_n = NULL; 4369 } 4370 PetscFunctionReturn(0); 4371 } 4372 4373 /* temporary hack into ksp private data structure */ 4374 #include <petsc/private/kspimpl.h> 4375 4376 #undef __FUNCT__ 4377 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 4378 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 4379 { 4380 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 4381 PC_IS *pcis = (PC_IS*)pc->data; 4382 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 4383 MatNullSpace CoarseNullSpace=NULL; 4384 ISLocalToGlobalMapping coarse_islg; 4385 IS coarse_is,*isarray; 4386 PetscInt i,im_active=-1,active_procs=-1; 4387 PetscInt nis,nisdofs,nisneu; 4388 PC pc_temp; 4389 PCType coarse_pc_type; 4390 KSPType coarse_ksp_type; 4391 PetscBool multilevel_requested,multilevel_allowed; 4392 PetscBool isredundant,isbddc,isnn,coarse_reuse; 4393 Mat t_coarse_mat_is; 4394 PetscInt void_procs,ncoarse_ml,ncoarse_ds,ncoarse; 4395 PetscMPIInt all_procs; 4396 PetscBool csin_ml,csin_ds,csin,csin_type_simple,redist; 4397 PetscBool compute_vecs = PETSC_FALSE; 4398 PetscScalar *array; 4399 PetscErrorCode ierr; 4400 4401 PetscFunctionBegin; 4402 /* Assign global numbering to coarse dofs */ 4403 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 4404 PetscInt ocoarse_size; 4405 compute_vecs = PETSC_TRUE; 4406 ocoarse_size = pcbddc->coarse_size; 4407 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 4408 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 4409 /* see if we can avoid some work */ 4410 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 4411 /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */ 4412 if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) { 4413 PC pc; 4414 PetscBool isbddc; 4415 4416 /* temporary workaround since PCBDDC does not have a reset method so far */ 4417 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc);CHKERRQ(ierr); 4418 ierr = PetscObjectTypeCompare((PetscObject)pc,PCBDDC,&isbddc);CHKERRQ(ierr); 4419 if (isbddc) { 4420 ierr = PCDestroy(&pc);CHKERRQ(ierr); 4421 } 4422 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 4423 coarse_reuse = PETSC_FALSE; 4424 } else { /* we can safely reuse already computed coarse matrix */ 4425 coarse_reuse = PETSC_TRUE; 4426 } 4427 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 4428 coarse_reuse = PETSC_FALSE; 4429 } 4430 /* reset any subassembling information */ 4431 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 4432 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4433 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 4434 coarse_reuse = PETSC_TRUE; 4435 } 4436 4437 /* count "active" (i.e. with positive local size) and "void" processes */ 4438 im_active = !!(pcis->n); 4439 ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4440 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr); 4441 void_procs = all_procs-active_procs; 4442 csin_type_simple = PETSC_TRUE; 4443 redist = PETSC_FALSE; 4444 if (pcbddc->current_level && void_procs) { 4445 csin_ml = PETSC_TRUE; 4446 ncoarse_ml = void_procs; 4447 /* it has no sense to redistribute on a set of processors larger than the number of active processes */ 4448 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < active_procs) { 4449 csin_ds = PETSC_TRUE; 4450 ncoarse_ds = pcbddc->redistribute_coarse; 4451 redist = PETSC_TRUE; 4452 } else { 4453 csin_ds = PETSC_TRUE; 4454 ncoarse_ds = active_procs; 4455 redist = PETSC_TRUE; 4456 } 4457 } else { 4458 csin_ml = PETSC_FALSE; 4459 ncoarse_ml = all_procs; 4460 if (void_procs) { 4461 csin_ds = PETSC_TRUE; 4462 ncoarse_ds = void_procs; 4463 csin_type_simple = PETSC_FALSE; 4464 } else { 4465 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < all_procs) { 4466 csin_ds = PETSC_TRUE; 4467 ncoarse_ds = pcbddc->redistribute_coarse; 4468 redist = PETSC_TRUE; 4469 } else { 4470 csin_ds = PETSC_FALSE; 4471 ncoarse_ds = all_procs; 4472 } 4473 } 4474 } 4475 4476 /* 4477 test if we can go multilevel: three conditions must be satisfied: 4478 - we have not exceeded the number of levels requested 4479 - we can actually subassemble the active processes 4480 - we can find a suitable number of MPI processes where we can place the subassembled problem 4481 */ 4482 multilevel_allowed = PETSC_FALSE; 4483 multilevel_requested = PETSC_FALSE; 4484 if (pcbddc->current_level < pcbddc->max_levels) { 4485 multilevel_requested = PETSC_TRUE; 4486 if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) { 4487 multilevel_allowed = PETSC_FALSE; 4488 } else { 4489 multilevel_allowed = PETSC_TRUE; 4490 } 4491 } 4492 /* determine number of process partecipating to coarse solver */ 4493 if (multilevel_allowed) { 4494 ncoarse = ncoarse_ml; 4495 csin = csin_ml; 4496 redist = PETSC_FALSE; 4497 } else { 4498 ncoarse = ncoarse_ds; 4499 csin = csin_ds; 4500 } 4501 4502 /* creates temporary l2gmap and IS for coarse indexes */ 4503 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 4504 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 4505 4506 /* creates temporary MATIS object for coarse matrix */ 4507 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr); 4508 ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr); 4509 ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr); 4510 ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr); 4511 #if 0 4512 { 4513 PetscViewer viewer; 4514 char filename[256]; 4515 sprintf(filename,"local_coarse_mat%d_level%d.m",PetscGlobalRank,pcbddc->current_level); 4516 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 4517 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 4518 ierr = MatView(coarse_submat_dense,viewer);CHKERRQ(ierr); 4519 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 4520 } 4521 #endif 4522 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,NULL,&t_coarse_mat_is);CHKERRQ(ierr); 4523 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 4524 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4525 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4526 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 4527 4528 /* compute dofs splitting and neumann boundaries for coarse dofs */ 4529 if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal) ) { /* protects from unneded computations */ 4530 PetscInt *tidxs,*tidxs2,nout,tsize,i; 4531 const PetscInt *idxs; 4532 ISLocalToGlobalMapping tmap; 4533 4534 /* create map between primal indices (in local representative ordering) and local primal numbering */ 4535 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 4536 /* allocate space for temporary storage */ 4537 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 4538 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 4539 /* allocate for IS array */ 4540 nisdofs = pcbddc->n_ISForDofsLocal; 4541 nisneu = !!pcbddc->NeumannBoundariesLocal; 4542 nis = nisdofs + nisneu; 4543 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 4544 /* dofs splitting */ 4545 for (i=0;i<nisdofs;i++) { 4546 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 4547 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 4548 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 4549 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 4550 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 4551 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 4552 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->ISForDofsLocal[i]),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 4553 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 4554 } 4555 /* neumann boundaries */ 4556 if (pcbddc->NeumannBoundariesLocal) { 4557 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 4558 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 4559 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 4560 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 4561 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 4562 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 4563 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->NeumannBoundariesLocal),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 4564 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 4565 } 4566 /* free memory */ 4567 ierr = PetscFree(tidxs);CHKERRQ(ierr); 4568 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 4569 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 4570 } else { 4571 nis = 0; 4572 nisdofs = 0; 4573 nisneu = 0; 4574 isarray = NULL; 4575 } 4576 /* destroy no longer needed map */ 4577 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 4578 4579 /* restrict on coarse candidates (if needed) */ 4580 coarse_mat_is = NULL; 4581 if (csin) { 4582 if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */ 4583 if (redist) { 4584 PetscMPIInt rank; 4585 PetscInt spc,n_spc_p1,dest[1],destsize; 4586 4587 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 4588 spc = active_procs/ncoarse; 4589 n_spc_p1 = active_procs%ncoarse; 4590 if (im_active) { 4591 destsize = 1; 4592 if (rank > n_spc_p1*(spc+1)-1) { 4593 dest[0] = n_spc_p1+(rank-(n_spc_p1*(spc+1)))/spc; 4594 } else { 4595 dest[0] = rank/(spc+1); 4596 } 4597 } else { 4598 destsize = 0; 4599 } 4600 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),destsize,dest,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4601 } else if (csin_type_simple) { 4602 PetscMPIInt rank; 4603 PetscInt issize,isidx; 4604 4605 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 4606 if (im_active) { 4607 issize = 1; 4608 isidx = (PetscInt)rank; 4609 } else { 4610 issize = 0; 4611 isidx = -1; 4612 } 4613 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4614 } else { /* get a suitable subassembling pattern from MATIS code */ 4615 ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4616 } 4617 4618 /* we need to shift on coarse candidates either if we are not redistributing or we are redistributing and we have enough void processes */ 4619 if (!redist || ncoarse <= void_procs) { 4620 PetscInt ncoarse_cand,tissize,*nisindices; 4621 PetscInt *coarse_candidates; 4622 const PetscInt* tisindices; 4623 4624 /* get coarse candidates' ranks in pc communicator */ 4625 ierr = PetscMalloc1(all_procs,&coarse_candidates);CHKERRQ(ierr); 4626 ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4627 for (i=0,ncoarse_cand=0;i<all_procs;i++) { 4628 if (!coarse_candidates[i]) { 4629 coarse_candidates[ncoarse_cand++]=i; 4630 } 4631 } 4632 if (ncoarse_cand < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",ncoarse_cand,ncoarse); 4633 4634 4635 if (pcbddc->dbg_flag) { 4636 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4637 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr); 4638 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 4639 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr); 4640 for (i=0;i<ncoarse_cand;i++) { 4641 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr); 4642 } 4643 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr); 4644 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4645 } 4646 /* shift the pattern on coarse candidates */ 4647 ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr); 4648 ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 4649 ierr = PetscMalloc1(tissize,&nisindices);CHKERRQ(ierr); 4650 for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]]; 4651 ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 4652 ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr); 4653 ierr = PetscFree(coarse_candidates);CHKERRQ(ierr); 4654 } 4655 if (pcbddc->dbg_flag) { 4656 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4657 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr); 4658 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 4659 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4660 } 4661 } 4662 /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */ 4663 if (multilevel_allowed) { /* we need to keep tracking of void processes for future placements */ 4664 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,PETSC_FALSE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 4665 } else { /* this is the last level, so use just receiving processes in subcomm */ 4666 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 4667 } 4668 } else { 4669 if (pcbddc->dbg_flag) { 4670 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4671 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr); 4672 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4673 } 4674 ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr); 4675 coarse_mat_is = t_coarse_mat_is; 4676 } 4677 4678 /* create local to global scatters for coarse problem */ 4679 if (compute_vecs) { 4680 PetscInt lrows; 4681 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 4682 if (coarse_mat_is) { 4683 ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr); 4684 } else { 4685 lrows = 0; 4686 } 4687 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 4688 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 4689 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 4690 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 4691 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 4692 } 4693 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 4694 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 4695 4696 /* set defaults for coarse KSP and PC */ 4697 if (multilevel_allowed) { 4698 coarse_ksp_type = KSPRICHARDSON; 4699 coarse_pc_type = PCBDDC; 4700 } else { 4701 coarse_ksp_type = KSPPREONLY; 4702 coarse_pc_type = PCREDUNDANT; 4703 } 4704 4705 /* print some info if requested */ 4706 if (pcbddc->dbg_flag) { 4707 if (!multilevel_allowed) { 4708 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4709 if (multilevel_requested) { 4710 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 4711 } else if (pcbddc->max_levels) { 4712 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 4713 } 4714 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4715 } 4716 } 4717 4718 /* create the coarse KSP object only once with defaults */ 4719 if (coarse_mat_is) { 4720 MatReuse coarse_mat_reuse; 4721 PetscViewer dbg_viewer = NULL; 4722 if (pcbddc->dbg_flag) { 4723 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is)); 4724 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 4725 } 4726 if (!pcbddc->coarse_ksp) { 4727 char prefix[256],str_level[16]; 4728 size_t len; 4729 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr); 4730 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 4731 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 4732 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 4733 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr); 4734 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 4735 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 4736 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 4737 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 4738 /* prefix */ 4739 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 4740 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 4741 if (!pcbddc->current_level) { 4742 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 4743 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 4744 } else { 4745 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 4746 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 4747 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 4748 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 4749 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 4750 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 4751 } 4752 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 4753 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 4754 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 4755 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 4756 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 4757 /* allow user customization */ 4758 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 4759 } 4760 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 4761 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 4762 if (nisdofs) { 4763 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 4764 for (i=0;i<nisdofs;i++) { 4765 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 4766 } 4767 } 4768 if (nisneu) { 4769 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 4770 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 4771 } 4772 4773 /* get some info after set from options */ 4774 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 4775 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 4776 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 4777 if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */ 4778 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 4779 isbddc = PETSC_FALSE; 4780 } 4781 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 4782 if (isredundant) { 4783 KSP inner_ksp; 4784 PC inner_pc; 4785 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 4786 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 4787 ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr); 4788 } 4789 4790 /* assemble coarse matrix */ 4791 if (coarse_reuse) { 4792 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 4793 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 4794 coarse_mat_reuse = MAT_REUSE_MATRIX; 4795 } else { 4796 coarse_mat_reuse = MAT_INITIAL_MATRIX; 4797 } 4798 if (isbddc || isnn) { 4799 if (pcbddc->coarsening_ratio > 1) { 4800 if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */ 4801 ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 4802 if (pcbddc->dbg_flag) { 4803 ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4804 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr); 4805 ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr); 4806 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 4807 } 4808 } 4809 ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr); 4810 } else { 4811 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 4812 coarse_mat = coarse_mat_is; 4813 } 4814 } else { 4815 ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 4816 } 4817 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 4818 4819 /* propagate symmetry info of coarse matrix */ 4820 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 4821 if (pc->pmat->symmetric_set) { 4822 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);CHKERRQ(ierr); 4823 } 4824 if (pc->pmat->hermitian_set) { 4825 ierr = MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);CHKERRQ(ierr); 4826 } 4827 if (pc->pmat->spd_set) { 4828 ierr = MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);CHKERRQ(ierr); 4829 } 4830 /* set operators */ 4831 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 4832 if (pcbddc->dbg_flag) { 4833 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 4834 } 4835 } else { /* processes non partecipating to coarse solver (if any) */ 4836 coarse_mat = 0; 4837 } 4838 ierr = PetscFree(isarray);CHKERRQ(ierr); 4839 #if 0 4840 { 4841 PetscViewer viewer; 4842 char filename[256]; 4843 sprintf(filename,"coarse_mat_level%d.m",pcbddc->current_level); 4844 ierr = PetscViewerASCIIOpen(PetscObjectComm((PetscObject)coarse_mat),filename,&viewer);CHKERRQ(ierr); 4845 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 4846 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 4847 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 4848 } 4849 #endif 4850 4851 /* Compute coarse null space (special handling by BDDC only) */ 4852 #if 0 4853 if (pcbddc->NullSpace) { 4854 ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr); 4855 } 4856 #endif 4857 4858 if (pcbddc->coarse_ksp) { 4859 Vec crhs,csol; 4860 PetscBool ispreonly; 4861 4862 if (CoarseNullSpace) { 4863 if (isbddc) { 4864 ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr); 4865 } else { 4866 ierr = MatSetNullSpace(coarse_mat,CoarseNullSpace);CHKERRQ(ierr); 4867 } 4868 } 4869 /* setup coarse ksp */ 4870 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 4871 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 4872 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 4873 /* hack */ 4874 if (!csol) { 4875 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 4876 } 4877 if (!crhs) { 4878 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 4879 } 4880 /* Check coarse problem if in debug mode or if solving with an iterative method */ 4881 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 4882 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 4883 KSP check_ksp; 4884 KSPType check_ksp_type; 4885 PC check_pc; 4886 Vec check_vec,coarse_vec; 4887 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 4888 PetscInt its; 4889 PetscBool compute_eigs; 4890 PetscReal *eigs_r,*eigs_c; 4891 PetscInt neigs; 4892 const char *prefix; 4893 4894 /* Create ksp object suitable for estimation of extreme eigenvalues */ 4895 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 4896 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 4897 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 4898 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 4899 if (ispreonly) { 4900 check_ksp_type = KSPPREONLY; 4901 compute_eigs = PETSC_FALSE; 4902 } else { 4903 check_ksp_type = KSPGMRES; 4904 compute_eigs = PETSC_TRUE; 4905 } 4906 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 4907 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 4908 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 4909 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 4910 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 4911 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 4912 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 4913 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 4914 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 4915 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 4916 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 4917 /* create random vec */ 4918 ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr); 4919 ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr); 4920 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 4921 if (CoarseNullSpace) { 4922 ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr); 4923 } 4924 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 4925 /* solve coarse problem */ 4926 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 4927 if (CoarseNullSpace) { 4928 ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr); 4929 } 4930 /* set eigenvalue estimation if preonly has not been requested */ 4931 if (compute_eigs) { 4932 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 4933 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 4934 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 4935 lambda_max = eigs_r[neigs-1]; 4936 lambda_min = eigs_r[0]; 4937 if (pcbddc->use_coarse_estimates) { 4938 if (lambda_max>lambda_min) { 4939 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr); 4940 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 4941 } 4942 } 4943 } 4944 4945 /* check coarse problem residual error */ 4946 if (pcbddc->dbg_flag) { 4947 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 4948 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 4949 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 4950 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 4951 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 4952 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 4953 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 4954 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 4955 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 4956 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 4957 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 4958 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 4959 if (compute_eigs) { 4960 PetscReal lambda_max_s,lambda_min_s; 4961 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 4962 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 4963 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 4964 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 4965 for (i=0;i<neigs;i++) { 4966 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 4967 } 4968 } 4969 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 4970 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 4971 } 4972 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 4973 if (compute_eigs) { 4974 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 4975 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 4976 } 4977 } 4978 } 4979 /* print additional info */ 4980 if (pcbddc->dbg_flag) { 4981 /* waits until all processes reaches this point */ 4982 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 4983 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 4984 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4985 } 4986 4987 /* free memory */ 4988 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 4989 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 4990 PetscFunctionReturn(0); 4991 } 4992 4993 #undef __FUNCT__ 4994 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 4995 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 4996 { 4997 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 4998 PC_IS* pcis = (PC_IS*)pc->data; 4999 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 5000 IS subset,subset_mult,subset_n; 5001 PetscInt local_size,coarse_size=0; 5002 PetscInt *local_primal_indices=NULL; 5003 const PetscInt *t_local_primal_indices; 5004 PetscErrorCode ierr; 5005 5006 PetscFunctionBegin; 5007 /* Compute global number of coarse dofs */ 5008 if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) { 5009 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first"); 5010 } 5011 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n);CHKERRQ(ierr); 5012 ierr = ISLocalToGlobalMappingApplyIS(pcis->mapping,subset_n,&subset);CHKERRQ(ierr); 5013 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 5014 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult);CHKERRQ(ierr); 5015 ierr = PCBDDCSubsetNumbering(subset,subset_mult,&coarse_size,&subset_n);CHKERRQ(ierr); 5016 ierr = ISDestroy(&subset);CHKERRQ(ierr); 5017 ierr = ISDestroy(&subset_mult);CHKERRQ(ierr); 5018 ierr = ISGetLocalSize(subset_n,&local_size);CHKERRQ(ierr); 5019 if (local_size != pcbddc->local_primal_size) { 5020 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %d != %d",local_size,pcbddc->local_primal_size); 5021 } 5022 ierr = PetscMalloc1(local_size,&local_primal_indices);CHKERRQ(ierr); 5023 ierr = ISGetIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 5024 ierr = PetscMemcpy(local_primal_indices,t_local_primal_indices,local_size*sizeof(PetscInt));CHKERRQ(ierr); 5025 ierr = ISRestoreIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 5026 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 5027 5028 /* check numbering */ 5029 if (pcbddc->dbg_flag) { 5030 PetscScalar coarsesum,*array,*array2; 5031 PetscInt i; 5032 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 5033 5034 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5035 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5036 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 5037 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 5038 /* counter */ 5039 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 5040 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 5041 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5042 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5043 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5044 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5045 5046 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 5047 for (i=0;i<pcbddc->local_primal_size;i++) { 5048 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 5049 } 5050 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 5051 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 5052 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 5053 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5054 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5055 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5056 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5057 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 5058 ierr = VecGetArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 5059 for (i=0;i<pcis->n;i++) { 5060 if (array[i] != 0.0 && array[i] != array2[i]) { 5061 PetscInt owned = (PetscInt)(array[i]); 5062 PetscInt neigh = (PetscInt)(array2[i]); 5063 set_error = PETSC_TRUE; 5064 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a %d processes instead of %d!\n",PetscGlobalRank,i,owned,neigh);CHKERRQ(ierr); 5065 } 5066 } 5067 ierr = VecRestoreArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 5068 ierr = MPI_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 5069 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5070 for (i=0;i<pcis->n;i++) { 5071 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 5072 } 5073 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 5074 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 5075 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5076 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5077 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 5078 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 5079 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 5080 PetscInt *gidxs; 5081 5082 ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr); 5083 ierr = ISLocalToGlobalMappingApply(pcis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr); 5084 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 5085 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5086 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 5087 for (i=0;i<pcbddc->local_primal_size;i++) { 5088 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr); 5089 } 5090 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5091 ierr = PetscFree(gidxs);CHKERRQ(ierr); 5092 } 5093 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5094 if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 5095 } 5096 /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */ 5097 /* get back data */ 5098 *coarse_size_n = coarse_size; 5099 *local_primal_indices_n = local_primal_indices; 5100 PetscFunctionReturn(0); 5101 } 5102 5103 #undef __FUNCT__ 5104 #define __FUNCT__ "PCBDDCGlobalToLocal" 5105 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 5106 { 5107 IS localis_t; 5108 PetscInt i,lsize,*idxs,n; 5109 PetscScalar *vals; 5110 PetscErrorCode ierr; 5111 5112 PetscFunctionBegin; 5113 /* get indices in local ordering exploiting local to global map */ 5114 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 5115 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 5116 for (i=0;i<lsize;i++) vals[i] = 1.0; 5117 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 5118 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 5119 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 5120 if (idxs) { /* multilevel guard */ 5121 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 5122 } 5123 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 5124 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 5125 ierr = PetscFree(vals);CHKERRQ(ierr); 5126 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 5127 /* now compute set in local ordering */ 5128 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5129 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5130 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 5131 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 5132 for (i=0,lsize=0;i<n;i++) { 5133 if (PetscRealPart(vals[i]) > 0.5) { 5134 lsize++; 5135 } 5136 } 5137 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 5138 for (i=0,lsize=0;i<n;i++) { 5139 if (PetscRealPart(vals[i]) > 0.5) { 5140 idxs[lsize++] = i; 5141 } 5142 } 5143 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 5144 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 5145 *localis = localis_t; 5146 PetscFunctionReturn(0); 5147 } 5148 5149 /* the next two functions will be called in KSPMatMult if a change of basis has been requested */ 5150 #undef __FUNCT__ 5151 #define __FUNCT__ "PCBDDCMatMult_Private" 5152 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y) 5153 { 5154 PCBDDCChange_ctx change_ctx; 5155 PetscErrorCode ierr; 5156 5157 PetscFunctionBegin; 5158 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 5159 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 5160 ierr = MatMult(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 5161 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 5162 PetscFunctionReturn(0); 5163 } 5164 5165 #undef __FUNCT__ 5166 #define __FUNCT__ "PCBDDCMatMultTranspose_Private" 5167 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y) 5168 { 5169 PCBDDCChange_ctx change_ctx; 5170 PetscErrorCode ierr; 5171 5172 PetscFunctionBegin; 5173 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 5174 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 5175 ierr = MatMultTranspose(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 5176 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 5177 PetscFunctionReturn(0); 5178 } 5179 5180 #undef __FUNCT__ 5181 #define __FUNCT__ "PCBDDCSetUpSubSchurs" 5182 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 5183 { 5184 PC_IS *pcis=(PC_IS*)pc->data; 5185 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 5186 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 5187 Mat S_j; 5188 PetscInt *used_xadj,*used_adjncy; 5189 PetscBool free_used_adj; 5190 PetscErrorCode ierr; 5191 5192 PetscFunctionBegin; 5193 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 5194 free_used_adj = PETSC_FALSE; 5195 if (pcbddc->sub_schurs_layers == -1) { 5196 used_xadj = NULL; 5197 used_adjncy = NULL; 5198 } else { 5199 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 5200 used_xadj = pcbddc->mat_graph->xadj; 5201 used_adjncy = pcbddc->mat_graph->adjncy; 5202 } else if (pcbddc->computed_rowadj) { 5203 used_xadj = pcbddc->mat_graph->xadj; 5204 used_adjncy = pcbddc->mat_graph->adjncy; 5205 } else { 5206 PetscBool flg_row=PETSC_FALSE; 5207 const PetscInt *xadj,*adjncy; 5208 PetscInt nvtxs; 5209 5210 ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 5211 if (flg_row) { 5212 ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr); 5213 ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr); 5214 ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr); 5215 free_used_adj = PETSC_TRUE; 5216 } else { 5217 pcbddc->sub_schurs_layers = -1; 5218 used_xadj = NULL; 5219 used_adjncy = NULL; 5220 } 5221 ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 5222 } 5223 } 5224 5225 /* setup sub_schurs data */ 5226 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 5227 if (!sub_schurs->use_mumps) { 5228 /* pcbddc->ksp_D up to date only if not using MUMPS */ 5229 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 5230 ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,PETSC_FALSE,PETSC_FALSE);CHKERRQ(ierr); 5231 } else { 5232 PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis; 5233 PetscBool isseqaij; 5234 if (!pcbddc->use_vertices && reuse_solvers) { 5235 PetscInt n_vertices; 5236 5237 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 5238 reuse_solvers = (PetscBool)!n_vertices; 5239 } 5240 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 5241 if (!isseqaij) { 5242 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 5243 if (matis->A == pcbddc->local_mat) { 5244 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 5245 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 5246 } else { 5247 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 5248 } 5249 } 5250 ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point);CHKERRQ(ierr); 5251 } 5252 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 5253 5254 /* free adjacency */ 5255 if (free_used_adj) { 5256 ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr); 5257 } 5258 PetscFunctionReturn(0); 5259 } 5260 5261 #undef __FUNCT__ 5262 #define __FUNCT__ "PCBDDCInitSubSchurs" 5263 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 5264 { 5265 PC_IS *pcis=(PC_IS*)pc->data; 5266 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 5267 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 5268 PCBDDCGraph graph; 5269 PetscErrorCode ierr; 5270 5271 PetscFunctionBegin; 5272 /* attach interface graph for determining subsets */ 5273 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 5274 IS verticesIS,verticescomm; 5275 PetscInt vsize,*idxs; 5276 5277 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 5278 ierr = ISGetSize(verticesIS,&vsize);CHKERRQ(ierr); 5279 ierr = ISGetIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 5280 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);CHKERRQ(ierr); 5281 ierr = ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 5282 ierr = ISDestroy(&verticesIS);CHKERRQ(ierr); 5283 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 5284 ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global);CHKERRQ(ierr); 5285 ierr = PCBDDCGraphSetUp(graph,0,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);CHKERRQ(ierr); 5286 ierr = ISDestroy(&verticescomm);CHKERRQ(ierr); 5287 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 5288 /* 5289 if (pcbddc->dbg_flag) { 5290 ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 5291 } 5292 */ 5293 } else { 5294 graph = pcbddc->mat_graph; 5295 } 5296 5297 /* sub_schurs init */ 5298 ierr = PCBDDCSubSchursInit(sub_schurs,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap);CHKERRQ(ierr); 5299 5300 /* free graph struct */ 5301 if (pcbddc->sub_schurs_rebuild) { 5302 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 5303 } 5304 PetscFunctionReturn(0); 5305 } 5306 5307 #undef __FUNCT__ 5308 #define __FUNCT__ "PCBDDCCheckOperator" 5309 PetscErrorCode PCBDDCCheckOperator(PC pc) 5310 { 5311 PC_IS *pcis=(PC_IS*)pc->data; 5312 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 5313 PetscErrorCode ierr; 5314 5315 PetscFunctionBegin; 5316 if (pcbddc->n_vertices == pcbddc->local_primal_size) { 5317 IS zerodiag = NULL; 5318 Mat S_j,B0=NULL,B0_B=NULL; 5319 Vec dummy_vec=NULL,vec_check_B,vec_scale_P; 5320 PetscScalar norm,p0_check,*array,*array2; 5321 PetscInt i; 5322 5323 /* B0 and B0_B */ 5324 if (zerodiag) { 5325 IS dummy; 5326 PetscInt ii[2]; 5327 5328 ii[0] = 0; 5329 ii[1] = pcbddc->B0_ncol; 5330 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,1,pcis->n,ii,pcbddc->B0_cols,pcbddc->B0_vals,&B0);CHKERRQ(ierr); 5331 ierr = ISCreateStride(PETSC_COMM_SELF,1,0,1,&dummy);CHKERRQ(ierr); 5332 ierr = MatGetSubMatrix(B0,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 5333 ierr = MatCreateVecs(B0_B,NULL,&dummy_vec);CHKERRQ(ierr); 5334 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 5335 } 5336 /* I need a primal vector to scale primal nodes since BDDC sums contibutions */ 5337 ierr = VecDuplicate(pcbddc->vec1_P,&vec_scale_P);CHKERRQ(ierr); 5338 ierr = VecSet(pcbddc->vec1_P,1.0);CHKERRQ(ierr); 5339 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5340 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5341 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5342 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5343 ierr = VecReciprocal(vec_scale_P);CHKERRQ(ierr); 5344 /* S_j */ 5345 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 5346 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 5347 5348 /* mimic vector in \widetilde{W}_\Gamma */ 5349 ierr = VecSetRandom(pcis->vec1_N,NULL);CHKERRQ(ierr); 5350 /* continuous in primal space */ 5351 ierr = VecSetRandom(pcbddc->coarse_vec,NULL);CHKERRQ(ierr); 5352 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5353 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5354 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5355 if (zerodiag) { 5356 p0_check = array[pcbddc->local_primal_size-1]; 5357 } else { 5358 p0_check = 0; 5359 } 5360 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 5361 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5362 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 5363 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 5364 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5365 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5366 ierr = VecDuplicate(pcis->vec2_B,&vec_check_B);CHKERRQ(ierr); 5367 ierr = VecCopy(pcis->vec2_B,vec_check_B);CHKERRQ(ierr); 5368 5369 /* assemble rhs for coarse problem */ 5370 /* widetilde{S}_\Gamma w_\Gamma + \widetilde{B0}^T_B p0 */ 5371 /* local with Schur */ 5372 ierr = MatMult(S_j,pcis->vec2_B,pcis->vec1_B);CHKERRQ(ierr); 5373 if (zerodiag) { 5374 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 5375 array[0] = p0_check; 5376 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 5377 ierr = MatMultTransposeAdd(B0_B,dummy_vec,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 5378 } 5379 /* sum on primal nodes the local contributions */ 5380 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5381 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5382 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 5383 ierr = VecGetArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 5384 for (i=0;i<pcbddc->local_primal_size;i++) array2[i] = array[pcbddc->local_primal_ref_node[i]]; 5385 ierr = VecRestoreArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 5386 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 5387 ierr = VecSet(pcbddc->coarse_vec,0.);CHKERRQ(ierr); 5388 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5389 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5390 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5391 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5392 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5393 /* scale primal nodes (BDDC sums contibutions) */ 5394 ierr = VecPointwiseMult(pcbddc->vec1_P,vec_scale_P,pcbddc->vec1_P);CHKERRQ(ierr); 5395 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 5396 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5397 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 5398 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 5399 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5400 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5401 /* global: \widetilde{B0}_B w_\Gamma */ 5402 if (zerodiag) { 5403 ierr = MatMult(B0_B,pcis->vec2_B,dummy_vec);CHKERRQ(ierr); 5404 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 5405 pcbddc->benign_p0 = array[0]; 5406 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 5407 } else { 5408 pcbddc->benign_p0 = 0.; 5409 } 5410 /* BDDC */ 5411 ierr = VecSet(pcis->vec1_D,0.);CHKERRQ(ierr); 5412 ierr = PCBDDCApplyInterfacePreconditioner(pc,PETSC_FALSE);CHKERRQ(ierr); 5413 5414 ierr = VecCopy(pcis->vec1_B,pcis->vec2_B);CHKERRQ(ierr); 5415 ierr = VecAXPY(pcis->vec1_B,-1.0,vec_check_B);CHKERRQ(ierr); 5416 ierr = VecNorm(pcis->vec1_B,NORM_INFINITY,&norm);CHKERRQ(ierr); 5417 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC local error is %1.4e\n",PetscGlobalRank,norm); 5418 if (pcbddc->benign_p0_lidx >= 0) { 5419 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC p0 error is %1.4e\n",PetscGlobalRank,PetscAbsScalar(pcbddc->benign_p0-p0_check)); 5420 } 5421 5422 ierr = VecDestroy(&vec_scale_P);CHKERRQ(ierr); 5423 ierr = VecDestroy(&vec_check_B);CHKERRQ(ierr); 5424 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 5425 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 5426 ierr = MatDestroy(&B0);CHKERRQ(ierr); 5427 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 5428 } 5429 PetscFunctionReturn(0); 5430 } 5431