1 #include <../src/ksp/pc/impls/bddc/bddc.h> 2 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 3 #include <petscblaslapack.h> 4 5 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y); 6 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y); 7 8 #undef __FUNCT__ 9 #define __FUNCT__ "MatDetectDisconnectedComponents" 10 PetscErrorCode MatDetectDisconnectedComponents(Mat A, PetscBool filter, PetscInt *ncc, IS* cc[]) 11 { 12 Mat B; 13 IS is_dummy,*cc_n; 14 ISLocalToGlobalMapping l2gmap_dummy; 15 PCBDDCGraph graph; 16 PetscInt i,n; 17 PetscInt *xadj,*adjncy; 18 PetscInt *xadj_filtered,*adjncy_filtered; 19 PetscBool flg_row,isseqaij; 20 PetscErrorCode ierr; 21 22 PetscFunctionBegin; 23 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 24 if (!isseqaij && filter) { 25 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 26 } else { 27 B = A; 28 } 29 ierr = MatGetRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 30 31 /* if filter is true, then removes entries lower than PETSC_SMALL in magnitude */ 32 if (filter) { 33 PetscScalar *data; 34 PetscInt j,cum; 35 36 ierr = PetscCalloc2(n+1,&xadj_filtered,xadj[n],&adjncy_filtered);CHKERRQ(ierr); 37 ierr = MatSeqAIJGetArray(B,&data);CHKERRQ(ierr); 38 cum = 0; 39 for (i=0;i<n;i++) { 40 PetscInt t; 41 42 for (j=xadj[i];j<xadj[i+1];j++) { 43 if (PetscUnlikely(PetscAbsScalar(data[j]) < PETSC_SMALL)) { 44 continue; 45 } 46 adjncy_filtered[cum+xadj_filtered[i]++] = adjncy[j]; 47 } 48 t = xadj_filtered[i]; 49 xadj_filtered[i] = cum; 50 cum += t; 51 } 52 ierr = MatSeqAIJRestoreArray(B,&data);CHKERRQ(ierr); 53 } else { 54 xadj_filtered = NULL; 55 adjncy_filtered = NULL; 56 } 57 58 /* compute local connected components using PCBDDCGraph */ 59 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&is_dummy);CHKERRQ(ierr); 60 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 61 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 62 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 63 ierr = PCBDDCGraphInit(graph,l2gmap_dummy,n);CHKERRQ(ierr); 64 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 65 if (xadj_filtered) { 66 graph->xadj = xadj_filtered; 67 graph->adjncy = adjncy_filtered; 68 } else { 69 graph->xadj = xadj; 70 graph->adjncy = adjncy; 71 } 72 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 73 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 74 75 /* partial clean up */ 76 ierr = PetscFree2(xadj_filtered,adjncy_filtered);CHKERRQ(ierr); 77 ierr = MatRestoreRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 78 if (!isseqaij && filter) { 79 ierr = MatDestroy(&B);CHKERRQ(ierr); 80 } 81 82 /* get back data */ 83 *ncc = graph->ncc; 84 ierr = PetscMalloc1(graph->ncc,&cc_n);CHKERRQ(ierr); 85 for (i=0;i<graph->ncc;i++) { 86 ierr = ISCreateGeneral(PETSC_COMM_SELF,graph->cptr[i+1]-graph->cptr[i],graph->queue+graph->cptr[i],PETSC_COPY_VALUES,&cc_n[i]);CHKERRQ(ierr); 87 } 88 *cc = cc_n; 89 90 /* clean up graph */ 91 graph->xadj = 0; 92 graph->adjncy = 0; 93 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 94 PetscFunctionReturn(0); 95 } 96 97 #undef __FUNCT__ 98 #define __FUNCT__ "PCBDDCBenignCheck" 99 PetscErrorCode PCBDDCBenignCheck(PC pc, IS zerodiag) 100 { 101 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 102 PC_IS* pcis = (PC_IS*)(pc->data); 103 IS dirIS = NULL; 104 PetscInt i; 105 PetscErrorCode ierr; 106 107 PetscFunctionBegin; 108 ierr = PCBDDCGraphGetDirichletDofs(pcbddc->mat_graph,&dirIS);CHKERRQ(ierr); 109 if (zerodiag) { 110 Mat A; 111 Vec vec3_N; 112 PetscScalar *vals; 113 const PetscInt *idxs; 114 PetscInt nz; 115 116 /* p0 */ 117 ierr = VecSet(pcis->vec1_N,0.);CHKERRQ(ierr); 118 ierr = PetscMalloc1(pcis->n,&vals);CHKERRQ(ierr); 119 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 120 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 121 for (i=0;i<nz;i++) vals[i] = 1.; 122 ierr = VecSetValues(pcis->vec1_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 123 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 124 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 125 /* v_I */ 126 ierr = VecSetRandom(pcis->vec2_N,NULL);CHKERRQ(ierr); 127 for (i=0;i<nz;i++) vals[i] = 0.; 128 ierr = VecSetValues(pcis->vec2_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 129 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 130 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 131 for (i=0;i<pcis->n_B;i++) vals[i] = 0.; 132 ierr = VecSetValues(pcis->vec2_N,pcis->n_B,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 133 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 134 if (dirIS) { 135 PetscInt n; 136 137 ierr = ISGetLocalSize(dirIS,&n);CHKERRQ(ierr); 138 ierr = ISGetIndices(dirIS,&idxs);CHKERRQ(ierr); 139 for (i=0;i<n;i++) vals[i] = 0.; 140 ierr = VecSetValues(pcis->vec2_N,n,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 141 ierr = ISRestoreIndices(dirIS,&idxs);CHKERRQ(ierr); 142 } 143 ierr = VecAssemblyBegin(pcis->vec2_N);CHKERRQ(ierr); 144 ierr = VecAssemblyEnd(pcis->vec2_N);CHKERRQ(ierr); 145 ierr = VecDuplicate(pcis->vec1_N,&vec3_N);CHKERRQ(ierr); 146 ierr = VecSet(vec3_N,0.);CHKERRQ(ierr); 147 ierr = MatISGetLocalMat(pc->mat,&A);CHKERRQ(ierr); 148 ierr = MatMult(A,pcis->vec1_N,vec3_N);CHKERRQ(ierr); 149 ierr = VecDot(vec3_N,pcis->vec2_N,&vals[0]);CHKERRQ(ierr); 150 if (PetscAbsScalar(vals[0]) > PETSC_SMALL) { 151 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! b(v_I,p_0) = %f (should be numerically 0.)",PetscAbsScalar(vals[0])); 152 } 153 ierr = PetscFree(vals);CHKERRQ(ierr); 154 ierr = VecDestroy(&vec3_N);CHKERRQ(ierr); 155 } 156 ierr = ISDestroy(&dirIS);CHKERRQ(ierr); 157 158 /* check PCBDDCBenignGetOrSetP0 */ 159 ierr = VecSetRandom(pcis->vec1_global,NULL);CHKERRQ(ierr); 160 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = -PetscGlobalRank-i; 161 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_FALSE);CHKERRQ(ierr); 162 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = 1; 163 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_TRUE);CHKERRQ(ierr); 164 for (i=0;i<pcbddc->benign_n;i++) { 165 if ((PetscInt)PetscRealPart(pcbddc->benign_p0[i]) != -PetscGlobalRank-i) { 166 SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error testing PCBDDCBenignGetOrSetP0! Found %1.4e at %d instead of %1.4e\n",pcbddc->benign_p0[i],i,-PetscGlobalRank-i);CHKERRQ(ierr); 167 } 168 } 169 PetscFunctionReturn(0); 170 } 171 172 #undef __FUNCT__ 173 #define __FUNCT__ "PCBDDCBenignDetectSaddlePoint" 174 PetscErrorCode PCBDDCBenignDetectSaddlePoint(PC pc, IS *zerodiaglocal) 175 { 176 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 177 IS pressures,zerodiag,*zerodiag_subs; 178 PetscInt nz,n; 179 PetscBool sorted,have_null,has_null_pressures,recompute_zerodiag; 180 PetscErrorCode ierr; 181 182 PetscFunctionBegin; 183 ierr = MatDestroy(&pcbddc->benign_original_mat);CHKERRQ(ierr); 184 ierr = PetscObjectReference((PetscObject)pcbddc->local_mat);CHKERRQ(ierr); 185 pcbddc->benign_original_mat = pcbddc->local_mat; 186 /* if a local info on dofs is present, assumes the last field is represented by "pressures" 187 otherwise, it uses only zerodiagonal dofs (ok if the pressure block is all zero; it could fail if it is not) 188 Checks if all the pressure dofs in each subdomain have a zero diagonal 189 If not, a change of basis on pressures is not needed 190 since the local Schur complements are SPD 191 */ 192 has_null_pressures = PETSC_TRUE; 193 have_null = PETSC_TRUE; 194 if (pcbddc->n_ISForDofsLocal) { 195 PetscInt npl,*idxs,p = pcbddc->n_ISForDofsLocal-1; 196 197 /* Dofs splitting for BDDC cannot have PETSC_COMM_SELF, so create a sequential IS */ 198 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[p],&npl);CHKERRQ(ierr); 199 ierr = ISGetIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 200 ierr = ISCreateGeneral(PETSC_COMM_SELF,npl,idxs,PETSC_COPY_VALUES,&pressures);CHKERRQ(ierr); 201 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 202 ierr = ISSorted(pressures,&sorted);CHKERRQ(ierr); 203 if (!sorted) { 204 ierr = ISSort(pressures);CHKERRQ(ierr); 205 } 206 } else { 207 pressures = NULL; 208 } 209 ierr = MatGetLocalSize(pcbddc->benign_original_mat,&n,NULL);CHKERRQ(ierr); 210 /* TODO: add check for shared dofs and raise error */ 211 ierr = MatFindZeroDiagonals(pcbddc->benign_original_mat,&zerodiag);CHKERRQ(ierr); 212 ierr = ISSorted(zerodiag,&sorted);CHKERRQ(ierr); 213 if (!sorted) { 214 ierr = ISSort(zerodiag);CHKERRQ(ierr); 215 } 216 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 217 if (!nz) { 218 if (n) have_null = PETSC_FALSE; 219 has_null_pressures = PETSC_FALSE; 220 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 221 } 222 recompute_zerodiag = PETSC_FALSE; 223 /* in case disconnected subdomains info is present, split the pressures accordingly (otherwise the benign trick could fail) */ 224 zerodiag_subs = NULL; 225 pcbddc->benign_n = 0; 226 if (has_null_pressures) { 227 IS *subs; 228 PetscInt nsubs,i; 229 230 subs = pcbddc->local_subs; 231 nsubs = pcbddc->n_local_subs; 232 if (nsubs > 1) { 233 ierr = PetscCalloc1(nsubs,&zerodiag_subs);CHKERRQ(ierr); 234 for (i=0;i<nsubs;i++) { 235 ISLocalToGlobalMapping l2g; 236 IS t_zerodiag_subs; 237 PetscInt nl; 238 239 ierr = ISLocalToGlobalMappingCreateIS(subs[i],&l2g);CHKERRQ(ierr); 240 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,zerodiag,&t_zerodiag_subs);CHKERRQ(ierr); 241 ierr = ISGetLocalSize(t_zerodiag_subs,&nl);CHKERRQ(ierr); 242 if (nl) { 243 PetscBool valid = PETSC_TRUE; 244 245 if (pressures) { 246 IS t_pressure_subs; 247 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,pressures,&t_pressure_subs);CHKERRQ(ierr); 248 ierr = ISEqual(t_pressure_subs,t_zerodiag_subs,&valid);CHKERRQ(ierr); 249 ierr = ISDestroy(&t_pressure_subs);CHKERRQ(ierr); 250 } 251 if (valid) { 252 ierr = ISLocalToGlobalMappingApplyIS(l2g,t_zerodiag_subs,&zerodiag_subs[pcbddc->benign_n]);CHKERRQ(ierr); 253 pcbddc->benign_n++; 254 } else { 255 recompute_zerodiag = PETSC_TRUE; 256 } 257 } 258 ierr = ISDestroy(&t_zerodiag_subs);CHKERRQ(ierr); 259 ierr = ISLocalToGlobalMappingDestroy(&l2g);CHKERRQ(ierr); 260 } 261 } else { /* there's just one subdomain (or zero if they have not been detected */ 262 PetscBool valid = PETSC_TRUE; 263 if (pressures) { 264 ierr = ISEqual(pressures,zerodiag,&valid);CHKERRQ(ierr); 265 } 266 if (valid) { 267 pcbddc->benign_n = 1; 268 ierr = PetscCalloc1(1,&zerodiag_subs);CHKERRQ(ierr); 269 ierr = PetscObjectReference((PetscObject)zerodiag);CHKERRQ(ierr); 270 zerodiag_subs[0] = zerodiag; 271 } 272 } 273 } 274 275 if (!pcbddc->benign_n) { 276 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 277 recompute_zerodiag = PETSC_FALSE; 278 has_null_pressures = PETSC_FALSE; 279 have_null = PETSC_FALSE; 280 } 281 282 /* final check for null pressures */ 283 if (zerodiag && pressures) { 284 PetscInt nz,np; 285 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 286 ierr = ISGetLocalSize(pressures,&np);CHKERRQ(ierr); 287 if (nz != np) have_null = PETSC_FALSE; 288 } 289 290 if (recompute_zerodiag) { 291 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 292 if (pcbddc->benign_n == 1) { 293 ierr = PetscObjectReference((PetscObject)zerodiag_subs[0]);CHKERRQ(ierr); 294 zerodiag = zerodiag_subs[0]; 295 } else { 296 PetscInt i,nzn,*new_idxs; 297 298 nzn = 0; 299 for (i=0;i<pcbddc->benign_n;i++) { 300 PetscInt ns; 301 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 302 nzn += ns; 303 } 304 ierr = PetscMalloc1(nzn,&new_idxs);CHKERRQ(ierr); 305 nzn = 0; 306 for (i=0;i<pcbddc->benign_n;i++) { 307 PetscInt ns,*idxs; 308 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 309 ierr = ISGetIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 310 ierr = PetscMemcpy(new_idxs+nzn,idxs,ns*sizeof(PetscInt));CHKERRQ(ierr); 311 ierr = ISRestoreIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 312 nzn += ns; 313 } 314 ierr = PetscSortInt(nzn,new_idxs);CHKERRQ(ierr); 315 ierr = ISCreateGeneral(PETSC_COMM_SELF,nzn,new_idxs,PETSC_OWN_POINTER,&zerodiag);CHKERRQ(ierr); 316 } 317 have_null = PETSC_FALSE; 318 } 319 320 if (has_null_pressures) { 321 IS zerodiagc; 322 const PetscInt *idxs,*idxsc; 323 PetscInt i,s,*nnz; 324 325 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 326 ierr = ISComplement(zerodiag,0,n,&zerodiagc);CHKERRQ(ierr); 327 ierr = ISGetIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 328 /* local change of basis for pressures */ 329 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 330 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->benign_original_mat),&pcbddc->benign_change);CHKERRQ(ierr); 331 ierr = MatSetType(pcbddc->benign_change,MATAIJ);CHKERRQ(ierr); 332 ierr = MatSetSizes(pcbddc->benign_change,n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 333 ierr = PetscMalloc1(n,&nnz);CHKERRQ(ierr); 334 for (i=0;i<n-nz;i++) nnz[idxsc[i]] = 1; /* identity on velocities plus pressure dofs for non-singular subdomains */ 335 for (i=0;i<pcbddc->benign_n;i++) { 336 PetscInt nzs,j; 337 338 ierr = ISGetLocalSize(zerodiag_subs[i],&nzs);CHKERRQ(ierr); 339 ierr = ISGetIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 340 for (j=0;j<nzs-1;j++) nnz[idxs[j]] = 2; /* change on pressures */ 341 nnz[idxs[nzs-1]] = nzs; /* last local pressure dof in subdomain */ 342 ierr = ISRestoreIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 343 } 344 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_change,0,nnz);CHKERRQ(ierr); 345 ierr = PetscFree(nnz);CHKERRQ(ierr); 346 /* set identity on velocities */ 347 for (i=0;i<n-nz;i++) { 348 ierr = MatSetValue(pcbddc->benign_change,idxsc[i],idxsc[i],1.,INSERT_VALUES);CHKERRQ(ierr); 349 } 350 ierr = ISRestoreIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 351 ierr = ISDestroy(&zerodiagc);CHKERRQ(ierr); 352 ierr = PetscMalloc3(pcbddc->benign_n,&pcbddc->benign_p0_lidx,pcbddc->benign_n,&pcbddc->benign_p0_gidx,pcbddc->benign_n,&pcbddc->benign_p0);CHKERRQ(ierr); 353 /* set change on pressures */ 354 for (s=0;s<pcbddc->benign_n;s++) { 355 PetscScalar *array; 356 PetscInt nzs; 357 358 ierr = ISGetLocalSize(zerodiag_subs[s],&nzs);CHKERRQ(ierr); 359 ierr = ISGetIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 360 for (i=0;i<nzs-1;i++) { 361 PetscScalar vals[2]; 362 PetscInt cols[2]; 363 364 cols[0] = idxs[i]; 365 cols[1] = idxs[nzs-1]; 366 vals[0] = 1.; 367 vals[1] = 1.; 368 ierr = MatSetValues(pcbddc->benign_change,1,cols,2,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 369 } 370 ierr = PetscMalloc1(nzs,&array);CHKERRQ(ierr); 371 for (i=0;i<nzs-1;i++) array[i] = -1.; 372 array[nzs-1] = 1.; 373 ierr = MatSetValues(pcbddc->benign_change,1,idxs+nzs-1,nzs,idxs,array,INSERT_VALUES);CHKERRQ(ierr); 374 /* store local idxs for p0 */ 375 pcbddc->benign_p0_lidx[s] = idxs[nzs-1]; 376 ierr = ISRestoreIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 377 ierr = ISDestroy(&zerodiag_subs[s]);CHKERRQ(ierr); 378 ierr = PetscFree(array);CHKERRQ(ierr); 379 } 380 ierr = MatAssemblyBegin(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 381 ierr = MatAssemblyEnd(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 382 /* TODO: need optimization? */ 383 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 384 ierr = MatPtAP(pcbddc->benign_original_mat,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 385 /* store global idxs for p0 */ 386 ierr = ISLocalToGlobalMappingApply(pc->pmat->rmap->mapping,pcbddc->benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 387 } 388 ierr = PetscFree(zerodiag_subs);CHKERRQ(ierr); 389 ierr = ISDestroy(&pressures);CHKERRQ(ierr); 390 391 /* determines if the coarse solver will be singular or not */ 392 ierr = MPI_Allreduce(&have_null,&pcbddc->benign_null,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 393 *zerodiaglocal = zerodiag; 394 PetscFunctionReturn(0); 395 } 396 397 #undef __FUNCT__ 398 #define __FUNCT__ "PCBDDCBenignGetOrSetP0" 399 PetscErrorCode PCBDDCBenignGetOrSetP0(PC pc, Vec v, PetscBool get) 400 { 401 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 402 PetscErrorCode ierr; 403 404 PetscFunctionBegin; 405 if (!pcbddc->benign_sf) { 406 ierr = PetscSFCreate(PetscObjectComm((PetscObject)pc),&pcbddc->benign_sf);CHKERRQ(ierr); 407 ierr = PetscSFSetGraphLayout(pcbddc->benign_sf,pc->pmat->rmap,pcbddc->benign_n,NULL,PETSC_OWN_POINTER,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 408 } 409 if (get) { /* use SF to get values */ 410 PetscScalar *array; 411 412 ierr = VecGetArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 413 ierr = PetscSFBcastBegin(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 414 ierr = PetscSFBcastEnd(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 415 ierr = VecRestoreArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 416 } else { /* use VecSetValues (not scalable, I should try to find a better solution (defining a new MPI_OP for reduction) */ 417 ierr = VecSetValues(v,pcbddc->benign_n,pcbddc->benign_p0_gidx,pcbddc->benign_p0,INSERT_VALUES);CHKERRQ(ierr); 418 ierr = VecAssemblyBegin(v);CHKERRQ(ierr); 419 ierr = VecAssemblyEnd(v);CHKERRQ(ierr); 420 } 421 PetscFunctionReturn(0); 422 } 423 424 #undef __FUNCT__ 425 #define __FUNCT__ "PCBDDCBenignPopOrPushB0" 426 PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop) 427 { 428 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 429 PetscErrorCode ierr; 430 431 PetscFunctionBegin; 432 /* TODO: add error checking 433 - avoid nested pop (or push) calls. 434 - cannot push before pop. 435 - cannot call this if pcbddc->local_mat is NULL 436 */ 437 if (!pcbddc->benign_n) { 438 PetscFunctionReturn(0); 439 } 440 if (pop) { 441 IS is_p0; 442 MatReuse reuse; 443 444 /* extract B_0 */ 445 reuse = MAT_INITIAL_MATRIX; 446 if (pcbddc->benign_B0) { 447 reuse = MAT_REUSE_MATRIX; 448 } 449 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->benign_n,pcbddc->benign_p0_lidx,PETSC_COPY_VALUES,&is_p0);CHKERRQ(ierr); 450 ierr = MatGetSubMatrix(pcbddc->local_mat,is_p0,NULL,reuse,&pcbddc->benign_B0);CHKERRQ(ierr); 451 /* remove rows and cols from local problem */ 452 ierr = MatSetOption(pcbddc->local_mat,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE);CHKERRQ(ierr); 453 ierr = MatZeroRowsColumnsIS(pcbddc->local_mat,is_p0,1.0,NULL,NULL);CHKERRQ(ierr); 454 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 455 } else { /* push */ 456 PetscInt i; 457 458 for (i=0;i<pcbddc->benign_n;i++) { 459 PetscScalar *B0_vals; 460 PetscInt *B0_cols,B0_ncol; 461 462 ierr = MatGetRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 463 ierr = MatSetValues(pcbddc->local_mat,1,pcbddc->benign_p0_lidx+i,B0_ncol,B0_cols,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 464 ierr = MatSetValues(pcbddc->local_mat,B0_ncol,B0_cols,1,pcbddc->benign_p0_lidx+1,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 465 ierr = MatSetValue(pcbddc->local_mat,pcbddc->benign_p0_lidx[i],pcbddc->benign_p0_lidx[i],0.0,INSERT_VALUES);CHKERRQ(ierr); 466 ierr = MatRestoreRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 467 } 468 ierr = MatAssemblyBegin(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 469 ierr = MatAssemblyEnd(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 470 } 471 PetscFunctionReturn(0); 472 } 473 474 #undef __FUNCT__ 475 #define __FUNCT__ "PCBDDCAdaptiveSelection" 476 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 477 { 478 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 479 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 480 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 481 PetscBLASInt *B_iwork,*B_ifail; 482 PetscScalar *work,lwork; 483 PetscScalar *St,*S,*eigv; 484 PetscScalar *Sarray,*Starray; 485 PetscReal *eigs,thresh; 486 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs; 487 PetscBool allocated_S_St; 488 #if defined(PETSC_USE_COMPLEX) 489 PetscReal *rwork; 490 #endif 491 PetscErrorCode ierr; 492 493 PetscFunctionBegin; 494 if (!sub_schurs->use_mumps) { 495 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS"); 496 } 497 498 if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) { 499 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\n",sub_schurs->is_hermitian,sub_schurs->is_posdef); 500 } 501 502 if (pcbddc->dbg_flag) { 503 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 504 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 505 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr); 506 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 507 } 508 509 if (pcbddc->dbg_flag) { 510 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef); 511 } 512 513 /* max size of subsets */ 514 mss = 0; 515 for (i=0;i<sub_schurs->n_subs;i++) { 516 PetscInt subset_size; 517 518 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 519 mss = PetscMax(mss,subset_size); 520 } 521 522 /* min/max and threshold */ 523 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 524 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0; 525 nmax = PetscMax(nmin,nmax); 526 allocated_S_St = PETSC_FALSE; 527 if (nmin) { 528 allocated_S_St = PETSC_TRUE; 529 } 530 531 /* allocate lapack workspace */ 532 cum = cum2 = 0; 533 maxneigs = 0; 534 for (i=0;i<sub_schurs->n_subs;i++) { 535 PetscInt n,subset_size; 536 537 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 538 n = PetscMin(subset_size,nmax); 539 cum += subset_size; 540 cum2 += subset_size*n; 541 maxneigs = PetscMax(maxneigs,n); 542 } 543 if (mss) { 544 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 545 PetscBLASInt B_itype = 1; 546 PetscBLASInt B_N = mss; 547 PetscReal zero = 0.0; 548 PetscReal eps = 0.0; /* dlamch? */ 549 550 B_lwork = -1; 551 S = NULL; 552 St = NULL; 553 eigs = NULL; 554 eigv = NULL; 555 B_iwork = NULL; 556 B_ifail = NULL; 557 #if defined(PETSC_USE_COMPLEX) 558 rwork = NULL; 559 #endif 560 thresh = 1.0; 561 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 562 #if defined(PETSC_USE_COMPLEX) 563 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 564 #else 565 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 566 #endif 567 if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 568 ierr = PetscFPTrapPop();CHKERRQ(ierr); 569 } else { 570 /* TODO */ 571 } 572 } else { 573 lwork = 0; 574 } 575 576 nv = 0; 577 if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */ 578 ierr = ISGetLocalSize(sub_schurs->is_vertices,&nv);CHKERRQ(ierr); 579 } 580 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr); 581 if (allocated_S_St) { 582 ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr); 583 } 584 ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr); 585 #if defined(PETSC_USE_COMPLEX) 586 ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr); 587 #endif 588 ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 589 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr, 590 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr, 591 nv+cum,&pcbddc->adaptive_constraints_idxs, 592 nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 593 ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr); 594 595 maxneigs = 0; 596 cum = cum2 = cumarray = 0; 597 pcbddc->adaptive_constraints_idxs_ptr[0] = 0; 598 pcbddc->adaptive_constraints_data_ptr[0] = 0; 599 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 600 const PetscInt *idxs; 601 602 ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 603 for (cum=0;cum<nv;cum++) { 604 pcbddc->adaptive_constraints_n[cum] = 1; 605 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 606 pcbddc->adaptive_constraints_data[cum] = 1.0; 607 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1; 608 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1; 609 } 610 cum2 = cum; 611 ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 612 } 613 614 if (mss) { /* multilevel */ 615 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 616 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 617 } 618 619 for (i=0;i<sub_schurs->n_subs;i++) { 620 621 const PetscInt *idxs; 622 PetscReal infty = PETSC_MAX_REAL; 623 PetscInt j,subset_size,eigs_start = 0; 624 PetscBLASInt B_N; 625 PetscBool same_data = PETSC_FALSE; 626 627 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 628 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 629 if (allocated_S_St) { /* S and S_t should be copied since we could need them later */ 630 if (sub_schurs->is_hermitian) { 631 PetscInt j,k; 632 if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscMemcmp later */ 633 ierr = PetscMemzero(S,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 634 ierr = PetscMemzero(St,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 635 } 636 for (j=0;j<subset_size;j++) { 637 for (k=j;k<subset_size;k++) { 638 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 639 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 640 } 641 } 642 } else { 643 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 644 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 645 } 646 } else { 647 S = Sarray + cumarray; 648 St = Starray + cumarray; 649 } 650 651 ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 652 /* see if we can save some work */ 653 if (sub_schurs->n_subs == 1) { 654 ierr = PetscMemcmp(S,St,subset_size*subset_size*sizeof(PetscScalar),&same_data);CHKERRQ(ierr); 655 } 656 657 if (same_data) { /* there's no need of constraints here, deluxe scaling is enough */ 658 B_neigs = 0; 659 } else { 660 /* Threshold: this is an heuristic for edges */ 661 thresh = pcbddc->mat_graph->count[idxs[0]]*pcbddc->adaptive_threshold; 662 663 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 664 PetscBLASInt B_itype = 1; 665 PetscBLASInt B_IL, B_IU; 666 PetscReal eps = -1.0; /* dlamch? */ 667 PetscInt nmin_s; 668 669 if (pcbddc->dbg_flag) { 670 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d %d %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]); 671 } 672 673 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 674 if (thresh > 1.+PETSC_SMALL) { 675 676 /* ask for eigenvalues larger than thresh */ 677 #if defined(PETSC_USE_COMPLEX) 678 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 679 #else 680 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 681 #endif 682 } else { 683 B_IU = PetscMax(1,PetscMin(B_N,nmax)); 684 B_IL = 1; 685 #if defined(PETSC_USE_COMPLEX) 686 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 687 #else 688 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 689 #endif 690 } 691 ierr = PetscFPTrapPop();CHKERRQ(ierr); 692 if (B_ierr) { 693 if (B_ierr < 0 ) { 694 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 695 } else if (B_ierr <= B_N) { 696 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 697 } else { 698 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 699 } 700 } 701 702 if (B_neigs > nmax) { 703 if (pcbddc->dbg_flag) { 704 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, more than maximum required %d.\n",B_neigs,nmax); 705 } 706 eigs_start = B_neigs -nmax; 707 B_neigs = nmax; 708 } 709 710 nmin_s = PetscMin(nmin,B_N); 711 if (B_neigs < nmin_s) { 712 PetscBLASInt B_neigs2; 713 714 B_IU = B_N - B_neigs; 715 B_IL = B_N - nmin_s + 1; 716 if (pcbddc->dbg_flag) { 717 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU); 718 } 719 if (sub_schurs->is_hermitian) { 720 PetscInt j; 721 for (j=0;j<subset_size;j++) { 722 ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 723 } 724 for (j=0;j<subset_size;j++) { 725 ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 726 } 727 } else { 728 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 729 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 730 } 731 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 732 #if defined(PETSC_USE_COMPLEX) 733 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 734 #else 735 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 736 #endif 737 ierr = PetscFPTrapPop();CHKERRQ(ierr); 738 B_neigs += B_neigs2; 739 } 740 if (B_ierr) { 741 if (B_ierr < 0 ) { 742 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 743 } else if (B_ierr <= B_N) { 744 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 745 } else { 746 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 747 } 748 } 749 if (pcbddc->dbg_flag) { 750 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Got %d eigs\n",B_neigs);CHKERRQ(ierr); 751 for (j=0;j<B_neigs;j++) { 752 if (eigs[j] == 0.0) { 753 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," Inf\n");CHKERRQ(ierr); 754 } else { 755 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",eigs[j+eigs_start]);CHKERRQ(ierr); 756 } 757 } 758 } 759 } else { 760 /* TODO */ 761 } 762 } 763 maxneigs = PetscMax(B_neigs,maxneigs); 764 pcbddc->adaptive_constraints_n[i+nv] = B_neigs; 765 if (B_neigs) { 766 ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 767 768 if (pcbddc->dbg_flag > 1) { 769 PetscInt ii; 770 for (ii=0;ii<B_neigs;ii++) { 771 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 772 for (j=0;j<B_N;j++) { 773 #if defined(PETSC_USE_COMPLEX) 774 PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 775 PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 776 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 777 #else 778 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);CHKERRQ(ierr); 779 #endif 780 } 781 } 782 } 783 #if 0 784 for (j=0;j<B_neigs;j++) { 785 PetscBLASInt Blas_N,Blas_one = 1.0; 786 PetscScalar norm; 787 ierr = PetscBLASIntCast(subset_size,&Blas_N);CHKERRQ(ierr); 788 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size, 789 &Blas_one,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one)); 790 if (pcbddc->adaptive_constraints_data[cum2] > 0.0) { 791 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 792 } else { 793 norm = -1.0/PetscSqrtReal(PetscRealPart(norm)); 794 } 795 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one)); 796 } 797 #endif 798 ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr); 799 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size; 800 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs; 801 cum++; 802 } 803 ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 804 /* shift for next computation */ 805 cumarray += subset_size*subset_size; 806 } 807 if (pcbddc->dbg_flag) { 808 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 809 } 810 811 if (mss) { 812 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 813 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 814 /* destroy matrices (junk) */ 815 ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr); 816 ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr); 817 } 818 if (allocated_S_St) { 819 ierr = PetscFree2(S,St);CHKERRQ(ierr); 820 } 821 ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr); 822 #if defined(PETSC_USE_COMPLEX) 823 ierr = PetscFree(rwork);CHKERRQ(ierr); 824 #endif 825 if (pcbddc->dbg_flag) { 826 PetscInt maxneigs_r; 827 ierr = MPI_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 828 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr); 829 } 830 PetscFunctionReturn(0); 831 } 832 833 #undef __FUNCT__ 834 #define __FUNCT__ "PCBDDCSetUpSolvers" 835 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 836 { 837 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 838 PetscScalar *coarse_submat_vals; 839 PetscErrorCode ierr; 840 841 PetscFunctionBegin; 842 /* Setup local scatters R_to_B and (optionally) R_to_D */ 843 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 844 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 845 846 /* Setup local neumann solver ksp_R */ 847 /* PCBDDCSetUpLocalScatters should be called first! */ 848 ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 849 850 /* Change global null space passed in by the user if change of basis has been requested */ 851 if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) { 852 ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr); 853 } 854 855 /* 856 Setup local correction and local part of coarse basis. 857 Gives back the dense local part of the coarse matrix in column major ordering 858 */ 859 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 860 861 /* Compute total number of coarse nodes and setup coarse solver */ 862 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 863 864 /* free */ 865 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 866 PetscFunctionReturn(0); 867 } 868 869 #undef __FUNCT__ 870 #define __FUNCT__ "PCBDDCResetCustomization" 871 PetscErrorCode PCBDDCResetCustomization(PC pc) 872 { 873 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 874 PetscErrorCode ierr; 875 876 PetscFunctionBegin; 877 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 878 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 879 ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr); 880 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 881 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 882 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 883 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 884 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 885 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 886 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 887 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 888 PetscFunctionReturn(0); 889 } 890 891 #undef __FUNCT__ 892 #define __FUNCT__ "PCBDDCResetTopography" 893 PetscErrorCode PCBDDCResetTopography(PC pc) 894 { 895 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 896 PetscInt i; 897 PetscErrorCode ierr; 898 899 PetscFunctionBegin; 900 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 901 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 902 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 903 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 904 for (i=0;i<pcbddc->n_local_subs;i++) { 905 ierr = ISDestroy(&pcbddc->local_subs[i]);CHKERRQ(ierr); 906 } 907 ierr = PetscFree(pcbddc->local_subs);CHKERRQ(ierr); 908 ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr); 909 PetscFunctionReturn(0); 910 } 911 912 #undef __FUNCT__ 913 #define __FUNCT__ "PCBDDCResetSolvers" 914 PetscErrorCode PCBDDCResetSolvers(PC pc) 915 { 916 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 917 PetscScalar *array; 918 PetscErrorCode ierr; 919 920 PetscFunctionBegin; 921 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 922 if (pcbddc->coarse_phi_B) { 923 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr); 924 ierr = PetscFree(array);CHKERRQ(ierr); 925 } 926 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 927 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 928 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 929 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 930 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 931 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 932 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 933 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 934 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 935 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 936 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 937 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 938 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 939 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 940 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 941 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 942 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 943 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 944 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 945 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 946 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 947 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 948 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 949 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 950 ierr = VecDestroy(&pcbddc->benign_vec);CHKERRQ(ierr); 951 ierr = MatDestroy(&pcbddc->benign_original_mat);CHKERRQ(ierr); 952 ierr = MatDestroy(&pcbddc->benign_original_mat);CHKERRQ(ierr); 953 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 954 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 955 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 956 PetscFunctionReturn(0); 957 } 958 959 #undef __FUNCT__ 960 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors" 961 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 962 { 963 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 964 PC_IS *pcis = (PC_IS*)pc->data; 965 VecType impVecType; 966 PetscInt n_constraints,n_R,old_size; 967 PetscErrorCode ierr; 968 969 PetscFunctionBegin; 970 if (!pcbddc->ConstraintMatrix) { 971 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created"); 972 } 973 /* get sizes */ 974 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - pcbddc->n_vertices; 975 n_R = pcis->n - pcbddc->n_vertices; 976 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 977 /* local work vectors (try to avoid unneeded work)*/ 978 /* R nodes */ 979 old_size = -1; 980 if (pcbddc->vec1_R) { 981 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 982 } 983 if (n_R != old_size) { 984 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 985 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 986 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 987 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 988 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 989 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 990 } 991 /* local primal dofs */ 992 old_size = -1; 993 if (pcbddc->vec1_P) { 994 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 995 } 996 if (pcbddc->local_primal_size != old_size) { 997 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 998 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 999 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 1000 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 1001 } 1002 /* local explicit constraints */ 1003 old_size = -1; 1004 if (pcbddc->vec1_C) { 1005 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 1006 } 1007 if (n_constraints && n_constraints != old_size) { 1008 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 1009 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 1010 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 1011 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 1012 } 1013 PetscFunctionReturn(0); 1014 } 1015 1016 #undef __FUNCT__ 1017 #define __FUNCT__ "PCBDDCSetUpCorrection" 1018 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 1019 { 1020 PetscErrorCode ierr; 1021 /* pointers to pcis and pcbddc */ 1022 PC_IS* pcis = (PC_IS*)pc->data; 1023 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1024 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1025 /* submatrices of local problem */ 1026 Mat A_RV,A_VR,A_VV,local_auxmat2_R; 1027 /* submatrices of benign trick */ 1028 Mat B0_V = NULL; 1029 /* submatrices of local coarse problem */ 1030 Mat S_VV,S_CV,S_VC,S_CC; 1031 /* working matrices */ 1032 Mat C_CR; 1033 /* additional working stuff */ 1034 PC pc_R; 1035 Mat F; 1036 PetscBool isLU,isCHOL,isILU; 1037 1038 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 1039 PetscScalar *work; 1040 PetscInt *idx_V_B; 1041 PetscInt n,n_vertices,n_constraints,*p0_lidx_I; 1042 PetscInt i,n_R,n_D,n_B; 1043 PetscBool unsymmetric_check; 1044 /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */ 1045 MatType impMatType; 1046 /* some shortcuts to scalars */ 1047 PetscScalar one=1.0,m_one=-1.0; 1048 1049 PetscFunctionBegin; 1050 n_vertices = pcbddc->n_vertices; 1051 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - n_vertices; 1052 /* Set Non-overlapping dimensions */ 1053 n_B = pcis->n_B; 1054 n_D = pcis->n - n_B; 1055 n_R = pcis->n - n_vertices; 1056 1057 /* Set types for local objects needed by BDDC precondtioner */ 1058 impMatType = MATSEQDENSE; 1059 1060 /* vertices in boundary numbering */ 1061 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 1062 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);CHKERRQ(ierr); 1063 if (i != n_vertices) { 1064 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i); 1065 } 1066 1067 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 1068 ierr = PetscCalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 1069 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr); 1070 ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr); 1071 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr); 1072 ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr); 1073 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr); 1074 ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr); 1075 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr); 1076 ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr); 1077 1078 unsymmetric_check = PETSC_FALSE; 1079 /* allocate workspace */ 1080 n = 0; 1081 if (n_constraints) { 1082 n += n_R*n_constraints; 1083 } 1084 if (n_vertices) { 1085 n = PetscMax(2*n_R*n_vertices,n); 1086 n = PetscMax((n_R+n_B)*n_vertices,n); 1087 } 1088 if (!pcbddc->symmetric_primal) { 1089 n = PetscMax(2*n_R*pcbddc->local_primal_size,n); 1090 unsymmetric_check = PETSC_TRUE; 1091 } 1092 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 1093 1094 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 1095 ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr); 1096 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr); 1097 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr); 1098 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr); 1099 if (isLU || isILU || isCHOL) { 1100 ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr); 1101 } else if (sub_schurs->reuse_mumps) { 1102 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1103 MatFactorType type; 1104 1105 F = reuse_mumps->F; 1106 ierr = MatGetFactorType(F,&type);CHKERRQ(ierr); 1107 if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE; 1108 } else { 1109 F = NULL; 1110 } 1111 1112 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 1113 if (n_constraints) { 1114 Mat M1,M2,M3; 1115 Mat auxmat; 1116 IS is_aux; 1117 PetscScalar *array,*array2; 1118 1119 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 1120 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 1121 1122 /* Extract constraints on R nodes: C_{CR} */ 1123 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 1124 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 1125 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&auxmat);CHKERRQ(ierr); 1126 1127 /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */ 1128 /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */ 1129 ierr = PetscMemzero(work,n_R*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 1130 for (i=0;i<n_constraints;i++) { 1131 const PetscScalar *row_cmat_values; 1132 const PetscInt *row_cmat_indices; 1133 PetscInt size_of_constraint,j; 1134 1135 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 1136 for (j=0;j<size_of_constraint;j++) { 1137 work[row_cmat_indices[j]+i*n_R] = -row_cmat_values[j]; 1138 } 1139 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 1140 } 1141 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,NULL,&local_auxmat2_R);CHKERRQ(ierr); 1142 if (F) { 1143 Mat B; 1144 1145 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 1146 ierr = MatMatSolve(F,B,local_auxmat2_R);CHKERRQ(ierr); 1147 ierr = MatDestroy(&B);CHKERRQ(ierr); 1148 } else { 1149 PetscScalar *marr; 1150 1151 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1152 for (i=0;i<n_constraints;i++) { 1153 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 1154 ierr = VecPlaceArray(pcbddc->vec2_R,marr+i*n_R);CHKERRQ(ierr); 1155 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1156 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1157 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 1158 } 1159 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1160 } 1161 if (!pcbddc->switch_static) { 1162 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);CHKERRQ(ierr); 1163 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 1164 ierr = MatDenseGetArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 1165 for (i=0;i<n_constraints;i++) { 1166 ierr = VecPlaceArray(pcbddc->vec1_R,array2+i*n_R);CHKERRQ(ierr); 1167 ierr = VecPlaceArray(pcis->vec1_B,array+i*n_B);CHKERRQ(ierr); 1168 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1169 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1170 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1171 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1172 } 1173 ierr = MatDenseRestoreArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 1174 ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 1175 ierr = MatMatMult(auxmat,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 1176 } else { 1177 ierr = PetscObjectReference((PetscObject)local_auxmat2_R);CHKERRQ(ierr); 1178 pcbddc->local_auxmat2 = local_auxmat2_R; 1179 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 1180 } 1181 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 1182 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 1183 ierr = MatScale(M3,m_one);CHKERRQ(ierr); 1184 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr); 1185 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 1186 if (isCHOL) { 1187 ierr = MatCholeskyFactor(M3,NULL,NULL);CHKERRQ(ierr); 1188 } else { 1189 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 1190 } 1191 ierr = VecSet(pcbddc->vec1_C,one);CHKERRQ(ierr); 1192 ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr); 1193 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 1194 ierr = MatDestroy(&M2);CHKERRQ(ierr); 1195 ierr = MatDestroy(&M3);CHKERRQ(ierr); 1196 /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */ 1197 ierr = MatMatMult(M1,auxmat,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 1198 ierr = MatDestroy(&auxmat);CHKERRQ(ierr); 1199 ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 1200 ierr = MatDestroy(&M1);CHKERRQ(ierr); 1201 } 1202 /* Get submatrices from subdomain matrix */ 1203 if (pcbddc->benign_n) { 1204 IS dummy; 1205 Mat B0_R; 1206 PetscReal norm; 1207 1208 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);CHKERRQ(ierr); 1209 ierr = MatGetSubMatrix(pcbddc->benign_B0,dummy,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&B0_R);CHKERRQ(ierr); 1210 ierr = MatNorm(B0_R,NORM_INFINITY,&norm);CHKERRQ(ierr); 1211 if (norm > PETSC_SMALL) { 1212 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! ||B0_R|| = %f (should be numerically 0.)",norm); 1213 } 1214 ierr = MatDestroy(&B0_R);CHKERRQ(ierr); 1215 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 1216 } 1217 1218 if (n_vertices) { 1219 IS is_aux; 1220 1221 if (sub_schurs->reuse_mumps) { /* is_R_local is not sorted, ISComplement doesn't like it */ 1222 IS tis; 1223 1224 ierr = ISDuplicate(pcbddc->is_R_local,&tis);CHKERRQ(ierr); 1225 ierr = ISSort(tis);CHKERRQ(ierr); 1226 ierr = ISComplement(tis,0,pcis->n,&is_aux);CHKERRQ(ierr); 1227 ierr = ISDestroy(&tis);CHKERRQ(ierr); 1228 } else { 1229 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 1230 } 1231 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 1232 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 1233 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 1234 if (pcbddc->benign_n) { 1235 IS dummy; 1236 1237 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);CHKERRQ(ierr); 1238 ierr = MatGetSubMatrix(pcbddc->benign_B0,dummy,is_aux,MAT_INITIAL_MATRIX,&B0_V);CHKERRQ(ierr); 1239 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 1240 } 1241 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 1242 } 1243 1244 /* Matrix of coarse basis functions (local) */ 1245 if (pcbddc->coarse_phi_B) { 1246 PetscInt on_B,on_primal,on_D=n_D; 1247 if (pcbddc->coarse_phi_D) { 1248 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr); 1249 } 1250 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 1251 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 1252 PetscScalar *marray; 1253 1254 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr); 1255 ierr = PetscFree(marray);CHKERRQ(ierr); 1256 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 1257 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 1258 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 1259 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 1260 } 1261 } 1262 1263 if (!pcbddc->coarse_phi_B) { 1264 PetscScalar *marray; 1265 1266 n = n_B*pcbddc->local_primal_size; 1267 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1268 n += n_D*pcbddc->local_primal_size; 1269 } 1270 if (!pcbddc->symmetric_primal) { 1271 n *= 2; 1272 } 1273 ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr); 1274 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 1275 n = n_B*pcbddc->local_primal_size; 1276 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1277 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 1278 n += n_D*pcbddc->local_primal_size; 1279 } 1280 if (!pcbddc->symmetric_primal) { 1281 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 1282 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1283 n = n_B*pcbddc->local_primal_size; 1284 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 1285 } 1286 } else { 1287 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 1288 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 1289 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1290 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 1291 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 1292 } 1293 } 1294 } 1295 1296 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 1297 p0_lidx_I = NULL; 1298 if (pcbddc->benign_n && (pcbddc->switch_static || pcbddc->dbg_flag)) { 1299 const PetscInt *idxs; 1300 1301 ierr = ISGetIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 1302 ierr = PetscMalloc1(pcbddc->benign_n,&p0_lidx_I);CHKERRQ(ierr); 1303 for (i=0;i<pcbddc->benign_n;i++) { 1304 ierr = PetscFindInt(pcbddc->benign_p0_lidx[i],pcis->n-pcis->n_B,idxs,&p0_lidx_I[i]);CHKERRQ(ierr); 1305 } 1306 ierr = ISRestoreIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 1307 } 1308 1309 /* vertices */ 1310 if (n_vertices) { 1311 1312 ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr); 1313 1314 if (n_R) { 1315 Mat A_RRmA_RV,S_VVt; /* S_VVt with LDA=N */ 1316 PetscBLASInt B_N,B_one = 1; 1317 PetscScalar *x,*y; 1318 PetscBool isseqaij; 1319 1320 ierr = MatScale(A_RV,m_one);CHKERRQ(ierr); 1321 ierr = MatConvert(A_RV,impMatType,MAT_REUSE_MATRIX,&A_RV);CHKERRQ(ierr); 1322 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 1323 if (F) { /* TODO could be optimized for symmetric problems */ 1324 ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr); 1325 } else { 1326 ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr); 1327 for (i=0;i<n_vertices;i++) { 1328 ierr = VecPlaceArray(pcbddc->vec1_R,y+i*n_R);CHKERRQ(ierr); 1329 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 1330 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1331 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1332 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 1333 } 1334 ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr); 1335 } 1336 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 1337 /* S_VV and S_CV are the subdomain contribution to coarse matrix. WARNING -> column major ordering */ 1338 if (n_constraints) { 1339 Mat B; 1340 1341 ierr = PetscMemzero(work+n_R*n_vertices,n_B*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 1342 for (i=0;i<n_vertices;i++) { 1343 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 1344 ierr = VecPlaceArray(pcis->vec1_B,work+n_R*n_vertices+i*n_B);CHKERRQ(ierr); 1345 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1346 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1347 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1348 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1349 } 1350 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr); 1351 ierr = MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr); 1352 ierr = MatDestroy(&B);CHKERRQ(ierr); 1353 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr); 1354 ierr = MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 1355 ierr = MatScale(S_CV,m_one);CHKERRQ(ierr); 1356 ierr = PetscBLASIntCast(n_R*n_vertices,&B_N);CHKERRQ(ierr); 1357 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+n_R*n_vertices,&B_one,work,&B_one)); 1358 ierr = MatDestroy(&B);CHKERRQ(ierr); 1359 } 1360 ierr = PetscObjectTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 1361 if (!isseqaij) { /* MatMatMult with SEQ(S)BAIJ below will raise an error */ 1362 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_REUSE_MATRIX,&A_VR);CHKERRQ(ierr); 1363 } 1364 ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr); 1365 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 1366 ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr); 1367 ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr); 1368 ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr); 1369 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one)); 1370 ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr); 1371 ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr); 1372 ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1373 ierr = MatDestroy(&S_VVt);CHKERRQ(ierr); 1374 } else { 1375 ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1376 } 1377 for (i=0;i<pcbddc->benign_n;i++) { 1378 const PetscScalar *vals; 1379 const PetscInt *idxs; 1380 PetscInt n,j,primal_idx; 1381 1382 ierr = MatGetRow(B0_V,i,&n,&idxs,&vals);CHKERRQ(ierr); 1383 primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + i; 1384 for (j=0;j<n;j++) { 1385 coarse_submat_vals[primal_idx*pcbddc->local_primal_size+idxs[j]] = vals[j]; 1386 coarse_submat_vals[idxs[j]*pcbddc->local_primal_size+primal_idx] = vals[j]; 1387 } 1388 ierr = MatRestoreRow(B0_V,i,&n,&idxs,&vals);CHKERRQ(ierr); 1389 } 1390 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 1391 1392 /* coarse basis functions */ 1393 for (i=0;i<n_vertices;i++) { 1394 PetscScalar *y; 1395 1396 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr); 1397 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 1398 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 1399 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1400 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1401 y[n_B*i+idx_V_B[i]] = 1.0; 1402 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 1403 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1404 1405 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1406 PetscInt j; 1407 1408 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 1409 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 1410 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1411 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1412 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 1413 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 1414 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 1415 } 1416 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1417 } 1418 /* if n_R == 0 the object is not destroyed */ 1419 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 1420 } 1421 1422 if (n_constraints) { 1423 Mat B; 1424 1425 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 1426 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 1427 ierr = MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 1428 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 1429 if (n_vertices) { 1430 if (isCHOL) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */ 1431 ierr = MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);CHKERRQ(ierr); 1432 } else { 1433 Mat S_VCt; 1434 1435 ierr = MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);CHKERRQ(ierr); 1436 ierr = MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1437 ierr = MatDestroy(&S_VCt);CHKERRQ(ierr); 1438 } 1439 } 1440 ierr = MatDestroy(&B);CHKERRQ(ierr); 1441 /* coarse basis functions */ 1442 for (i=0;i<n_constraints;i++) { 1443 PetscScalar *y; 1444 1445 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr); 1446 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 1447 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr); 1448 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1449 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1450 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 1451 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1452 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1453 PetscInt j; 1454 1455 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 1456 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr); 1457 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1458 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1459 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 1460 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 1461 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 1462 } 1463 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1464 } 1465 } 1466 if (n_constraints) { 1467 ierr = MatDestroy(&local_auxmat2_R);CHKERRQ(ierr); 1468 } 1469 ierr = PetscFree(p0_lidx_I);CHKERRQ(ierr); 1470 ierr = MatDestroy(&B0_V);CHKERRQ(ierr); 1471 1472 /* compute other basis functions for non-symmetric problems */ 1473 if (!pcbddc->symmetric_primal) { 1474 1475 if (n_constraints) { 1476 Mat S_CCT,B_C; 1477 1478 /* this is a lazy thing */ 1479 ierr = MatConvert(C_CR,impMatType,MAT_REUSE_MATRIX,&C_CR);CHKERRQ(ierr); 1480 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work+n_vertices*n_R,&B_C);CHKERRQ(ierr); 1481 ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr); 1482 ierr = MatTransposeMatMult(C_CR,S_CCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr); 1483 ierr = MatDestroy(&S_CCT);CHKERRQ(ierr); 1484 if (n_vertices) { 1485 Mat B_V,S_VCT; 1486 1487 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&B_V);CHKERRQ(ierr); 1488 ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr); 1489 ierr = MatTransposeMatMult(C_CR,S_VCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr); 1490 ierr = MatDestroy(&B_V);CHKERRQ(ierr); 1491 ierr = MatDestroy(&S_VCT);CHKERRQ(ierr); 1492 } 1493 ierr = MatDestroy(&B_C);CHKERRQ(ierr); 1494 } else { /* if there are no constraints, reset work */ 1495 ierr = PetscMemzero(work,n_R*pcbddc->local_primal_size*sizeof(PetscScalar));CHKERRQ(ierr); 1496 } 1497 if (n_vertices && n_R) { 1498 Mat A_VRT; 1499 PetscScalar *marray; 1500 PetscBLASInt B_N,B_one = 1; 1501 1502 ierr = MatTranspose(A_VR,MAT_INITIAL_MATRIX,&A_VRT);CHKERRQ(ierr); 1503 ierr = MatConvert(A_VRT,impMatType,MAT_REUSE_MATRIX,&A_VRT);CHKERRQ(ierr); 1504 ierr = MatDenseGetArray(A_VRT,&marray);CHKERRQ(ierr); 1505 ierr = PetscBLASIntCast(n_vertices*n_R,&B_N);CHKERRQ(ierr); 1506 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&m_one,marray,&B_one,work,&B_one)); 1507 ierr = MatDenseRestoreArray(A_VRT,&marray);CHKERRQ(ierr); 1508 ierr = MatDestroy(&A_VRT);CHKERRQ(ierr); 1509 } 1510 1511 if (F) { /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 1512 for (i=0;i<pcbddc->local_primal_size;i++) { 1513 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 1514 ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr); 1515 ierr = MatSolveTranspose(F,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1516 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1517 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 1518 } 1519 } else { 1520 for (i=0;i<pcbddc->local_primal_size;i++) { 1521 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 1522 ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr); 1523 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1524 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1525 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 1526 } 1527 } 1528 /* coarse basis functions */ 1529 for (i=0;i<pcbddc->local_primal_size;i++) { 1530 PetscScalar *y; 1531 1532 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*(i+pcbddc->local_primal_size));CHKERRQ(ierr); 1533 ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 1534 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 1535 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1536 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1537 if (i<n_vertices) { 1538 y[n_B*i+idx_V_B[i]] = 1.0; 1539 } 1540 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 1541 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1542 1543 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1544 ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 1545 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 1546 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1547 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1548 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 1549 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 1550 } 1551 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1552 } 1553 } 1554 /* free memory */ 1555 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 1556 ierr = MatDestroy(&S_VV);CHKERRQ(ierr); 1557 ierr = MatDestroy(&S_CV);CHKERRQ(ierr); 1558 ierr = MatDestroy(&S_VC);CHKERRQ(ierr); 1559 ierr = MatDestroy(&S_CC);CHKERRQ(ierr); 1560 ierr = PetscFree(work);CHKERRQ(ierr); 1561 if (n_vertices) { 1562 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 1563 } 1564 if (n_constraints) { 1565 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 1566 } 1567 /* Checking coarse_sub_mat and coarse basis functios */ 1568 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 1569 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 1570 if (pcbddc->dbg_flag) { 1571 Mat coarse_sub_mat; 1572 Mat AUXMAT,TM1,TM2,TM3,TM4; 1573 Mat coarse_phi_D,coarse_phi_B; 1574 Mat coarse_psi_D,coarse_psi_B; 1575 Mat A_II,A_BB,A_IB,A_BI; 1576 Mat C_B,CPHI; 1577 IS is_dummy; 1578 Vec mones; 1579 MatType checkmattype=MATSEQAIJ; 1580 PetscReal real_value; 1581 1582 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 1583 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 1584 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 1585 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 1586 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 1587 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 1588 if (unsymmetric_check) { 1589 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 1590 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 1591 } 1592 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 1593 1594 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1595 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);CHKERRQ(ierr); 1596 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1597 if (unsymmetric_check) { 1598 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1599 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 1600 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1601 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1602 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 1603 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1604 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1605 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 1606 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1607 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1608 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 1609 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1610 } else { 1611 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 1612 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 1613 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1614 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 1615 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1616 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1617 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 1618 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1619 } 1620 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1621 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1622 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1623 ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr); 1624 if (pcbddc->benign_n) { 1625 Mat B0_I,B0_B,B0_BPHI,B0_IPHI; 1626 PetscScalar *data,*data2; 1627 PetscInt j; 1628 1629 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 1630 ierr = MatGetSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B); 1631 ierr = MatGetSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_I_local,MAT_INITIAL_MATRIX,&B0_I); 1632 ierr = MatMatMult(B0_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 1633 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_REUSE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 1634 ierr = MatDenseGetArray(TM1,&data);CHKERRQ(ierr); 1635 ierr = MatDenseGetArray(B0_BPHI,&data2);CHKERRQ(ierr); 1636 for (j=0;j<pcbddc->benign_n;j++) { 1637 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 1638 for (i=0;i<pcbddc->local_primal_size;i++) { 1639 data[primal_idx*pcbddc->local_primal_size+i] += data2[i*pcbddc->benign_n+j]; 1640 data[i*pcbddc->local_primal_size+primal_idx] += data2[i*pcbddc->benign_n+j]; 1641 } 1642 } 1643 ierr = MatDenseRestoreArray(TM1,&data);CHKERRQ(ierr); 1644 ierr = MatDenseRestoreArray(B0_BPHI,&data2);CHKERRQ(ierr); 1645 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 1646 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 1647 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 1648 ierr = MatMatMult(B0_I,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&B0_IPHI);CHKERRQ(ierr); 1649 ierr = MatDestroy(&B0_I);CHKERRQ(ierr); 1650 ierr = MatNorm(B0_IPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1651 ierr = MatDestroy(&B0_IPHI);CHKERRQ(ierr); 1652 } 1653 #if 0 1654 { 1655 PetscViewer viewer; 1656 char filename[256]; 1657 sprintf(filename,"proj_local_coarse_mat%d.m",PetscGlobalRank); 1658 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 1659 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 1660 ierr = MatView(TM1,viewer);CHKERRQ(ierr); 1661 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 1662 } 1663 #endif 1664 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1665 ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1666 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1667 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 1668 1669 /* check constraints */ 1670 if (!pcbddc->benign_n) { /* TODO: add benign case */ 1671 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size,0,1,&is_dummy);CHKERRQ(ierr); 1672 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B); 1673 ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 1674 ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr); 1675 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 1676 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 1677 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1678 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 1679 if (unsymmetric_check) { 1680 ierr = MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 1681 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 1682 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 1683 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1684 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 1685 } 1686 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 1687 ierr = MatDestroy(&CPHI);CHKERRQ(ierr); 1688 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 1689 ierr = VecDestroy(&mones);CHKERRQ(ierr); 1690 } 1691 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1692 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 1693 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 1694 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 1695 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 1696 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 1697 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 1698 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 1699 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 1700 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 1701 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 1702 if (unsymmetric_check) { 1703 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 1704 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 1705 } 1706 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 1707 } 1708 /* get back data */ 1709 *coarse_submat_vals_n = coarse_submat_vals; 1710 PetscFunctionReturn(0); 1711 } 1712 1713 #undef __FUNCT__ 1714 #define __FUNCT__ "MatGetSubMatrixUnsorted" 1715 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B) 1716 { 1717 Mat *work_mat; 1718 IS isrow_s,iscol_s; 1719 PetscBool rsorted,csorted; 1720 PetscInt rsize,*idxs_perm_r,csize,*idxs_perm_c; 1721 PetscErrorCode ierr; 1722 1723 PetscFunctionBegin; 1724 ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr); 1725 ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr); 1726 ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr); 1727 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 1728 1729 if (!rsorted) { 1730 const PetscInt *idxs; 1731 PetscInt *idxs_sorted,i; 1732 1733 ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr); 1734 ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr); 1735 for (i=0;i<rsize;i++) { 1736 idxs_perm_r[i] = i; 1737 } 1738 ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr); 1739 ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr); 1740 for (i=0;i<rsize;i++) { 1741 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 1742 } 1743 ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr); 1744 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr); 1745 } else { 1746 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1747 isrow_s = isrow; 1748 } 1749 1750 if (!csorted) { 1751 if (isrow == iscol) { 1752 ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr); 1753 iscol_s = isrow_s; 1754 } else { 1755 const PetscInt *idxs; 1756 PetscInt *idxs_sorted,i; 1757 1758 ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr); 1759 ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr); 1760 for (i=0;i<csize;i++) { 1761 idxs_perm_c[i] = i; 1762 } 1763 ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr); 1764 ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr); 1765 for (i=0;i<csize;i++) { 1766 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 1767 } 1768 ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr); 1769 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr); 1770 } 1771 } else { 1772 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1773 iscol_s = iscol; 1774 } 1775 1776 ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 1777 1778 if (!rsorted || !csorted) { 1779 Mat new_mat; 1780 IS is_perm_r,is_perm_c; 1781 1782 if (!rsorted) { 1783 PetscInt *idxs_r,i; 1784 ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr); 1785 for (i=0;i<rsize;i++) { 1786 idxs_r[idxs_perm_r[i]] = i; 1787 } 1788 ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr); 1789 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr); 1790 } else { 1791 ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr); 1792 } 1793 ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr); 1794 1795 if (!csorted) { 1796 if (isrow_s == iscol_s) { 1797 ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr); 1798 is_perm_c = is_perm_r; 1799 } else { 1800 PetscInt *idxs_c,i; 1801 ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr); 1802 for (i=0;i<csize;i++) { 1803 idxs_c[idxs_perm_c[i]] = i; 1804 } 1805 ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr); 1806 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr); 1807 } 1808 } else { 1809 ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr); 1810 } 1811 ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr); 1812 1813 ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr); 1814 ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr); 1815 work_mat[0] = new_mat; 1816 ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr); 1817 ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr); 1818 } 1819 1820 ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr); 1821 *B = work_mat[0]; 1822 ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr); 1823 ierr = ISDestroy(&isrow_s);CHKERRQ(ierr); 1824 ierr = ISDestroy(&iscol_s);CHKERRQ(ierr); 1825 PetscFunctionReturn(0); 1826 } 1827 1828 #undef __FUNCT__ 1829 #define __FUNCT__ "PCBDDCComputeLocalMatrix" 1830 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 1831 { 1832 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1833 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1834 Mat new_mat; 1835 IS is_local,is_global; 1836 PetscInt local_size; 1837 PetscBool isseqaij; 1838 PetscErrorCode ierr; 1839 1840 PetscFunctionBegin; 1841 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1842 ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr); 1843 ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr); 1844 ierr = ISLocalToGlobalMappingApplyIS(pc->pmat->rmap->mapping,is_local,&is_global);CHKERRQ(ierr); 1845 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 1846 ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr); 1847 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 1848 1849 /* check */ 1850 if (pcbddc->dbg_flag) { 1851 Vec x,x_change; 1852 PetscReal error; 1853 1854 ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr); 1855 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 1856 ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr); 1857 ierr = VecScatterBegin(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1858 ierr = VecScatterEnd(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1859 ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr); 1860 ierr = VecScatterBegin(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1861 ierr = VecScatterEnd(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1862 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 1863 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 1864 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1865 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr); 1866 ierr = VecDestroy(&x);CHKERRQ(ierr); 1867 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 1868 } 1869 1870 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 1871 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 1872 if (isseqaij) { 1873 ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 1874 } else { 1875 Mat work_mat; 1876 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 1877 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 1878 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 1879 } 1880 if (matis->A->symmetric_set) { 1881 ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);CHKERRQ(ierr); 1882 #if !defined(PETSC_USE_COMPLEX) 1883 ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);CHKERRQ(ierr); 1884 #endif 1885 } 1886 /* 1887 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 1888 ierr = MatView(new_mat,(PetscViewer)0);CHKERRQ(ierr); 1889 */ 1890 ierr = MatDestroy(&new_mat);CHKERRQ(ierr); 1891 PetscFunctionReturn(0); 1892 } 1893 1894 #undef __FUNCT__ 1895 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 1896 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 1897 { 1898 PC_IS* pcis = (PC_IS*)(pc->data); 1899 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1900 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1901 PetscInt *idx_R_local=NULL; 1902 PetscInt n_vertices,i,j,n_R,n_D,n_B; 1903 PetscInt vbs,bs; 1904 PetscBT bitmask=NULL; 1905 PetscErrorCode ierr; 1906 1907 PetscFunctionBegin; 1908 /* 1909 No need to setup local scatters if 1910 - primal space is unchanged 1911 AND 1912 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 1913 AND 1914 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 1915 */ 1916 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 1917 PetscFunctionReturn(0); 1918 } 1919 /* destroy old objects */ 1920 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 1921 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 1922 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 1923 /* Set Non-overlapping dimensions */ 1924 n_B = pcis->n_B; 1925 n_D = pcis->n - n_B; 1926 n_vertices = pcbddc->n_vertices; 1927 1928 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 1929 1930 /* create auxiliary bitmask and allocate workspace */ 1931 if (!sub_schurs->reuse_mumps) { 1932 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 1933 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 1934 for (i=0;i<n_vertices;i++) { 1935 ierr = PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);CHKERRQ(ierr); 1936 } 1937 1938 for (i=0, n_R=0; i<pcis->n; i++) { 1939 if (!PetscBTLookup(bitmask,i)) { 1940 idx_R_local[n_R++] = i; 1941 } 1942 } 1943 } else { /* A different ordering (already computed) is present if we are reusing MUMPS Schur solver */ 1944 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1945 1946 ierr = ISGetIndices(reuse_mumps->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1947 ierr = ISGetLocalSize(reuse_mumps->is_R,&n_R);CHKERRQ(ierr); 1948 } 1949 1950 /* Block code */ 1951 vbs = 1; 1952 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 1953 if (bs>1 && !(n_vertices%bs)) { 1954 PetscBool is_blocked = PETSC_TRUE; 1955 PetscInt *vary; 1956 if (!sub_schurs->reuse_mumps) { 1957 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 1958 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 1959 /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 1960 /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */ 1961 for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++; 1962 for (i=0; i<pcis->n/bs; i++) { 1963 if (vary[i]!=0 && vary[i]!=bs) { 1964 is_blocked = PETSC_FALSE; 1965 break; 1966 } 1967 } 1968 ierr = PetscFree(vary);CHKERRQ(ierr); 1969 } else { 1970 /* Verify directly the R set */ 1971 for (i=0; i<n_R/bs; i++) { 1972 PetscInt j,node=idx_R_local[bs*i]; 1973 for (j=1; j<bs; j++) { 1974 if (node != idx_R_local[bs*i+j]-j) { 1975 is_blocked = PETSC_FALSE; 1976 break; 1977 } 1978 } 1979 } 1980 } 1981 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 1982 vbs = bs; 1983 for (i=0;i<n_R/vbs;i++) { 1984 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 1985 } 1986 } 1987 } 1988 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 1989 if (sub_schurs->reuse_mumps) { 1990 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1991 1992 ierr = ISRestoreIndices(reuse_mumps->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1993 ierr = ISDestroy(&reuse_mumps->is_R);CHKERRQ(ierr); 1994 ierr = PetscObjectReference((PetscObject)pcbddc->is_R_local);CHKERRQ(ierr); 1995 reuse_mumps->is_R = pcbddc->is_R_local; 1996 } else { 1997 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 1998 } 1999 2000 /* print some info if requested */ 2001 if (pcbddc->dbg_flag) { 2002 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2003 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2004 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 2005 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 2006 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 2007 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices-pcbddc->benign_n,pcbddc->local_primal_size);CHKERRQ(ierr); 2008 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2009 } 2010 2011 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 2012 if (!sub_schurs->reuse_mumps) { 2013 IS is_aux1,is_aux2; 2014 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 2015 2016 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2017 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 2018 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 2019 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2020 for (i=0; i<n_D; i++) { 2021 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 2022 } 2023 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2024 for (i=0, j=0; i<n_R; i++) { 2025 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 2026 aux_array1[j++] = i; 2027 } 2028 } 2029 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 2030 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2031 for (i=0, j=0; i<n_B; i++) { 2032 if (!PetscBTLookup(bitmask,is_indices[i])) { 2033 aux_array2[j++] = i; 2034 } 2035 } 2036 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2037 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 2038 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 2039 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 2040 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 2041 2042 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2043 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 2044 for (i=0, j=0; i<n_R; i++) { 2045 if (PetscBTLookup(bitmask,idx_R_local[i])) { 2046 aux_array1[j++] = i; 2047 } 2048 } 2049 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 2050 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 2051 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 2052 } 2053 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 2054 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2055 } else { 2056 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 2057 IS tis; 2058 PetscInt schur_size; 2059 2060 ierr = ISGetLocalSize(reuse_mumps->is_B,&schur_size);CHKERRQ(ierr); 2061 ierr = ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);CHKERRQ(ierr); 2062 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_mumps->is_B,&pcbddc->R_to_B);CHKERRQ(ierr); 2063 ierr = ISDestroy(&tis);CHKERRQ(ierr); 2064 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2065 ierr = ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);CHKERRQ(ierr); 2066 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 2067 ierr = ISDestroy(&tis);CHKERRQ(ierr); 2068 } 2069 } 2070 PetscFunctionReturn(0); 2071 } 2072 2073 2074 #undef __FUNCT__ 2075 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 2076 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 2077 { 2078 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2079 PC_IS *pcis = (PC_IS*)pc->data; 2080 PC pc_temp; 2081 Mat A_RR; 2082 MatReuse reuse; 2083 PetscScalar m_one = -1.0; 2084 PetscReal value; 2085 PetscInt n_D,n_R; 2086 PetscBool use_exact,use_exact_reduced,issbaij; 2087 PetscErrorCode ierr; 2088 /* prefixes stuff */ 2089 char dir_prefix[256],neu_prefix[256],str_level[16]; 2090 size_t len; 2091 2092 PetscFunctionBegin; 2093 2094 /* compute prefixes */ 2095 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 2096 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 2097 if (!pcbddc->current_level) { 2098 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 2099 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 2100 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 2101 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 2102 } else { 2103 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 2104 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 2105 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 2106 len -= 15; /* remove "pc_bddc_coarse_" */ 2107 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 2108 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 2109 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 2110 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 2111 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 2112 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 2113 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 2114 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 2115 } 2116 2117 /* DIRICHLET PROBLEM */ 2118 if (dirichlet) { 2119 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2120 if (pcbddc->local_mat->symmetric_set) { 2121 ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 2122 } 2123 /* Matrix for Dirichlet problem is pcis->A_II */ 2124 n_D = pcis->n - pcis->n_B; 2125 if (!pcbddc->ksp_D) { /* create object if not yet build */ 2126 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 2127 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 2128 /* default */ 2129 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 2130 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 2131 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 2132 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 2133 if (issbaij) { 2134 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 2135 } else { 2136 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 2137 } 2138 /* Allow user's customization */ 2139 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 2140 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 2141 } 2142 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 2143 if (sub_schurs->reuse_mumps) { 2144 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 2145 2146 ierr = KSPSetPC(pcbddc->ksp_D,reuse_mumps->interior_solver);CHKERRQ(ierr); 2147 } 2148 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 2149 if (!n_D) { 2150 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 2151 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 2152 } 2153 /* Set Up KSP for Dirichlet problem of BDDC */ 2154 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 2155 /* set ksp_D into pcis data */ 2156 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 2157 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 2158 pcis->ksp_D = pcbddc->ksp_D; 2159 } 2160 2161 /* NEUMANN PROBLEM */ 2162 A_RR = 0; 2163 if (neumann) { 2164 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2165 PetscInt ibs,mbs; 2166 PetscBool issbaij; 2167 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 2168 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 2169 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 2170 if (pcbddc->ksp_R) { /* already created ksp */ 2171 PetscInt nn_R; 2172 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 2173 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 2174 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 2175 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 2176 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 2177 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2178 reuse = MAT_INITIAL_MATRIX; 2179 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 2180 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 2181 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2182 reuse = MAT_INITIAL_MATRIX; 2183 } else { /* safe to reuse the matrix */ 2184 reuse = MAT_REUSE_MATRIX; 2185 } 2186 } 2187 /* last check */ 2188 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 2189 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2190 reuse = MAT_INITIAL_MATRIX; 2191 } 2192 } else { /* first time, so we need to create the matrix */ 2193 reuse = MAT_INITIAL_MATRIX; 2194 } 2195 /* extract A_RR */ 2196 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 2197 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 2198 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 2199 if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */ 2200 if (matis->A == pcbddc->local_mat) { 2201 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2202 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 2203 } else { 2204 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 2205 } 2206 } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 2207 if (matis->A == pcbddc->local_mat) { 2208 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2209 ierr = MatConvert(matis->A,MATSEQBAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 2210 } else { 2211 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 2212 } 2213 } 2214 if (!sub_schurs->reuse_mumps) { 2215 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 2216 if (pcbddc->local_mat->symmetric_set) { 2217 ierr = MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 2218 } 2219 } else { 2220 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 2221 2222 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2223 ierr = PCGetOperators(reuse_mumps->correction_solver,&A_RR,NULL);CHKERRQ(ierr); 2224 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 2225 } 2226 if (!pcbddc->ksp_R) { /* create object if not present */ 2227 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 2228 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 2229 /* default */ 2230 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 2231 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 2232 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 2233 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 2234 if (issbaij) { 2235 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 2236 } else { 2237 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 2238 } 2239 /* Allow user's customization */ 2240 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 2241 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 2242 } 2243 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 2244 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 2245 if (!n_R) { 2246 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 2247 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 2248 } 2249 /* Reuse MUMPS solver if it is present */ 2250 if (sub_schurs->reuse_mumps) { 2251 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 2252 2253 ierr = KSPSetPC(pcbddc->ksp_R,reuse_mumps->correction_solver);CHKERRQ(ierr); 2254 } 2255 /* Set Up KSP for Neumann problem of BDDC */ 2256 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 2257 } 2258 /* free Neumann problem's matrix */ 2259 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2260 2261 /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */ 2262 if (pcbddc->NullSpace || pcbddc->dbg_flag) { 2263 if (pcbddc->dbg_flag) { 2264 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2265 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 2266 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2267 } 2268 if (dirichlet) { /* Dirichlet */ 2269 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 2270 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 2271 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 2272 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 2273 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 2274 /* need to be adapted? */ 2275 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 2276 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2277 ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr); 2278 /* print info */ 2279 if (pcbddc->dbg_flag) { 2280 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 2281 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2282 } 2283 if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) { 2284 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_TRUE,pcis->is_I_local);CHKERRQ(ierr); 2285 } 2286 } 2287 if (neumann) { /* Neumann */ 2288 ierr = KSPGetOperators(pcbddc->ksp_R,&A_RR,NULL);CHKERRQ(ierr); 2289 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 2290 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 2291 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 2292 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 2293 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 2294 /* need to be adapted? */ 2295 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 2296 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2297 /* print info */ 2298 if (pcbddc->dbg_flag) { 2299 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 2300 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2301 } 2302 if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */ 2303 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_FALSE,pcbddc->is_R_local);CHKERRQ(ierr); 2304 } 2305 } 2306 } 2307 PetscFunctionReturn(0); 2308 } 2309 2310 #undef __FUNCT__ 2311 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection" 2312 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose) 2313 { 2314 PetscErrorCode ierr; 2315 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 2316 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2317 2318 PetscFunctionBegin; 2319 if (!sub_schurs->reuse_mumps) { 2320 ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr); 2321 } 2322 if (!pcbddc->switch_static) { 2323 if (applytranspose && pcbddc->local_auxmat1) { 2324 ierr = MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 2325 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 2326 } 2327 if (!sub_schurs->reuse_mumps) { 2328 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2329 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2330 } else { 2331 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 2332 2333 ierr = VecScatterBegin(reuse_mumps->correction_scatter_B,inout_B,reuse_mumps->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2334 ierr = VecScatterEnd(reuse_mumps->correction_scatter_B,inout_B,reuse_mumps->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2335 } 2336 } else { 2337 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2338 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2339 ierr = VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2340 ierr = VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2341 if (applytranspose && pcbddc->local_auxmat1) { 2342 ierr = MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);CHKERRQ(ierr); 2343 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 2344 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2345 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2346 } 2347 } 2348 if (!sub_schurs->reuse_mumps) { 2349 if (applytranspose) { 2350 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 2351 } else { 2352 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 2353 } 2354 #if defined(PETSC_HAVE_MUMPS) 2355 } else { 2356 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 2357 2358 if (applytranspose) { 2359 ierr = MatMumpsSolveSchurComplementTranspose(reuse_mumps->F,reuse_mumps->rhs_B,reuse_mumps->sol_B);CHKERRQ(ierr); 2360 } else { 2361 ierr = MatMumpsSolveSchurComplement(reuse_mumps->F,reuse_mumps->rhs_B,reuse_mumps->sol_B);CHKERRQ(ierr); 2362 } 2363 #endif 2364 } 2365 ierr = VecSet(inout_B,0.);CHKERRQ(ierr); 2366 if (!pcbddc->switch_static) { 2367 if (!sub_schurs->reuse_mumps) { 2368 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2369 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2370 } else { 2371 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 2372 2373 ierr = VecScatterBegin(reuse_mumps->correction_scatter_B,reuse_mumps->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2374 ierr = VecScatterEnd(reuse_mumps->correction_scatter_B,reuse_mumps->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2375 } 2376 if (!applytranspose && pcbddc->local_auxmat1) { 2377 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 2378 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 2379 } 2380 } else { 2381 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2382 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2383 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2384 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2385 if (!applytranspose && pcbddc->local_auxmat1) { 2386 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 2387 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 2388 } 2389 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2390 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2391 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2392 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2393 } 2394 PetscFunctionReturn(0); 2395 } 2396 2397 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 2398 #undef __FUNCT__ 2399 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 2400 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 2401 { 2402 PetscErrorCode ierr; 2403 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 2404 PC_IS* pcis = (PC_IS*) (pc->data); 2405 const PetscScalar zero = 0.0; 2406 2407 PetscFunctionBegin; 2408 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 2409 if (applytranspose) { 2410 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 2411 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 2412 } else { 2413 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 2414 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 2415 } 2416 2417 /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */ 2418 if (pcbddc->benign_n) { 2419 PetscScalar *array; 2420 PetscInt j; 2421 2422 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 2423 for (j=0;j<pcbddc->benign_n;j++) array[pcbddc->local_primal_size-pcbddc->benign_n+j] += pcbddc->benign_p0[j]; 2424 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 2425 } 2426 2427 /* start communications from local primal nodes to rhs of coarse solver */ 2428 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 2429 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2430 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2431 2432 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 2433 /* TODO remove null space when doing multilevel */ 2434 if (pcbddc->coarse_ksp) { 2435 Mat coarse_mat; 2436 Vec rhs,sol; 2437 MatNullSpace nullsp; 2438 2439 ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr); 2440 ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr); 2441 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 2442 ierr = MatGetNullSpace(coarse_mat,&nullsp);CHKERRQ(ierr); 2443 if (nullsp) { 2444 ierr = MatNullSpaceRemove(nullsp,rhs);CHKERRQ(ierr); 2445 } 2446 if (applytranspose) { 2447 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 2448 } else { 2449 ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 2450 } 2451 if (nullsp) { 2452 ierr = MatNullSpaceRemove(nullsp,sol);CHKERRQ(ierr); 2453 } 2454 } 2455 2456 /* Local solution on R nodes */ 2457 if (pcis->n) { /* in/out pcbddc->vec1_B,pcbddc->vec1_D */ 2458 ierr = PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);CHKERRQ(ierr); 2459 } 2460 2461 /* communications from coarse sol to local primal nodes */ 2462 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2463 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2464 2465 /* Sum contributions from two levels */ 2466 if (applytranspose) { 2467 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 2468 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 2469 } else { 2470 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 2471 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 2472 } 2473 /* store p0 */ 2474 if (pcbddc->benign_n) { 2475 PetscScalar *array; 2476 PetscInt j; 2477 2478 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 2479 for (j=0;j<pcbddc->benign_n;j++) pcbddc->benign_p0[j] = array[pcbddc->local_primal_size-pcbddc->benign_n+j]; 2480 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 2481 } 2482 PetscFunctionReturn(0); 2483 } 2484 2485 #undef __FUNCT__ 2486 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 2487 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 2488 { 2489 PetscErrorCode ierr; 2490 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 2491 PetscScalar *array; 2492 Vec from,to; 2493 2494 PetscFunctionBegin; 2495 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 2496 from = pcbddc->coarse_vec; 2497 to = pcbddc->vec1_P; 2498 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 2499 Vec tvec; 2500 2501 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 2502 ierr = VecResetArray(tvec);CHKERRQ(ierr); 2503 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 2504 ierr = VecGetArray(tvec,&array);CHKERRQ(ierr); 2505 ierr = VecPlaceArray(from,array);CHKERRQ(ierr); 2506 ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr); 2507 } 2508 } else { /* from local to global -> put data in coarse right hand side */ 2509 from = pcbddc->vec1_P; 2510 to = pcbddc->coarse_vec; 2511 } 2512 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 2513 PetscFunctionReturn(0); 2514 } 2515 2516 #undef __FUNCT__ 2517 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 2518 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 2519 { 2520 PetscErrorCode ierr; 2521 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 2522 PetscScalar *array; 2523 Vec from,to; 2524 2525 PetscFunctionBegin; 2526 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 2527 from = pcbddc->coarse_vec; 2528 to = pcbddc->vec1_P; 2529 } else { /* from local to global -> put data in coarse right hand side */ 2530 from = pcbddc->vec1_P; 2531 to = pcbddc->coarse_vec; 2532 } 2533 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 2534 if (smode == SCATTER_FORWARD) { 2535 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 2536 Vec tvec; 2537 2538 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 2539 ierr = VecGetArray(to,&array);CHKERRQ(ierr); 2540 ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr); 2541 ierr = VecRestoreArray(to,&array);CHKERRQ(ierr); 2542 } 2543 } else { 2544 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 2545 ierr = VecResetArray(from);CHKERRQ(ierr); 2546 } 2547 } 2548 PetscFunctionReturn(0); 2549 } 2550 2551 /* uncomment for testing purposes */ 2552 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 2553 #undef __FUNCT__ 2554 #define __FUNCT__ "PCBDDCConstraintsSetUp" 2555 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 2556 { 2557 PetscErrorCode ierr; 2558 PC_IS* pcis = (PC_IS*)(pc->data); 2559 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2560 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 2561 /* one and zero */ 2562 PetscScalar one=1.0,zero=0.0; 2563 /* space to store constraints and their local indices */ 2564 PetscScalar *constraints_data; 2565 PetscInt *constraints_idxs,*constraints_idxs_B; 2566 PetscInt *constraints_idxs_ptr,*constraints_data_ptr; 2567 PetscInt *constraints_n; 2568 /* iterators */ 2569 PetscInt i,j,k,total_counts,total_counts_cc,cum; 2570 /* BLAS integers */ 2571 PetscBLASInt lwork,lierr; 2572 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 2573 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 2574 /* reuse */ 2575 PetscInt olocal_primal_size,olocal_primal_size_cc; 2576 PetscInt *olocal_primal_ref_node,*olocal_primal_ref_mult; 2577 /* change of basis */ 2578 PetscBool qr_needed; 2579 PetscBT change_basis,qr_needed_idx; 2580 /* auxiliary stuff */ 2581 PetscInt *nnz,*is_indices; 2582 PetscInt ncc; 2583 /* some quantities */ 2584 PetscInt n_vertices,total_primal_vertices,valid_constraints; 2585 PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints; 2586 2587 PetscFunctionBegin; 2588 /* Destroy Mat objects computed previously */ 2589 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2590 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 2591 /* save info on constraints from previous setup (if any) */ 2592 olocal_primal_size = pcbddc->local_primal_size; 2593 olocal_primal_size_cc = pcbddc->local_primal_size_cc; 2594 ierr = PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);CHKERRQ(ierr); 2595 ierr = PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 2596 ierr = PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 2597 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 2598 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2599 2600 /* print some info */ 2601 if (pcbddc->dbg_flag) { 2602 IS vertices; 2603 PetscInt nv,nedges,nfaces; 2604 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 2605 ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr); 2606 ierr = ISDestroy(&vertices);CHKERRQ(ierr); 2607 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 2608 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 2609 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 2610 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr); 2611 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr); 2612 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2613 } 2614 2615 if (!pcbddc->adaptive_selection) { 2616 IS ISForVertices,*ISForFaces,*ISForEdges; 2617 MatNullSpace nearnullsp; 2618 const Vec *nearnullvecs; 2619 Vec *localnearnullsp; 2620 PetscScalar *array; 2621 PetscInt n_ISForFaces,n_ISForEdges,nnsp_size; 2622 PetscBool nnsp_has_cnst; 2623 /* LAPACK working arrays for SVD or POD */ 2624 PetscBool skip_lapack,boolforchange; 2625 PetscScalar *work; 2626 PetscReal *singular_vals; 2627 #if defined(PETSC_USE_COMPLEX) 2628 PetscReal *rwork; 2629 #endif 2630 #if defined(PETSC_MISSING_LAPACK_GESVD) 2631 PetscScalar *temp_basis,*correlation_mat; 2632 #else 2633 PetscBLASInt dummy_int=1; 2634 PetscScalar dummy_scalar=1.; 2635 #endif 2636 2637 /* Get index sets for faces, edges and vertices from graph */ 2638 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 2639 /* free unneeded index sets */ 2640 if (!pcbddc->use_vertices) { 2641 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 2642 } 2643 if (!pcbddc->use_edges) { 2644 for (i=0;i<n_ISForEdges;i++) { 2645 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 2646 } 2647 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 2648 n_ISForEdges = 0; 2649 } 2650 if (!pcbddc->use_faces) { 2651 for (i=0;i<n_ISForFaces;i++) { 2652 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 2653 } 2654 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 2655 n_ISForFaces = 0; 2656 } 2657 2658 #if defined(PETSC_USE_DEBUG) 2659 /* HACK: when solving singular problems not using vertices, a change of basis is mandatory. 2660 Also use_change_of_basis should be consistent among processors */ 2661 if (pcbddc->NullSpace) { 2662 PetscBool tbool[2],gbool[2]; 2663 2664 if (!ISForVertices && !pcbddc->user_ChangeOfBasisMatrix) { 2665 pcbddc->use_change_of_basis = PETSC_TRUE; 2666 if (!ISForEdges) { 2667 pcbddc->use_change_on_faces = PETSC_TRUE; 2668 } 2669 } 2670 tbool[0] = pcbddc->use_change_of_basis; 2671 tbool[1] = pcbddc->use_change_on_faces; 2672 ierr = MPI_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2673 pcbddc->use_change_of_basis = gbool[0]; 2674 pcbddc->use_change_on_faces = gbool[1]; 2675 } 2676 #endif 2677 2678 /* check if near null space is attached to global mat */ 2679 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 2680 if (nearnullsp) { 2681 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 2682 /* remove any stored info */ 2683 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 2684 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 2685 /* store information for BDDC solver reuse */ 2686 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 2687 pcbddc->onearnullspace = nearnullsp; 2688 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 2689 for (i=0;i<nnsp_size;i++) { 2690 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 2691 } 2692 } else { /* if near null space is not provided BDDC uses constants by default */ 2693 nnsp_size = 0; 2694 nnsp_has_cnst = PETSC_TRUE; 2695 } 2696 /* get max number of constraints on a single cc */ 2697 max_constraints = nnsp_size; 2698 if (nnsp_has_cnst) max_constraints++; 2699 2700 /* 2701 Evaluate maximum storage size needed by the procedure 2702 - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]" 2703 - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]" 2704 There can be multiple constraints per connected component 2705 */ 2706 n_vertices = 0; 2707 if (ISForVertices) { 2708 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 2709 } 2710 ncc = n_vertices+n_ISForFaces+n_ISForEdges; 2711 ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr); 2712 2713 total_counts = n_ISForFaces+n_ISForEdges; 2714 total_counts *= max_constraints; 2715 total_counts += n_vertices; 2716 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 2717 2718 total_counts = 0; 2719 max_size_of_constraint = 0; 2720 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 2721 IS used_is; 2722 if (i<n_ISForEdges) { 2723 used_is = ISForEdges[i]; 2724 } else { 2725 used_is = ISForFaces[i-n_ISForEdges]; 2726 } 2727 ierr = ISGetSize(used_is,&j);CHKERRQ(ierr); 2728 total_counts += j; 2729 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 2730 } 2731 ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr); 2732 2733 /* get local part of global near null space vectors */ 2734 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 2735 for (k=0;k<nnsp_size;k++) { 2736 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 2737 ierr = VecScatterBegin(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2738 ierr = VecScatterEnd(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2739 } 2740 2741 /* whether or not to skip lapack calls */ 2742 skip_lapack = PETSC_TRUE; 2743 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 2744 2745 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 2746 if (!skip_lapack) { 2747 PetscScalar temp_work; 2748 2749 #if defined(PETSC_MISSING_LAPACK_GESVD) 2750 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 2751 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 2752 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 2753 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 2754 #if defined(PETSC_USE_COMPLEX) 2755 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 2756 #endif 2757 /* now we evaluate the optimal workspace using query with lwork=-1 */ 2758 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 2759 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 2760 lwork = -1; 2761 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2762 #if !defined(PETSC_USE_COMPLEX) 2763 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 2764 #else 2765 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 2766 #endif 2767 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2768 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 2769 #else /* on missing GESVD */ 2770 /* SVD */ 2771 PetscInt max_n,min_n; 2772 max_n = max_size_of_constraint; 2773 min_n = max_constraints; 2774 if (max_size_of_constraint < max_constraints) { 2775 min_n = max_size_of_constraint; 2776 max_n = max_constraints; 2777 } 2778 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 2779 #if defined(PETSC_USE_COMPLEX) 2780 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 2781 #endif 2782 /* now we evaluate the optimal workspace using query with lwork=-1 */ 2783 lwork = -1; 2784 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 2785 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 2786 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 2787 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2788 #if !defined(PETSC_USE_COMPLEX) 2789 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr)); 2790 #else 2791 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr)); 2792 #endif 2793 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2794 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 2795 #endif /* on missing GESVD */ 2796 /* Allocate optimal workspace */ 2797 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 2798 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 2799 } 2800 /* Now we can loop on constraining sets */ 2801 total_counts = 0; 2802 constraints_idxs_ptr[0] = 0; 2803 constraints_data_ptr[0] = 0; 2804 /* vertices */ 2805 if (n_vertices) { 2806 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2807 ierr = PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 2808 for (i=0;i<n_vertices;i++) { 2809 constraints_n[total_counts] = 1; 2810 constraints_data[total_counts] = 1.0; 2811 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 2812 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 2813 total_counts++; 2814 } 2815 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2816 n_vertices = total_counts; 2817 } 2818 2819 /* edges and faces */ 2820 total_counts_cc = total_counts; 2821 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 2822 IS used_is; 2823 PetscBool idxs_copied = PETSC_FALSE; 2824 2825 if (ncc<n_ISForEdges) { 2826 used_is = ISForEdges[ncc]; 2827 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 2828 } else { 2829 used_is = ISForFaces[ncc-n_ISForEdges]; 2830 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 2831 } 2832 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 2833 2834 ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr); 2835 ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2836 /* change of basis should not be performed on local periodic nodes */ 2837 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 2838 if (nnsp_has_cnst) { 2839 PetscScalar quad_value; 2840 2841 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 2842 idxs_copied = PETSC_TRUE; 2843 2844 if (!pcbddc->use_nnsp_true) { 2845 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 2846 } else { 2847 quad_value = 1.0; 2848 } 2849 for (j=0;j<size_of_constraint;j++) { 2850 constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value; 2851 } 2852 temp_constraints++; 2853 total_counts++; 2854 } 2855 for (k=0;k<nnsp_size;k++) { 2856 PetscReal real_value; 2857 PetscScalar *ptr_to_data; 2858 2859 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2860 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint]; 2861 for (j=0;j<size_of_constraint;j++) { 2862 ptr_to_data[j] = array[is_indices[j]]; 2863 } 2864 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2865 /* check if array is null on the connected component */ 2866 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2867 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one)); 2868 if (real_value > 0.0) { /* keep indices and values */ 2869 temp_constraints++; 2870 total_counts++; 2871 if (!idxs_copied) { 2872 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 2873 idxs_copied = PETSC_TRUE; 2874 } 2875 } 2876 } 2877 ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2878 valid_constraints = temp_constraints; 2879 if (!pcbddc->use_nnsp_true && temp_constraints) { 2880 if (temp_constraints == 1) { /* just normalize the constraint */ 2881 PetscScalar norm,*ptr_to_data; 2882 2883 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 2884 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2885 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one)); 2886 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 2887 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one)); 2888 } else { /* perform SVD */ 2889 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 2890 PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 2891 2892 #if defined(PETSC_MISSING_LAPACK_GESVD) 2893 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 2894 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 2895 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 2896 the constraints basis will differ (by a complex factor with absolute value equal to 1) 2897 from that computed using LAPACKgesvd 2898 -> This is due to a different computation of eigenvectors in LAPACKheev 2899 -> The quality of the POD-computed basis will be the same */ 2900 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 2901 /* Store upper triangular part of correlation matrix */ 2902 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2903 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2904 for (j=0;j<temp_constraints;j++) { 2905 for (k=0;k<j+1;k++) { 2906 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one)); 2907 } 2908 } 2909 /* compute eigenvalues and eigenvectors of correlation matrix */ 2910 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2911 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 2912 #if !defined(PETSC_USE_COMPLEX) 2913 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 2914 #else 2915 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 2916 #endif 2917 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2918 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 2919 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 2920 j = 0; 2921 while (j < temp_constraints && singular_vals[j] < tol) j++; 2922 total_counts = total_counts-j; 2923 valid_constraints = temp_constraints-j; 2924 /* scale and copy POD basis into used quadrature memory */ 2925 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2926 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2927 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 2928 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2929 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 2930 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 2931 if (j<temp_constraints) { 2932 PetscInt ii; 2933 for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]); 2934 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2935 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 2936 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2937 for (k=0;k<temp_constraints-j;k++) { 2938 for (ii=0;ii<size_of_constraint;ii++) { 2939 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 2940 } 2941 } 2942 } 2943 #else /* on missing GESVD */ 2944 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2945 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2946 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2947 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2948 #if !defined(PETSC_USE_COMPLEX) 2949 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr)); 2950 #else 2951 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr)); 2952 #endif 2953 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 2954 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2955 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 2956 k = temp_constraints; 2957 if (k > size_of_constraint) k = size_of_constraint; 2958 j = 0; 2959 while (j < k && singular_vals[k-j-1] < tol) j++; 2960 valid_constraints = k-j; 2961 total_counts = total_counts-temp_constraints+valid_constraints; 2962 #endif /* on missing GESVD */ 2963 } 2964 } 2965 /* update pointers information */ 2966 if (valid_constraints) { 2967 constraints_n[total_counts_cc] = valid_constraints; 2968 constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint; 2969 constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints; 2970 /* set change_of_basis flag */ 2971 if (boolforchange) { 2972 PetscBTSet(change_basis,total_counts_cc); 2973 } 2974 total_counts_cc++; 2975 } 2976 } 2977 /* free workspace */ 2978 if (!skip_lapack) { 2979 ierr = PetscFree(work);CHKERRQ(ierr); 2980 #if defined(PETSC_USE_COMPLEX) 2981 ierr = PetscFree(rwork);CHKERRQ(ierr); 2982 #endif 2983 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 2984 #if defined(PETSC_MISSING_LAPACK_GESVD) 2985 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 2986 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 2987 #endif 2988 } 2989 for (k=0;k<nnsp_size;k++) { 2990 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 2991 } 2992 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 2993 /* free index sets of faces, edges and vertices */ 2994 for (i=0;i<n_ISForFaces;i++) { 2995 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 2996 } 2997 if (n_ISForFaces) { 2998 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 2999 } 3000 for (i=0;i<n_ISForEdges;i++) { 3001 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 3002 } 3003 if (n_ISForEdges) { 3004 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 3005 } 3006 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 3007 } else { 3008 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3009 3010 total_counts = 0; 3011 n_vertices = 0; 3012 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 3013 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 3014 } 3015 max_constraints = 0; 3016 total_counts_cc = 0; 3017 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 3018 total_counts += pcbddc->adaptive_constraints_n[i]; 3019 if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++; 3020 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 3021 } 3022 constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr; 3023 constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr; 3024 constraints_idxs = pcbddc->adaptive_constraints_idxs; 3025 constraints_data = pcbddc->adaptive_constraints_data; 3026 /* constraints_n differs from pcbddc->adaptive_constraints_n */ 3027 ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr); 3028 total_counts_cc = 0; 3029 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 3030 if (pcbddc->adaptive_constraints_n[i]) { 3031 constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i]; 3032 } 3033 } 3034 #if 0 3035 printf("Found %d totals (%d)\n",total_counts_cc,total_counts); 3036 for (i=0;i<total_counts_cc;i++) { 3037 printf("const %d, start %d",i,constraints_idxs_ptr[i]); 3038 printf(" end %d:\n",constraints_idxs_ptr[i+1]); 3039 for (j=constraints_idxs_ptr[i];j<constraints_idxs_ptr[i+1];j++) { 3040 printf(" %d",constraints_idxs[j]); 3041 } 3042 printf("\n"); 3043 printf("number of cc: %d\n",constraints_n[i]); 3044 } 3045 for (i=0;i<n_vertices;i++) { 3046 PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]); 3047 } 3048 for (i=0;i<sub_schurs->n_subs;i++) { 3049 PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]); 3050 } 3051 #endif 3052 3053 max_size_of_constraint = 0; 3054 for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]); 3055 ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr); 3056 /* Change of basis */ 3057 ierr = PetscBTCreate(total_counts_cc,&change_basis);CHKERRQ(ierr); 3058 if (pcbddc->use_change_of_basis) { 3059 for (i=0;i<sub_schurs->n_subs;i++) { 3060 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 3061 ierr = PetscBTSet(change_basis,i+n_vertices);CHKERRQ(ierr); 3062 } 3063 } 3064 } 3065 } 3066 pcbddc->local_primal_size = total_counts; 3067 ierr = PetscMalloc1(pcbddc->local_primal_size+pcbddc->benign_n,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 3068 3069 /* map constraints_idxs in boundary numbering */ 3070 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr); 3071 if (i != constraints_idxs_ptr[total_counts_cc]) { 3072 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",constraints_idxs_ptr[total_counts_cc],i); 3073 } 3074 3075 /* Create constraint matrix */ 3076 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 3077 ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr); 3078 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 3079 3080 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 3081 /* determine if a QR strategy is needed for change of basis */ 3082 qr_needed = PETSC_FALSE; 3083 ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr); 3084 total_primal_vertices=0; 3085 pcbddc->local_primal_size_cc = 0; 3086 for (i=0;i<total_counts_cc;i++) { 3087 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 3088 if (size_of_constraint == 1) { 3089 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]]; 3090 pcbddc->local_primal_size_cc += 1; 3091 } else if (PetscBTLookup(change_basis,i)) { 3092 for (k=0;k<constraints_n[i];k++) { 3093 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 3094 } 3095 pcbddc->local_primal_size_cc += constraints_n[i]; 3096 if (constraints_n[i] > 1 || pcbddc->use_qr_single || pcbddc->faster_deluxe) { 3097 PetscBTSet(qr_needed_idx,i); 3098 qr_needed = PETSC_TRUE; 3099 } 3100 } else { 3101 pcbddc->local_primal_size_cc += 1; 3102 } 3103 } 3104 /* note that the local variable n_vertices used below stores the number of pointwise constraints */ 3105 pcbddc->n_vertices = total_primal_vertices; 3106 /* permute indices in order to have a sorted set of vertices */ 3107 ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 3108 3109 ierr = PetscMalloc2(pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 3110 ierr = PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));CHKERRQ(ierr); 3111 for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1; 3112 3113 /* nonzero structure of constraint matrix */ 3114 /* and get reference dof for local constraints */ 3115 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 3116 for (i=0;i<total_primal_vertices;i++) nnz[i] = 1; 3117 3118 j = total_primal_vertices; 3119 total_counts = total_primal_vertices; 3120 cum = total_primal_vertices; 3121 for (i=n_vertices;i<total_counts_cc;i++) { 3122 if (!PetscBTLookup(change_basis,i)) { 3123 pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]]; 3124 pcbddc->local_primal_ref_mult[cum] = constraints_n[i]; 3125 cum++; 3126 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 3127 for (k=0;k<constraints_n[i];k++) { 3128 pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 3129 nnz[j+k] = size_of_constraint; 3130 } 3131 j += constraints_n[i]; 3132 } 3133 } 3134 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 3135 ierr = PetscFree(nnz);CHKERRQ(ierr); 3136 3137 /* set values in constraint matrix */ 3138 for (i=0;i<total_primal_vertices;i++) { 3139 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 3140 } 3141 total_counts = total_primal_vertices; 3142 for (i=n_vertices;i<total_counts_cc;i++) { 3143 if (!PetscBTLookup(change_basis,i)) { 3144 PetscInt *cols; 3145 3146 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 3147 cols = constraints_idxs+constraints_idxs_ptr[i]; 3148 for (k=0;k<constraints_n[i];k++) { 3149 PetscInt row = total_counts+k; 3150 PetscScalar *vals; 3151 3152 vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint; 3153 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 3154 } 3155 total_counts += constraints_n[i]; 3156 } 3157 } 3158 /* assembling */ 3159 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3160 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3161 3162 /* 3163 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 3164 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 3165 */ 3166 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 3167 if (pcbddc->use_change_of_basis) { 3168 /* dual and primal dofs on a single cc */ 3169 PetscInt dual_dofs,primal_dofs; 3170 /* working stuff for GEQRF */ 3171 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 3172 PetscBLASInt lqr_work; 3173 /* working stuff for UNGQR */ 3174 PetscScalar *gqr_work,lgqr_work_t; 3175 PetscBLASInt lgqr_work; 3176 /* working stuff for TRTRS */ 3177 PetscScalar *trs_rhs; 3178 PetscBLASInt Blas_NRHS; 3179 /* pointers for values insertion into change of basis matrix */ 3180 PetscInt *start_rows,*start_cols; 3181 PetscScalar *start_vals; 3182 /* working stuff for values insertion */ 3183 PetscBT is_primal; 3184 PetscInt *aux_primal_numbering_B; 3185 /* matrix sizes */ 3186 PetscInt global_size,local_size; 3187 /* temporary change of basis */ 3188 Mat localChangeOfBasisMatrix; 3189 /* extra space for debugging */ 3190 PetscScalar *dbg_work; 3191 3192 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 3193 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 3194 ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 3195 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 3196 /* nonzeros for local mat */ 3197 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 3198 for (i=0;i<pcis->n;i++) nnz[i]=1; 3199 for (i=n_vertices;i<total_counts_cc;i++) { 3200 if (PetscBTLookup(change_basis,i)) { 3201 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 3202 if (PetscBTLookup(qr_needed_idx,i)) { 3203 for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint; 3204 } else { 3205 nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint; 3206 for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2; 3207 } 3208 } 3209 } 3210 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 3211 ierr = PetscFree(nnz);CHKERRQ(ierr); 3212 /* Set initial identity in the matrix */ 3213 for (i=0;i<pcis->n;i++) { 3214 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 3215 } 3216 3217 if (pcbddc->dbg_flag) { 3218 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 3219 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 3220 } 3221 3222 3223 /* Now we loop on the constraints which need a change of basis */ 3224 /* 3225 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 3226 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 3227 3228 Basic blocks of change of basis matrix T computed by 3229 3230 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 3231 3232 | 1 0 ... 0 s_1/S | 3233 | 0 1 ... 0 s_2/S | 3234 | ... | 3235 | 0 ... 1 s_{n-1}/S | 3236 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 3237 3238 with S = \sum_{i=1}^n s_i^2 3239 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 3240 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 3241 3242 - QR decomposition of constraints otherwise 3243 */ 3244 if (qr_needed) { 3245 /* space to store Q */ 3246 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 3247 /* first we issue queries for optimal work */ 3248 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 3249 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 3250 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3251 lqr_work = -1; 3252 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 3253 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 3254 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 3255 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 3256 lgqr_work = -1; 3257 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 3258 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 3259 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 3260 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3261 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 3262 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 3263 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 3264 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 3265 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 3266 /* array to store scaling factors for reflectors */ 3267 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 3268 /* array to store rhs and solution of triangular solver */ 3269 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 3270 /* allocating workspace for check */ 3271 if (pcbddc->dbg_flag) { 3272 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr); 3273 } 3274 } 3275 /* array to store whether a node is primal or not */ 3276 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 3277 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 3278 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);CHKERRQ(ierr); 3279 if (i != total_primal_vertices) { 3280 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i); 3281 } 3282 for (i=0;i<total_primal_vertices;i++) { 3283 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 3284 } 3285 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 3286 3287 /* loop on constraints and see whether or not they need a change of basis and compute it */ 3288 for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) { 3289 size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts]; 3290 if (PetscBTLookup(change_basis,total_counts)) { 3291 /* get constraint info */ 3292 primal_dofs = constraints_n[total_counts]; 3293 dual_dofs = size_of_constraint-primal_dofs; 3294 3295 if (pcbddc->dbg_flag) { 3296 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d: %d need a change of basis (size %d)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr); 3297 } 3298 3299 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 3300 3301 /* copy quadrature constraints for change of basis check */ 3302 if (pcbddc->dbg_flag) { 3303 ierr = PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 3304 } 3305 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 3306 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 3307 3308 /* compute QR decomposition of constraints */ 3309 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3310 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 3311 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3312 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3313 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 3314 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 3315 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3316 3317 /* explictly compute R^-T */ 3318 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 3319 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 3320 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 3321 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 3322 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3323 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 3324 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3325 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 3326 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 3327 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3328 3329 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 3330 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3331 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3332 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 3333 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3334 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3335 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 3336 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 3337 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3338 3339 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 3340 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 3341 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 3342 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3343 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 3344 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 3345 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3346 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 3347 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 3348 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3349 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC)); 3350 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3351 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 3352 3353 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 3354 start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]]; 3355 /* insert cols for primal dofs */ 3356 for (j=0;j<primal_dofs;j++) { 3357 start_vals = &qr_basis[j*size_of_constraint]; 3358 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 3359 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 3360 } 3361 /* insert cols for dual dofs */ 3362 for (j=0,k=0;j<dual_dofs;k++) { 3363 if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) { 3364 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 3365 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 3366 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 3367 j++; 3368 } 3369 } 3370 3371 /* check change of basis */ 3372 if (pcbddc->dbg_flag) { 3373 PetscInt ii,jj; 3374 PetscBool valid_qr=PETSC_TRUE; 3375 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 3376 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3377 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 3378 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3379 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 3380 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 3381 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3382 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC)); 3383 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3384 for (jj=0;jj<size_of_constraint;jj++) { 3385 for (ii=0;ii<primal_dofs;ii++) { 3386 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 3387 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 3388 } 3389 } 3390 if (!valid_qr) { 3391 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 3392 for (jj=0;jj<size_of_constraint;jj++) { 3393 for (ii=0;ii<primal_dofs;ii++) { 3394 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 3395 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 3396 } 3397 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 3398 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 3399 } 3400 } 3401 } 3402 } else { 3403 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 3404 } 3405 } 3406 } else { /* simple transformation block */ 3407 PetscInt row,col; 3408 PetscScalar val,norm; 3409 3410 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3411 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one)); 3412 for (j=0;j<size_of_constraint;j++) { 3413 PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j]; 3414 row = constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 3415 if (!PetscBTLookup(is_primal,row_B)) { 3416 col = constraints_idxs[constraints_idxs_ptr[total_counts]]; 3417 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 3418 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 3419 } else { 3420 for (k=0;k<size_of_constraint;k++) { 3421 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 3422 if (row != col) { 3423 val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]]; 3424 } else { 3425 val = constraints_data[constraints_data_ptr[total_counts]]/norm; 3426 } 3427 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 3428 } 3429 } 3430 } 3431 if (pcbddc->dbg_flag) { 3432 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 3433 } 3434 } 3435 } else { 3436 if (pcbddc->dbg_flag) { 3437 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,size_of_constraint);CHKERRQ(ierr); 3438 } 3439 } 3440 } 3441 3442 /* free workspace */ 3443 if (qr_needed) { 3444 if (pcbddc->dbg_flag) { 3445 ierr = PetscFree(dbg_work);CHKERRQ(ierr); 3446 } 3447 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 3448 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 3449 ierr = PetscFree(qr_work);CHKERRQ(ierr); 3450 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 3451 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 3452 } 3453 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 3454 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3455 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3456 3457 /* assembling of global change of variable */ 3458 { 3459 Mat tmat; 3460 PetscInt bs; 3461 3462 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 3463 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 3464 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 3465 ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr); 3466 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 3467 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 3468 ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr); 3469 ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr); 3470 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 3471 ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr); 3472 ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 3473 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 3474 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 3475 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 3476 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3477 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3478 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 3479 ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr); 3480 } 3481 /* check */ 3482 if (pcbddc->dbg_flag) { 3483 PetscReal error; 3484 Vec x,x_change; 3485 3486 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 3487 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 3488 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 3489 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 3490 ierr = VecScatterBegin(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3491 ierr = VecScatterEnd(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3492 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 3493 ierr = VecScatterBegin(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3494 ierr = VecScatterEnd(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3495 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 3496 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 3497 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 3498 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3499 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr); 3500 ierr = VecDestroy(&x);CHKERRQ(ierr); 3501 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 3502 } 3503 3504 /* adapt sub_schurs computed (if any) */ 3505 if (pcbddc->use_deluxe_scaling) { 3506 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 3507 if (sub_schurs->S_Ej_all) { 3508 Mat S_new,tmat; 3509 ISLocalToGlobalMapping NtoSall; 3510 IS is_all_N,is_V,is_V_Sall; 3511 const PetscScalar *array; 3512 const PetscInt *idxs_V,*idxs_all; 3513 PetscInt i,n_V; 3514 3515 ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr); 3516 ierr = MatGetSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr); 3517 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->n_vertices,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&is_V);CHKERRQ(ierr); 3518 ierr = ISLocalToGlobalMappingCreateIS(is_all_N,&NtoSall);CHKERRQ(ierr); 3519 ierr = ISGlobalToLocalMappingApplyIS(NtoSall,IS_GTOLM_DROP,is_V,&is_V_Sall);CHKERRQ(ierr); 3520 ierr = ISLocalToGlobalMappingDestroy(&NtoSall);CHKERRQ(ierr); 3521 ierr = ISDestroy(&is_all_N);CHKERRQ(ierr); 3522 ierr = ISDestroy(&is_V);CHKERRQ(ierr); 3523 ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 3524 ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr); 3525 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 3526 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 3527 ierr = ISGetLocalSize(is_V_Sall,&n_V);CHKERRQ(ierr); 3528 ierr = ISGetIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 3529 ierr = ISGetIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 3530 ierr = VecGetArrayRead(pcis->D,&array);CHKERRQ(ierr); 3531 for (i=0;i<n_V;i++) { 3532 PetscScalar val; 3533 PetscInt idx; 3534 3535 idx = idxs_V[i]; 3536 val = array[idxs_all[idxs_V[i]]]; 3537 ierr = MatSetValue(S_new,idx,idx,val,INSERT_VALUES);CHKERRQ(ierr); 3538 } 3539 ierr = MatAssemblyBegin(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3540 ierr = MatAssemblyEnd(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3541 sub_schurs->S_Ej_all = S_new; 3542 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 3543 if (sub_schurs->sum_S_Ej_all) { 3544 ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 3545 ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 3546 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 3547 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 3548 sub_schurs->sum_S_Ej_all = S_new; 3549 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 3550 } 3551 ierr = VecRestoreArrayRead(pcis->D,&array);CHKERRQ(ierr); 3552 ierr = ISRestoreIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 3553 ierr = ISRestoreIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 3554 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 3555 ierr = ISDestroy(&is_V_Sall);CHKERRQ(ierr); 3556 } 3557 } 3558 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 3559 } else if (pcbddc->user_ChangeOfBasisMatrix) { 3560 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 3561 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 3562 } 3563 3564 /* set up change of basis context */ 3565 if (pcbddc->ChangeOfBasisMatrix) { 3566 PCBDDCChange_ctx change_ctx; 3567 3568 if (!pcbddc->new_global_mat) { 3569 PetscInt global_size,local_size; 3570 3571 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 3572 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 3573 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->new_global_mat);CHKERRQ(ierr); 3574 ierr = MatSetSizes(pcbddc->new_global_mat,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 3575 ierr = MatSetType(pcbddc->new_global_mat,MATSHELL);CHKERRQ(ierr); 3576 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT,(void (*)(void))PCBDDCMatMult_Private);CHKERRQ(ierr); 3577 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCMatMultTranspose_Private);CHKERRQ(ierr); 3578 ierr = PetscNew(&change_ctx);CHKERRQ(ierr); 3579 ierr = MatShellSetContext(pcbddc->new_global_mat,change_ctx);CHKERRQ(ierr); 3580 } else { 3581 ierr = MatShellGetContext(pcbddc->new_global_mat,&change_ctx);CHKERRQ(ierr); 3582 ierr = MatDestroy(&change_ctx->global_change);CHKERRQ(ierr); 3583 ierr = VecDestroyVecs(2,&change_ctx->work);CHKERRQ(ierr); 3584 } 3585 if (!pcbddc->user_ChangeOfBasisMatrix) { 3586 ierr = PetscObjectReference((PetscObject)pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 3587 change_ctx->global_change = pcbddc->ChangeOfBasisMatrix; 3588 } else { 3589 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 3590 change_ctx->global_change = pcbddc->user_ChangeOfBasisMatrix; 3591 } 3592 ierr = VecDuplicateVecs(pcis->vec1_global,2,&change_ctx->work);CHKERRQ(ierr); 3593 ierr = MatSetUp(pcbddc->new_global_mat);CHKERRQ(ierr); 3594 ierr = MatAssemblyBegin(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3595 ierr = MatAssemblyEnd(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3596 } 3597 3598 /* add pressure dofs to set of primal nodes for numbering purposes */ 3599 for (i=0;i<pcbddc->benign_n;i++) { 3600 pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc] = pcbddc->benign_p0_lidx[i]; 3601 pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size] = pcbddc->benign_p0_lidx[i]; 3602 pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc] = 1; 3603 pcbddc->local_primal_size_cc++; 3604 pcbddc->local_primal_size++; 3605 } 3606 3607 /* check if a new primal space has been introduced (also take into account benign trick) */ 3608 pcbddc->new_primal_space_local = PETSC_TRUE; 3609 if (olocal_primal_size == pcbddc->local_primal_size) { 3610 ierr = PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc*sizeof(PetscScalar),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 3611 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 3612 if (!pcbddc->new_primal_space_local) { 3613 ierr = PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscScalar),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 3614 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 3615 } 3616 } 3617 ierr = PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);CHKERRQ(ierr); 3618 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 3619 ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3620 3621 /* flush dbg viewer */ 3622 if (pcbddc->dbg_flag) { 3623 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3624 } 3625 3626 /* free workspace */ 3627 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 3628 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 3629 if (!pcbddc->adaptive_selection) { 3630 ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr); 3631 ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr); 3632 } else { 3633 ierr = PetscFree5(pcbddc->adaptive_constraints_n, 3634 pcbddc->adaptive_constraints_idxs_ptr, 3635 pcbddc->adaptive_constraints_data_ptr, 3636 pcbddc->adaptive_constraints_idxs, 3637 pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 3638 ierr = PetscFree(constraints_n);CHKERRQ(ierr); 3639 ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr); 3640 } 3641 PetscFunctionReturn(0); 3642 } 3643 3644 #undef __FUNCT__ 3645 #define __FUNCT__ "PCBDDCAnalyzeInterface" 3646 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 3647 { 3648 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3649 PC_IS *pcis = (PC_IS*)pc->data; 3650 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 3651 PetscInt ierr,i,vertex_size,N; 3652 PetscViewer viewer=pcbddc->dbg_viewer; 3653 3654 PetscFunctionBegin; 3655 /* Reset previously computed graph */ 3656 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 3657 /* Init local Graph struct */ 3658 ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr); 3659 ierr = PCBDDCGraphInit(pcbddc->mat_graph,pcis->mapping,N);CHKERRQ(ierr); 3660 3661 /* Check validity of the csr graph passed in by the user */ 3662 if (pcbddc->mat_graph->nvtxs_csr && pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) { 3663 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid size of local CSR graph! Found %d, expected %d\n",pcbddc->mat_graph->nvtxs_csr,pcbddc->mat_graph->nvtxs); 3664 } 3665 3666 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 3667 if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) { 3668 PetscInt *xadj,*adjncy; 3669 PetscInt nvtxs; 3670 PetscBool flg_row=PETSC_FALSE; 3671 3672 if (pcbddc->use_local_adj) { 3673 3674 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3675 if (flg_row) { 3676 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 3677 pcbddc->computed_rowadj = PETSC_TRUE; 3678 } 3679 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3680 } else if (pcbddc->current_level && pcis->n_B) { /* just compute subdomain's connected components for coarser levels when the local boundary is not empty */ 3681 IS is_dummy; 3682 ISLocalToGlobalMapping l2gmap_dummy; 3683 PetscInt j,sum; 3684 PetscInt *cxadj,*cadjncy; 3685 const PetscInt *idxs; 3686 PCBDDCGraph graph; 3687 PetscBT is_on_boundary; 3688 3689 ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n,0,1,&is_dummy);CHKERRQ(ierr); 3690 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 3691 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 3692 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 3693 ierr = PCBDDCGraphInit(graph,l2gmap_dummy,pcis->n);CHKERRQ(ierr); 3694 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 3695 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3696 if (flg_row) { 3697 graph->xadj = xadj; 3698 graph->adjncy = adjncy; 3699 } 3700 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 3701 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 3702 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3703 3704 if (pcbddc->dbg_flag) { 3705 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] Found %d subdomains (local size %d)\n",PetscGlobalRank,graph->ncc,pcis->n);CHKERRQ(ierr); 3706 for (i=0;i<graph->ncc;i++) { 3707 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] %d cc size %d\n",PetscGlobalRank,i,graph->cptr[i+1]-graph->cptr[i]);CHKERRQ(ierr); 3708 } 3709 } 3710 3711 ierr = PetscBTCreate(pcis->n,&is_on_boundary);CHKERRQ(ierr); 3712 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 3713 for (i=0;i<pcis->n_B;i++) { 3714 ierr = PetscBTSet(is_on_boundary,idxs[i]);CHKERRQ(ierr); 3715 } 3716 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 3717 3718 ierr = PetscCalloc1(pcis->n+1,&cxadj);CHKERRQ(ierr); 3719 sum = 0; 3720 for (i=0;i<graph->ncc;i++) { 3721 PetscInt sizecc = 0; 3722 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 3723 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 3724 sizecc++; 3725 } 3726 } 3727 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 3728 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 3729 cxadj[graph->queue[j]] = sizecc; 3730 } 3731 } 3732 sum += sizecc*sizecc; 3733 } 3734 ierr = PetscMalloc1(sum,&cadjncy);CHKERRQ(ierr); 3735 sum = 0; 3736 for (i=0;i<pcis->n;i++) { 3737 PetscInt temp = cxadj[i]; 3738 cxadj[i] = sum; 3739 sum += temp; 3740 } 3741 cxadj[pcis->n] = sum; 3742 for (i=0;i<graph->ncc;i++) { 3743 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 3744 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 3745 PetscInt k,sizecc = 0; 3746 for (k=graph->cptr[i];k<graph->cptr[i+1];k++) { 3747 if (PetscBTLookup(is_on_boundary,graph->queue[k])) { 3748 cadjncy[cxadj[graph->queue[j]]+sizecc]=graph->queue[k]; 3749 sizecc++; 3750 } 3751 } 3752 } 3753 } 3754 } 3755 if (sum) { 3756 ierr = PCBDDCSetLocalAdjacencyGraph(pc,pcis->n,cxadj,cadjncy,PETSC_OWN_POINTER);CHKERRQ(ierr); 3757 } else { 3758 ierr = PetscFree(cxadj);CHKERRQ(ierr); 3759 ierr = PetscFree(cadjncy);CHKERRQ(ierr); 3760 } 3761 graph->xadj = 0; 3762 graph->adjncy = 0; 3763 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 3764 ierr = PetscBTDestroy(&is_on_boundary);CHKERRQ(ierr); 3765 } 3766 } 3767 if (pcbddc->dbg_flag) { 3768 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3769 } 3770 3771 /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */ 3772 vertex_size = 1; 3773 if (pcbddc->user_provided_isfordofs) { 3774 if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */ 3775 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 3776 for (i=0;i<pcbddc->n_ISForDofs;i++) { 3777 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 3778 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 3779 } 3780 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 3781 pcbddc->n_ISForDofs = 0; 3782 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 3783 } 3784 /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */ 3785 ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr); 3786 } else { 3787 if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */ 3788 ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr); 3789 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 3790 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 3791 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 3792 } 3793 } 3794 } 3795 3796 /* Setup of Graph */ 3797 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */ 3798 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 3799 } 3800 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */ 3801 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 3802 } 3803 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices);CHKERRQ(ierr); 3804 3805 /* attach info on disconnected subdomains if present */ 3806 if (pcbddc->n_local_subs) { 3807 PetscInt *local_subs; 3808 3809 ierr = PetscMalloc1(N,&local_subs);CHKERRQ(ierr); 3810 for (i=0;i<pcbddc->n_local_subs;i++) { 3811 const PetscInt *idxs; 3812 PetscInt nl,j; 3813 3814 ierr = ISGetLocalSize(pcbddc->local_subs[i],&nl);CHKERRQ(ierr); 3815 ierr = ISGetIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 3816 for (j=0;j<nl;j++) { 3817 local_subs[idxs[j]] = i; 3818 } 3819 ierr = ISRestoreIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 3820 } 3821 pcbddc->mat_graph->n_local_subs = pcbddc->n_local_subs; 3822 pcbddc->mat_graph->local_subs = local_subs; 3823 } 3824 3825 /* Graph's connected components analysis */ 3826 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 3827 3828 /* print some info to stdout */ 3829 if (pcbddc->dbg_flag) { 3830 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);CHKERRQ(ierr); 3831 } 3832 3833 /* mark topography has done */ 3834 pcbddc->recompute_topography = PETSC_FALSE; 3835 PetscFunctionReturn(0); 3836 } 3837 3838 /* given an index sets possibly with holes, renumbers the indexes removing the holes */ 3839 #undef __FUNCT__ 3840 #define __FUNCT__ "PCBDDCSubsetNumbering" 3841 PetscErrorCode PCBDDCSubsetNumbering(IS subset, IS subset_mult, PetscInt *N_n, IS *subset_n) 3842 { 3843 PetscSF sf; 3844 PetscLayout map; 3845 const PetscInt *idxs; 3846 PetscInt *leaf_data,*root_data,*gidxs; 3847 PetscInt N,n,i,lbounds[2],gbounds[2],Nl; 3848 PetscInt n_n,nlocals,start,first_index; 3849 PetscMPIInt commsize; 3850 PetscBool first_found; 3851 PetscErrorCode ierr; 3852 3853 PetscFunctionBegin; 3854 ierr = ISGetLocalSize(subset,&n);CHKERRQ(ierr); 3855 if (subset_mult) { 3856 PetscCheckSameComm(subset,1,subset_mult,2); 3857 ierr = ISGetLocalSize(subset,&i);CHKERRQ(ierr); 3858 if (i != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Local subset and multiplicity sizes don't match! %d != %d",n,i); 3859 } 3860 /* create workspace layout for computing global indices of subset */ 3861 ierr = ISGetIndices(subset,&idxs);CHKERRQ(ierr); 3862 lbounds[0] = lbounds[1] = 0; 3863 for (i=0;i<n;i++) { 3864 if (idxs[i] < lbounds[0]) lbounds[0] = idxs[i]; 3865 else if (idxs[i] > lbounds[1]) lbounds[1] = idxs[i]; 3866 } 3867 lbounds[0] = -lbounds[0]; 3868 ierr = MPI_Allreduce(lbounds,gbounds,2,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 3869 gbounds[0] = -gbounds[0]; 3870 N = gbounds[1] - gbounds[0] + 1; 3871 ierr = PetscLayoutCreate(PetscObjectComm((PetscObject)subset),&map);CHKERRQ(ierr); 3872 ierr = PetscLayoutSetBlockSize(map,1);CHKERRQ(ierr); 3873 ierr = PetscLayoutSetSize(map,N);CHKERRQ(ierr); 3874 ierr = PetscLayoutSetUp(map);CHKERRQ(ierr); 3875 ierr = PetscLayoutGetLocalSize(map,&Nl);CHKERRQ(ierr); 3876 3877 /* create sf : leaf_data == multiplicity of indexes, root data == global index in layout */ 3878 ierr = PetscMalloc2(n,&leaf_data,Nl,&root_data);CHKERRQ(ierr); 3879 if (subset_mult) { 3880 const PetscInt* idxs_mult; 3881 3882 ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 3883 ierr = PetscMemcpy(leaf_data,idxs_mult,n*sizeof(PetscInt));CHKERRQ(ierr); 3884 ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 3885 } else { 3886 for (i=0;i<n;i++) leaf_data[i] = 1; 3887 } 3888 /* local size of new subset */ 3889 n_n = 0; 3890 for (i=0;i<n;i++) n_n += leaf_data[i]; 3891 3892 /* global indexes in layout */ 3893 ierr = PetscMalloc1(n_n,&gidxs);CHKERRQ(ierr); /* allocating possibly extra space in gidxs which will be used later */ 3894 for (i=0;i<n;i++) gidxs[i] = idxs[i] - gbounds[0]; 3895 ierr = ISRestoreIndices(subset,&idxs);CHKERRQ(ierr); 3896 ierr = PetscSFCreate(PetscObjectComm((PetscObject)subset),&sf);CHKERRQ(ierr); 3897 ierr = PetscSFSetGraphLayout(sf,map,n,NULL,PETSC_COPY_VALUES,gidxs);CHKERRQ(ierr); 3898 ierr = PetscLayoutDestroy(&map);CHKERRQ(ierr); 3899 3900 /* reduce from leaves to roots */ 3901 ierr = PetscMemzero(root_data,Nl*sizeof(PetscInt));CHKERRQ(ierr); 3902 ierr = PetscSFReduceBegin(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr); 3903 ierr = PetscSFReduceEnd(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr); 3904 3905 /* count indexes in local part of layout */ 3906 nlocals = 0; 3907 first_index = -1; 3908 first_found = PETSC_FALSE; 3909 for (i=0;i<Nl;i++) { 3910 if (!first_found && root_data[i]) { 3911 first_found = PETSC_TRUE; 3912 first_index = i; 3913 } 3914 nlocals += root_data[i]; 3915 } 3916 3917 /* cumulative of number of indexes and size of subset without holes */ 3918 #if defined(PETSC_HAVE_MPI_EXSCAN) 3919 start = 0; 3920 ierr = MPI_Exscan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 3921 #else 3922 ierr = MPI_Scan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 3923 start = start-nlocals; 3924 #endif 3925 3926 if (N_n) { /* compute total size of new subset if requested */ 3927 *N_n = start + nlocals; 3928 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)subset),&commsize);CHKERRQ(ierr); 3929 ierr = MPI_Bcast(N_n,1,MPIU_INT,commsize-1,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 3930 } 3931 3932 /* adapt root data with cumulative */ 3933 if (first_found) { 3934 PetscInt old_index; 3935 3936 root_data[first_index] += start; 3937 old_index = first_index; 3938 for (i=first_index+1;i<Nl;i++) { 3939 if (root_data[i]) { 3940 root_data[i] += root_data[old_index]; 3941 old_index = i; 3942 } 3943 } 3944 } 3945 3946 /* from roots to leaves */ 3947 ierr = PetscSFBcastBegin(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr); 3948 ierr = PetscSFBcastEnd(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr); 3949 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 3950 3951 /* create new IS with global indexes without holes */ 3952 if (subset_mult) { 3953 const PetscInt* idxs_mult; 3954 PetscInt cum; 3955 3956 cum = 0; 3957 ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 3958 for (i=0;i<n;i++) { 3959 PetscInt j; 3960 for (j=0;j<idxs_mult[i];j++) gidxs[cum++] = leaf_data[i] - idxs_mult[i] + j; 3961 } 3962 ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 3963 } else { 3964 for (i=0;i<n;i++) { 3965 gidxs[i] = leaf_data[i]-1; 3966 } 3967 } 3968 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)subset),n_n,gidxs,PETSC_OWN_POINTER,subset_n);CHKERRQ(ierr); 3969 ierr = PetscFree2(leaf_data,root_data);CHKERRQ(ierr); 3970 PetscFunctionReturn(0); 3971 } 3972 3973 #undef __FUNCT__ 3974 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 3975 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 3976 { 3977 PetscInt i,j; 3978 PetscScalar *alphas; 3979 PetscErrorCode ierr; 3980 3981 PetscFunctionBegin; 3982 /* this implements stabilized Gram-Schmidt */ 3983 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 3984 for (i=0;i<n;i++) { 3985 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 3986 if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); } 3987 for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); } 3988 } 3989 ierr = PetscFree(alphas);CHKERRQ(ierr); 3990 PetscFunctionReturn(0); 3991 } 3992 3993 #undef __FUNCT__ 3994 #define __FUNCT__ "MatISGetSubassemblingPattern" 3995 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscInt redprocs, IS* is_sends) 3996 { 3997 IS ranks_send_to; 3998 PetscInt n_neighs,*neighs,*n_shared,**shared; 3999 PetscMPIInt size,rank,color; 4000 PetscInt *xadj,*adjncy; 4001 PetscInt *adjncy_wgt,*v_wgt,*ranks_send_to_idx; 4002 PetscInt i,local_size,threshold=0; 4003 PetscBool use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE; 4004 PetscSubcomm subcomm; 4005 PetscErrorCode ierr; 4006 4007 PetscFunctionBegin; 4008 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr); 4009 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 4010 ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 4011 4012 /* Get info on mapping */ 4013 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&local_size);CHKERRQ(ierr); 4014 ierr = ISLocalToGlobalMappingGetInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 4015 4016 /* build local CSR graph of subdomains' connectivity */ 4017 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 4018 xadj[0] = 0; 4019 xadj[1] = PetscMax(n_neighs-1,0); 4020 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 4021 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 4022 4023 if (threshold) { 4024 PetscInt xadj_count = 0; 4025 for (i=1;i<n_neighs;i++) { 4026 if (n_shared[i] > threshold) { 4027 adjncy[xadj_count] = neighs[i]; 4028 adjncy_wgt[xadj_count] = n_shared[i]; 4029 xadj_count++; 4030 } 4031 } 4032 xadj[1] = xadj_count; 4033 } else { 4034 if (xadj[1]) { 4035 ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr); 4036 ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr); 4037 } 4038 } 4039 ierr = ISLocalToGlobalMappingRestoreInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 4040 if (use_square) { 4041 for (i=0;i<xadj[1];i++) { 4042 adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i]; 4043 } 4044 } 4045 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 4046 4047 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 4048 4049 /* 4050 Restrict work on active processes only. 4051 */ 4052 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr); 4053 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 4054 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 4055 ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr); 4056 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 4057 if (color) { 4058 ierr = PetscFree(xadj);CHKERRQ(ierr); 4059 ierr = PetscFree(adjncy);CHKERRQ(ierr); 4060 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 4061 } else { 4062 Mat subdomain_adj; 4063 IS new_ranks,new_ranks_contig; 4064 MatPartitioning partitioner; 4065 PetscInt prank,rstart=0,rend=0; 4066 PetscInt *is_indices,*oldranks; 4067 PetscBool aggregate; 4068 4069 ierr = MPI_Comm_size(PetscSubcommChild(subcomm),&size);CHKERRQ(ierr); 4070 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 4071 prank = rank; 4072 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,PetscSubcommChild(subcomm));CHKERRQ(ierr); 4073 /* 4074 for (i=0;i<size;i++) { 4075 PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]); 4076 } 4077 */ 4078 for (i=0;i<xadj[1];i++) { 4079 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 4080 } 4081 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 4082 aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE); 4083 if (aggregate) { 4084 PetscInt lrows,row,ncols,*cols; 4085 PetscMPIInt nrank; 4086 PetscScalar *vals; 4087 4088 ierr = MPI_Comm_rank(PetscSubcommChild(subcomm),&nrank);CHKERRQ(ierr); 4089 lrows = 0; 4090 if (nrank<redprocs) { 4091 lrows = size/redprocs; 4092 if (nrank<size%redprocs) lrows++; 4093 } 4094 ierr = MatCreateAIJ(PetscSubcommChild(subcomm),lrows,lrows,size,size,50,NULL,50,NULL,&subdomain_adj);CHKERRQ(ierr); 4095 ierr = MatGetOwnershipRange(subdomain_adj,&rstart,&rend);CHKERRQ(ierr); 4096 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4097 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4098 row = nrank; 4099 ncols = xadj[1]-xadj[0]; 4100 cols = adjncy; 4101 ierr = PetscMalloc1(ncols,&vals);CHKERRQ(ierr); 4102 for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i]; 4103 ierr = MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 4104 ierr = MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4105 ierr = MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4106 ierr = PetscFree(xadj);CHKERRQ(ierr); 4107 ierr = PetscFree(adjncy);CHKERRQ(ierr); 4108 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 4109 ierr = PetscFree(vals);CHKERRQ(ierr); 4110 } else { 4111 ierr = MatCreateMPIAdj(PetscSubcommChild(subcomm),1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 4112 } 4113 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 4114 4115 /* Partition */ 4116 ierr = MatPartitioningCreate(PetscSubcommChild(subcomm),&partitioner);CHKERRQ(ierr); 4117 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 4118 if (use_vwgt) { 4119 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 4120 v_wgt[0] = local_size; 4121 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 4122 } 4123 n_subdomains = PetscMin((PetscInt)size,n_subdomains); 4124 ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr); 4125 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 4126 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 4127 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 4128 4129 /* renumber new_ranks to avoid "holes" in new set of processors */ 4130 ierr = PCBDDCSubsetNumbering(new_ranks,NULL,NULL,&new_ranks_contig);CHKERRQ(ierr); 4131 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 4132 ierr = ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4133 if (!redprocs) { 4134 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 4135 } else { 4136 PetscInt idxs[1]; 4137 PetscMPIInt tag; 4138 MPI_Request *reqs; 4139 4140 ierr = PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag);CHKERRQ(ierr); 4141 ierr = PetscMalloc1(rend-rstart,&reqs);CHKERRQ(ierr); 4142 for (i=rstart;i<rend;i++) { 4143 ierr = MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,PetscSubcommChild(subcomm),&reqs[i-rstart]);CHKERRQ(ierr); 4144 } 4145 ierr = MPI_Recv(idxs,1,MPIU_INT,MPI_ANY_SOURCE,tag,PetscSubcommChild(subcomm),MPI_STATUS_IGNORE);CHKERRQ(ierr); 4146 ierr = MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4147 ierr = PetscFree(reqs);CHKERRQ(ierr); 4148 ranks_send_to_idx[0] = oldranks[idxs[0]]; 4149 } 4150 ierr = ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4151 /* clean up */ 4152 ierr = PetscFree(oldranks);CHKERRQ(ierr); 4153 ierr = ISDestroy(&new_ranks_contig);CHKERRQ(ierr); 4154 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 4155 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 4156 } 4157 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 4158 4159 /* assemble parallel IS for sends */ 4160 i = 1; 4161 if (color) i=0; 4162 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr); 4163 /* get back IS */ 4164 *is_sends = ranks_send_to; 4165 PetscFunctionReturn(0); 4166 } 4167 4168 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 4169 4170 #undef __FUNCT__ 4171 #define __FUNCT__ "MatISSubassemble" 4172 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[]) 4173 { 4174 Mat local_mat; 4175 IS is_sends_internal; 4176 PetscInt rows,cols,new_local_rows; 4177 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals; 4178 PetscBool ismatis,isdense,newisdense,destroy_mat; 4179 ISLocalToGlobalMapping l2gmap; 4180 PetscInt* l2gmap_indices; 4181 const PetscInt* is_indices; 4182 MatType new_local_type; 4183 /* buffers */ 4184 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 4185 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 4186 PetscInt *recv_buffer_idxs_local; 4187 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 4188 /* MPI */ 4189 MPI_Comm comm,comm_n; 4190 PetscSubcomm subcomm; 4191 PetscMPIInt n_sends,n_recvs,commsize; 4192 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 4193 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 4194 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,source_dest; 4195 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals; 4196 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals; 4197 PetscErrorCode ierr; 4198 4199 PetscFunctionBegin; 4200 /* TODO: add missing checks */ 4201 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 4202 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 4203 PetscValidLogicalCollectiveEnum(mat,reuse,5); 4204 PetscValidLogicalCollectiveInt(mat,nis,7); 4205 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 4206 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 4207 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 4208 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 4209 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 4210 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 4211 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 4212 if (reuse == MAT_REUSE_MATRIX && *mat_n) { 4213 PetscInt mrows,mcols,mnrows,mncols; 4214 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 4215 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 4216 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 4217 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 4218 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 4219 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 4220 } 4221 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 4222 PetscValidLogicalCollectiveInt(mat,bs,0); 4223 /* prepare IS for sending if not provided */ 4224 if (!is_sends) { 4225 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 4226 ierr = MatISGetSubassemblingPattern(mat,n_subdomains,0,&is_sends_internal);CHKERRQ(ierr); 4227 } else { 4228 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 4229 is_sends_internal = is_sends; 4230 } 4231 4232 /* get comm */ 4233 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 4234 4235 /* compute number of sends */ 4236 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 4237 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 4238 4239 /* compute number of receives */ 4240 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 4241 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 4242 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 4243 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 4244 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 4245 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 4246 ierr = PetscFree(iflags);CHKERRQ(ierr); 4247 4248 /* restrict comm if requested */ 4249 subcomm = 0; 4250 destroy_mat = PETSC_FALSE; 4251 if (restrict_comm) { 4252 PetscMPIInt color,subcommsize; 4253 4254 color = 0; 4255 if (restrict_full) { 4256 if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm (full restriction) */ 4257 } else { 4258 if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not partecipate in new comm */ 4259 } 4260 ierr = MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 4261 subcommsize = commsize - subcommsize; 4262 /* check if reuse has been requested */ 4263 if (reuse == MAT_REUSE_MATRIX) { 4264 if (*mat_n) { 4265 PetscMPIInt subcommsize2; 4266 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 4267 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 4268 comm_n = PetscObjectComm((PetscObject)*mat_n); 4269 } else { 4270 comm_n = PETSC_COMM_SELF; 4271 } 4272 } else { /* MAT_INITIAL_MATRIX */ 4273 PetscMPIInt rank; 4274 4275 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4276 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 4277 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 4278 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 4279 comm_n = PetscSubcommChild(subcomm); 4280 } 4281 /* flag to destroy *mat_n if not significative */ 4282 if (color) destroy_mat = PETSC_TRUE; 4283 } else { 4284 comm_n = comm; 4285 } 4286 4287 /* prepare send/receive buffers */ 4288 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 4289 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 4290 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 4291 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 4292 if (nis) { 4293 ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr); 4294 } 4295 4296 /* Get data from local matrices */ 4297 if (!isdense) { 4298 SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 4299 /* TODO: See below some guidelines on how to prepare the local buffers */ 4300 /* 4301 send_buffer_vals should contain the raw values of the local matrix 4302 send_buffer_idxs should contain: 4303 - MatType_PRIVATE type 4304 - PetscInt size_of_l2gmap 4305 - PetscInt global_row_indices[size_of_l2gmap] 4306 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 4307 */ 4308 } else { 4309 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 4310 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&i);CHKERRQ(ierr); 4311 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 4312 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 4313 send_buffer_idxs[1] = i; 4314 ierr = ISLocalToGlobalMappingGetIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 4315 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 4316 ierr = ISLocalToGlobalMappingRestoreIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 4317 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 4318 for (i=0;i<n_sends;i++) { 4319 ilengths_vals[is_indices[i]] = len*len; 4320 ilengths_idxs[is_indices[i]] = len+2; 4321 } 4322 } 4323 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 4324 /* additional is (if any) */ 4325 if (nis) { 4326 PetscMPIInt psum; 4327 PetscInt j; 4328 for (j=0,psum=0;j<nis;j++) { 4329 PetscInt plen; 4330 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 4331 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 4332 psum += len+1; /* indices + lenght */ 4333 } 4334 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 4335 for (j=0,psum=0;j<nis;j++) { 4336 PetscInt plen; 4337 const PetscInt *is_array_idxs; 4338 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 4339 send_buffer_idxs_is[psum] = plen; 4340 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 4341 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 4342 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 4343 psum += plen+1; /* indices + lenght */ 4344 } 4345 for (i=0;i<n_sends;i++) { 4346 ilengths_idxs_is[is_indices[i]] = psum; 4347 } 4348 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 4349 } 4350 4351 buf_size_idxs = 0; 4352 buf_size_vals = 0; 4353 buf_size_idxs_is = 0; 4354 for (i=0;i<n_recvs;i++) { 4355 buf_size_idxs += (PetscInt)olengths_idxs[i]; 4356 buf_size_vals += (PetscInt)olengths_vals[i]; 4357 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 4358 } 4359 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 4360 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 4361 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 4362 4363 /* get new tags for clean communications */ 4364 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 4365 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 4366 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 4367 4368 /* allocate for requests */ 4369 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 4370 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 4371 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 4372 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 4373 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 4374 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 4375 4376 /* communications */ 4377 ptr_idxs = recv_buffer_idxs; 4378 ptr_vals = recv_buffer_vals; 4379 ptr_idxs_is = recv_buffer_idxs_is; 4380 for (i=0;i<n_recvs;i++) { 4381 source_dest = onodes[i]; 4382 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 4383 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 4384 ptr_idxs += olengths_idxs[i]; 4385 ptr_vals += olengths_vals[i]; 4386 if (nis) { 4387 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 4388 ptr_idxs_is += olengths_idxs_is[i]; 4389 } 4390 } 4391 for (i=0;i<n_sends;i++) { 4392 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 4393 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 4394 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 4395 if (nis) { 4396 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 4397 } 4398 } 4399 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 4400 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 4401 4402 /* assemble new l2g map */ 4403 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4404 ptr_idxs = recv_buffer_idxs; 4405 new_local_rows = 0; 4406 for (i=0;i<n_recvs;i++) { 4407 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 4408 ptr_idxs += olengths_idxs[i]; 4409 } 4410 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 4411 ptr_idxs = recv_buffer_idxs; 4412 new_local_rows = 0; 4413 for (i=0;i<n_recvs;i++) { 4414 ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 4415 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 4416 ptr_idxs += olengths_idxs[i]; 4417 } 4418 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 4419 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 4420 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 4421 4422 /* infer new local matrix type from received local matrices type */ 4423 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 4424 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 4425 if (n_recvs) { 4426 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 4427 ptr_idxs = recv_buffer_idxs; 4428 for (i=0;i<n_recvs;i++) { 4429 if ((PetscInt)new_local_type_private != *ptr_idxs) { 4430 new_local_type_private = MATAIJ_PRIVATE; 4431 break; 4432 } 4433 ptr_idxs += olengths_idxs[i]; 4434 } 4435 switch (new_local_type_private) { 4436 case MATDENSE_PRIVATE: 4437 if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */ 4438 new_local_type = MATSEQAIJ; 4439 bs = 1; 4440 } else { /* if I receive only 1 dense matrix */ 4441 new_local_type = MATSEQDENSE; 4442 bs = 1; 4443 } 4444 break; 4445 case MATAIJ_PRIVATE: 4446 new_local_type = MATSEQAIJ; 4447 bs = 1; 4448 break; 4449 case MATBAIJ_PRIVATE: 4450 new_local_type = MATSEQBAIJ; 4451 break; 4452 case MATSBAIJ_PRIVATE: 4453 new_local_type = MATSEQSBAIJ; 4454 break; 4455 default: 4456 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__); 4457 break; 4458 } 4459 } else { /* by default, new_local_type is seqdense */ 4460 new_local_type = MATSEQDENSE; 4461 bs = 1; 4462 } 4463 4464 /* create MATIS object if needed */ 4465 if (reuse == MAT_INITIAL_MATRIX) { 4466 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 4467 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 4468 } else { 4469 /* it also destroys the local matrices */ 4470 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 4471 } 4472 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 4473 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 4474 4475 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4476 4477 /* Global to local map of received indices */ 4478 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 4479 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 4480 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 4481 4482 /* restore attributes -> type of incoming data and its size */ 4483 buf_size_idxs = 0; 4484 for (i=0;i<n_recvs;i++) { 4485 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 4486 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 4487 buf_size_idxs += (PetscInt)olengths_idxs[i]; 4488 } 4489 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 4490 4491 /* set preallocation */ 4492 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 4493 if (!newisdense) { 4494 PetscInt *new_local_nnz=0; 4495 4496 ptr_vals = recv_buffer_vals; 4497 ptr_idxs = recv_buffer_idxs_local; 4498 if (n_recvs) { 4499 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 4500 } 4501 for (i=0;i<n_recvs;i++) { 4502 PetscInt j; 4503 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 4504 for (j=0;j<*(ptr_idxs+1);j++) { 4505 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 4506 } 4507 } else { 4508 /* TODO */ 4509 } 4510 ptr_idxs += olengths_idxs[i]; 4511 } 4512 if (new_local_nnz) { 4513 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 4514 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 4515 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 4516 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 4517 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 4518 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 4519 } else { 4520 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 4521 } 4522 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 4523 } else { 4524 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 4525 } 4526 4527 /* set values */ 4528 ptr_vals = recv_buffer_vals; 4529 ptr_idxs = recv_buffer_idxs_local; 4530 for (i=0;i<n_recvs;i++) { 4531 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 4532 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 4533 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 4534 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 4535 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 4536 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 4537 } else { 4538 /* TODO */ 4539 } 4540 ptr_idxs += olengths_idxs[i]; 4541 ptr_vals += olengths_vals[i]; 4542 } 4543 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4544 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4545 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4546 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4547 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 4548 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 4549 4550 #if 0 4551 if (!restrict_comm) { /* check */ 4552 Vec lvec,rvec; 4553 PetscReal infty_error; 4554 4555 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 4556 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 4557 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 4558 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 4559 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 4560 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 4561 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 4562 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 4563 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 4564 } 4565 #endif 4566 4567 /* assemble new additional is (if any) */ 4568 if (nis) { 4569 PetscInt **temp_idxs,*count_is,j,psum; 4570 4571 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4572 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 4573 ptr_idxs = recv_buffer_idxs_is; 4574 psum = 0; 4575 for (i=0;i<n_recvs;i++) { 4576 for (j=0;j<nis;j++) { 4577 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 4578 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 4579 psum += plen; 4580 ptr_idxs += plen+1; /* shift pointer to received data */ 4581 } 4582 } 4583 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 4584 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 4585 for (i=1;i<nis;i++) { 4586 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 4587 } 4588 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 4589 ptr_idxs = recv_buffer_idxs_is; 4590 for (i=0;i<n_recvs;i++) { 4591 for (j=0;j<nis;j++) { 4592 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 4593 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 4594 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 4595 ptr_idxs += plen+1; /* shift pointer to received data */ 4596 } 4597 } 4598 for (i=0;i<nis;i++) { 4599 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 4600 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 4601 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 4602 } 4603 ierr = PetscFree(count_is);CHKERRQ(ierr); 4604 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 4605 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 4606 } 4607 /* free workspace */ 4608 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 4609 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4610 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 4611 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4612 if (isdense) { 4613 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 4614 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 4615 } else { 4616 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 4617 } 4618 if (nis) { 4619 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4620 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 4621 } 4622 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 4623 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 4624 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 4625 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 4626 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 4627 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 4628 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 4629 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 4630 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 4631 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 4632 ierr = PetscFree(onodes);CHKERRQ(ierr); 4633 if (nis) { 4634 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 4635 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 4636 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 4637 } 4638 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 4639 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 4640 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 4641 for (i=0;i<nis;i++) { 4642 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 4643 } 4644 *mat_n = NULL; 4645 } 4646 PetscFunctionReturn(0); 4647 } 4648 4649 /* temporary hack into ksp private data structure */ 4650 #include <petsc/private/kspimpl.h> 4651 4652 #undef __FUNCT__ 4653 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 4654 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 4655 { 4656 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 4657 PC_IS *pcis = (PC_IS*)pc->data; 4658 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 4659 MatNullSpace CoarseNullSpace=NULL; 4660 ISLocalToGlobalMapping coarse_islg; 4661 IS coarse_is,*isarray; 4662 PetscInt i,im_active=-1,active_procs=-1; 4663 PetscInt nis,nisdofs,nisneu; 4664 PC pc_temp; 4665 PCType coarse_pc_type; 4666 KSPType coarse_ksp_type; 4667 PetscBool multilevel_requested,multilevel_allowed; 4668 PetscBool isredundant,isbddc,isnn,coarse_reuse; 4669 Mat t_coarse_mat_is; 4670 PetscInt void_procs,ncoarse_ml,ncoarse_ds,ncoarse; 4671 PetscMPIInt all_procs; 4672 PetscBool csin_ml,csin_ds,csin,csin_type_simple,redist; 4673 PetscBool compute_vecs = PETSC_FALSE; 4674 PetscScalar *array; 4675 PetscErrorCode ierr; 4676 4677 PetscFunctionBegin; 4678 /* Assign global numbering to coarse dofs */ 4679 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 4680 PetscInt ocoarse_size; 4681 compute_vecs = PETSC_TRUE; 4682 ocoarse_size = pcbddc->coarse_size; 4683 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 4684 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 4685 /* see if we can avoid some work */ 4686 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 4687 /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */ 4688 if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) { 4689 PC pc; 4690 PetscBool isbddc; 4691 4692 /* temporary workaround since PCBDDC does not have a reset method so far */ 4693 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc);CHKERRQ(ierr); 4694 ierr = PetscObjectTypeCompare((PetscObject)pc,PCBDDC,&isbddc);CHKERRQ(ierr); 4695 if (isbddc) { 4696 ierr = PCDestroy(&pc);CHKERRQ(ierr); 4697 } 4698 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 4699 coarse_reuse = PETSC_FALSE; 4700 } else { /* we can safely reuse already computed coarse matrix */ 4701 coarse_reuse = PETSC_TRUE; 4702 } 4703 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 4704 coarse_reuse = PETSC_FALSE; 4705 } 4706 /* reset any subassembling information */ 4707 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 4708 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4709 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 4710 coarse_reuse = PETSC_TRUE; 4711 } 4712 4713 /* count "active" (i.e. with positive local size) and "void" processes */ 4714 im_active = !!(pcis->n); 4715 ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4716 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr); 4717 void_procs = all_procs-active_procs; 4718 csin_type_simple = PETSC_TRUE; 4719 redist = PETSC_FALSE; 4720 if (pcbddc->current_level && void_procs) { 4721 csin_ml = PETSC_TRUE; 4722 ncoarse_ml = void_procs; 4723 /* it has no sense to redistribute on a set of processors larger than the number of active processes */ 4724 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < active_procs) { 4725 csin_ds = PETSC_TRUE; 4726 ncoarse_ds = pcbddc->redistribute_coarse; 4727 redist = PETSC_TRUE; 4728 } else { 4729 csin_ds = PETSC_TRUE; 4730 ncoarse_ds = active_procs; 4731 redist = PETSC_TRUE; 4732 } 4733 } else { 4734 csin_ml = PETSC_FALSE; 4735 ncoarse_ml = all_procs; 4736 if (void_procs) { 4737 csin_ds = PETSC_TRUE; 4738 ncoarse_ds = void_procs; 4739 csin_type_simple = PETSC_FALSE; 4740 } else { 4741 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < all_procs) { 4742 csin_ds = PETSC_TRUE; 4743 ncoarse_ds = pcbddc->redistribute_coarse; 4744 redist = PETSC_TRUE; 4745 } else { 4746 csin_ds = PETSC_FALSE; 4747 ncoarse_ds = all_procs; 4748 } 4749 } 4750 } 4751 4752 /* 4753 test if we can go multilevel: three conditions must be satisfied: 4754 - we have not exceeded the number of levels requested 4755 - we can actually subassemble the active processes 4756 - we can find a suitable number of MPI processes where we can place the subassembled problem 4757 */ 4758 multilevel_allowed = PETSC_FALSE; 4759 multilevel_requested = PETSC_FALSE; 4760 if (pcbddc->current_level < pcbddc->max_levels) { 4761 multilevel_requested = PETSC_TRUE; 4762 if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) { 4763 multilevel_allowed = PETSC_FALSE; 4764 } else { 4765 multilevel_allowed = PETSC_TRUE; 4766 } 4767 } 4768 /* determine number of process partecipating to coarse solver */ 4769 if (multilevel_allowed) { 4770 ncoarse = ncoarse_ml; 4771 csin = csin_ml; 4772 redist = PETSC_FALSE; 4773 } else { 4774 ncoarse = ncoarse_ds; 4775 csin = csin_ds; 4776 } 4777 4778 /* creates temporary l2gmap and IS for coarse indexes */ 4779 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 4780 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 4781 4782 /* creates temporary MATIS object for coarse matrix */ 4783 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr); 4784 ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr); 4785 ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr); 4786 ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr); 4787 #if 0 4788 { 4789 PetscViewer viewer; 4790 char filename[256]; 4791 sprintf(filename,"local_coarse_mat%d_level%d.m",PetscGlobalRank,pcbddc->current_level); 4792 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 4793 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 4794 ierr = MatView(coarse_submat_dense,viewer);CHKERRQ(ierr); 4795 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 4796 } 4797 #endif 4798 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,NULL,&t_coarse_mat_is);CHKERRQ(ierr); 4799 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 4800 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4801 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4802 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 4803 4804 /* compute dofs splitting and neumann boundaries for coarse dofs */ 4805 if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal) ) { /* protects from unneded computations */ 4806 PetscInt *tidxs,*tidxs2,nout,tsize,i; 4807 const PetscInt *idxs; 4808 ISLocalToGlobalMapping tmap; 4809 4810 /* create map between primal indices (in local representative ordering) and local primal numbering */ 4811 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 4812 /* allocate space for temporary storage */ 4813 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 4814 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 4815 /* allocate for IS array */ 4816 nisdofs = pcbddc->n_ISForDofsLocal; 4817 nisneu = !!pcbddc->NeumannBoundariesLocal; 4818 nis = nisdofs + nisneu; 4819 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 4820 /* dofs splitting */ 4821 for (i=0;i<nisdofs;i++) { 4822 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 4823 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 4824 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 4825 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 4826 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 4827 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 4828 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->ISForDofsLocal[i]),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 4829 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 4830 } 4831 /* neumann boundaries */ 4832 if (pcbddc->NeumannBoundariesLocal) { 4833 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 4834 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 4835 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 4836 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 4837 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 4838 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 4839 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->NeumannBoundariesLocal),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 4840 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 4841 } 4842 /* free memory */ 4843 ierr = PetscFree(tidxs);CHKERRQ(ierr); 4844 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 4845 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 4846 } else { 4847 nis = 0; 4848 nisdofs = 0; 4849 nisneu = 0; 4850 isarray = NULL; 4851 } 4852 /* destroy no longer needed map */ 4853 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 4854 4855 /* restrict on coarse candidates (if needed) */ 4856 coarse_mat_is = NULL; 4857 if (csin) { 4858 if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */ 4859 if (redist) { 4860 PetscMPIInt rank; 4861 PetscInt spc,n_spc_p1,dest[1],destsize; 4862 4863 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 4864 spc = active_procs/ncoarse; 4865 n_spc_p1 = active_procs%ncoarse; 4866 if (im_active) { 4867 destsize = 1; 4868 if (rank > n_spc_p1*(spc+1)-1) { 4869 dest[0] = n_spc_p1+(rank-(n_spc_p1*(spc+1)))/spc; 4870 } else { 4871 dest[0] = rank/(spc+1); 4872 } 4873 } else { 4874 destsize = 0; 4875 } 4876 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),destsize,dest,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4877 } else if (csin_type_simple) { 4878 PetscMPIInt rank; 4879 PetscInt issize,isidx; 4880 4881 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 4882 if (im_active) { 4883 issize = 1; 4884 isidx = (PetscInt)rank; 4885 } else { 4886 issize = 0; 4887 isidx = -1; 4888 } 4889 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4890 } else { /* get a suitable subassembling pattern from MATIS code */ 4891 ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4892 } 4893 4894 /* we need to shift on coarse candidates either if we are not redistributing or we are redistributing and we have enough void processes */ 4895 if (!redist || ncoarse <= void_procs) { 4896 PetscInt ncoarse_cand,tissize,*nisindices; 4897 PetscInt *coarse_candidates; 4898 const PetscInt* tisindices; 4899 4900 /* get coarse candidates' ranks in pc communicator */ 4901 ierr = PetscMalloc1(all_procs,&coarse_candidates);CHKERRQ(ierr); 4902 ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4903 for (i=0,ncoarse_cand=0;i<all_procs;i++) { 4904 if (!coarse_candidates[i]) { 4905 coarse_candidates[ncoarse_cand++]=i; 4906 } 4907 } 4908 if (ncoarse_cand < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",ncoarse_cand,ncoarse); 4909 4910 4911 if (pcbddc->dbg_flag) { 4912 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4913 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr); 4914 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 4915 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr); 4916 for (i=0;i<ncoarse_cand;i++) { 4917 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr); 4918 } 4919 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr); 4920 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4921 } 4922 /* shift the pattern on coarse candidates */ 4923 ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr); 4924 ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 4925 ierr = PetscMalloc1(tissize,&nisindices);CHKERRQ(ierr); 4926 for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]]; 4927 ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 4928 ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr); 4929 ierr = PetscFree(coarse_candidates);CHKERRQ(ierr); 4930 } 4931 if (pcbddc->dbg_flag) { 4932 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4933 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr); 4934 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 4935 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4936 } 4937 } 4938 /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */ 4939 if (multilevel_allowed) { /* we need to keep tracking of void processes for future placements */ 4940 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,PETSC_FALSE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 4941 } else { /* this is the last level, so use just receiving processes in subcomm */ 4942 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 4943 } 4944 } else { 4945 if (pcbddc->dbg_flag) { 4946 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4947 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr); 4948 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4949 } 4950 ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr); 4951 coarse_mat_is = t_coarse_mat_is; 4952 } 4953 4954 /* create local to global scatters for coarse problem */ 4955 if (compute_vecs) { 4956 PetscInt lrows; 4957 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 4958 if (coarse_mat_is) { 4959 ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr); 4960 } else { 4961 lrows = 0; 4962 } 4963 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 4964 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 4965 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 4966 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 4967 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 4968 } 4969 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 4970 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 4971 4972 /* set defaults for coarse KSP and PC */ 4973 if (multilevel_allowed) { 4974 coarse_ksp_type = KSPRICHARDSON; 4975 coarse_pc_type = PCBDDC; 4976 } else { 4977 coarse_ksp_type = KSPPREONLY; 4978 coarse_pc_type = PCREDUNDANT; 4979 } 4980 4981 /* print some info if requested */ 4982 if (pcbddc->dbg_flag) { 4983 if (!multilevel_allowed) { 4984 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4985 if (multilevel_requested) { 4986 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 4987 } else if (pcbddc->max_levels) { 4988 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 4989 } 4990 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4991 } 4992 } 4993 4994 /* create the coarse KSP object only once with defaults */ 4995 if (coarse_mat_is) { 4996 MatReuse coarse_mat_reuse; 4997 PetscViewer dbg_viewer = NULL; 4998 if (pcbddc->dbg_flag) { 4999 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is)); 5000 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 5001 } 5002 if (!pcbddc->coarse_ksp) { 5003 char prefix[256],str_level[16]; 5004 size_t len; 5005 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr); 5006 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 5007 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 5008 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 5009 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr); 5010 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 5011 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 5012 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 5013 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 5014 /* prefix */ 5015 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 5016 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 5017 if (!pcbddc->current_level) { 5018 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 5019 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 5020 } else { 5021 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 5022 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 5023 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 5024 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 5025 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 5026 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 5027 } 5028 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 5029 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 5030 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 5031 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 5032 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 5033 /* allow user customization */ 5034 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 5035 } 5036 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 5037 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 5038 if (nisdofs) { 5039 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 5040 for (i=0;i<nisdofs;i++) { 5041 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 5042 } 5043 } 5044 if (nisneu) { 5045 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 5046 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 5047 } 5048 5049 /* get some info after set from options */ 5050 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 5051 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 5052 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 5053 if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */ 5054 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 5055 isbddc = PETSC_FALSE; 5056 } 5057 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 5058 if (isredundant) { 5059 KSP inner_ksp; 5060 PC inner_pc; 5061 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 5062 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 5063 ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr); 5064 } 5065 5066 /* assemble coarse matrix */ 5067 if (coarse_reuse) { 5068 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 5069 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 5070 coarse_mat_reuse = MAT_REUSE_MATRIX; 5071 } else { 5072 coarse_mat_reuse = MAT_INITIAL_MATRIX; 5073 } 5074 if (isbddc || isnn) { 5075 if (pcbddc->coarsening_ratio > 1) { 5076 if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */ 5077 ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 5078 if (pcbddc->dbg_flag) { 5079 ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5080 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr); 5081 ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr); 5082 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 5083 } 5084 } 5085 ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr); 5086 } else { 5087 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 5088 coarse_mat = coarse_mat_is; 5089 } 5090 } else { 5091 ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 5092 } 5093 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 5094 5095 /* propagate symmetry info of coarse matrix */ 5096 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 5097 if (pc->pmat->symmetric_set) { 5098 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);CHKERRQ(ierr); 5099 } 5100 if (pc->pmat->hermitian_set) { 5101 ierr = MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);CHKERRQ(ierr); 5102 } 5103 if (pc->pmat->spd_set) { 5104 ierr = MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);CHKERRQ(ierr); 5105 } 5106 /* set operators */ 5107 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 5108 if (pcbddc->dbg_flag) { 5109 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 5110 } 5111 } else { /* processes non partecipating to coarse solver (if any) */ 5112 coarse_mat = 0; 5113 } 5114 ierr = PetscFree(isarray);CHKERRQ(ierr); 5115 #if 0 5116 { 5117 PetscViewer viewer; 5118 char filename[256]; 5119 sprintf(filename,"coarse_mat_level%d.m",pcbddc->current_level); 5120 ierr = PetscViewerASCIIOpen(PetscObjectComm((PetscObject)coarse_mat),filename,&viewer);CHKERRQ(ierr); 5121 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 5122 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 5123 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 5124 } 5125 #endif 5126 5127 /* Compute coarse null space (special handling by BDDC only) */ 5128 #if 0 5129 if (pcbddc->NullSpace) { 5130 ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr); 5131 } 5132 #endif 5133 /* hack */ 5134 if (pcbddc->coarse_ksp) { 5135 Vec crhs,csol; 5136 5137 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 5138 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 5139 if (!csol) { 5140 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 5141 } 5142 if (!crhs) { 5143 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 5144 } 5145 } 5146 5147 /* compute null space for coarse solver if the benign trick has been requested */ 5148 if (pcbddc->benign_null) { 5149 5150 ierr = VecSet(pcbddc->vec1_P,0.);CHKERRQ(ierr); 5151 for (i=0;i<pcbddc->benign_n;i++) { 5152 ierr = VecSetValue(pcbddc->vec1_P,pcbddc->local_primal_size-pcbddc->benign_n+i,1.0,INSERT_VALUES);CHKERRQ(ierr); 5153 } 5154 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 5155 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 5156 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5157 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5158 if (coarse_mat) { 5159 Vec nullv; 5160 PetscScalar *array,*array2; 5161 PetscInt nl; 5162 5163 ierr = MatCreateVecs(coarse_mat,&nullv,NULL);CHKERRQ(ierr); 5164 ierr = VecGetLocalSize(nullv,&nl);CHKERRQ(ierr); 5165 ierr = VecGetArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 5166 ierr = VecGetArray(nullv,&array2);CHKERRQ(ierr); 5167 ierr = PetscMemcpy(array2,array,nl*sizeof(*array));CHKERRQ(ierr); 5168 ierr = VecRestoreArray(nullv,&array2);CHKERRQ(ierr); 5169 ierr = VecRestoreArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 5170 ierr = VecNormalize(nullv,NULL);CHKERRQ(ierr); 5171 ierr = MatNullSpaceCreate(PetscObjectComm((PetscObject)coarse_mat),PETSC_FALSE,1,&nullv,&CoarseNullSpace);CHKERRQ(ierr); 5172 ierr = VecDestroy(&nullv);CHKERRQ(ierr); 5173 } 5174 } 5175 5176 if (pcbddc->coarse_ksp) { 5177 PetscBool ispreonly; 5178 5179 if (CoarseNullSpace) { 5180 PetscBool isnull; 5181 ierr = MatNullSpaceTest(CoarseNullSpace,coarse_mat,&isnull);CHKERRQ(ierr); 5182 if (isnull) { 5183 if (isbddc) { 5184 ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr); 5185 } else { 5186 ierr = MatSetNullSpace(coarse_mat,CoarseNullSpace);CHKERRQ(ierr); 5187 } 5188 } else { 5189 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 5190 } 5191 } 5192 /* setup coarse ksp */ 5193 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 5194 /* Check coarse problem if in debug mode or if solving with an iterative method */ 5195 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 5196 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 5197 KSP check_ksp; 5198 KSPType check_ksp_type; 5199 PC check_pc; 5200 Vec check_vec,coarse_vec; 5201 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 5202 PetscInt its; 5203 PetscBool compute_eigs; 5204 PetscReal *eigs_r,*eigs_c; 5205 PetscInt neigs; 5206 const char *prefix; 5207 5208 /* Create ksp object suitable for estimation of extreme eigenvalues */ 5209 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 5210 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 5211 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 5212 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 5213 if (ispreonly) { 5214 check_ksp_type = KSPPREONLY; 5215 compute_eigs = PETSC_FALSE; 5216 } else { 5217 check_ksp_type = KSPGMRES; 5218 compute_eigs = PETSC_TRUE; 5219 } 5220 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 5221 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 5222 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 5223 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 5224 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 5225 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 5226 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 5227 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 5228 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 5229 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 5230 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 5231 /* create random vec */ 5232 ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr); 5233 ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr); 5234 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 5235 if (CoarseNullSpace) { 5236 ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr); 5237 } 5238 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 5239 /* solve coarse problem */ 5240 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 5241 if (CoarseNullSpace) { 5242 ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr); 5243 } 5244 /* set eigenvalue estimation if preonly has not been requested */ 5245 if (compute_eigs) { 5246 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 5247 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 5248 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 5249 lambda_max = eigs_r[neigs-1]; 5250 lambda_min = eigs_r[0]; 5251 if (pcbddc->use_coarse_estimates) { 5252 if (lambda_max>lambda_min) { 5253 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr); 5254 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 5255 } 5256 } 5257 } 5258 5259 /* check coarse problem residual error */ 5260 if (pcbddc->dbg_flag) { 5261 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 5262 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 5263 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 5264 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 5265 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 5266 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 5267 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 5268 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 5269 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 5270 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 5271 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 5272 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 5273 if (CoarseNullSpace) { 5274 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem is singular\n");CHKERRQ(ierr); 5275 } 5276 if (compute_eigs) { 5277 PetscReal lambda_max_s,lambda_min_s; 5278 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 5279 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 5280 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 5281 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 5282 for (i=0;i<neigs;i++) { 5283 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 5284 } 5285 } 5286 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 5287 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 5288 } 5289 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 5290 if (compute_eigs) { 5291 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 5292 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 5293 } 5294 } 5295 } 5296 /* print additional info */ 5297 if (pcbddc->dbg_flag) { 5298 /* waits until all processes reaches this point */ 5299 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 5300 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 5301 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5302 } 5303 5304 /* free memory */ 5305 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 5306 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 5307 PetscFunctionReturn(0); 5308 } 5309 5310 #undef __FUNCT__ 5311 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 5312 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 5313 { 5314 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 5315 PC_IS* pcis = (PC_IS*)pc->data; 5316 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 5317 IS subset,subset_mult,subset_n; 5318 PetscInt local_size,coarse_size=0; 5319 PetscInt *local_primal_indices=NULL; 5320 const PetscInt *t_local_primal_indices; 5321 PetscErrorCode ierr; 5322 5323 PetscFunctionBegin; 5324 /* Compute global number of coarse dofs */ 5325 if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) { 5326 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first"); 5327 } 5328 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n);CHKERRQ(ierr); 5329 ierr = ISLocalToGlobalMappingApplyIS(pcis->mapping,subset_n,&subset);CHKERRQ(ierr); 5330 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 5331 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult);CHKERRQ(ierr); 5332 ierr = PCBDDCSubsetNumbering(subset,subset_mult,&coarse_size,&subset_n);CHKERRQ(ierr); 5333 ierr = ISDestroy(&subset);CHKERRQ(ierr); 5334 ierr = ISDestroy(&subset_mult);CHKERRQ(ierr); 5335 ierr = ISGetLocalSize(subset_n,&local_size);CHKERRQ(ierr); 5336 if (local_size != pcbddc->local_primal_size) { 5337 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %d != %d",local_size,pcbddc->local_primal_size); 5338 } 5339 ierr = PetscMalloc1(local_size,&local_primal_indices);CHKERRQ(ierr); 5340 ierr = ISGetIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 5341 ierr = PetscMemcpy(local_primal_indices,t_local_primal_indices,local_size*sizeof(PetscInt));CHKERRQ(ierr); 5342 ierr = ISRestoreIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 5343 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 5344 5345 /* check numbering */ 5346 if (pcbddc->dbg_flag) { 5347 PetscScalar coarsesum,*array,*array2; 5348 PetscInt i; 5349 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 5350 5351 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5352 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5353 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 5354 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 5355 /* counter */ 5356 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 5357 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 5358 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5359 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5360 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5361 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5362 5363 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 5364 for (i=0;i<pcbddc->local_primal_size;i++) { 5365 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 5366 } 5367 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 5368 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 5369 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 5370 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5371 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5372 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5373 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5374 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 5375 ierr = VecGetArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 5376 for (i=0;i<pcis->n;i++) { 5377 if (array[i] != 0.0 && array[i] != array2[i]) { 5378 PetscInt owned = (PetscInt)PetscRealPart(array[i]); 5379 PetscInt neigh = (PetscInt)PetscRealPart(array2[i]); 5380 set_error = PETSC_TRUE; 5381 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a %d processes instead of %d!\n",PetscGlobalRank,i,owned,neigh);CHKERRQ(ierr); 5382 } 5383 } 5384 ierr = VecRestoreArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 5385 ierr = MPI_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 5386 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5387 for (i=0;i<pcis->n;i++) { 5388 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 5389 } 5390 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 5391 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 5392 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5393 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5394 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 5395 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 5396 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 5397 PetscInt *gidxs; 5398 5399 ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr); 5400 ierr = ISLocalToGlobalMappingApply(pcis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr); 5401 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 5402 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5403 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 5404 for (i=0;i<pcbddc->local_primal_size;i++) { 5405 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr); 5406 } 5407 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5408 ierr = PetscFree(gidxs);CHKERRQ(ierr); 5409 } 5410 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5411 if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 5412 } 5413 /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */ 5414 /* get back data */ 5415 *coarse_size_n = coarse_size; 5416 *local_primal_indices_n = local_primal_indices; 5417 PetscFunctionReturn(0); 5418 } 5419 5420 #undef __FUNCT__ 5421 #define __FUNCT__ "PCBDDCGlobalToLocal" 5422 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 5423 { 5424 IS localis_t; 5425 PetscInt i,lsize,*idxs,n; 5426 PetscScalar *vals; 5427 PetscErrorCode ierr; 5428 5429 PetscFunctionBegin; 5430 /* get indices in local ordering exploiting local to global map */ 5431 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 5432 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 5433 for (i=0;i<lsize;i++) vals[i] = 1.0; 5434 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 5435 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 5436 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 5437 if (idxs) { /* multilevel guard */ 5438 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 5439 } 5440 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 5441 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 5442 ierr = PetscFree(vals);CHKERRQ(ierr); 5443 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 5444 /* now compute set in local ordering */ 5445 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5446 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5447 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 5448 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 5449 for (i=0,lsize=0;i<n;i++) { 5450 if (PetscRealPart(vals[i]) > 0.5) { 5451 lsize++; 5452 } 5453 } 5454 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 5455 for (i=0,lsize=0;i<n;i++) { 5456 if (PetscRealPart(vals[i]) > 0.5) { 5457 idxs[lsize++] = i; 5458 } 5459 } 5460 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 5461 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 5462 *localis = localis_t; 5463 PetscFunctionReturn(0); 5464 } 5465 5466 /* the next two functions will be called in KSPMatMult if a change of basis has been requested */ 5467 #undef __FUNCT__ 5468 #define __FUNCT__ "PCBDDCMatMult_Private" 5469 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y) 5470 { 5471 PCBDDCChange_ctx change_ctx; 5472 PetscErrorCode ierr; 5473 5474 PetscFunctionBegin; 5475 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 5476 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 5477 ierr = MatMult(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 5478 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 5479 PetscFunctionReturn(0); 5480 } 5481 5482 #undef __FUNCT__ 5483 #define __FUNCT__ "PCBDDCMatMultTranspose_Private" 5484 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y) 5485 { 5486 PCBDDCChange_ctx change_ctx; 5487 PetscErrorCode ierr; 5488 5489 PetscFunctionBegin; 5490 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 5491 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 5492 ierr = MatMultTranspose(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 5493 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 5494 PetscFunctionReturn(0); 5495 } 5496 5497 #undef __FUNCT__ 5498 #define __FUNCT__ "PCBDDCSetUpSubSchurs" 5499 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 5500 { 5501 PC_IS *pcis=(PC_IS*)pc->data; 5502 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 5503 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 5504 Mat S_j; 5505 PetscInt *used_xadj,*used_adjncy; 5506 PetscBool free_used_adj; 5507 PetscErrorCode ierr; 5508 5509 PetscFunctionBegin; 5510 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 5511 free_used_adj = PETSC_FALSE; 5512 if (pcbddc->sub_schurs_layers == -1) { 5513 used_xadj = NULL; 5514 used_adjncy = NULL; 5515 } else { 5516 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 5517 used_xadj = pcbddc->mat_graph->xadj; 5518 used_adjncy = pcbddc->mat_graph->adjncy; 5519 } else if (pcbddc->computed_rowadj) { 5520 used_xadj = pcbddc->mat_graph->xadj; 5521 used_adjncy = pcbddc->mat_graph->adjncy; 5522 } else { 5523 PetscBool flg_row=PETSC_FALSE; 5524 const PetscInt *xadj,*adjncy; 5525 PetscInt nvtxs; 5526 5527 ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 5528 if (flg_row) { 5529 ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr); 5530 ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr); 5531 ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr); 5532 free_used_adj = PETSC_TRUE; 5533 } else { 5534 pcbddc->sub_schurs_layers = -1; 5535 used_xadj = NULL; 5536 used_adjncy = NULL; 5537 } 5538 ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 5539 } 5540 } 5541 5542 /* setup sub_schurs data */ 5543 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 5544 if (!sub_schurs->use_mumps) { 5545 /* pcbddc->ksp_D up to date only if not using MUMPS */ 5546 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 5547 ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,PETSC_FALSE,PETSC_FALSE);CHKERRQ(ierr); 5548 } else { 5549 PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis; 5550 PetscBool isseqaij; 5551 if (!pcbddc->use_vertices && reuse_solvers) { 5552 PetscInt n_vertices; 5553 5554 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 5555 reuse_solvers = (PetscBool)!n_vertices; 5556 } 5557 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 5558 if (!isseqaij) { 5559 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 5560 if (matis->A == pcbddc->local_mat) { 5561 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 5562 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 5563 } else { 5564 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 5565 } 5566 } 5567 ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point);CHKERRQ(ierr); 5568 } 5569 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 5570 5571 /* free adjacency */ 5572 if (free_used_adj) { 5573 ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr); 5574 } 5575 PetscFunctionReturn(0); 5576 } 5577 5578 #undef __FUNCT__ 5579 #define __FUNCT__ "PCBDDCInitSubSchurs" 5580 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 5581 { 5582 PC_IS *pcis=(PC_IS*)pc->data; 5583 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 5584 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 5585 PCBDDCGraph graph; 5586 PetscErrorCode ierr; 5587 5588 PetscFunctionBegin; 5589 /* attach interface graph for determining subsets */ 5590 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 5591 IS verticesIS,verticescomm; 5592 PetscInt vsize,*idxs; 5593 5594 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 5595 ierr = ISGetSize(verticesIS,&vsize);CHKERRQ(ierr); 5596 ierr = ISGetIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 5597 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);CHKERRQ(ierr); 5598 ierr = ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 5599 ierr = ISDestroy(&verticesIS);CHKERRQ(ierr); 5600 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 5601 ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global);CHKERRQ(ierr); 5602 ierr = PCBDDCGraphSetUp(graph,0,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);CHKERRQ(ierr); 5603 ierr = ISDestroy(&verticescomm);CHKERRQ(ierr); 5604 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 5605 /* 5606 if (pcbddc->dbg_flag) { 5607 ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 5608 } 5609 */ 5610 } else { 5611 graph = pcbddc->mat_graph; 5612 } 5613 5614 /* sub_schurs init */ 5615 ierr = PCBDDCSubSchursInit(sub_schurs,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap);CHKERRQ(ierr); 5616 5617 /* free graph struct */ 5618 if (pcbddc->sub_schurs_rebuild) { 5619 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 5620 } 5621 PetscFunctionReturn(0); 5622 } 5623 5624 #undef __FUNCT__ 5625 #define __FUNCT__ "PCBDDCCheckOperator" 5626 PetscErrorCode PCBDDCCheckOperator(PC pc) 5627 { 5628 PC_IS *pcis=(PC_IS*)pc->data; 5629 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 5630 PetscErrorCode ierr; 5631 5632 PetscFunctionBegin; 5633 if (pcbddc->n_vertices == pcbddc->local_primal_size) { 5634 IS zerodiag = NULL; 5635 Mat S_j,B0_B=NULL; 5636 Vec dummy_vec=NULL,vec_check_B,vec_scale_P; 5637 PetscScalar *p0_check,*array,*array2; 5638 PetscReal norm; 5639 PetscInt i; 5640 5641 /* B0 and B0_B */ 5642 if (zerodiag) { 5643 IS dummy; 5644 5645 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);CHKERRQ(ierr); 5646 ierr = MatGetSubMatrix(pcbddc->benign_B0,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 5647 ierr = MatCreateVecs(B0_B,NULL,&dummy_vec);CHKERRQ(ierr); 5648 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 5649 } 5650 /* I need a primal vector to scale primal nodes since BDDC sums contibutions */ 5651 ierr = VecDuplicate(pcbddc->vec1_P,&vec_scale_P);CHKERRQ(ierr); 5652 ierr = VecSet(pcbddc->vec1_P,1.0);CHKERRQ(ierr); 5653 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5654 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5655 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5656 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5657 ierr = VecReciprocal(vec_scale_P);CHKERRQ(ierr); 5658 /* S_j */ 5659 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 5660 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 5661 5662 /* mimic vector in \widetilde{W}_\Gamma */ 5663 ierr = VecSetRandom(pcis->vec1_N,NULL);CHKERRQ(ierr); 5664 /* continuous in primal space */ 5665 ierr = VecSetRandom(pcbddc->coarse_vec,NULL);CHKERRQ(ierr); 5666 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5667 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5668 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5669 ierr = PetscCalloc1(pcbddc->benign_n,&p0_check);CHKERRQ(ierr); 5670 for (i=0;i<pcbddc->benign_n;i++) p0_check[i] = array[pcbddc->local_primal_size-pcbddc->benign_n+i]; 5671 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 5672 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5673 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 5674 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 5675 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5676 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5677 ierr = VecDuplicate(pcis->vec2_B,&vec_check_B);CHKERRQ(ierr); 5678 ierr = VecCopy(pcis->vec2_B,vec_check_B);CHKERRQ(ierr); 5679 5680 /* assemble rhs for coarse problem */ 5681 /* widetilde{S}_\Gamma w_\Gamma + \widetilde{B0}^T_B p0 */ 5682 /* local with Schur */ 5683 ierr = MatMult(S_j,pcis->vec2_B,pcis->vec1_B);CHKERRQ(ierr); 5684 if (zerodiag) { 5685 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 5686 for (i=0;i<pcbddc->benign_n;i++) array[i] = p0_check[i]; 5687 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 5688 ierr = MatMultTransposeAdd(B0_B,dummy_vec,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 5689 } 5690 /* sum on primal nodes the local contributions */ 5691 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5692 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5693 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 5694 ierr = VecGetArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 5695 for (i=0;i<pcbddc->local_primal_size;i++) array2[i] = array[pcbddc->local_primal_ref_node[i]]; 5696 ierr = VecRestoreArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 5697 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 5698 ierr = VecSet(pcbddc->coarse_vec,0.);CHKERRQ(ierr); 5699 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5700 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5701 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5702 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5703 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5704 /* scale primal nodes (BDDC sums contibutions) */ 5705 ierr = VecPointwiseMult(pcbddc->vec1_P,vec_scale_P,pcbddc->vec1_P);CHKERRQ(ierr); 5706 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 5707 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5708 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 5709 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 5710 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5711 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5712 /* global: \widetilde{B0}_B w_\Gamma */ 5713 if (zerodiag) { 5714 ierr = MatMult(B0_B,pcis->vec2_B,dummy_vec);CHKERRQ(ierr); 5715 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 5716 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = array[i]; 5717 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 5718 } 5719 /* BDDC */ 5720 ierr = VecSet(pcis->vec1_D,0.);CHKERRQ(ierr); 5721 ierr = PCBDDCApplyInterfacePreconditioner(pc,PETSC_FALSE);CHKERRQ(ierr); 5722 5723 ierr = VecCopy(pcis->vec1_B,pcis->vec2_B);CHKERRQ(ierr); 5724 ierr = VecAXPY(pcis->vec1_B,-1.0,vec_check_B);CHKERRQ(ierr); 5725 ierr = VecNorm(pcis->vec1_B,NORM_INFINITY,&norm);CHKERRQ(ierr); 5726 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC local error is %1.4e\n",PetscGlobalRank,norm); 5727 for (i=0;i<pcbddc->benign_n;i++) { 5728 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC p0[%d] error is %1.4e\n",PetscGlobalRank,i,PetscAbsScalar(pcbddc->benign_p0[i]-p0_check[i])); 5729 } 5730 ierr = PetscFree(p0_check);CHKERRQ(ierr); 5731 ierr = VecDestroy(&vec_scale_P);CHKERRQ(ierr); 5732 ierr = VecDestroy(&vec_check_B);CHKERRQ(ierr); 5733 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 5734 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 5735 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 5736 } 5737 PetscFunctionReturn(0); 5738 } 5739