1 #include <../src/mat/impls/aij/seq/aij.h> 2 #include <../src/ksp/pc/impls/bddc/bddc.h> 3 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 4 #include <petscblaslapack.h> 5 6 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y); 7 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y); 8 9 /* TODO: add reuse flag */ 10 #undef __FUNCT__ 11 #define __FUNCT__ "MatSeqAIJCompress" 12 PetscErrorCode MatSeqAIJCompress(Mat A, Mat *B) 13 { 14 Mat Bt; 15 PetscScalar *a,*bdata; 16 const PetscInt *ii,*ij; 17 PetscInt m,n,i,nnz,*bii,*bij; 18 PetscBool flg_row; 19 PetscErrorCode ierr; 20 21 PetscFunctionBegin; 22 ierr = MatGetSize(A,&n,&m);CHKERRQ(ierr); 23 ierr = MatGetRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 24 ierr = MatSeqAIJGetArray(A,&a);CHKERRQ(ierr); 25 nnz = n; 26 for (i=0;i<ii[n];i++) { 27 if (PetscLikely(PetscAbsScalar(a[i]) > PETSC_SMALL)) nnz++; 28 } 29 ierr = PetscMalloc1(n+1,&bii);CHKERRQ(ierr); 30 ierr = PetscMalloc1(nnz,&bij);CHKERRQ(ierr); 31 ierr = PetscMalloc1(nnz,&bdata);CHKERRQ(ierr); 32 nnz = 0; 33 bii[0] = 0; 34 for (i=0;i<n;i++) { 35 PetscInt j; 36 for (j=ii[i];j<ii[i+1];j++) { 37 PetscScalar entry = a[j]; 38 if (PetscLikely(PetscAbsScalar(entry) > PETSC_SMALL) || ij[j] == i) { 39 bij[nnz] = ij[j]; 40 bdata[nnz] = entry; 41 nnz++; 42 } 43 } 44 bii[i+1] = nnz; 45 } 46 ierr = MatSeqAIJRestoreArray(A,&a);CHKERRQ(ierr); 47 ierr = MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),n,m,bii,bij,bdata,&Bt);CHKERRQ(ierr); 48 ierr = MatRestoreRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 49 { 50 Mat_SeqAIJ *b = (Mat_SeqAIJ*)(Bt->data); 51 b->free_a = PETSC_TRUE; 52 b->free_ij = PETSC_TRUE; 53 } 54 *B = Bt; 55 PetscFunctionReturn(0); 56 } 57 58 #undef __FUNCT__ 59 #define __FUNCT__ "MatDetectDisconnectedComponents" 60 PetscErrorCode MatDetectDisconnectedComponents(Mat A, PetscBool filter, PetscInt *ncc, IS* cc[]) 61 { 62 Mat B; 63 IS is_dummy,*cc_n; 64 ISLocalToGlobalMapping l2gmap_dummy; 65 PCBDDCGraph graph; 66 PetscInt i,n; 67 PetscInt *xadj,*adjncy; 68 PetscInt *xadj_filtered,*adjncy_filtered; 69 PetscBool flg_row,isseqaij; 70 PetscErrorCode ierr; 71 72 PetscFunctionBegin; 73 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 74 if (!isseqaij && filter) { 75 PetscBool isseqdense; 76 77 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQDENSE,&isseqdense);CHKERRQ(ierr); 78 if (!isseqdense) { 79 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 80 } else { /* TODO: rectangular case and LDA */ 81 PetscScalar *array; 82 PetscReal chop=1.e-6; 83 84 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 85 ierr = MatDenseGetArray(B,&array);CHKERRQ(ierr); 86 ierr = MatGetSize(B,&n,NULL);CHKERRQ(ierr); 87 for (i=0;i<n;i++) { 88 PetscInt j; 89 for (j=i+1;j<n;j++) { 90 PetscReal thresh = chop*(PetscAbsScalar(array[i*(n+1)])+PetscAbsScalar(array[j*(n+1)])); 91 if (PetscAbsScalar(array[i*n+j]) < thresh) array[i*n+j] = 0.; 92 if (PetscAbsScalar(array[j*n+i]) < thresh) array[j*n+i] = 0.; 93 } 94 } 95 ierr = MatDenseRestoreArray(B,&array);CHKERRQ(ierr); 96 ierr = MatConvert(B,MATSEQAIJ,MAT_REUSE_MATRIX,&B);CHKERRQ(ierr); 97 } 98 } else { 99 B = A; 100 } 101 ierr = MatGetRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 102 103 /* if filter is true, then removes entries lower than PETSC_SMALL in magnitude */ 104 if (filter) { 105 PetscScalar *data; 106 PetscInt j,cum; 107 108 ierr = PetscCalloc2(n+1,&xadj_filtered,xadj[n],&adjncy_filtered);CHKERRQ(ierr); 109 ierr = MatSeqAIJGetArray(B,&data);CHKERRQ(ierr); 110 cum = 0; 111 for (i=0;i<n;i++) { 112 PetscInt t; 113 114 for (j=xadj[i];j<xadj[i+1];j++) { 115 if (PetscUnlikely(PetscAbsScalar(data[j]) < PETSC_SMALL)) { 116 continue; 117 } 118 adjncy_filtered[cum+xadj_filtered[i]++] = adjncy[j]; 119 } 120 t = xadj_filtered[i]; 121 xadj_filtered[i] = cum; 122 cum += t; 123 } 124 ierr = MatSeqAIJRestoreArray(B,&data);CHKERRQ(ierr); 125 } else { 126 xadj_filtered = NULL; 127 adjncy_filtered = NULL; 128 } 129 130 /* compute local connected components using PCBDDCGraph */ 131 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&is_dummy);CHKERRQ(ierr); 132 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 133 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 134 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 135 ierr = PCBDDCGraphInit(graph,l2gmap_dummy,n);CHKERRQ(ierr); 136 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 137 if (xadj_filtered) { 138 graph->xadj = xadj_filtered; 139 graph->adjncy = adjncy_filtered; 140 } else { 141 graph->xadj = xadj; 142 graph->adjncy = adjncy; 143 } 144 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 145 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 146 147 /* partial clean up */ 148 ierr = PetscFree2(xadj_filtered,adjncy_filtered);CHKERRQ(ierr); 149 ierr = MatRestoreRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 150 if (A != B) { 151 ierr = MatDestroy(&B);CHKERRQ(ierr); 152 } 153 154 /* get back data */ 155 if (ncc) *ncc = graph->ncc; 156 if (cc) { 157 ierr = PetscMalloc1(graph->ncc,&cc_n);CHKERRQ(ierr); 158 for (i=0;i<graph->ncc;i++) { 159 ierr = ISCreateGeneral(PETSC_COMM_SELF,graph->cptr[i+1]-graph->cptr[i],graph->queue+graph->cptr[i],PETSC_COPY_VALUES,&cc_n[i]);CHKERRQ(ierr); 160 } 161 *cc = cc_n; 162 } 163 /* clean up graph */ 164 graph->xadj = 0; 165 graph->adjncy = 0; 166 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 167 PetscFunctionReturn(0); 168 } 169 170 #undef __FUNCT__ 171 #define __FUNCT__ "PCBDDCBenignCheck" 172 PetscErrorCode PCBDDCBenignCheck(PC pc, IS zerodiag) 173 { 174 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 175 PC_IS* pcis = (PC_IS*)(pc->data); 176 IS dirIS = NULL; 177 PetscInt i; 178 PetscErrorCode ierr; 179 180 PetscFunctionBegin; 181 ierr = PCBDDCGraphGetDirichletDofs(pcbddc->mat_graph,&dirIS);CHKERRQ(ierr); 182 if (zerodiag) { 183 Mat A; 184 Vec vec3_N; 185 PetscScalar *vals; 186 const PetscInt *idxs; 187 PetscInt nz; 188 189 /* p0 */ 190 ierr = VecSet(pcis->vec1_N,0.);CHKERRQ(ierr); 191 ierr = PetscMalloc1(pcis->n,&vals);CHKERRQ(ierr); 192 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 193 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 194 for (i=0;i<nz;i++) vals[i] = 1.; 195 ierr = VecSetValues(pcis->vec1_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 196 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 197 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 198 /* v_I */ 199 ierr = VecSetRandom(pcis->vec2_N,NULL);CHKERRQ(ierr); 200 for (i=0;i<nz;i++) vals[i] = 0.; 201 ierr = VecSetValues(pcis->vec2_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 202 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 203 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 204 for (i=0;i<pcis->n_B;i++) vals[i] = 0.; 205 ierr = VecSetValues(pcis->vec2_N,pcis->n_B,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 206 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 207 if (dirIS) { 208 PetscInt n; 209 210 ierr = ISGetLocalSize(dirIS,&n);CHKERRQ(ierr); 211 ierr = ISGetIndices(dirIS,&idxs);CHKERRQ(ierr); 212 for (i=0;i<n;i++) vals[i] = 0.; 213 ierr = VecSetValues(pcis->vec2_N,n,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 214 ierr = ISRestoreIndices(dirIS,&idxs);CHKERRQ(ierr); 215 } 216 ierr = VecAssemblyBegin(pcis->vec2_N);CHKERRQ(ierr); 217 ierr = VecAssemblyEnd(pcis->vec2_N);CHKERRQ(ierr); 218 ierr = VecDuplicate(pcis->vec1_N,&vec3_N);CHKERRQ(ierr); 219 ierr = VecSet(vec3_N,0.);CHKERRQ(ierr); 220 ierr = MatISGetLocalMat(pc->mat,&A);CHKERRQ(ierr); 221 ierr = MatMult(A,pcis->vec1_N,vec3_N);CHKERRQ(ierr); 222 ierr = VecDot(vec3_N,pcis->vec2_N,&vals[0]);CHKERRQ(ierr); 223 if (PetscAbsScalar(vals[0]) > PETSC_SMALL) { 224 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! b(v_I,p_0) = %f (should be numerically 0.)",PetscAbsScalar(vals[0])); 225 } 226 ierr = PetscFree(vals);CHKERRQ(ierr); 227 ierr = VecDestroy(&vec3_N);CHKERRQ(ierr); 228 } 229 ierr = ISDestroy(&dirIS);CHKERRQ(ierr); 230 231 /* check PCBDDCBenignGetOrSetP0 */ 232 ierr = VecSetRandom(pcis->vec1_global,NULL);CHKERRQ(ierr); 233 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = -PetscGlobalRank-i; 234 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_FALSE);CHKERRQ(ierr); 235 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = 1; 236 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_TRUE);CHKERRQ(ierr); 237 for (i=0;i<pcbddc->benign_n;i++) { 238 if ((PetscInt)PetscRealPart(pcbddc->benign_p0[i]) != -PetscGlobalRank-i) { 239 SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error testing PCBDDCBenignGetOrSetP0! Found %1.4e at %d instead of %1.4e\n",pcbddc->benign_p0[i],i,-PetscGlobalRank-i);CHKERRQ(ierr); 240 } 241 } 242 PetscFunctionReturn(0); 243 } 244 245 #undef __FUNCT__ 246 #define __FUNCT__ "PCBDDCBenignDetectSaddlePoint" 247 PetscErrorCode PCBDDCBenignDetectSaddlePoint(PC pc, IS *zerodiaglocal) 248 { 249 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 250 IS pressures,zerodiag,*zerodiag_subs; 251 PetscInt nz,n; 252 PetscBool sorted,have_null,has_null_pressures,recompute_zerodiag; 253 PetscErrorCode ierr; 254 255 PetscFunctionBegin; 256 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 257 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 258 ierr = MatDestroy(&pcbddc->benign_original_mat);CHKERRQ(ierr); 259 ierr = PetscObjectReference((PetscObject)pcbddc->local_mat);CHKERRQ(ierr); 260 pcbddc->benign_original_mat = pcbddc->local_mat; 261 /* if a local info on dofs is present, assumes the last field is represented by "pressures" 262 otherwise, it uses only zerodiagonal dofs (ok if the pressure block is all zero; it could fail if it is not) 263 Checks if all the pressure dofs in each subdomain have a zero diagonal 264 If not, a change of basis on pressures is not needed 265 since the local Schur complements are SPD 266 */ 267 has_null_pressures = PETSC_TRUE; 268 have_null = PETSC_TRUE; 269 if (pcbddc->n_ISForDofsLocal) { 270 PetscInt npl,*idxs,p = pcbddc->n_ISForDofsLocal-1; 271 272 /* Dofs splitting for BDDC cannot have PETSC_COMM_SELF, so create a sequential IS */ 273 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[p],&npl);CHKERRQ(ierr); 274 ierr = ISGetIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 275 ierr = ISCreateGeneral(PETSC_COMM_SELF,npl,idxs,PETSC_COPY_VALUES,&pressures);CHKERRQ(ierr); 276 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 277 ierr = ISSorted(pressures,&sorted);CHKERRQ(ierr); 278 if (!sorted) { 279 ierr = ISSort(pressures);CHKERRQ(ierr); 280 } 281 } else { 282 pressures = NULL; 283 } 284 ierr = MatGetLocalSize(pcbddc->benign_original_mat,&n,NULL);CHKERRQ(ierr); 285 /* TODO: add check for shared dofs and raise error */ 286 ierr = MatFindZeroDiagonals(pcbddc->benign_original_mat,&zerodiag);CHKERRQ(ierr); 287 ierr = ISSorted(zerodiag,&sorted);CHKERRQ(ierr); 288 if (!sorted) { 289 ierr = ISSort(zerodiag);CHKERRQ(ierr); 290 } 291 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 292 if (!nz) { 293 if (n) have_null = PETSC_FALSE; 294 has_null_pressures = PETSC_FALSE; 295 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 296 } 297 recompute_zerodiag = PETSC_FALSE; 298 /* in case disconnected subdomains info is present, split the pressures accordingly (otherwise the benign trick could fail) */ 299 zerodiag_subs = NULL; 300 pcbddc->benign_n = 0; 301 if (has_null_pressures) { 302 IS *subs; 303 PetscInt nsubs,i; 304 305 subs = pcbddc->local_subs; 306 nsubs = pcbddc->n_local_subs; 307 if (nsubs > 1) { 308 ierr = PetscCalloc1(nsubs,&zerodiag_subs);CHKERRQ(ierr); 309 for (i=0;i<nsubs;i++) { 310 ISLocalToGlobalMapping l2g; 311 IS t_zerodiag_subs; 312 PetscInt nl; 313 314 ierr = ISLocalToGlobalMappingCreateIS(subs[i],&l2g);CHKERRQ(ierr); 315 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,zerodiag,&t_zerodiag_subs);CHKERRQ(ierr); 316 ierr = ISGetLocalSize(t_zerodiag_subs,&nl);CHKERRQ(ierr); 317 if (nl) { 318 PetscBool valid = PETSC_TRUE; 319 320 if (pressures) { 321 IS t_pressure_subs; 322 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,pressures,&t_pressure_subs);CHKERRQ(ierr); 323 ierr = ISEqual(t_pressure_subs,t_zerodiag_subs,&valid);CHKERRQ(ierr); 324 ierr = ISDestroy(&t_pressure_subs);CHKERRQ(ierr); 325 } 326 if (valid) { 327 ierr = ISLocalToGlobalMappingApplyIS(l2g,t_zerodiag_subs,&zerodiag_subs[pcbddc->benign_n]);CHKERRQ(ierr); 328 pcbddc->benign_n++; 329 } else { 330 recompute_zerodiag = PETSC_TRUE; 331 } 332 } 333 ierr = ISDestroy(&t_zerodiag_subs);CHKERRQ(ierr); 334 ierr = ISLocalToGlobalMappingDestroy(&l2g);CHKERRQ(ierr); 335 } 336 } else { /* there's just one subdomain (or zero if they have not been detected */ 337 PetscBool valid = PETSC_TRUE; 338 if (pressures) { 339 ierr = ISEqual(pressures,zerodiag,&valid);CHKERRQ(ierr); 340 } 341 if (valid) { 342 pcbddc->benign_n = 1; 343 ierr = PetscCalloc1(1,&zerodiag_subs);CHKERRQ(ierr); 344 ierr = PetscObjectReference((PetscObject)zerodiag);CHKERRQ(ierr); 345 zerodiag_subs[0] = zerodiag; 346 } 347 } 348 } 349 350 if (!pcbddc->benign_n) { 351 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 352 recompute_zerodiag = PETSC_FALSE; 353 has_null_pressures = PETSC_FALSE; 354 have_null = PETSC_FALSE; 355 } 356 357 /* final check for null pressures */ 358 if (zerodiag && pressures) { 359 PetscInt nz,np; 360 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 361 ierr = ISGetLocalSize(pressures,&np);CHKERRQ(ierr); 362 if (nz != np) have_null = PETSC_FALSE; 363 } 364 365 if (recompute_zerodiag) { 366 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 367 if (pcbddc->benign_n == 1) { 368 ierr = PetscObjectReference((PetscObject)zerodiag_subs[0]);CHKERRQ(ierr); 369 zerodiag = zerodiag_subs[0]; 370 } else { 371 PetscInt i,nzn,*new_idxs; 372 373 nzn = 0; 374 for (i=0;i<pcbddc->benign_n;i++) { 375 PetscInt ns; 376 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 377 nzn += ns; 378 } 379 ierr = PetscMalloc1(nzn,&new_idxs);CHKERRQ(ierr); 380 nzn = 0; 381 for (i=0;i<pcbddc->benign_n;i++) { 382 PetscInt ns,*idxs; 383 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 384 ierr = ISGetIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 385 ierr = PetscMemcpy(new_idxs+nzn,idxs,ns*sizeof(PetscInt));CHKERRQ(ierr); 386 ierr = ISRestoreIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 387 nzn += ns; 388 } 389 ierr = PetscSortInt(nzn,new_idxs);CHKERRQ(ierr); 390 ierr = ISCreateGeneral(PETSC_COMM_SELF,nzn,new_idxs,PETSC_OWN_POINTER,&zerodiag);CHKERRQ(ierr); 391 } 392 have_null = PETSC_FALSE; 393 } 394 395 if (has_null_pressures) { 396 IS zerodiagc; 397 const PetscInt *idxs,*idxsc; 398 PetscInt i,s,*nnz; 399 Mat M; 400 401 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 402 ierr = ISComplement(zerodiag,0,n,&zerodiagc);CHKERRQ(ierr); 403 ierr = ISGetIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 404 /* local change of basis for pressures */ 405 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 406 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->benign_original_mat),&pcbddc->benign_change);CHKERRQ(ierr); 407 ierr = MatSetType(pcbddc->benign_change,MATAIJ);CHKERRQ(ierr); 408 ierr = MatSetSizes(pcbddc->benign_change,n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 409 ierr = PetscMalloc1(n,&nnz);CHKERRQ(ierr); 410 for (i=0;i<n-nz;i++) nnz[idxsc[i]] = 1; /* identity on velocities plus pressure dofs for non-singular subdomains */ 411 for (i=0;i<pcbddc->benign_n;i++) { 412 PetscInt nzs,j; 413 414 ierr = ISGetLocalSize(zerodiag_subs[i],&nzs);CHKERRQ(ierr); 415 ierr = ISGetIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 416 for (j=0;j<nzs-1;j++) nnz[idxs[j]] = 2; /* change on pressures */ 417 nnz[idxs[nzs-1]] = nzs; /* last local pressure dof in subdomain */ 418 ierr = ISRestoreIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 419 } 420 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_change,0,nnz);CHKERRQ(ierr); 421 ierr = PetscFree(nnz);CHKERRQ(ierr); 422 /* set identity on velocities */ 423 for (i=0;i<n-nz;i++) { 424 ierr = MatSetValue(pcbddc->benign_change,idxsc[i],idxsc[i],1.,INSERT_VALUES);CHKERRQ(ierr); 425 } 426 ierr = ISRestoreIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 427 ierr = ISDestroy(&zerodiagc);CHKERRQ(ierr); 428 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 429 ierr = PetscMalloc3(pcbddc->benign_n,&pcbddc->benign_p0_lidx,pcbddc->benign_n,&pcbddc->benign_p0_gidx,pcbddc->benign_n,&pcbddc->benign_p0);CHKERRQ(ierr); 430 /* set change on pressures */ 431 for (s=0;s<pcbddc->benign_n;s++) { 432 PetscScalar *array; 433 PetscInt nzs; 434 435 ierr = ISGetLocalSize(zerodiag_subs[s],&nzs);CHKERRQ(ierr); 436 ierr = ISGetIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 437 for (i=0;i<nzs-1;i++) { 438 PetscScalar vals[2]; 439 PetscInt cols[2]; 440 441 cols[0] = idxs[i]; 442 cols[1] = idxs[nzs-1]; 443 vals[0] = 1.; 444 vals[1] = 1.; 445 ierr = MatSetValues(pcbddc->benign_change,1,cols,2,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 446 } 447 ierr = PetscMalloc1(nzs,&array);CHKERRQ(ierr); 448 for (i=0;i<nzs-1;i++) array[i] = -1.; 449 array[nzs-1] = 1.; 450 ierr = MatSetValues(pcbddc->benign_change,1,idxs+nzs-1,nzs,idxs,array,INSERT_VALUES);CHKERRQ(ierr); 451 /* store local idxs for p0 */ 452 pcbddc->benign_p0_lidx[s] = idxs[nzs-1]; 453 ierr = ISRestoreIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 454 ierr = ISDestroy(&zerodiag_subs[s]);CHKERRQ(ierr); 455 ierr = PetscFree(array);CHKERRQ(ierr); 456 } 457 ierr = MatAssemblyBegin(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 458 ierr = MatAssemblyEnd(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 459 /* TODO: need optimization? */ 460 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 461 ierr = MatPtAP(pcbddc->benign_original_mat,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&M);CHKERRQ(ierr); 462 ierr = MatSeqAIJCompress(M,&pcbddc->local_mat);CHKERRQ(ierr); 463 ierr = MatDestroy(&M);CHKERRQ(ierr); 464 /* store global idxs for p0 */ 465 ierr = ISLocalToGlobalMappingApply(pc->pmat->rmap->mapping,pcbddc->benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 466 } 467 ierr = PetscFree(zerodiag_subs);CHKERRQ(ierr); 468 ierr = ISDestroy(&pressures);CHKERRQ(ierr); 469 470 /* determines if the coarse solver will be singular or not */ 471 ierr = MPI_Allreduce(&have_null,&pcbddc->benign_null,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 472 *zerodiaglocal = zerodiag; 473 PetscFunctionReturn(0); 474 } 475 476 #undef __FUNCT__ 477 #define __FUNCT__ "PCBDDCBenignGetOrSetP0" 478 PetscErrorCode PCBDDCBenignGetOrSetP0(PC pc, Vec v, PetscBool get) 479 { 480 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 481 PetscErrorCode ierr; 482 483 PetscFunctionBegin; 484 if (!pcbddc->benign_sf) { 485 ierr = PetscSFCreate(PetscObjectComm((PetscObject)pc),&pcbddc->benign_sf);CHKERRQ(ierr); 486 ierr = PetscSFSetGraphLayout(pcbddc->benign_sf,pc->pmat->rmap,pcbddc->benign_n,NULL,PETSC_OWN_POINTER,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 487 } 488 if (get) { /* use SF to get values */ 489 PetscScalar *array; 490 491 ierr = VecGetArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 492 ierr = PetscSFBcastBegin(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 493 ierr = PetscSFBcastEnd(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 494 ierr = VecRestoreArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 495 } else { /* use VecSetValues (not scalable, I should try to find a better solution (defining a new MPI_OP for reduction) */ 496 ierr = VecSetValues(v,pcbddc->benign_n,pcbddc->benign_p0_gidx,pcbddc->benign_p0,INSERT_VALUES);CHKERRQ(ierr); 497 ierr = VecAssemblyBegin(v);CHKERRQ(ierr); 498 ierr = VecAssemblyEnd(v);CHKERRQ(ierr); 499 } 500 PetscFunctionReturn(0); 501 } 502 503 #undef __FUNCT__ 504 #define __FUNCT__ "PCBDDCBenignPopOrPushB0" 505 PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop) 506 { 507 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 508 PetscErrorCode ierr; 509 510 PetscFunctionBegin; 511 /* TODO: add error checking 512 - avoid nested pop (or push) calls. 513 - cannot push before pop. 514 - cannot call this if pcbddc->local_mat is NULL 515 */ 516 if (!pcbddc->benign_n) { 517 PetscFunctionReturn(0); 518 } 519 if (pop) { 520 IS is_p0; 521 MatReuse reuse; 522 523 /* extract B_0 */ 524 reuse = MAT_INITIAL_MATRIX; 525 if (pcbddc->benign_B0) { 526 reuse = MAT_REUSE_MATRIX; 527 } 528 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->benign_n,pcbddc->benign_p0_lidx,PETSC_COPY_VALUES,&is_p0);CHKERRQ(ierr); 529 ierr = MatGetSubMatrix(pcbddc->local_mat,is_p0,NULL,reuse,&pcbddc->benign_B0);CHKERRQ(ierr); 530 /* remove rows and cols from local problem */ 531 ierr = MatSetOption(pcbddc->local_mat,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE);CHKERRQ(ierr); 532 ierr = MatZeroRowsColumnsIS(pcbddc->local_mat,is_p0,1.0,NULL,NULL);CHKERRQ(ierr); 533 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 534 } else { /* push */ 535 PetscInt i; 536 537 for (i=0;i<pcbddc->benign_n;i++) { 538 PetscScalar *B0_vals; 539 PetscInt *B0_cols,B0_ncol; 540 541 ierr = MatGetRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 542 ierr = MatSetValues(pcbddc->local_mat,1,pcbddc->benign_p0_lidx+i,B0_ncol,B0_cols,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 543 ierr = MatSetValues(pcbddc->local_mat,B0_ncol,B0_cols,1,pcbddc->benign_p0_lidx+1,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 544 ierr = MatSetValue(pcbddc->local_mat,pcbddc->benign_p0_lidx[i],pcbddc->benign_p0_lidx[i],0.0,INSERT_VALUES);CHKERRQ(ierr); 545 ierr = MatRestoreRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 546 } 547 ierr = MatAssemblyBegin(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 548 ierr = MatAssemblyEnd(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 549 } 550 PetscFunctionReturn(0); 551 } 552 553 #undef __FUNCT__ 554 #define __FUNCT__ "PCBDDCAdaptiveSelection" 555 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 556 { 557 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 558 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 559 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 560 PetscBLASInt *B_iwork,*B_ifail; 561 PetscScalar *work,lwork; 562 PetscScalar *St,*S,*eigv; 563 PetscScalar *Sarray,*Starray; 564 PetscReal *eigs,thresh; 565 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs; 566 PetscBool allocated_S_St; 567 #if defined(PETSC_USE_COMPLEX) 568 PetscReal *rwork; 569 #endif 570 PetscErrorCode ierr; 571 572 PetscFunctionBegin; 573 if (!sub_schurs->use_mumps) { 574 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS"); 575 } 576 577 if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) { 578 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\n",sub_schurs->is_hermitian,sub_schurs->is_posdef); 579 } 580 581 if (pcbddc->dbg_flag) { 582 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 583 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 584 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr); 585 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 586 } 587 588 if (pcbddc->dbg_flag) { 589 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef); 590 } 591 592 /* max size of subsets */ 593 mss = 0; 594 for (i=0;i<sub_schurs->n_subs;i++) { 595 PetscInt subset_size; 596 597 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 598 mss = PetscMax(mss,subset_size); 599 } 600 601 /* min/max and threshold */ 602 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 603 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0; 604 nmax = PetscMax(nmin,nmax); 605 allocated_S_St = PETSC_FALSE; 606 if (nmin) { 607 allocated_S_St = PETSC_TRUE; 608 } 609 610 /* allocate lapack workspace */ 611 cum = cum2 = 0; 612 maxneigs = 0; 613 for (i=0;i<sub_schurs->n_subs;i++) { 614 PetscInt n,subset_size; 615 616 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 617 n = PetscMin(subset_size,nmax); 618 cum += subset_size; 619 cum2 += subset_size*n; 620 maxneigs = PetscMax(maxneigs,n); 621 } 622 if (mss) { 623 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 624 PetscBLASInt B_itype = 1; 625 PetscBLASInt B_N = mss; 626 PetscReal zero = 0.0; 627 PetscReal eps = 0.0; /* dlamch? */ 628 629 B_lwork = -1; 630 S = NULL; 631 St = NULL; 632 eigs = NULL; 633 eigv = NULL; 634 B_iwork = NULL; 635 B_ifail = NULL; 636 #if defined(PETSC_USE_COMPLEX) 637 rwork = NULL; 638 #endif 639 thresh = 1.0; 640 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 641 #if defined(PETSC_USE_COMPLEX) 642 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 643 #else 644 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 645 #endif 646 if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 647 ierr = PetscFPTrapPop();CHKERRQ(ierr); 648 } else { 649 /* TODO */ 650 } 651 } else { 652 lwork = 0; 653 } 654 655 nv = 0; 656 if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */ 657 ierr = ISGetLocalSize(sub_schurs->is_vertices,&nv);CHKERRQ(ierr); 658 } 659 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr); 660 if (allocated_S_St) { 661 ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr); 662 } 663 ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr); 664 #if defined(PETSC_USE_COMPLEX) 665 ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr); 666 #endif 667 ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 668 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr, 669 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr, 670 nv+cum,&pcbddc->adaptive_constraints_idxs, 671 nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 672 ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr); 673 674 maxneigs = 0; 675 cum = cum2 = cumarray = 0; 676 pcbddc->adaptive_constraints_idxs_ptr[0] = 0; 677 pcbddc->adaptive_constraints_data_ptr[0] = 0; 678 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 679 const PetscInt *idxs; 680 681 ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 682 for (cum=0;cum<nv;cum++) { 683 pcbddc->adaptive_constraints_n[cum] = 1; 684 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 685 pcbddc->adaptive_constraints_data[cum] = 1.0; 686 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1; 687 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1; 688 } 689 cum2 = cum; 690 ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 691 } 692 693 if (mss) { /* multilevel */ 694 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 695 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 696 } 697 698 for (i=0;i<sub_schurs->n_subs;i++) { 699 700 const PetscInt *idxs; 701 PetscReal infty = PETSC_MAX_REAL; 702 PetscInt j,subset_size,eigs_start = 0; 703 PetscBLASInt B_N; 704 PetscBool same_data = PETSC_FALSE; 705 706 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 707 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 708 if (allocated_S_St) { /* S and S_t should be copied since we could need them later */ 709 if (sub_schurs->is_hermitian) { 710 PetscInt j,k; 711 if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscMemcmp later */ 712 ierr = PetscMemzero(S,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 713 ierr = PetscMemzero(St,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 714 } 715 for (j=0;j<subset_size;j++) { 716 for (k=j;k<subset_size;k++) { 717 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 718 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 719 } 720 } 721 } else { 722 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 723 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 724 } 725 } else { 726 S = Sarray + cumarray; 727 St = Starray + cumarray; 728 } 729 730 ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 731 /* see if we can save some work */ 732 if (sub_schurs->n_subs == 1) { 733 ierr = PetscMemcmp(S,St,subset_size*subset_size*sizeof(PetscScalar),&same_data);CHKERRQ(ierr); 734 } 735 736 if (same_data) { /* there's no need of constraints here, deluxe scaling is enough */ 737 B_neigs = 0; 738 } else { 739 /* Threshold: this is an heuristic for edges */ 740 thresh = pcbddc->mat_graph->count[idxs[0]]*pcbddc->adaptive_threshold; 741 if (nmin) { 742 Mat SM,StM; 743 PetscInt j,k,nccs,nccst; 744 745 for (j=0;j<subset_size;j++) { 746 for (k=j;k<subset_size;k++) { 747 S [k*subset_size+j] = S [j*subset_size+k]; 748 St[k*subset_size+j] = St[j*subset_size+k]; 749 } 750 } 751 ierr = MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,S,&SM);CHKERRQ(ierr); 752 ierr = MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,St,&StM);CHKERRQ(ierr); 753 ierr = MatDetectDisconnectedComponents(SM,PETSC_TRUE,&nccs,NULL);CHKERRQ(ierr); 754 ierr = MatDetectDisconnectedComponents(StM,PETSC_TRUE,&nccst,NULL);CHKERRQ(ierr); 755 if (nccs != 1 || nccst != 1) { 756 PetscPrintf(PETSC_COMM_SELF,"[%d] Found disc %d %d (size %d)\n",PetscGlobalRank,nccs,nccst,subset_size); 757 } 758 ierr = MatDestroy(&SM);CHKERRQ(ierr); 759 ierr = MatDestroy(&StM);CHKERRQ(ierr); 760 } 761 762 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 763 PetscBLASInt B_itype = 1; 764 PetscBLASInt B_IL, B_IU; 765 PetscReal eps = -1.0; /* dlamch? */ 766 PetscInt nmin_s; 767 768 if (pcbddc->dbg_flag) { 769 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d %d %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]); 770 } 771 772 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 773 if (thresh > 1.+PETSC_SMALL) { 774 775 /* ask for eigenvalues larger than thresh */ 776 #if defined(PETSC_USE_COMPLEX) 777 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 778 #else 779 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 780 #endif 781 } else { 782 B_IU = PetscMax(1,PetscMin(B_N,nmax)); 783 B_IL = 1; 784 #if defined(PETSC_USE_COMPLEX) 785 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 786 #else 787 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 788 #endif 789 } 790 ierr = PetscFPTrapPop();CHKERRQ(ierr); 791 if (B_ierr) { 792 if (B_ierr < 0 ) { 793 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 794 } else if (B_ierr <= B_N) { 795 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 796 } else { 797 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 798 } 799 } 800 801 if (B_neigs > nmax) { 802 if (pcbddc->dbg_flag) { 803 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, more than maximum required %d.\n",B_neigs,nmax); 804 } 805 eigs_start = B_neigs -nmax; 806 B_neigs = nmax; 807 } 808 809 nmin_s = PetscMin(nmin,B_N); 810 if (B_neigs < nmin_s) { 811 PetscBLASInt B_neigs2; 812 813 B_IU = B_N - B_neigs; 814 B_IL = B_N - nmin_s + 1; 815 if (pcbddc->dbg_flag) { 816 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU); 817 } 818 if (sub_schurs->is_hermitian) { 819 PetscInt j; 820 for (j=0;j<subset_size;j++) { 821 ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 822 } 823 for (j=0;j<subset_size;j++) { 824 ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 825 } 826 } else { 827 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 828 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 829 } 830 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 831 #if defined(PETSC_USE_COMPLEX) 832 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 833 #else 834 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 835 #endif 836 ierr = PetscFPTrapPop();CHKERRQ(ierr); 837 B_neigs += B_neigs2; 838 } 839 if (B_ierr) { 840 if (B_ierr < 0 ) { 841 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 842 } else if (B_ierr <= B_N) { 843 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 844 } else { 845 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 846 } 847 } 848 if (pcbddc->dbg_flag) { 849 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Got %d eigs\n",B_neigs);CHKERRQ(ierr); 850 for (j=0;j<B_neigs;j++) { 851 if (eigs[j] == 0.0) { 852 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," Inf\n");CHKERRQ(ierr); 853 } else { 854 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",eigs[j+eigs_start]);CHKERRQ(ierr); 855 } 856 } 857 } 858 } else { 859 /* TODO */ 860 } 861 } 862 maxneigs = PetscMax(B_neigs,maxneigs); 863 pcbddc->adaptive_constraints_n[i+nv] = B_neigs; 864 if (B_neigs) { 865 ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 866 867 if (pcbddc->dbg_flag > 1) { 868 PetscInt ii; 869 for (ii=0;ii<B_neigs;ii++) { 870 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 871 for (j=0;j<B_N;j++) { 872 #if defined(PETSC_USE_COMPLEX) 873 PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 874 PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 875 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 876 #else 877 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);CHKERRQ(ierr); 878 #endif 879 } 880 } 881 } 882 #if 0 883 for (j=0;j<B_neigs;j++) { 884 PetscBLASInt Blas_N,Blas_one = 1.0; 885 PetscScalar norm; 886 ierr = PetscBLASIntCast(subset_size,&Blas_N);CHKERRQ(ierr); 887 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size, 888 &Blas_one,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one)); 889 if (pcbddc->adaptive_constraints_data[cum2] > 0.0) { 890 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 891 } else { 892 norm = -1.0/PetscSqrtReal(PetscRealPart(norm)); 893 } 894 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one)); 895 } 896 #endif 897 ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr); 898 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size; 899 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs; 900 cum++; 901 } 902 ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 903 /* shift for next computation */ 904 cumarray += subset_size*subset_size; 905 } 906 if (pcbddc->dbg_flag) { 907 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 908 } 909 910 if (mss) { 911 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 912 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 913 /* destroy matrices (junk) */ 914 ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr); 915 ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr); 916 } 917 if (allocated_S_St) { 918 ierr = PetscFree2(S,St);CHKERRQ(ierr); 919 } 920 ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr); 921 #if defined(PETSC_USE_COMPLEX) 922 ierr = PetscFree(rwork);CHKERRQ(ierr); 923 #endif 924 if (pcbddc->dbg_flag) { 925 PetscInt maxneigs_r; 926 ierr = MPI_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 927 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr); 928 } 929 PetscFunctionReturn(0); 930 } 931 932 #undef __FUNCT__ 933 #define __FUNCT__ "PCBDDCSetUpSolvers" 934 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 935 { 936 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 937 PetscScalar *coarse_submat_vals; 938 PetscErrorCode ierr; 939 940 PetscFunctionBegin; 941 /* Setup local scatters R_to_B and (optionally) R_to_D */ 942 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 943 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 944 945 /* Setup local neumann solver ksp_R */ 946 /* PCBDDCSetUpLocalScatters should be called first! */ 947 ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 948 949 /* Change global null space passed in by the user if change of basis has been requested */ 950 if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) { 951 ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr); 952 } 953 954 /* 955 Setup local correction and local part of coarse basis. 956 Gives back the dense local part of the coarse matrix in column major ordering 957 */ 958 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 959 960 /* Compute total number of coarse nodes and setup coarse solver */ 961 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 962 963 /* free */ 964 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 965 PetscFunctionReturn(0); 966 } 967 968 #undef __FUNCT__ 969 #define __FUNCT__ "PCBDDCResetCustomization" 970 PetscErrorCode PCBDDCResetCustomization(PC pc) 971 { 972 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 973 PetscErrorCode ierr; 974 975 PetscFunctionBegin; 976 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 977 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 978 ierr = ISDestroy(&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 979 ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr); 980 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 981 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 982 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 983 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 984 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 985 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 986 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 987 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 988 PetscFunctionReturn(0); 989 } 990 991 #undef __FUNCT__ 992 #define __FUNCT__ "PCBDDCResetTopography" 993 PetscErrorCode PCBDDCResetTopography(PC pc) 994 { 995 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 996 PetscInt i; 997 PetscErrorCode ierr; 998 999 PetscFunctionBegin; 1000 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 1001 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1002 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1003 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 1004 for (i=0;i<pcbddc->n_local_subs;i++) { 1005 ierr = ISDestroy(&pcbddc->local_subs[i]);CHKERRQ(ierr); 1006 } 1007 ierr = PetscFree(pcbddc->local_subs);CHKERRQ(ierr); 1008 ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr); 1009 PetscFunctionReturn(0); 1010 } 1011 1012 #undef __FUNCT__ 1013 #define __FUNCT__ "PCBDDCResetSolvers" 1014 PetscErrorCode PCBDDCResetSolvers(PC pc) 1015 { 1016 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1017 PetscScalar *array; 1018 PetscErrorCode ierr; 1019 1020 PetscFunctionBegin; 1021 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 1022 if (pcbddc->coarse_phi_B) { 1023 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr); 1024 ierr = PetscFree(array);CHKERRQ(ierr); 1025 } 1026 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 1027 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 1028 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 1029 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 1030 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 1031 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 1032 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 1033 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 1034 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 1035 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 1036 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 1037 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 1038 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 1039 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 1040 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 1041 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 1042 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 1043 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1044 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 1045 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 1046 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 1047 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 1048 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 1049 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 1050 ierr = VecDestroy(&pcbddc->benign_vec);CHKERRQ(ierr); 1051 ierr = MatDestroy(&pcbddc->benign_original_mat);CHKERRQ(ierr); 1052 ierr = MatDestroy(&pcbddc->benign_original_mat);CHKERRQ(ierr); 1053 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 1054 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 1055 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 1056 PetscFunctionReturn(0); 1057 } 1058 1059 #undef __FUNCT__ 1060 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors" 1061 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 1062 { 1063 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1064 PC_IS *pcis = (PC_IS*)pc->data; 1065 VecType impVecType; 1066 PetscInt n_constraints,n_R,old_size; 1067 PetscErrorCode ierr; 1068 1069 PetscFunctionBegin; 1070 if (!pcbddc->ConstraintMatrix) { 1071 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created"); 1072 } 1073 /* get sizes */ 1074 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - pcbddc->n_vertices; 1075 n_R = pcis->n - pcbddc->n_vertices; 1076 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 1077 /* local work vectors (try to avoid unneeded work)*/ 1078 /* R nodes */ 1079 old_size = -1; 1080 if (pcbddc->vec1_R) { 1081 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 1082 } 1083 if (n_R != old_size) { 1084 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 1085 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 1086 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 1087 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 1088 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 1089 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 1090 } 1091 /* local primal dofs */ 1092 old_size = -1; 1093 if (pcbddc->vec1_P) { 1094 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 1095 } 1096 if (pcbddc->local_primal_size != old_size) { 1097 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 1098 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 1099 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 1100 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 1101 } 1102 /* local explicit constraints */ 1103 old_size = -1; 1104 if (pcbddc->vec1_C) { 1105 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 1106 } 1107 if (n_constraints && n_constraints != old_size) { 1108 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 1109 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 1110 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 1111 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 1112 } 1113 PetscFunctionReturn(0); 1114 } 1115 1116 #undef __FUNCT__ 1117 #define __FUNCT__ "PCBDDCSetUpCorrection" 1118 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 1119 { 1120 PetscErrorCode ierr; 1121 /* pointers to pcis and pcbddc */ 1122 PC_IS* pcis = (PC_IS*)pc->data; 1123 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1124 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1125 /* submatrices of local problem */ 1126 Mat A_RV,A_VR,A_VV,local_auxmat2_R; 1127 /* submatrices of benign trick */ 1128 Mat B0_V = NULL; 1129 /* submatrices of local coarse problem */ 1130 Mat S_VV,S_CV,S_VC,S_CC; 1131 /* working matrices */ 1132 Mat C_CR; 1133 /* additional working stuff */ 1134 PC pc_R; 1135 Mat F; 1136 PetscBool isLU,isCHOL,isILU; 1137 1138 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 1139 PetscScalar *work; 1140 PetscInt *idx_V_B; 1141 PetscInt n,n_vertices,n_constraints,*p0_lidx_I; 1142 PetscInt i,n_R,n_D,n_B; 1143 PetscBool unsymmetric_check; 1144 /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */ 1145 MatType impMatType; 1146 /* some shortcuts to scalars */ 1147 PetscScalar one=1.0,m_one=-1.0; 1148 1149 PetscFunctionBegin; 1150 n_vertices = pcbddc->n_vertices; 1151 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - n_vertices; 1152 /* Set Non-overlapping dimensions */ 1153 n_B = pcis->n_B; 1154 n_D = pcis->n - n_B; 1155 n_R = pcis->n - n_vertices; 1156 1157 /* Set types for local objects needed by BDDC precondtioner */ 1158 impMatType = MATSEQDENSE; 1159 1160 /* vertices in boundary numbering */ 1161 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 1162 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);CHKERRQ(ierr); 1163 if (i != n_vertices) { 1164 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i); 1165 } 1166 1167 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 1168 ierr = PetscCalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 1169 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr); 1170 ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr); 1171 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr); 1172 ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr); 1173 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr); 1174 ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr); 1175 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr); 1176 ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr); 1177 1178 unsymmetric_check = PETSC_FALSE; 1179 /* allocate workspace */ 1180 n = 0; 1181 if (n_constraints) { 1182 n += n_R*n_constraints; 1183 } 1184 if (n_vertices) { 1185 n = PetscMax(2*n_R*n_vertices,n); 1186 n = PetscMax((n_R+n_B)*n_vertices,n); 1187 } 1188 if (!pcbddc->symmetric_primal) { 1189 n = PetscMax(2*n_R*pcbddc->local_primal_size,n); 1190 unsymmetric_check = PETSC_TRUE; 1191 } 1192 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 1193 1194 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 1195 ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr); 1196 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr); 1197 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr); 1198 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr); 1199 if (isLU || isILU || isCHOL) { 1200 ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr); 1201 } else if (sub_schurs->reuse_mumps) { 1202 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1203 MatFactorType type; 1204 1205 F = reuse_mumps->F; 1206 ierr = MatGetFactorType(F,&type);CHKERRQ(ierr); 1207 if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE; 1208 } else { 1209 F = NULL; 1210 } 1211 1212 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 1213 if (n_constraints) { 1214 Mat M1,M2,M3; 1215 Mat auxmat; 1216 IS is_aux; 1217 PetscScalar *array,*array2; 1218 1219 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 1220 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 1221 1222 /* Extract constraints on R nodes: C_{CR} */ 1223 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 1224 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 1225 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&auxmat);CHKERRQ(ierr); 1226 1227 /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */ 1228 /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */ 1229 ierr = PetscMemzero(work,n_R*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 1230 for (i=0;i<n_constraints;i++) { 1231 const PetscScalar *row_cmat_values; 1232 const PetscInt *row_cmat_indices; 1233 PetscInt size_of_constraint,j; 1234 1235 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 1236 for (j=0;j<size_of_constraint;j++) { 1237 work[row_cmat_indices[j]+i*n_R] = -row_cmat_values[j]; 1238 } 1239 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 1240 } 1241 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,NULL,&local_auxmat2_R);CHKERRQ(ierr); 1242 if (F) { 1243 Mat B; 1244 1245 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 1246 ierr = MatMatSolve(F,B,local_auxmat2_R);CHKERRQ(ierr); 1247 ierr = MatDestroy(&B);CHKERRQ(ierr); 1248 } else { 1249 PetscScalar *marr; 1250 1251 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1252 for (i=0;i<n_constraints;i++) { 1253 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 1254 ierr = VecPlaceArray(pcbddc->vec2_R,marr+i*n_R);CHKERRQ(ierr); 1255 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1256 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1257 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 1258 } 1259 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1260 } 1261 if (!pcbddc->switch_static) { 1262 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);CHKERRQ(ierr); 1263 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 1264 ierr = MatDenseGetArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 1265 for (i=0;i<n_constraints;i++) { 1266 ierr = VecPlaceArray(pcbddc->vec1_R,array2+i*n_R);CHKERRQ(ierr); 1267 ierr = VecPlaceArray(pcis->vec1_B,array+i*n_B);CHKERRQ(ierr); 1268 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1269 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1270 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1271 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1272 } 1273 ierr = MatDenseRestoreArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 1274 ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 1275 ierr = MatMatMult(auxmat,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 1276 } else { 1277 ierr = PetscObjectReference((PetscObject)local_auxmat2_R);CHKERRQ(ierr); 1278 pcbddc->local_auxmat2 = local_auxmat2_R; 1279 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 1280 } 1281 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 1282 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 1283 ierr = MatScale(M3,m_one);CHKERRQ(ierr); 1284 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr); 1285 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 1286 if (isCHOL) { 1287 ierr = MatCholeskyFactor(M3,NULL,NULL);CHKERRQ(ierr); 1288 } else { 1289 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 1290 } 1291 ierr = VecSet(pcbddc->vec1_C,one);CHKERRQ(ierr); 1292 ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr); 1293 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 1294 ierr = MatDestroy(&M2);CHKERRQ(ierr); 1295 ierr = MatDestroy(&M3);CHKERRQ(ierr); 1296 /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */ 1297 ierr = MatMatMult(M1,auxmat,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 1298 ierr = MatDestroy(&auxmat);CHKERRQ(ierr); 1299 ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 1300 ierr = MatDestroy(&M1);CHKERRQ(ierr); 1301 } 1302 /* Get submatrices from subdomain matrix */ 1303 if (pcbddc->benign_n) { 1304 IS dummy; 1305 Mat B0_R; 1306 PetscReal norm; 1307 1308 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);CHKERRQ(ierr); 1309 ierr = MatGetSubMatrix(pcbddc->benign_B0,dummy,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&B0_R);CHKERRQ(ierr); 1310 ierr = MatNorm(B0_R,NORM_INFINITY,&norm);CHKERRQ(ierr); 1311 if (norm > PETSC_SMALL) { 1312 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! ||B0_R|| = %f (should be numerically 0.)",norm); 1313 } 1314 ierr = MatDestroy(&B0_R);CHKERRQ(ierr); 1315 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 1316 } 1317 1318 if (n_vertices) { 1319 IS is_aux; 1320 1321 if (sub_schurs->reuse_mumps) { /* is_R_local is not sorted, ISComplement doesn't like it */ 1322 IS tis; 1323 1324 ierr = ISDuplicate(pcbddc->is_R_local,&tis);CHKERRQ(ierr); 1325 ierr = ISSort(tis);CHKERRQ(ierr); 1326 ierr = ISComplement(tis,0,pcis->n,&is_aux);CHKERRQ(ierr); 1327 ierr = ISDestroy(&tis);CHKERRQ(ierr); 1328 } else { 1329 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 1330 } 1331 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 1332 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 1333 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 1334 if (pcbddc->benign_n) { 1335 IS dummy; 1336 1337 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);CHKERRQ(ierr); 1338 ierr = MatGetSubMatrix(pcbddc->benign_B0,dummy,is_aux,MAT_INITIAL_MATRIX,&B0_V);CHKERRQ(ierr); 1339 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 1340 } 1341 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 1342 } 1343 1344 /* Matrix of coarse basis functions (local) */ 1345 if (pcbddc->coarse_phi_B) { 1346 PetscInt on_B,on_primal,on_D=n_D; 1347 if (pcbddc->coarse_phi_D) { 1348 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr); 1349 } 1350 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 1351 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 1352 PetscScalar *marray; 1353 1354 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr); 1355 ierr = PetscFree(marray);CHKERRQ(ierr); 1356 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 1357 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 1358 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 1359 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 1360 } 1361 } 1362 1363 if (!pcbddc->coarse_phi_B) { 1364 PetscScalar *marray; 1365 1366 n = n_B*pcbddc->local_primal_size; 1367 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1368 n += n_D*pcbddc->local_primal_size; 1369 } 1370 if (!pcbddc->symmetric_primal) { 1371 n *= 2; 1372 } 1373 ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr); 1374 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 1375 n = n_B*pcbddc->local_primal_size; 1376 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1377 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 1378 n += n_D*pcbddc->local_primal_size; 1379 } 1380 if (!pcbddc->symmetric_primal) { 1381 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 1382 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1383 n = n_B*pcbddc->local_primal_size; 1384 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 1385 } 1386 } else { 1387 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 1388 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 1389 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1390 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 1391 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 1392 } 1393 } 1394 } 1395 1396 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 1397 p0_lidx_I = NULL; 1398 if (pcbddc->benign_n && (pcbddc->switch_static || pcbddc->dbg_flag)) { 1399 const PetscInt *idxs; 1400 1401 ierr = ISGetIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 1402 ierr = PetscMalloc1(pcbddc->benign_n,&p0_lidx_I);CHKERRQ(ierr); 1403 for (i=0;i<pcbddc->benign_n;i++) { 1404 ierr = PetscFindInt(pcbddc->benign_p0_lidx[i],pcis->n-pcis->n_B,idxs,&p0_lidx_I[i]);CHKERRQ(ierr); 1405 } 1406 ierr = ISRestoreIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 1407 } 1408 1409 /* vertices */ 1410 if (n_vertices) { 1411 1412 ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr); 1413 1414 if (n_R) { 1415 Mat A_RRmA_RV,S_VVt; /* S_VVt with LDA=N */ 1416 PetscBLASInt B_N,B_one = 1; 1417 PetscScalar *x,*y; 1418 PetscBool isseqaij; 1419 1420 ierr = MatScale(A_RV,m_one);CHKERRQ(ierr); 1421 ierr = MatConvert(A_RV,impMatType,MAT_REUSE_MATRIX,&A_RV);CHKERRQ(ierr); 1422 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 1423 if (F) { /* TODO could be optimized for symmetric problems */ 1424 ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr); 1425 } else { 1426 ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr); 1427 for (i=0;i<n_vertices;i++) { 1428 ierr = VecPlaceArray(pcbddc->vec1_R,y+i*n_R);CHKERRQ(ierr); 1429 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 1430 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1431 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1432 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 1433 } 1434 ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr); 1435 } 1436 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 1437 /* S_VV and S_CV are the subdomain contribution to coarse matrix. WARNING -> column major ordering */ 1438 if (n_constraints) { 1439 Mat B; 1440 1441 ierr = PetscMemzero(work+n_R*n_vertices,n_B*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 1442 for (i=0;i<n_vertices;i++) { 1443 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 1444 ierr = VecPlaceArray(pcis->vec1_B,work+n_R*n_vertices+i*n_B);CHKERRQ(ierr); 1445 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1446 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1447 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1448 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1449 } 1450 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr); 1451 ierr = MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr); 1452 ierr = MatDestroy(&B);CHKERRQ(ierr); 1453 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr); 1454 ierr = MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 1455 ierr = MatScale(S_CV,m_one);CHKERRQ(ierr); 1456 ierr = PetscBLASIntCast(n_R*n_vertices,&B_N);CHKERRQ(ierr); 1457 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+n_R*n_vertices,&B_one,work,&B_one)); 1458 ierr = MatDestroy(&B);CHKERRQ(ierr); 1459 } 1460 ierr = PetscObjectTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 1461 if (!isseqaij) { /* MatMatMult with SEQ(S)BAIJ below will raise an error */ 1462 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_REUSE_MATRIX,&A_VR);CHKERRQ(ierr); 1463 } 1464 ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr); 1465 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 1466 ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr); 1467 ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr); 1468 ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr); 1469 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one)); 1470 ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr); 1471 ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr); 1472 ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1473 ierr = MatDestroy(&S_VVt);CHKERRQ(ierr); 1474 } else { 1475 ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1476 } 1477 for (i=0;i<pcbddc->benign_n;i++) { 1478 const PetscScalar *vals; 1479 const PetscInt *idxs; 1480 PetscInt n,j,primal_idx; 1481 1482 ierr = MatGetRow(B0_V,i,&n,&idxs,&vals);CHKERRQ(ierr); 1483 primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + i; 1484 for (j=0;j<n;j++) { 1485 coarse_submat_vals[primal_idx*pcbddc->local_primal_size+idxs[j]] = vals[j]; 1486 coarse_submat_vals[idxs[j]*pcbddc->local_primal_size+primal_idx] = vals[j]; 1487 } 1488 ierr = MatRestoreRow(B0_V,i,&n,&idxs,&vals);CHKERRQ(ierr); 1489 } 1490 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 1491 1492 /* coarse basis functions */ 1493 for (i=0;i<n_vertices;i++) { 1494 PetscScalar *y; 1495 1496 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr); 1497 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 1498 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 1499 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1500 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1501 y[n_B*i+idx_V_B[i]] = 1.0; 1502 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 1503 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1504 1505 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1506 PetscInt j; 1507 1508 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 1509 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 1510 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1511 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1512 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 1513 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 1514 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 1515 } 1516 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1517 } 1518 /* if n_R == 0 the object is not destroyed */ 1519 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 1520 } 1521 1522 if (n_constraints) { 1523 Mat B; 1524 1525 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 1526 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 1527 ierr = MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 1528 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 1529 if (n_vertices) { 1530 if (isCHOL) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */ 1531 ierr = MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);CHKERRQ(ierr); 1532 } else { 1533 Mat S_VCt; 1534 1535 ierr = MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);CHKERRQ(ierr); 1536 ierr = MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1537 ierr = MatDestroy(&S_VCt);CHKERRQ(ierr); 1538 } 1539 } 1540 ierr = MatDestroy(&B);CHKERRQ(ierr); 1541 /* coarse basis functions */ 1542 for (i=0;i<n_constraints;i++) { 1543 PetscScalar *y; 1544 1545 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr); 1546 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 1547 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr); 1548 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1549 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1550 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 1551 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1552 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1553 PetscInt j; 1554 1555 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 1556 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr); 1557 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1558 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1559 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 1560 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 1561 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 1562 } 1563 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1564 } 1565 } 1566 if (n_constraints) { 1567 ierr = MatDestroy(&local_auxmat2_R);CHKERRQ(ierr); 1568 } 1569 ierr = PetscFree(p0_lidx_I);CHKERRQ(ierr); 1570 ierr = MatDestroy(&B0_V);CHKERRQ(ierr); 1571 1572 /* compute other basis functions for non-symmetric problems */ 1573 if (!pcbddc->symmetric_primal) { 1574 1575 if (n_constraints) { 1576 Mat S_CCT,B_C; 1577 1578 /* this is a lazy thing */ 1579 ierr = MatConvert(C_CR,impMatType,MAT_REUSE_MATRIX,&C_CR);CHKERRQ(ierr); 1580 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work+n_vertices*n_R,&B_C);CHKERRQ(ierr); 1581 ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr); 1582 ierr = MatTransposeMatMult(C_CR,S_CCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr); 1583 ierr = MatDestroy(&S_CCT);CHKERRQ(ierr); 1584 if (n_vertices) { 1585 Mat B_V,S_VCT; 1586 1587 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&B_V);CHKERRQ(ierr); 1588 ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr); 1589 ierr = MatTransposeMatMult(C_CR,S_VCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr); 1590 ierr = MatDestroy(&B_V);CHKERRQ(ierr); 1591 ierr = MatDestroy(&S_VCT);CHKERRQ(ierr); 1592 } 1593 ierr = MatDestroy(&B_C);CHKERRQ(ierr); 1594 } else { /* if there are no constraints, reset work */ 1595 ierr = PetscMemzero(work,n_R*pcbddc->local_primal_size*sizeof(PetscScalar));CHKERRQ(ierr); 1596 } 1597 if (n_vertices && n_R) { 1598 Mat A_VRT; 1599 PetscScalar *marray; 1600 PetscBLASInt B_N,B_one = 1; 1601 1602 ierr = MatTranspose(A_VR,MAT_INITIAL_MATRIX,&A_VRT);CHKERRQ(ierr); 1603 ierr = MatConvert(A_VRT,impMatType,MAT_REUSE_MATRIX,&A_VRT);CHKERRQ(ierr); 1604 ierr = MatDenseGetArray(A_VRT,&marray);CHKERRQ(ierr); 1605 ierr = PetscBLASIntCast(n_vertices*n_R,&B_N);CHKERRQ(ierr); 1606 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&m_one,marray,&B_one,work,&B_one)); 1607 ierr = MatDenseRestoreArray(A_VRT,&marray);CHKERRQ(ierr); 1608 ierr = MatDestroy(&A_VRT);CHKERRQ(ierr); 1609 } 1610 1611 if (F) { /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 1612 for (i=0;i<pcbddc->local_primal_size;i++) { 1613 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 1614 ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr); 1615 ierr = MatSolveTranspose(F,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1616 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1617 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 1618 } 1619 } else { 1620 for (i=0;i<pcbddc->local_primal_size;i++) { 1621 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 1622 ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr); 1623 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1624 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1625 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 1626 } 1627 } 1628 /* coarse basis functions */ 1629 for (i=0;i<pcbddc->local_primal_size;i++) { 1630 PetscScalar *y; 1631 1632 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*(i+pcbddc->local_primal_size));CHKERRQ(ierr); 1633 ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 1634 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 1635 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1636 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1637 if (i<n_vertices) { 1638 y[n_B*i+idx_V_B[i]] = 1.0; 1639 } 1640 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 1641 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1642 1643 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1644 ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 1645 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 1646 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1647 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1648 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 1649 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 1650 } 1651 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1652 } 1653 } 1654 /* free memory */ 1655 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 1656 ierr = MatDestroy(&S_VV);CHKERRQ(ierr); 1657 ierr = MatDestroy(&S_CV);CHKERRQ(ierr); 1658 ierr = MatDestroy(&S_VC);CHKERRQ(ierr); 1659 ierr = MatDestroy(&S_CC);CHKERRQ(ierr); 1660 ierr = PetscFree(work);CHKERRQ(ierr); 1661 if (n_vertices) { 1662 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 1663 } 1664 if (n_constraints) { 1665 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 1666 } 1667 /* Checking coarse_sub_mat and coarse basis functios */ 1668 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 1669 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 1670 if (pcbddc->dbg_flag) { 1671 Mat coarse_sub_mat; 1672 Mat AUXMAT,TM1,TM2,TM3,TM4; 1673 Mat coarse_phi_D,coarse_phi_B; 1674 Mat coarse_psi_D,coarse_psi_B; 1675 Mat A_II,A_BB,A_IB,A_BI; 1676 Mat C_B,CPHI; 1677 IS is_dummy; 1678 Vec mones; 1679 MatType checkmattype=MATSEQAIJ; 1680 PetscReal real_value; 1681 1682 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 1683 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 1684 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 1685 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 1686 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 1687 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 1688 if (unsymmetric_check) { 1689 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 1690 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 1691 } 1692 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 1693 1694 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1695 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);CHKERRQ(ierr); 1696 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1697 if (unsymmetric_check) { 1698 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1699 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 1700 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1701 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1702 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 1703 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1704 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1705 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 1706 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1707 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1708 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 1709 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1710 } else { 1711 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 1712 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 1713 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1714 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 1715 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1716 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1717 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 1718 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1719 } 1720 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1721 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1722 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1723 ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr); 1724 if (pcbddc->benign_n) { 1725 Mat B0_I,B0_B,B0_BPHI,B0_IPHI; 1726 PetscScalar *data,*data2; 1727 PetscInt j; 1728 1729 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 1730 ierr = MatGetSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B); 1731 ierr = MatGetSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_I_local,MAT_INITIAL_MATRIX,&B0_I); 1732 ierr = MatMatMult(B0_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 1733 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_REUSE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 1734 ierr = MatDenseGetArray(TM1,&data);CHKERRQ(ierr); 1735 ierr = MatDenseGetArray(B0_BPHI,&data2);CHKERRQ(ierr); 1736 for (j=0;j<pcbddc->benign_n;j++) { 1737 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 1738 for (i=0;i<pcbddc->local_primal_size;i++) { 1739 data[primal_idx*pcbddc->local_primal_size+i] += data2[i*pcbddc->benign_n+j]; 1740 data[i*pcbddc->local_primal_size+primal_idx] += data2[i*pcbddc->benign_n+j]; 1741 } 1742 } 1743 ierr = MatDenseRestoreArray(TM1,&data);CHKERRQ(ierr); 1744 ierr = MatDenseRestoreArray(B0_BPHI,&data2);CHKERRQ(ierr); 1745 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 1746 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 1747 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 1748 ierr = MatMatMult(B0_I,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&B0_IPHI);CHKERRQ(ierr); 1749 ierr = MatDestroy(&B0_I);CHKERRQ(ierr); 1750 ierr = MatNorm(B0_IPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1751 ierr = MatDestroy(&B0_IPHI);CHKERRQ(ierr); 1752 } 1753 #if 0 1754 { 1755 PetscViewer viewer; 1756 char filename[256]; 1757 sprintf(filename,"proj_local_coarse_mat%d.m",PetscGlobalRank); 1758 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 1759 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 1760 ierr = MatView(TM1,viewer);CHKERRQ(ierr); 1761 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 1762 } 1763 #endif 1764 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1765 ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1766 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1767 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 1768 1769 /* check constraints */ 1770 if (!pcbddc->benign_n) { /* TODO: add benign case */ 1771 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size,0,1,&is_dummy);CHKERRQ(ierr); 1772 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B); 1773 ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 1774 ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr); 1775 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 1776 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 1777 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1778 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 1779 if (unsymmetric_check) { 1780 ierr = MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 1781 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 1782 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 1783 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1784 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 1785 } 1786 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 1787 ierr = MatDestroy(&CPHI);CHKERRQ(ierr); 1788 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 1789 ierr = VecDestroy(&mones);CHKERRQ(ierr); 1790 } 1791 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1792 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 1793 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 1794 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 1795 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 1796 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 1797 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 1798 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 1799 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 1800 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 1801 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 1802 if (unsymmetric_check) { 1803 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 1804 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 1805 } 1806 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 1807 } 1808 /* get back data */ 1809 *coarse_submat_vals_n = coarse_submat_vals; 1810 PetscFunctionReturn(0); 1811 } 1812 1813 #undef __FUNCT__ 1814 #define __FUNCT__ "MatGetSubMatrixUnsorted" 1815 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B) 1816 { 1817 Mat *work_mat; 1818 IS isrow_s,iscol_s; 1819 PetscBool rsorted,csorted; 1820 PetscInt rsize,*idxs_perm_r,csize,*idxs_perm_c; 1821 PetscErrorCode ierr; 1822 1823 PetscFunctionBegin; 1824 ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr); 1825 ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr); 1826 ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr); 1827 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 1828 1829 if (!rsorted) { 1830 const PetscInt *idxs; 1831 PetscInt *idxs_sorted,i; 1832 1833 ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr); 1834 ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr); 1835 for (i=0;i<rsize;i++) { 1836 idxs_perm_r[i] = i; 1837 } 1838 ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr); 1839 ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr); 1840 for (i=0;i<rsize;i++) { 1841 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 1842 } 1843 ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr); 1844 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr); 1845 } else { 1846 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1847 isrow_s = isrow; 1848 } 1849 1850 if (!csorted) { 1851 if (isrow == iscol) { 1852 ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr); 1853 iscol_s = isrow_s; 1854 } else { 1855 const PetscInt *idxs; 1856 PetscInt *idxs_sorted,i; 1857 1858 ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr); 1859 ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr); 1860 for (i=0;i<csize;i++) { 1861 idxs_perm_c[i] = i; 1862 } 1863 ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr); 1864 ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr); 1865 for (i=0;i<csize;i++) { 1866 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 1867 } 1868 ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr); 1869 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr); 1870 } 1871 } else { 1872 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1873 iscol_s = iscol; 1874 } 1875 1876 ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 1877 1878 if (!rsorted || !csorted) { 1879 Mat new_mat; 1880 IS is_perm_r,is_perm_c; 1881 1882 if (!rsorted) { 1883 PetscInt *idxs_r,i; 1884 ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr); 1885 for (i=0;i<rsize;i++) { 1886 idxs_r[idxs_perm_r[i]] = i; 1887 } 1888 ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr); 1889 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr); 1890 } else { 1891 ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr); 1892 } 1893 ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr); 1894 1895 if (!csorted) { 1896 if (isrow_s == iscol_s) { 1897 ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr); 1898 is_perm_c = is_perm_r; 1899 } else { 1900 PetscInt *idxs_c,i; 1901 ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr); 1902 for (i=0;i<csize;i++) { 1903 idxs_c[idxs_perm_c[i]] = i; 1904 } 1905 ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr); 1906 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr); 1907 } 1908 } else { 1909 ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr); 1910 } 1911 ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr); 1912 1913 ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr); 1914 ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr); 1915 work_mat[0] = new_mat; 1916 ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr); 1917 ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr); 1918 } 1919 1920 ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr); 1921 *B = work_mat[0]; 1922 ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr); 1923 ierr = ISDestroy(&isrow_s);CHKERRQ(ierr); 1924 ierr = ISDestroy(&iscol_s);CHKERRQ(ierr); 1925 PetscFunctionReturn(0); 1926 } 1927 1928 #undef __FUNCT__ 1929 #define __FUNCT__ "PCBDDCComputeLocalMatrix" 1930 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 1931 { 1932 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1933 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1934 Mat new_mat; 1935 IS is_local,is_global; 1936 PetscInt local_size; 1937 PetscBool isseqaij; 1938 PetscErrorCode ierr; 1939 1940 PetscFunctionBegin; 1941 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1942 ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr); 1943 ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr); 1944 ierr = ISLocalToGlobalMappingApplyIS(pc->pmat->rmap->mapping,is_local,&is_global);CHKERRQ(ierr); 1945 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 1946 ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr); 1947 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 1948 1949 /* check */ 1950 if (pcbddc->dbg_flag) { 1951 Vec x,x_change; 1952 PetscReal error; 1953 1954 ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr); 1955 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 1956 ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr); 1957 ierr = VecScatterBegin(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1958 ierr = VecScatterEnd(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1959 ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr); 1960 ierr = VecScatterBegin(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1961 ierr = VecScatterEnd(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1962 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 1963 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 1964 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1965 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr); 1966 ierr = VecDestroy(&x);CHKERRQ(ierr); 1967 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 1968 } 1969 1970 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 1971 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 1972 if (isseqaij) { 1973 Mat M; 1974 1975 ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&M);CHKERRQ(ierr); 1976 ierr = MatSeqAIJCompress(M,&pcbddc->local_mat);CHKERRQ(ierr); 1977 ierr = MatDestroy(&M);CHKERRQ(ierr); 1978 } else { 1979 Mat work_mat,M; 1980 1981 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 1982 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&M);CHKERRQ(ierr); 1983 ierr = MatSeqAIJCompress(M,&pcbddc->local_mat);CHKERRQ(ierr); 1984 ierr = MatDestroy(&M);CHKERRQ(ierr); 1985 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 1986 } 1987 if (matis->A->symmetric_set) { 1988 ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);CHKERRQ(ierr); 1989 #if !defined(PETSC_USE_COMPLEX) 1990 ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);CHKERRQ(ierr); 1991 #endif 1992 } 1993 /* 1994 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 1995 ierr = MatView(new_mat,(PetscViewer)0);CHKERRQ(ierr); 1996 */ 1997 ierr = MatDestroy(&new_mat);CHKERRQ(ierr); 1998 PetscFunctionReturn(0); 1999 } 2000 2001 #undef __FUNCT__ 2002 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 2003 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 2004 { 2005 PC_IS* pcis = (PC_IS*)(pc->data); 2006 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2007 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2008 PetscInt *idx_R_local=NULL; 2009 PetscInt n_vertices,i,j,n_R,n_D,n_B; 2010 PetscInt vbs,bs; 2011 PetscBT bitmask=NULL; 2012 PetscErrorCode ierr; 2013 2014 PetscFunctionBegin; 2015 /* 2016 No need to setup local scatters if 2017 - primal space is unchanged 2018 AND 2019 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 2020 AND 2021 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 2022 */ 2023 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 2024 PetscFunctionReturn(0); 2025 } 2026 /* destroy old objects */ 2027 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 2028 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 2029 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 2030 /* Set Non-overlapping dimensions */ 2031 n_B = pcis->n_B; 2032 n_D = pcis->n - n_B; 2033 n_vertices = pcbddc->n_vertices; 2034 2035 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 2036 2037 /* create auxiliary bitmask and allocate workspace */ 2038 if (!sub_schurs->reuse_mumps) { 2039 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 2040 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 2041 for (i=0;i<n_vertices;i++) { 2042 ierr = PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);CHKERRQ(ierr); 2043 } 2044 2045 for (i=0, n_R=0; i<pcis->n; i++) { 2046 if (!PetscBTLookup(bitmask,i)) { 2047 idx_R_local[n_R++] = i; 2048 } 2049 } 2050 } else { /* A different ordering (already computed) is present if we are reusing MUMPS Schur solver */ 2051 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 2052 2053 ierr = ISGetIndices(reuse_mumps->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2054 ierr = ISGetLocalSize(reuse_mumps->is_R,&n_R);CHKERRQ(ierr); 2055 } 2056 2057 /* Block code */ 2058 vbs = 1; 2059 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 2060 if (bs>1 && !(n_vertices%bs)) { 2061 PetscBool is_blocked = PETSC_TRUE; 2062 PetscInt *vary; 2063 if (!sub_schurs->reuse_mumps) { 2064 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 2065 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 2066 /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 2067 /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */ 2068 for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++; 2069 for (i=0; i<pcis->n/bs; i++) { 2070 if (vary[i]!=0 && vary[i]!=bs) { 2071 is_blocked = PETSC_FALSE; 2072 break; 2073 } 2074 } 2075 ierr = PetscFree(vary);CHKERRQ(ierr); 2076 } else { 2077 /* Verify directly the R set */ 2078 for (i=0; i<n_R/bs; i++) { 2079 PetscInt j,node=idx_R_local[bs*i]; 2080 for (j=1; j<bs; j++) { 2081 if (node != idx_R_local[bs*i+j]-j) { 2082 is_blocked = PETSC_FALSE; 2083 break; 2084 } 2085 } 2086 } 2087 } 2088 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 2089 vbs = bs; 2090 for (i=0;i<n_R/vbs;i++) { 2091 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 2092 } 2093 } 2094 } 2095 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 2096 if (sub_schurs->reuse_mumps) { 2097 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 2098 2099 ierr = ISRestoreIndices(reuse_mumps->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2100 ierr = ISDestroy(&reuse_mumps->is_R);CHKERRQ(ierr); 2101 ierr = PetscObjectReference((PetscObject)pcbddc->is_R_local);CHKERRQ(ierr); 2102 reuse_mumps->is_R = pcbddc->is_R_local; 2103 } else { 2104 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 2105 } 2106 2107 /* print some info if requested */ 2108 if (pcbddc->dbg_flag) { 2109 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2110 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2111 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 2112 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 2113 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 2114 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices-pcbddc->benign_n,pcbddc->local_primal_size);CHKERRQ(ierr); 2115 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2116 } 2117 2118 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 2119 if (!sub_schurs->reuse_mumps) { 2120 IS is_aux1,is_aux2; 2121 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 2122 2123 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2124 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 2125 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 2126 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2127 for (i=0; i<n_D; i++) { 2128 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 2129 } 2130 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2131 for (i=0, j=0; i<n_R; i++) { 2132 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 2133 aux_array1[j++] = i; 2134 } 2135 } 2136 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 2137 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2138 for (i=0, j=0; i<n_B; i++) { 2139 if (!PetscBTLookup(bitmask,is_indices[i])) { 2140 aux_array2[j++] = i; 2141 } 2142 } 2143 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2144 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 2145 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 2146 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 2147 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 2148 2149 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2150 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 2151 for (i=0, j=0; i<n_R; i++) { 2152 if (PetscBTLookup(bitmask,idx_R_local[i])) { 2153 aux_array1[j++] = i; 2154 } 2155 } 2156 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 2157 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 2158 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 2159 } 2160 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 2161 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2162 } else { 2163 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 2164 IS tis; 2165 PetscInt schur_size; 2166 2167 ierr = ISGetLocalSize(reuse_mumps->is_B,&schur_size);CHKERRQ(ierr); 2168 ierr = ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);CHKERRQ(ierr); 2169 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_mumps->is_B,&pcbddc->R_to_B);CHKERRQ(ierr); 2170 ierr = ISDestroy(&tis);CHKERRQ(ierr); 2171 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2172 ierr = ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);CHKERRQ(ierr); 2173 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 2174 ierr = ISDestroy(&tis);CHKERRQ(ierr); 2175 } 2176 } 2177 PetscFunctionReturn(0); 2178 } 2179 2180 2181 #undef __FUNCT__ 2182 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 2183 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 2184 { 2185 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2186 PC_IS *pcis = (PC_IS*)pc->data; 2187 PC pc_temp; 2188 Mat A_RR; 2189 MatReuse reuse; 2190 PetscScalar m_one = -1.0; 2191 PetscReal value; 2192 PetscInt n_D,n_R; 2193 PetscBool use_exact,use_exact_reduced,issbaij; 2194 PetscErrorCode ierr; 2195 /* prefixes stuff */ 2196 char dir_prefix[256],neu_prefix[256],str_level[16]; 2197 size_t len; 2198 2199 PetscFunctionBegin; 2200 2201 /* compute prefixes */ 2202 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 2203 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 2204 if (!pcbddc->current_level) { 2205 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 2206 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 2207 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 2208 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 2209 } else { 2210 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 2211 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 2212 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 2213 len -= 15; /* remove "pc_bddc_coarse_" */ 2214 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 2215 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 2216 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 2217 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 2218 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 2219 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 2220 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 2221 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 2222 } 2223 2224 /* DIRICHLET PROBLEM */ 2225 if (dirichlet) { 2226 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2227 if (pcbddc->local_mat->symmetric_set) { 2228 ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 2229 } 2230 /* Matrix for Dirichlet problem is pcis->A_II */ 2231 n_D = pcis->n - pcis->n_B; 2232 if (!pcbddc->ksp_D) { /* create object if not yet build */ 2233 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 2234 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 2235 /* default */ 2236 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 2237 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 2238 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 2239 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 2240 if (issbaij) { 2241 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 2242 } else { 2243 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 2244 } 2245 /* Allow user's customization */ 2246 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 2247 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 2248 } 2249 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 2250 if (sub_schurs->reuse_mumps) { 2251 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 2252 2253 ierr = KSPSetPC(pcbddc->ksp_D,reuse_mumps->interior_solver);CHKERRQ(ierr); 2254 } 2255 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 2256 if (!n_D) { 2257 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 2258 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 2259 } 2260 /* Set Up KSP for Dirichlet problem of BDDC */ 2261 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 2262 /* set ksp_D into pcis data */ 2263 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 2264 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 2265 pcis->ksp_D = pcbddc->ksp_D; 2266 } 2267 2268 /* NEUMANN PROBLEM */ 2269 A_RR = 0; 2270 if (neumann) { 2271 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2272 PetscInt ibs,mbs; 2273 PetscBool issbaij; 2274 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 2275 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 2276 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 2277 if (pcbddc->ksp_R) { /* already created ksp */ 2278 PetscInt nn_R; 2279 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 2280 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 2281 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 2282 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 2283 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 2284 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2285 reuse = MAT_INITIAL_MATRIX; 2286 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 2287 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 2288 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2289 reuse = MAT_INITIAL_MATRIX; 2290 } else { /* safe to reuse the matrix */ 2291 reuse = MAT_REUSE_MATRIX; 2292 } 2293 } 2294 /* last check */ 2295 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 2296 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2297 reuse = MAT_INITIAL_MATRIX; 2298 } 2299 } else { /* first time, so we need to create the matrix */ 2300 reuse = MAT_INITIAL_MATRIX; 2301 } 2302 /* extract A_RR */ 2303 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 2304 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 2305 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 2306 if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */ 2307 if (matis->A == pcbddc->local_mat) { 2308 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2309 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 2310 } else { 2311 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 2312 } 2313 } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 2314 if (matis->A == pcbddc->local_mat) { 2315 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2316 ierr = MatConvert(matis->A,MATSEQBAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 2317 } else { 2318 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 2319 } 2320 } 2321 if (!sub_schurs->reuse_mumps) { 2322 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 2323 if (pcbddc->local_mat->symmetric_set) { 2324 ierr = MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 2325 } 2326 } else { 2327 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 2328 2329 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2330 ierr = PCGetOperators(reuse_mumps->correction_solver,&A_RR,NULL);CHKERRQ(ierr); 2331 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 2332 } 2333 if (!pcbddc->ksp_R) { /* create object if not present */ 2334 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 2335 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 2336 /* default */ 2337 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 2338 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 2339 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 2340 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 2341 if (issbaij) { 2342 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 2343 } else { 2344 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 2345 } 2346 /* Allow user's customization */ 2347 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 2348 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 2349 } 2350 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 2351 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 2352 if (!n_R) { 2353 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 2354 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 2355 } 2356 /* Reuse MUMPS solver if it is present */ 2357 if (sub_schurs->reuse_mumps) { 2358 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 2359 2360 ierr = KSPSetPC(pcbddc->ksp_R,reuse_mumps->correction_solver);CHKERRQ(ierr); 2361 } 2362 /* Set Up KSP for Neumann problem of BDDC */ 2363 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 2364 } 2365 /* free Neumann problem's matrix */ 2366 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2367 2368 /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */ 2369 if (pcbddc->NullSpace || pcbddc->dbg_flag) { 2370 if (pcbddc->dbg_flag) { 2371 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2372 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 2373 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2374 } 2375 if (dirichlet) { /* Dirichlet */ 2376 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 2377 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 2378 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 2379 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 2380 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 2381 /* need to be adapted? */ 2382 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 2383 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2384 ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr); 2385 /* print info */ 2386 if (pcbddc->dbg_flag) { 2387 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 2388 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2389 } 2390 if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) { 2391 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_TRUE,pcis->is_I_local);CHKERRQ(ierr); 2392 } 2393 } 2394 if (neumann) { /* Neumann */ 2395 ierr = KSPGetOperators(pcbddc->ksp_R,&A_RR,NULL);CHKERRQ(ierr); 2396 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 2397 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 2398 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 2399 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 2400 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 2401 /* need to be adapted? */ 2402 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 2403 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2404 /* print info */ 2405 if (pcbddc->dbg_flag) { 2406 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 2407 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2408 } 2409 if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */ 2410 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_FALSE,pcbddc->is_R_local);CHKERRQ(ierr); 2411 } 2412 } 2413 } 2414 PetscFunctionReturn(0); 2415 } 2416 2417 #undef __FUNCT__ 2418 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection" 2419 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose) 2420 { 2421 PetscErrorCode ierr; 2422 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 2423 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2424 2425 PetscFunctionBegin; 2426 if (!sub_schurs->reuse_mumps) { 2427 ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr); 2428 } 2429 if (!pcbddc->switch_static) { 2430 if (applytranspose && pcbddc->local_auxmat1) { 2431 ierr = MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 2432 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 2433 } 2434 if (!sub_schurs->reuse_mumps) { 2435 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2436 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2437 } else { 2438 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 2439 2440 ierr = VecScatterBegin(reuse_mumps->correction_scatter_B,inout_B,reuse_mumps->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2441 ierr = VecScatterEnd(reuse_mumps->correction_scatter_B,inout_B,reuse_mumps->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2442 } 2443 } else { 2444 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2445 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2446 ierr = VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2447 ierr = VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2448 if (applytranspose && pcbddc->local_auxmat1) { 2449 ierr = MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);CHKERRQ(ierr); 2450 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 2451 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2452 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2453 } 2454 } 2455 if (!sub_schurs->reuse_mumps) { 2456 if (applytranspose) { 2457 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 2458 } else { 2459 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 2460 } 2461 #if defined(PETSC_HAVE_MUMPS) 2462 } else { 2463 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 2464 2465 if (applytranspose) { 2466 ierr = MatMumpsSolveSchurComplementTranspose(reuse_mumps->F,reuse_mumps->rhs_B,reuse_mumps->sol_B);CHKERRQ(ierr); 2467 } else { 2468 ierr = MatMumpsSolveSchurComplement(reuse_mumps->F,reuse_mumps->rhs_B,reuse_mumps->sol_B);CHKERRQ(ierr); 2469 } 2470 #endif 2471 } 2472 ierr = VecSet(inout_B,0.);CHKERRQ(ierr); 2473 if (!pcbddc->switch_static) { 2474 if (!sub_schurs->reuse_mumps) { 2475 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2476 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2477 } else { 2478 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 2479 2480 ierr = VecScatterBegin(reuse_mumps->correction_scatter_B,reuse_mumps->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2481 ierr = VecScatterEnd(reuse_mumps->correction_scatter_B,reuse_mumps->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2482 } 2483 if (!applytranspose && pcbddc->local_auxmat1) { 2484 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 2485 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 2486 } 2487 } else { 2488 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2489 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2490 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2491 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2492 if (!applytranspose && pcbddc->local_auxmat1) { 2493 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 2494 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 2495 } 2496 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2497 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2498 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2499 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2500 } 2501 PetscFunctionReturn(0); 2502 } 2503 2504 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 2505 #undef __FUNCT__ 2506 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 2507 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 2508 { 2509 PetscErrorCode ierr; 2510 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 2511 PC_IS* pcis = (PC_IS*) (pc->data); 2512 const PetscScalar zero = 0.0; 2513 2514 PetscFunctionBegin; 2515 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 2516 if (applytranspose) { 2517 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 2518 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 2519 } else { 2520 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 2521 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 2522 } 2523 2524 /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */ 2525 if (pcbddc->benign_n) { 2526 PetscScalar *array; 2527 PetscInt j; 2528 2529 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 2530 for (j=0;j<pcbddc->benign_n;j++) array[pcbddc->local_primal_size-pcbddc->benign_n+j] += pcbddc->benign_p0[j]; 2531 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 2532 } 2533 2534 /* start communications from local primal nodes to rhs of coarse solver */ 2535 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 2536 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2537 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2538 2539 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 2540 /* TODO remove null space when doing multilevel */ 2541 if (pcbddc->coarse_ksp) { 2542 Mat coarse_mat; 2543 Vec rhs,sol; 2544 MatNullSpace nullsp; 2545 2546 ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr); 2547 ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr); 2548 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 2549 ierr = MatGetNullSpace(coarse_mat,&nullsp);CHKERRQ(ierr); 2550 if (nullsp) { 2551 ierr = MatNullSpaceRemove(nullsp,rhs);CHKERRQ(ierr); 2552 } 2553 if (applytranspose) { 2554 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 2555 } else { 2556 ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 2557 } 2558 if (nullsp) { 2559 ierr = MatNullSpaceRemove(nullsp,sol);CHKERRQ(ierr); 2560 } 2561 } 2562 2563 /* Local solution on R nodes */ 2564 if (pcis->n) { /* in/out pcbddc->vec1_B,pcbddc->vec1_D */ 2565 ierr = PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);CHKERRQ(ierr); 2566 } 2567 2568 /* communications from coarse sol to local primal nodes */ 2569 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2570 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2571 2572 /* Sum contributions from two levels */ 2573 if (applytranspose) { 2574 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 2575 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 2576 } else { 2577 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 2578 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 2579 } 2580 /* store p0 */ 2581 if (pcbddc->benign_n) { 2582 PetscScalar *array; 2583 PetscInt j; 2584 2585 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 2586 for (j=0;j<pcbddc->benign_n;j++) pcbddc->benign_p0[j] = array[pcbddc->local_primal_size-pcbddc->benign_n+j]; 2587 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 2588 } 2589 PetscFunctionReturn(0); 2590 } 2591 2592 #undef __FUNCT__ 2593 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 2594 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 2595 { 2596 PetscErrorCode ierr; 2597 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 2598 PetscScalar *array; 2599 Vec from,to; 2600 2601 PetscFunctionBegin; 2602 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 2603 from = pcbddc->coarse_vec; 2604 to = pcbddc->vec1_P; 2605 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 2606 Vec tvec; 2607 2608 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 2609 ierr = VecResetArray(tvec);CHKERRQ(ierr); 2610 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 2611 ierr = VecGetArray(tvec,&array);CHKERRQ(ierr); 2612 ierr = VecPlaceArray(from,array);CHKERRQ(ierr); 2613 ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr); 2614 } 2615 } else { /* from local to global -> put data in coarse right hand side */ 2616 from = pcbddc->vec1_P; 2617 to = pcbddc->coarse_vec; 2618 } 2619 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 2620 PetscFunctionReturn(0); 2621 } 2622 2623 #undef __FUNCT__ 2624 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 2625 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 2626 { 2627 PetscErrorCode ierr; 2628 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 2629 PetscScalar *array; 2630 Vec from,to; 2631 2632 PetscFunctionBegin; 2633 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 2634 from = pcbddc->coarse_vec; 2635 to = pcbddc->vec1_P; 2636 } else { /* from local to global -> put data in coarse right hand side */ 2637 from = pcbddc->vec1_P; 2638 to = pcbddc->coarse_vec; 2639 } 2640 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 2641 if (smode == SCATTER_FORWARD) { 2642 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 2643 Vec tvec; 2644 2645 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 2646 ierr = VecGetArray(to,&array);CHKERRQ(ierr); 2647 ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr); 2648 ierr = VecRestoreArray(to,&array);CHKERRQ(ierr); 2649 } 2650 } else { 2651 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 2652 ierr = VecResetArray(from);CHKERRQ(ierr); 2653 } 2654 } 2655 PetscFunctionReturn(0); 2656 } 2657 2658 /* uncomment for testing purposes */ 2659 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 2660 #undef __FUNCT__ 2661 #define __FUNCT__ "PCBDDCConstraintsSetUp" 2662 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 2663 { 2664 PetscErrorCode ierr; 2665 PC_IS* pcis = (PC_IS*)(pc->data); 2666 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2667 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 2668 /* one and zero */ 2669 PetscScalar one=1.0,zero=0.0; 2670 /* space to store constraints and their local indices */ 2671 PetscScalar *constraints_data; 2672 PetscInt *constraints_idxs,*constraints_idxs_B; 2673 PetscInt *constraints_idxs_ptr,*constraints_data_ptr; 2674 PetscInt *constraints_n; 2675 /* iterators */ 2676 PetscInt i,j,k,total_counts,total_counts_cc,cum; 2677 /* BLAS integers */ 2678 PetscBLASInt lwork,lierr; 2679 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 2680 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 2681 /* reuse */ 2682 PetscInt olocal_primal_size,olocal_primal_size_cc; 2683 PetscInt *olocal_primal_ref_node,*olocal_primal_ref_mult; 2684 /* change of basis */ 2685 PetscBool qr_needed; 2686 PetscBT change_basis,qr_needed_idx; 2687 /* auxiliary stuff */ 2688 PetscInt *nnz,*is_indices; 2689 PetscInt ncc; 2690 /* some quantities */ 2691 PetscInt n_vertices,total_primal_vertices,valid_constraints; 2692 PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints; 2693 2694 PetscFunctionBegin; 2695 /* Destroy Mat objects computed previously */ 2696 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2697 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 2698 /* save info on constraints from previous setup (if any) */ 2699 olocal_primal_size = pcbddc->local_primal_size; 2700 olocal_primal_size_cc = pcbddc->local_primal_size_cc; 2701 ierr = PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);CHKERRQ(ierr); 2702 ierr = PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 2703 ierr = PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 2704 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 2705 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2706 2707 /* print some info */ 2708 if (pcbddc->dbg_flag) { 2709 IS vertices; 2710 PetscInt nv,nedges,nfaces; 2711 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 2712 ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr); 2713 ierr = ISDestroy(&vertices);CHKERRQ(ierr); 2714 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 2715 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 2716 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 2717 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr); 2718 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr); 2719 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2720 } 2721 2722 if (!pcbddc->adaptive_selection) { 2723 IS ISForVertices,*ISForFaces,*ISForEdges; 2724 MatNullSpace nearnullsp; 2725 const Vec *nearnullvecs; 2726 Vec *localnearnullsp; 2727 PetscScalar *array; 2728 PetscInt n_ISForFaces,n_ISForEdges,nnsp_size; 2729 PetscBool nnsp_has_cnst; 2730 /* LAPACK working arrays for SVD or POD */ 2731 PetscBool skip_lapack,boolforchange; 2732 PetscScalar *work; 2733 PetscReal *singular_vals; 2734 #if defined(PETSC_USE_COMPLEX) 2735 PetscReal *rwork; 2736 #endif 2737 #if defined(PETSC_MISSING_LAPACK_GESVD) 2738 PetscScalar *temp_basis,*correlation_mat; 2739 #else 2740 PetscBLASInt dummy_int=1; 2741 PetscScalar dummy_scalar=1.; 2742 #endif 2743 2744 /* Get index sets for faces, edges and vertices from graph */ 2745 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 2746 /* free unneeded index sets */ 2747 if (!pcbddc->use_vertices) { 2748 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 2749 } 2750 if (!pcbddc->use_edges) { 2751 for (i=0;i<n_ISForEdges;i++) { 2752 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 2753 } 2754 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 2755 n_ISForEdges = 0; 2756 } 2757 if (!pcbddc->use_faces) { 2758 for (i=0;i<n_ISForFaces;i++) { 2759 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 2760 } 2761 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 2762 n_ISForFaces = 0; 2763 } 2764 2765 #if defined(PETSC_USE_DEBUG) 2766 /* HACK: when solving singular problems not using vertices, a change of basis is mandatory. 2767 Also use_change_of_basis should be consistent among processors */ 2768 if (pcbddc->NullSpace) { 2769 PetscBool tbool[2],gbool[2]; 2770 2771 if (!ISForVertices && !pcbddc->user_ChangeOfBasisMatrix) { 2772 pcbddc->use_change_of_basis = PETSC_TRUE; 2773 if (!ISForEdges) { 2774 pcbddc->use_change_on_faces = PETSC_TRUE; 2775 } 2776 } 2777 tbool[0] = pcbddc->use_change_of_basis; 2778 tbool[1] = pcbddc->use_change_on_faces; 2779 ierr = MPI_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2780 pcbddc->use_change_of_basis = gbool[0]; 2781 pcbddc->use_change_on_faces = gbool[1]; 2782 } 2783 #endif 2784 2785 /* check if near null space is attached to global mat */ 2786 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 2787 if (nearnullsp) { 2788 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 2789 /* remove any stored info */ 2790 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 2791 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 2792 /* store information for BDDC solver reuse */ 2793 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 2794 pcbddc->onearnullspace = nearnullsp; 2795 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 2796 for (i=0;i<nnsp_size;i++) { 2797 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 2798 } 2799 } else { /* if near null space is not provided BDDC uses constants by default */ 2800 nnsp_size = 0; 2801 nnsp_has_cnst = PETSC_TRUE; 2802 } 2803 /* get max number of constraints on a single cc */ 2804 max_constraints = nnsp_size; 2805 if (nnsp_has_cnst) max_constraints++; 2806 2807 /* 2808 Evaluate maximum storage size needed by the procedure 2809 - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]" 2810 - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]" 2811 There can be multiple constraints per connected component 2812 */ 2813 n_vertices = 0; 2814 if (ISForVertices) { 2815 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 2816 } 2817 ncc = n_vertices+n_ISForFaces+n_ISForEdges; 2818 ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr); 2819 2820 total_counts = n_ISForFaces+n_ISForEdges; 2821 total_counts *= max_constraints; 2822 total_counts += n_vertices; 2823 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 2824 2825 total_counts = 0; 2826 max_size_of_constraint = 0; 2827 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 2828 IS used_is; 2829 if (i<n_ISForEdges) { 2830 used_is = ISForEdges[i]; 2831 } else { 2832 used_is = ISForFaces[i-n_ISForEdges]; 2833 } 2834 ierr = ISGetSize(used_is,&j);CHKERRQ(ierr); 2835 total_counts += j; 2836 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 2837 } 2838 ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr); 2839 2840 /* get local part of global near null space vectors */ 2841 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 2842 for (k=0;k<nnsp_size;k++) { 2843 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 2844 ierr = VecScatterBegin(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2845 ierr = VecScatterEnd(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2846 } 2847 2848 /* whether or not to skip lapack calls */ 2849 skip_lapack = PETSC_TRUE; 2850 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 2851 2852 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 2853 if (!skip_lapack) { 2854 PetscScalar temp_work; 2855 2856 #if defined(PETSC_MISSING_LAPACK_GESVD) 2857 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 2858 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 2859 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 2860 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 2861 #if defined(PETSC_USE_COMPLEX) 2862 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 2863 #endif 2864 /* now we evaluate the optimal workspace using query with lwork=-1 */ 2865 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 2866 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 2867 lwork = -1; 2868 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2869 #if !defined(PETSC_USE_COMPLEX) 2870 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 2871 #else 2872 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 2873 #endif 2874 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2875 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 2876 #else /* on missing GESVD */ 2877 /* SVD */ 2878 PetscInt max_n,min_n; 2879 max_n = max_size_of_constraint; 2880 min_n = max_constraints; 2881 if (max_size_of_constraint < max_constraints) { 2882 min_n = max_size_of_constraint; 2883 max_n = max_constraints; 2884 } 2885 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 2886 #if defined(PETSC_USE_COMPLEX) 2887 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 2888 #endif 2889 /* now we evaluate the optimal workspace using query with lwork=-1 */ 2890 lwork = -1; 2891 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 2892 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 2893 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 2894 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2895 #if !defined(PETSC_USE_COMPLEX) 2896 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr)); 2897 #else 2898 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr)); 2899 #endif 2900 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2901 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 2902 #endif /* on missing GESVD */ 2903 /* Allocate optimal workspace */ 2904 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 2905 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 2906 } 2907 /* Now we can loop on constraining sets */ 2908 total_counts = 0; 2909 constraints_idxs_ptr[0] = 0; 2910 constraints_data_ptr[0] = 0; 2911 /* vertices */ 2912 if (n_vertices) { 2913 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2914 ierr = PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 2915 for (i=0;i<n_vertices;i++) { 2916 constraints_n[total_counts] = 1; 2917 constraints_data[total_counts] = 1.0; 2918 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 2919 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 2920 total_counts++; 2921 } 2922 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2923 n_vertices = total_counts; 2924 } 2925 2926 /* edges and faces */ 2927 total_counts_cc = total_counts; 2928 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 2929 IS used_is; 2930 PetscBool idxs_copied = PETSC_FALSE; 2931 2932 if (ncc<n_ISForEdges) { 2933 used_is = ISForEdges[ncc]; 2934 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 2935 } else { 2936 used_is = ISForFaces[ncc-n_ISForEdges]; 2937 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 2938 } 2939 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 2940 2941 ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr); 2942 ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2943 /* change of basis should not be performed on local periodic nodes */ 2944 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 2945 if (nnsp_has_cnst) { 2946 PetscScalar quad_value; 2947 2948 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 2949 idxs_copied = PETSC_TRUE; 2950 2951 if (!pcbddc->use_nnsp_true) { 2952 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 2953 } else { 2954 quad_value = 1.0; 2955 } 2956 for (j=0;j<size_of_constraint;j++) { 2957 constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value; 2958 } 2959 temp_constraints++; 2960 total_counts++; 2961 } 2962 for (k=0;k<nnsp_size;k++) { 2963 PetscReal real_value; 2964 PetscScalar *ptr_to_data; 2965 2966 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2967 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint]; 2968 for (j=0;j<size_of_constraint;j++) { 2969 ptr_to_data[j] = array[is_indices[j]]; 2970 } 2971 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2972 /* check if array is null on the connected component */ 2973 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2974 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one)); 2975 if (real_value > 0.0) { /* keep indices and values */ 2976 temp_constraints++; 2977 total_counts++; 2978 if (!idxs_copied) { 2979 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 2980 idxs_copied = PETSC_TRUE; 2981 } 2982 } 2983 } 2984 ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2985 valid_constraints = temp_constraints; 2986 if (!pcbddc->use_nnsp_true && temp_constraints) { 2987 if (temp_constraints == 1) { /* just normalize the constraint */ 2988 PetscScalar norm,*ptr_to_data; 2989 2990 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 2991 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2992 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one)); 2993 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 2994 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one)); 2995 } else { /* perform SVD */ 2996 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 2997 PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 2998 2999 #if defined(PETSC_MISSING_LAPACK_GESVD) 3000 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 3001 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 3002 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 3003 the constraints basis will differ (by a complex factor with absolute value equal to 1) 3004 from that computed using LAPACKgesvd 3005 -> This is due to a different computation of eigenvectors in LAPACKheev 3006 -> The quality of the POD-computed basis will be the same */ 3007 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 3008 /* Store upper triangular part of correlation matrix */ 3009 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3010 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3011 for (j=0;j<temp_constraints;j++) { 3012 for (k=0;k<j+1;k++) { 3013 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one)); 3014 } 3015 } 3016 /* compute eigenvalues and eigenvectors of correlation matrix */ 3017 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 3018 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 3019 #if !defined(PETSC_USE_COMPLEX) 3020 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 3021 #else 3022 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 3023 #endif 3024 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3025 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 3026 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 3027 j = 0; 3028 while (j < temp_constraints && singular_vals[j] < tol) j++; 3029 total_counts = total_counts-j; 3030 valid_constraints = temp_constraints-j; 3031 /* scale and copy POD basis into used quadrature memory */ 3032 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3033 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 3034 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 3035 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3036 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 3037 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 3038 if (j<temp_constraints) { 3039 PetscInt ii; 3040 for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]); 3041 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3042 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 3043 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3044 for (k=0;k<temp_constraints-j;k++) { 3045 for (ii=0;ii<size_of_constraint;ii++) { 3046 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 3047 } 3048 } 3049 } 3050 #else /* on missing GESVD */ 3051 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3052 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 3053 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3054 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3055 #if !defined(PETSC_USE_COMPLEX) 3056 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr)); 3057 #else 3058 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr)); 3059 #endif 3060 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 3061 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3062 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 3063 k = temp_constraints; 3064 if (k > size_of_constraint) k = size_of_constraint; 3065 j = 0; 3066 while (j < k && singular_vals[k-j-1] < tol) j++; 3067 valid_constraints = k-j; 3068 total_counts = total_counts-temp_constraints+valid_constraints; 3069 #endif /* on missing GESVD */ 3070 } 3071 } 3072 /* update pointers information */ 3073 if (valid_constraints) { 3074 constraints_n[total_counts_cc] = valid_constraints; 3075 constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint; 3076 constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints; 3077 /* set change_of_basis flag */ 3078 if (boolforchange) { 3079 PetscBTSet(change_basis,total_counts_cc); 3080 } 3081 total_counts_cc++; 3082 } 3083 } 3084 /* free workspace */ 3085 if (!skip_lapack) { 3086 ierr = PetscFree(work);CHKERRQ(ierr); 3087 #if defined(PETSC_USE_COMPLEX) 3088 ierr = PetscFree(rwork);CHKERRQ(ierr); 3089 #endif 3090 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 3091 #if defined(PETSC_MISSING_LAPACK_GESVD) 3092 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 3093 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 3094 #endif 3095 } 3096 for (k=0;k<nnsp_size;k++) { 3097 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 3098 } 3099 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 3100 /* free index sets of faces, edges and vertices */ 3101 for (i=0;i<n_ISForFaces;i++) { 3102 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 3103 } 3104 if (n_ISForFaces) { 3105 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 3106 } 3107 for (i=0;i<n_ISForEdges;i++) { 3108 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 3109 } 3110 if (n_ISForEdges) { 3111 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 3112 } 3113 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 3114 } else { 3115 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3116 3117 total_counts = 0; 3118 n_vertices = 0; 3119 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 3120 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 3121 } 3122 max_constraints = 0; 3123 total_counts_cc = 0; 3124 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 3125 total_counts += pcbddc->adaptive_constraints_n[i]; 3126 if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++; 3127 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 3128 } 3129 constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr; 3130 constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr; 3131 constraints_idxs = pcbddc->adaptive_constraints_idxs; 3132 constraints_data = pcbddc->adaptive_constraints_data; 3133 /* constraints_n differs from pcbddc->adaptive_constraints_n */ 3134 ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr); 3135 total_counts_cc = 0; 3136 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 3137 if (pcbddc->adaptive_constraints_n[i]) { 3138 constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i]; 3139 } 3140 } 3141 #if 0 3142 printf("Found %d totals (%d)\n",total_counts_cc,total_counts); 3143 for (i=0;i<total_counts_cc;i++) { 3144 printf("const %d, start %d",i,constraints_idxs_ptr[i]); 3145 printf(" end %d:\n",constraints_idxs_ptr[i+1]); 3146 for (j=constraints_idxs_ptr[i];j<constraints_idxs_ptr[i+1];j++) { 3147 printf(" %d",constraints_idxs[j]); 3148 } 3149 printf("\n"); 3150 printf("number of cc: %d\n",constraints_n[i]); 3151 } 3152 for (i=0;i<n_vertices;i++) { 3153 PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]); 3154 } 3155 for (i=0;i<sub_schurs->n_subs;i++) { 3156 PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]); 3157 } 3158 #endif 3159 3160 max_size_of_constraint = 0; 3161 for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]); 3162 ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr); 3163 /* Change of basis */ 3164 ierr = PetscBTCreate(total_counts_cc,&change_basis);CHKERRQ(ierr); 3165 if (pcbddc->use_change_of_basis) { 3166 for (i=0;i<sub_schurs->n_subs;i++) { 3167 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 3168 ierr = PetscBTSet(change_basis,i+n_vertices);CHKERRQ(ierr); 3169 } 3170 } 3171 } 3172 } 3173 pcbddc->local_primal_size = total_counts; 3174 ierr = PetscMalloc1(pcbddc->local_primal_size+pcbddc->benign_n,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 3175 3176 /* map constraints_idxs in boundary numbering */ 3177 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr); 3178 if (i != constraints_idxs_ptr[total_counts_cc]) { 3179 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",constraints_idxs_ptr[total_counts_cc],i); 3180 } 3181 3182 /* Create constraint matrix */ 3183 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 3184 ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr); 3185 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 3186 3187 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 3188 /* determine if a QR strategy is needed for change of basis */ 3189 qr_needed = PETSC_FALSE; 3190 ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr); 3191 total_primal_vertices=0; 3192 pcbddc->local_primal_size_cc = 0; 3193 for (i=0;i<total_counts_cc;i++) { 3194 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 3195 if (size_of_constraint == 1) { 3196 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]]; 3197 pcbddc->local_primal_size_cc += 1; 3198 } else if (PetscBTLookup(change_basis,i)) { 3199 for (k=0;k<constraints_n[i];k++) { 3200 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 3201 } 3202 pcbddc->local_primal_size_cc += constraints_n[i]; 3203 if (constraints_n[i] > 1 || pcbddc->use_qr_single || pcbddc->faster_deluxe) { 3204 PetscBTSet(qr_needed_idx,i); 3205 qr_needed = PETSC_TRUE; 3206 } 3207 } else { 3208 pcbddc->local_primal_size_cc += 1; 3209 } 3210 } 3211 /* note that the local variable n_vertices used below stores the number of pointwise constraints */ 3212 pcbddc->n_vertices = total_primal_vertices; 3213 /* permute indices in order to have a sorted set of vertices */ 3214 ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 3215 3216 ierr = PetscMalloc2(pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 3217 ierr = PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));CHKERRQ(ierr); 3218 for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1; 3219 3220 /* nonzero structure of constraint matrix */ 3221 /* and get reference dof for local constraints */ 3222 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 3223 for (i=0;i<total_primal_vertices;i++) nnz[i] = 1; 3224 3225 j = total_primal_vertices; 3226 total_counts = total_primal_vertices; 3227 cum = total_primal_vertices; 3228 for (i=n_vertices;i<total_counts_cc;i++) { 3229 if (!PetscBTLookup(change_basis,i)) { 3230 pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]]; 3231 pcbddc->local_primal_ref_mult[cum] = constraints_n[i]; 3232 cum++; 3233 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 3234 for (k=0;k<constraints_n[i];k++) { 3235 pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 3236 nnz[j+k] = size_of_constraint; 3237 } 3238 j += constraints_n[i]; 3239 } 3240 } 3241 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 3242 ierr = PetscFree(nnz);CHKERRQ(ierr); 3243 3244 /* set values in constraint matrix */ 3245 for (i=0;i<total_primal_vertices;i++) { 3246 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 3247 } 3248 total_counts = total_primal_vertices; 3249 for (i=n_vertices;i<total_counts_cc;i++) { 3250 if (!PetscBTLookup(change_basis,i)) { 3251 PetscInt *cols; 3252 3253 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 3254 cols = constraints_idxs+constraints_idxs_ptr[i]; 3255 for (k=0;k<constraints_n[i];k++) { 3256 PetscInt row = total_counts+k; 3257 PetscScalar *vals; 3258 3259 vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint; 3260 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 3261 } 3262 total_counts += constraints_n[i]; 3263 } 3264 } 3265 /* assembling */ 3266 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3267 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3268 3269 /* 3270 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 3271 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 3272 */ 3273 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 3274 if (pcbddc->use_change_of_basis) { 3275 /* dual and primal dofs on a single cc */ 3276 PetscInt dual_dofs,primal_dofs; 3277 /* working stuff for GEQRF */ 3278 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 3279 PetscBLASInt lqr_work; 3280 /* working stuff for UNGQR */ 3281 PetscScalar *gqr_work,lgqr_work_t; 3282 PetscBLASInt lgqr_work; 3283 /* working stuff for TRTRS */ 3284 PetscScalar *trs_rhs; 3285 PetscBLASInt Blas_NRHS; 3286 /* pointers for values insertion into change of basis matrix */ 3287 PetscInt *start_rows,*start_cols; 3288 PetscScalar *start_vals; 3289 /* working stuff for values insertion */ 3290 PetscBT is_primal; 3291 PetscInt *aux_primal_numbering_B; 3292 /* matrix sizes */ 3293 PetscInt global_size,local_size; 3294 /* temporary change of basis */ 3295 Mat localChangeOfBasisMatrix; 3296 /* extra space for debugging */ 3297 PetscScalar *dbg_work; 3298 3299 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 3300 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 3301 ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 3302 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 3303 /* nonzeros for local mat */ 3304 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 3305 for (i=0;i<pcis->n;i++) nnz[i]=1; 3306 for (i=n_vertices;i<total_counts_cc;i++) { 3307 if (PetscBTLookup(change_basis,i)) { 3308 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 3309 if (PetscBTLookup(qr_needed_idx,i)) { 3310 for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint; 3311 } else { 3312 nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint; 3313 for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2; 3314 } 3315 } 3316 } 3317 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 3318 ierr = PetscFree(nnz);CHKERRQ(ierr); 3319 /* Set initial identity in the matrix */ 3320 for (i=0;i<pcis->n;i++) { 3321 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 3322 } 3323 3324 if (pcbddc->dbg_flag) { 3325 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 3326 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 3327 } 3328 3329 3330 /* Now we loop on the constraints which need a change of basis */ 3331 /* 3332 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 3333 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 3334 3335 Basic blocks of change of basis matrix T computed by 3336 3337 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 3338 3339 | 1 0 ... 0 s_1/S | 3340 | 0 1 ... 0 s_2/S | 3341 | ... | 3342 | 0 ... 1 s_{n-1}/S | 3343 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 3344 3345 with S = \sum_{i=1}^n s_i^2 3346 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 3347 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 3348 3349 - QR decomposition of constraints otherwise 3350 */ 3351 if (qr_needed) { 3352 /* space to store Q */ 3353 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 3354 /* first we issue queries for optimal work */ 3355 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 3356 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 3357 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3358 lqr_work = -1; 3359 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 3360 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 3361 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 3362 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 3363 lgqr_work = -1; 3364 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 3365 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 3366 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 3367 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3368 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 3369 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 3370 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 3371 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 3372 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 3373 /* array to store scaling factors for reflectors */ 3374 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 3375 /* array to store rhs and solution of triangular solver */ 3376 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 3377 /* allocating workspace for check */ 3378 if (pcbddc->dbg_flag) { 3379 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr); 3380 } 3381 } 3382 /* array to store whether a node is primal or not */ 3383 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 3384 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 3385 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);CHKERRQ(ierr); 3386 if (i != total_primal_vertices) { 3387 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i); 3388 } 3389 for (i=0;i<total_primal_vertices;i++) { 3390 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 3391 } 3392 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 3393 3394 /* loop on constraints and see whether or not they need a change of basis and compute it */ 3395 for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) { 3396 size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts]; 3397 if (PetscBTLookup(change_basis,total_counts)) { 3398 /* get constraint info */ 3399 primal_dofs = constraints_n[total_counts]; 3400 dual_dofs = size_of_constraint-primal_dofs; 3401 3402 if (pcbddc->dbg_flag) { 3403 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d: %d need a change of basis (size %d)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr); 3404 } 3405 3406 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 3407 3408 /* copy quadrature constraints for change of basis check */ 3409 if (pcbddc->dbg_flag) { 3410 ierr = PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 3411 } 3412 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 3413 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 3414 3415 /* compute QR decomposition of constraints */ 3416 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3417 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 3418 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3419 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3420 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 3421 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 3422 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3423 3424 /* explictly compute R^-T */ 3425 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 3426 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 3427 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 3428 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 3429 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3430 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 3431 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3432 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 3433 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 3434 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3435 3436 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 3437 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3438 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3439 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 3440 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3441 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3442 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 3443 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 3444 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3445 3446 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 3447 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 3448 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 3449 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3450 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 3451 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 3452 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3453 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 3454 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 3455 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3456 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC)); 3457 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3458 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 3459 3460 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 3461 start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]]; 3462 /* insert cols for primal dofs */ 3463 for (j=0;j<primal_dofs;j++) { 3464 start_vals = &qr_basis[j*size_of_constraint]; 3465 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 3466 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 3467 } 3468 /* insert cols for dual dofs */ 3469 for (j=0,k=0;j<dual_dofs;k++) { 3470 if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) { 3471 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 3472 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 3473 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 3474 j++; 3475 } 3476 } 3477 3478 /* check change of basis */ 3479 if (pcbddc->dbg_flag) { 3480 PetscInt ii,jj; 3481 PetscBool valid_qr=PETSC_TRUE; 3482 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 3483 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3484 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 3485 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3486 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 3487 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 3488 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3489 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC)); 3490 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3491 for (jj=0;jj<size_of_constraint;jj++) { 3492 for (ii=0;ii<primal_dofs;ii++) { 3493 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 3494 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 3495 } 3496 } 3497 if (!valid_qr) { 3498 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 3499 for (jj=0;jj<size_of_constraint;jj++) { 3500 for (ii=0;ii<primal_dofs;ii++) { 3501 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 3502 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 3503 } 3504 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 3505 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 3506 } 3507 } 3508 } 3509 } else { 3510 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 3511 } 3512 } 3513 } else { /* simple transformation block */ 3514 PetscInt row,col; 3515 PetscScalar val,norm; 3516 3517 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3518 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one)); 3519 for (j=0;j<size_of_constraint;j++) { 3520 PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j]; 3521 row = constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 3522 if (!PetscBTLookup(is_primal,row_B)) { 3523 col = constraints_idxs[constraints_idxs_ptr[total_counts]]; 3524 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 3525 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 3526 } else { 3527 for (k=0;k<size_of_constraint;k++) { 3528 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 3529 if (row != col) { 3530 val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]]; 3531 } else { 3532 val = constraints_data[constraints_data_ptr[total_counts]]/norm; 3533 } 3534 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 3535 } 3536 } 3537 } 3538 if (pcbddc->dbg_flag) { 3539 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 3540 } 3541 } 3542 } else { 3543 if (pcbddc->dbg_flag) { 3544 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,size_of_constraint);CHKERRQ(ierr); 3545 } 3546 } 3547 } 3548 3549 /* free workspace */ 3550 if (qr_needed) { 3551 if (pcbddc->dbg_flag) { 3552 ierr = PetscFree(dbg_work);CHKERRQ(ierr); 3553 } 3554 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 3555 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 3556 ierr = PetscFree(qr_work);CHKERRQ(ierr); 3557 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 3558 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 3559 } 3560 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 3561 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3562 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3563 3564 /* assembling of global change of variable */ 3565 { 3566 Mat tmat; 3567 PetscInt bs; 3568 3569 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 3570 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 3571 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 3572 ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr); 3573 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 3574 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 3575 ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr); 3576 ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr); 3577 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 3578 ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr); 3579 ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 3580 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 3581 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 3582 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 3583 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3584 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3585 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 3586 ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr); 3587 } 3588 /* check */ 3589 if (pcbddc->dbg_flag) { 3590 PetscReal error; 3591 Vec x,x_change; 3592 3593 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 3594 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 3595 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 3596 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 3597 ierr = VecScatterBegin(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3598 ierr = VecScatterEnd(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3599 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 3600 ierr = VecScatterBegin(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3601 ierr = VecScatterEnd(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3602 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 3603 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 3604 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 3605 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3606 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr); 3607 ierr = VecDestroy(&x);CHKERRQ(ierr); 3608 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 3609 } 3610 3611 /* adapt sub_schurs computed (if any) */ 3612 if (pcbddc->use_deluxe_scaling) { 3613 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 3614 if (sub_schurs->S_Ej_all) { 3615 Mat S_new,tmat; 3616 ISLocalToGlobalMapping NtoSall; 3617 IS is_all_N,is_V,is_V_Sall; 3618 const PetscScalar *array; 3619 const PetscInt *idxs_V,*idxs_all; 3620 PetscInt i,n_V; 3621 3622 ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr); 3623 ierr = MatGetSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr); 3624 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->n_vertices,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&is_V);CHKERRQ(ierr); 3625 ierr = ISLocalToGlobalMappingCreateIS(is_all_N,&NtoSall);CHKERRQ(ierr); 3626 ierr = ISGlobalToLocalMappingApplyIS(NtoSall,IS_GTOLM_DROP,is_V,&is_V_Sall);CHKERRQ(ierr); 3627 ierr = ISLocalToGlobalMappingDestroy(&NtoSall);CHKERRQ(ierr); 3628 ierr = ISDestroy(&is_all_N);CHKERRQ(ierr); 3629 ierr = ISDestroy(&is_V);CHKERRQ(ierr); 3630 ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 3631 ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr); 3632 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 3633 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 3634 ierr = ISGetLocalSize(is_V_Sall,&n_V);CHKERRQ(ierr); 3635 ierr = ISGetIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 3636 ierr = ISGetIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 3637 ierr = VecGetArrayRead(pcis->D,&array);CHKERRQ(ierr); 3638 for (i=0;i<n_V;i++) { 3639 PetscScalar val; 3640 PetscInt idx; 3641 3642 idx = idxs_V[i]; 3643 val = array[idxs_all[idxs_V[i]]]; 3644 ierr = MatSetValue(S_new,idx,idx,val,INSERT_VALUES);CHKERRQ(ierr); 3645 } 3646 ierr = MatAssemblyBegin(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3647 ierr = MatAssemblyEnd(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3648 sub_schurs->S_Ej_all = S_new; 3649 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 3650 if (sub_schurs->sum_S_Ej_all) { 3651 ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 3652 ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 3653 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 3654 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 3655 sub_schurs->sum_S_Ej_all = S_new; 3656 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 3657 } 3658 ierr = VecRestoreArrayRead(pcis->D,&array);CHKERRQ(ierr); 3659 ierr = ISRestoreIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 3660 ierr = ISRestoreIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 3661 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 3662 ierr = ISDestroy(&is_V_Sall);CHKERRQ(ierr); 3663 } 3664 } 3665 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 3666 } else if (pcbddc->user_ChangeOfBasisMatrix) { 3667 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 3668 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 3669 } 3670 3671 /* set up change of basis context */ 3672 if (pcbddc->ChangeOfBasisMatrix) { 3673 PCBDDCChange_ctx change_ctx; 3674 3675 if (!pcbddc->new_global_mat) { 3676 PetscInt global_size,local_size; 3677 3678 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 3679 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 3680 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->new_global_mat);CHKERRQ(ierr); 3681 ierr = MatSetSizes(pcbddc->new_global_mat,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 3682 ierr = MatSetType(pcbddc->new_global_mat,MATSHELL);CHKERRQ(ierr); 3683 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT,(void (*)(void))PCBDDCMatMult_Private);CHKERRQ(ierr); 3684 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCMatMultTranspose_Private);CHKERRQ(ierr); 3685 ierr = PetscNew(&change_ctx);CHKERRQ(ierr); 3686 ierr = MatShellSetContext(pcbddc->new_global_mat,change_ctx);CHKERRQ(ierr); 3687 } else { 3688 ierr = MatShellGetContext(pcbddc->new_global_mat,&change_ctx);CHKERRQ(ierr); 3689 ierr = MatDestroy(&change_ctx->global_change);CHKERRQ(ierr); 3690 ierr = VecDestroyVecs(2,&change_ctx->work);CHKERRQ(ierr); 3691 } 3692 if (!pcbddc->user_ChangeOfBasisMatrix) { 3693 ierr = PetscObjectReference((PetscObject)pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 3694 change_ctx->global_change = pcbddc->ChangeOfBasisMatrix; 3695 } else { 3696 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 3697 change_ctx->global_change = pcbddc->user_ChangeOfBasisMatrix; 3698 } 3699 ierr = VecDuplicateVecs(pcis->vec1_global,2,&change_ctx->work);CHKERRQ(ierr); 3700 ierr = MatSetUp(pcbddc->new_global_mat);CHKERRQ(ierr); 3701 ierr = MatAssemblyBegin(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3702 ierr = MatAssemblyEnd(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3703 } 3704 3705 /* add pressure dofs to set of primal nodes for numbering purposes */ 3706 for (i=0;i<pcbddc->benign_n;i++) { 3707 pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc] = pcbddc->benign_p0_lidx[i]; 3708 pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size] = pcbddc->benign_p0_lidx[i]; 3709 pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc] = 1; 3710 pcbddc->local_primal_size_cc++; 3711 pcbddc->local_primal_size++; 3712 } 3713 3714 /* check if a new primal space has been introduced (also take into account benign trick) */ 3715 pcbddc->new_primal_space_local = PETSC_TRUE; 3716 if (olocal_primal_size == pcbddc->local_primal_size) { 3717 ierr = PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 3718 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 3719 if (!pcbddc->new_primal_space_local) { 3720 ierr = PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 3721 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 3722 } 3723 } 3724 ierr = PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);CHKERRQ(ierr); 3725 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 3726 ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3727 3728 /* flush dbg viewer */ 3729 if (pcbddc->dbg_flag) { 3730 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3731 } 3732 3733 /* free workspace */ 3734 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 3735 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 3736 if (!pcbddc->adaptive_selection) { 3737 ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr); 3738 ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr); 3739 } else { 3740 ierr = PetscFree5(pcbddc->adaptive_constraints_n, 3741 pcbddc->adaptive_constraints_idxs_ptr, 3742 pcbddc->adaptive_constraints_data_ptr, 3743 pcbddc->adaptive_constraints_idxs, 3744 pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 3745 ierr = PetscFree(constraints_n);CHKERRQ(ierr); 3746 ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr); 3747 } 3748 PetscFunctionReturn(0); 3749 } 3750 3751 #undef __FUNCT__ 3752 #define __FUNCT__ "PCBDDCAnalyzeInterface" 3753 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 3754 { 3755 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3756 PC_IS *pcis = (PC_IS*)pc->data; 3757 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 3758 PetscInt ierr,i,vertex_size,N; 3759 PetscViewer viewer=pcbddc->dbg_viewer; 3760 3761 PetscFunctionBegin; 3762 /* Reset previously computed graph */ 3763 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 3764 /* Init local Graph struct */ 3765 ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr); 3766 ierr = PCBDDCGraphInit(pcbddc->mat_graph,pcis->mapping,N);CHKERRQ(ierr); 3767 3768 /* Check validity of the csr graph passed in by the user */ 3769 if (pcbddc->mat_graph->nvtxs_csr && pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) { 3770 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid size of local CSR graph! Found %d, expected %d\n",pcbddc->mat_graph->nvtxs_csr,pcbddc->mat_graph->nvtxs); 3771 } 3772 3773 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 3774 if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) { 3775 PetscInt *xadj,*adjncy; 3776 PetscInt nvtxs; 3777 PetscBool flg_row=PETSC_FALSE; 3778 3779 if (pcbddc->use_local_adj) { 3780 3781 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3782 if (flg_row) { 3783 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 3784 pcbddc->computed_rowadj = PETSC_TRUE; 3785 } 3786 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3787 } else if (pcbddc->current_level && pcis->n_B) { /* just compute subdomain's connected components for coarser levels when the local boundary is not empty */ 3788 IS is_dummy; 3789 ISLocalToGlobalMapping l2gmap_dummy; 3790 PetscInt j,sum; 3791 PetscInt *cxadj,*cadjncy; 3792 const PetscInt *idxs; 3793 PCBDDCGraph graph; 3794 PetscBT is_on_boundary; 3795 3796 ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n,0,1,&is_dummy);CHKERRQ(ierr); 3797 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 3798 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 3799 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 3800 ierr = PCBDDCGraphInit(graph,l2gmap_dummy,pcis->n);CHKERRQ(ierr); 3801 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 3802 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3803 if (flg_row) { 3804 graph->xadj = xadj; 3805 graph->adjncy = adjncy; 3806 } 3807 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 3808 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 3809 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3810 3811 if (pcbddc->dbg_flag) { 3812 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] Found %d subdomains (local size %d)\n",PetscGlobalRank,graph->ncc,pcis->n);CHKERRQ(ierr); 3813 for (i=0;i<graph->ncc;i++) { 3814 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] %d cc size %d\n",PetscGlobalRank,i,graph->cptr[i+1]-graph->cptr[i]);CHKERRQ(ierr); 3815 } 3816 } 3817 3818 ierr = PetscBTCreate(pcis->n,&is_on_boundary);CHKERRQ(ierr); 3819 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 3820 for (i=0;i<pcis->n_B;i++) { 3821 ierr = PetscBTSet(is_on_boundary,idxs[i]);CHKERRQ(ierr); 3822 } 3823 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 3824 3825 ierr = PetscCalloc1(pcis->n+1,&cxadj);CHKERRQ(ierr); 3826 sum = 0; 3827 for (i=0;i<graph->ncc;i++) { 3828 PetscInt sizecc = 0; 3829 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 3830 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 3831 sizecc++; 3832 } 3833 } 3834 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 3835 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 3836 cxadj[graph->queue[j]] = sizecc; 3837 } 3838 } 3839 sum += sizecc*sizecc; 3840 } 3841 ierr = PetscMalloc1(sum,&cadjncy);CHKERRQ(ierr); 3842 sum = 0; 3843 for (i=0;i<pcis->n;i++) { 3844 PetscInt temp = cxadj[i]; 3845 cxadj[i] = sum; 3846 sum += temp; 3847 } 3848 cxadj[pcis->n] = sum; 3849 for (i=0;i<graph->ncc;i++) { 3850 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 3851 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 3852 PetscInt k,sizecc = 0; 3853 for (k=graph->cptr[i];k<graph->cptr[i+1];k++) { 3854 if (PetscBTLookup(is_on_boundary,graph->queue[k])) { 3855 cadjncy[cxadj[graph->queue[j]]+sizecc]=graph->queue[k]; 3856 sizecc++; 3857 } 3858 } 3859 } 3860 } 3861 } 3862 if (sum) { 3863 ierr = PCBDDCSetLocalAdjacencyGraph(pc,pcis->n,cxadj,cadjncy,PETSC_OWN_POINTER);CHKERRQ(ierr); 3864 } else { 3865 ierr = PetscFree(cxadj);CHKERRQ(ierr); 3866 ierr = PetscFree(cadjncy);CHKERRQ(ierr); 3867 } 3868 graph->xadj = 0; 3869 graph->adjncy = 0; 3870 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 3871 ierr = PetscBTDestroy(&is_on_boundary);CHKERRQ(ierr); 3872 } 3873 } 3874 if (pcbddc->dbg_flag) { 3875 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3876 } 3877 3878 /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */ 3879 vertex_size = 1; 3880 if (pcbddc->user_provided_isfordofs) { 3881 if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */ 3882 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 3883 for (i=0;i<pcbddc->n_ISForDofs;i++) { 3884 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 3885 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 3886 } 3887 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 3888 pcbddc->n_ISForDofs = 0; 3889 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 3890 } 3891 /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */ 3892 ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr); 3893 } else { 3894 if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */ 3895 ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr); 3896 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 3897 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 3898 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 3899 } 3900 } 3901 } 3902 3903 /* Setup of Graph */ 3904 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */ 3905 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 3906 } 3907 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */ 3908 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 3909 } 3910 if (!pcbddc->user_primal_vertices_local && pcbddc->user_primal_vertices) { /* need to convert from global to local */ 3911 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->user_primal_vertices,&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 3912 } 3913 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 3914 3915 /* attach info on disconnected subdomains if present */ 3916 if (pcbddc->n_local_subs) { 3917 PetscInt *local_subs; 3918 3919 ierr = PetscMalloc1(N,&local_subs);CHKERRQ(ierr); 3920 for (i=0;i<pcbddc->n_local_subs;i++) { 3921 const PetscInt *idxs; 3922 PetscInt nl,j; 3923 3924 ierr = ISGetLocalSize(pcbddc->local_subs[i],&nl);CHKERRQ(ierr); 3925 ierr = ISGetIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 3926 for (j=0;j<nl;j++) { 3927 local_subs[idxs[j]] = i; 3928 } 3929 ierr = ISRestoreIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 3930 } 3931 pcbddc->mat_graph->n_local_subs = pcbddc->n_local_subs; 3932 pcbddc->mat_graph->local_subs = local_subs; 3933 } 3934 3935 /* Graph's connected components analysis */ 3936 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 3937 3938 /* print some info to stdout */ 3939 if (pcbddc->dbg_flag) { 3940 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);CHKERRQ(ierr); 3941 } 3942 3943 /* mark topography has done */ 3944 pcbddc->recompute_topography = PETSC_FALSE; 3945 PetscFunctionReturn(0); 3946 } 3947 3948 /* given an index sets possibly with holes, renumbers the indexes removing the holes */ 3949 #undef __FUNCT__ 3950 #define __FUNCT__ "PCBDDCSubsetNumbering" 3951 PetscErrorCode PCBDDCSubsetNumbering(IS subset, IS subset_mult, PetscInt *N_n, IS *subset_n) 3952 { 3953 PetscSF sf; 3954 PetscLayout map; 3955 const PetscInt *idxs; 3956 PetscInt *leaf_data,*root_data,*gidxs; 3957 PetscInt N,n,i,lbounds[2],gbounds[2],Nl; 3958 PetscInt n_n,nlocals,start,first_index; 3959 PetscMPIInt commsize; 3960 PetscBool first_found; 3961 PetscErrorCode ierr; 3962 3963 PetscFunctionBegin; 3964 ierr = ISGetLocalSize(subset,&n);CHKERRQ(ierr); 3965 if (subset_mult) { 3966 PetscCheckSameComm(subset,1,subset_mult,2); 3967 ierr = ISGetLocalSize(subset,&i);CHKERRQ(ierr); 3968 if (i != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Local subset and multiplicity sizes don't match! %d != %d",n,i); 3969 } 3970 /* create workspace layout for computing global indices of subset */ 3971 ierr = ISGetIndices(subset,&idxs);CHKERRQ(ierr); 3972 lbounds[0] = lbounds[1] = 0; 3973 for (i=0;i<n;i++) { 3974 if (idxs[i] < lbounds[0]) lbounds[0] = idxs[i]; 3975 else if (idxs[i] > lbounds[1]) lbounds[1] = idxs[i]; 3976 } 3977 lbounds[0] = -lbounds[0]; 3978 ierr = MPI_Allreduce(lbounds,gbounds,2,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 3979 gbounds[0] = -gbounds[0]; 3980 N = gbounds[1] - gbounds[0] + 1; 3981 ierr = PetscLayoutCreate(PetscObjectComm((PetscObject)subset),&map);CHKERRQ(ierr); 3982 ierr = PetscLayoutSetBlockSize(map,1);CHKERRQ(ierr); 3983 ierr = PetscLayoutSetSize(map,N);CHKERRQ(ierr); 3984 ierr = PetscLayoutSetUp(map);CHKERRQ(ierr); 3985 ierr = PetscLayoutGetLocalSize(map,&Nl);CHKERRQ(ierr); 3986 3987 /* create sf : leaf_data == multiplicity of indexes, root data == global index in layout */ 3988 ierr = PetscMalloc2(n,&leaf_data,Nl,&root_data);CHKERRQ(ierr); 3989 if (subset_mult) { 3990 const PetscInt* idxs_mult; 3991 3992 ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 3993 ierr = PetscMemcpy(leaf_data,idxs_mult,n*sizeof(PetscInt));CHKERRQ(ierr); 3994 ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 3995 } else { 3996 for (i=0;i<n;i++) leaf_data[i] = 1; 3997 } 3998 /* local size of new subset */ 3999 n_n = 0; 4000 for (i=0;i<n;i++) n_n += leaf_data[i]; 4001 4002 /* global indexes in layout */ 4003 ierr = PetscMalloc1(n_n,&gidxs);CHKERRQ(ierr); /* allocating possibly extra space in gidxs which will be used later */ 4004 for (i=0;i<n;i++) gidxs[i] = idxs[i] - gbounds[0]; 4005 ierr = ISRestoreIndices(subset,&idxs);CHKERRQ(ierr); 4006 ierr = PetscSFCreate(PetscObjectComm((PetscObject)subset),&sf);CHKERRQ(ierr); 4007 ierr = PetscSFSetGraphLayout(sf,map,n,NULL,PETSC_COPY_VALUES,gidxs);CHKERRQ(ierr); 4008 ierr = PetscLayoutDestroy(&map);CHKERRQ(ierr); 4009 4010 /* reduce from leaves to roots */ 4011 ierr = PetscMemzero(root_data,Nl*sizeof(PetscInt));CHKERRQ(ierr); 4012 ierr = PetscSFReduceBegin(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr); 4013 ierr = PetscSFReduceEnd(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr); 4014 4015 /* count indexes in local part of layout */ 4016 nlocals = 0; 4017 first_index = -1; 4018 first_found = PETSC_FALSE; 4019 for (i=0;i<Nl;i++) { 4020 if (!first_found && root_data[i]) { 4021 first_found = PETSC_TRUE; 4022 first_index = i; 4023 } 4024 nlocals += root_data[i]; 4025 } 4026 4027 /* cumulative of number of indexes and size of subset without holes */ 4028 #if defined(PETSC_HAVE_MPI_EXSCAN) 4029 start = 0; 4030 ierr = MPI_Exscan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 4031 #else 4032 ierr = MPI_Scan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 4033 start = start-nlocals; 4034 #endif 4035 4036 if (N_n) { /* compute total size of new subset if requested */ 4037 *N_n = start + nlocals; 4038 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)subset),&commsize);CHKERRQ(ierr); 4039 ierr = MPI_Bcast(N_n,1,MPIU_INT,commsize-1,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 4040 } 4041 4042 /* adapt root data with cumulative */ 4043 if (first_found) { 4044 PetscInt old_index; 4045 4046 root_data[first_index] += start; 4047 old_index = first_index; 4048 for (i=first_index+1;i<Nl;i++) { 4049 if (root_data[i]) { 4050 root_data[i] += root_data[old_index]; 4051 old_index = i; 4052 } 4053 } 4054 } 4055 4056 /* from roots to leaves */ 4057 ierr = PetscSFBcastBegin(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr); 4058 ierr = PetscSFBcastEnd(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr); 4059 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 4060 4061 /* create new IS with global indexes without holes */ 4062 if (subset_mult) { 4063 const PetscInt* idxs_mult; 4064 PetscInt cum; 4065 4066 cum = 0; 4067 ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 4068 for (i=0;i<n;i++) { 4069 PetscInt j; 4070 for (j=0;j<idxs_mult[i];j++) gidxs[cum++] = leaf_data[i] - idxs_mult[i] + j; 4071 } 4072 ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 4073 } else { 4074 for (i=0;i<n;i++) { 4075 gidxs[i] = leaf_data[i]-1; 4076 } 4077 } 4078 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)subset),n_n,gidxs,PETSC_OWN_POINTER,subset_n);CHKERRQ(ierr); 4079 ierr = PetscFree2(leaf_data,root_data);CHKERRQ(ierr); 4080 PetscFunctionReturn(0); 4081 } 4082 4083 #undef __FUNCT__ 4084 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 4085 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 4086 { 4087 PetscInt i,j; 4088 PetscScalar *alphas; 4089 PetscErrorCode ierr; 4090 4091 PetscFunctionBegin; 4092 /* this implements stabilized Gram-Schmidt */ 4093 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 4094 for (i=0;i<n;i++) { 4095 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 4096 if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); } 4097 for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); } 4098 } 4099 ierr = PetscFree(alphas);CHKERRQ(ierr); 4100 PetscFunctionReturn(0); 4101 } 4102 4103 #undef __FUNCT__ 4104 #define __FUNCT__ "MatISGetSubassemblingPattern" 4105 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscInt redprocs, IS* is_sends) 4106 { 4107 IS ranks_send_to; 4108 PetscInt n_neighs,*neighs,*n_shared,**shared; 4109 PetscMPIInt size,rank,color; 4110 PetscInt *xadj,*adjncy; 4111 PetscInt *adjncy_wgt,*v_wgt,*ranks_send_to_idx; 4112 PetscInt i,local_size,threshold=0; 4113 PetscBool use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE; 4114 PetscSubcomm subcomm; 4115 PetscErrorCode ierr; 4116 4117 PetscFunctionBegin; 4118 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr); 4119 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 4120 ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 4121 4122 /* Get info on mapping */ 4123 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&local_size);CHKERRQ(ierr); 4124 ierr = ISLocalToGlobalMappingGetInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 4125 4126 /* build local CSR graph of subdomains' connectivity */ 4127 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 4128 xadj[0] = 0; 4129 xadj[1] = PetscMax(n_neighs-1,0); 4130 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 4131 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 4132 4133 if (threshold) { 4134 PetscInt xadj_count = 0; 4135 for (i=1;i<n_neighs;i++) { 4136 if (n_shared[i] > threshold) { 4137 adjncy[xadj_count] = neighs[i]; 4138 adjncy_wgt[xadj_count] = n_shared[i]; 4139 xadj_count++; 4140 } 4141 } 4142 xadj[1] = xadj_count; 4143 } else { 4144 if (xadj[1]) { 4145 ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr); 4146 ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr); 4147 } 4148 } 4149 ierr = ISLocalToGlobalMappingRestoreInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 4150 if (use_square) { 4151 for (i=0;i<xadj[1];i++) { 4152 adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i]; 4153 } 4154 } 4155 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 4156 4157 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 4158 4159 /* 4160 Restrict work on active processes only. 4161 */ 4162 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr); 4163 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 4164 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 4165 ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr); 4166 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 4167 if (color) { 4168 ierr = PetscFree(xadj);CHKERRQ(ierr); 4169 ierr = PetscFree(adjncy);CHKERRQ(ierr); 4170 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 4171 } else { 4172 Mat subdomain_adj; 4173 IS new_ranks,new_ranks_contig; 4174 MatPartitioning partitioner; 4175 PetscInt prank,rstart=0,rend=0; 4176 PetscInt *is_indices,*oldranks; 4177 PetscBool aggregate; 4178 4179 ierr = MPI_Comm_size(PetscSubcommChild(subcomm),&size);CHKERRQ(ierr); 4180 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 4181 prank = rank; 4182 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,PetscSubcommChild(subcomm));CHKERRQ(ierr); 4183 /* 4184 for (i=0;i<size;i++) { 4185 PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]); 4186 } 4187 */ 4188 for (i=0;i<xadj[1];i++) { 4189 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 4190 } 4191 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 4192 aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE); 4193 if (aggregate) { 4194 PetscInt lrows,row,ncols,*cols; 4195 PetscMPIInt nrank; 4196 PetscScalar *vals; 4197 4198 ierr = MPI_Comm_rank(PetscSubcommChild(subcomm),&nrank);CHKERRQ(ierr); 4199 lrows = 0; 4200 if (nrank<redprocs) { 4201 lrows = size/redprocs; 4202 if (nrank<size%redprocs) lrows++; 4203 } 4204 ierr = MatCreateAIJ(PetscSubcommChild(subcomm),lrows,lrows,size,size,50,NULL,50,NULL,&subdomain_adj);CHKERRQ(ierr); 4205 ierr = MatGetOwnershipRange(subdomain_adj,&rstart,&rend);CHKERRQ(ierr); 4206 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4207 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4208 row = nrank; 4209 ncols = xadj[1]-xadj[0]; 4210 cols = adjncy; 4211 ierr = PetscMalloc1(ncols,&vals);CHKERRQ(ierr); 4212 for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i]; 4213 ierr = MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 4214 ierr = MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4215 ierr = MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4216 ierr = PetscFree(xadj);CHKERRQ(ierr); 4217 ierr = PetscFree(adjncy);CHKERRQ(ierr); 4218 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 4219 ierr = PetscFree(vals);CHKERRQ(ierr); 4220 } else { 4221 ierr = MatCreateMPIAdj(PetscSubcommChild(subcomm),1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 4222 } 4223 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 4224 4225 /* Partition */ 4226 ierr = MatPartitioningCreate(PetscSubcommChild(subcomm),&partitioner);CHKERRQ(ierr); 4227 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 4228 if (use_vwgt) { 4229 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 4230 v_wgt[0] = local_size; 4231 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 4232 } 4233 n_subdomains = PetscMin((PetscInt)size,n_subdomains); 4234 ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr); 4235 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 4236 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 4237 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 4238 4239 /* renumber new_ranks to avoid "holes" in new set of processors */ 4240 ierr = PCBDDCSubsetNumbering(new_ranks,NULL,NULL,&new_ranks_contig);CHKERRQ(ierr); 4241 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 4242 ierr = ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4243 if (!redprocs) { 4244 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 4245 } else { 4246 PetscInt idxs[1]; 4247 PetscMPIInt tag; 4248 MPI_Request *reqs; 4249 4250 ierr = PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag);CHKERRQ(ierr); 4251 ierr = PetscMalloc1(rend-rstart,&reqs);CHKERRQ(ierr); 4252 for (i=rstart;i<rend;i++) { 4253 ierr = MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,PetscSubcommChild(subcomm),&reqs[i-rstart]);CHKERRQ(ierr); 4254 } 4255 ierr = MPI_Recv(idxs,1,MPIU_INT,MPI_ANY_SOURCE,tag,PetscSubcommChild(subcomm),MPI_STATUS_IGNORE);CHKERRQ(ierr); 4256 ierr = MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4257 ierr = PetscFree(reqs);CHKERRQ(ierr); 4258 ranks_send_to_idx[0] = oldranks[idxs[0]]; 4259 } 4260 ierr = ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4261 /* clean up */ 4262 ierr = PetscFree(oldranks);CHKERRQ(ierr); 4263 ierr = ISDestroy(&new_ranks_contig);CHKERRQ(ierr); 4264 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 4265 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 4266 } 4267 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 4268 4269 /* assemble parallel IS for sends */ 4270 i = 1; 4271 if (color) i=0; 4272 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr); 4273 /* get back IS */ 4274 *is_sends = ranks_send_to; 4275 PetscFunctionReturn(0); 4276 } 4277 4278 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 4279 4280 #undef __FUNCT__ 4281 #define __FUNCT__ "MatISSubassemble" 4282 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[]) 4283 { 4284 Mat local_mat; 4285 IS is_sends_internal; 4286 PetscInt rows,cols,new_local_rows; 4287 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals; 4288 PetscBool ismatis,isdense,newisdense,destroy_mat; 4289 ISLocalToGlobalMapping l2gmap; 4290 PetscInt* l2gmap_indices; 4291 const PetscInt* is_indices; 4292 MatType new_local_type; 4293 /* buffers */ 4294 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 4295 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 4296 PetscInt *recv_buffer_idxs_local; 4297 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 4298 /* MPI */ 4299 MPI_Comm comm,comm_n; 4300 PetscSubcomm subcomm; 4301 PetscMPIInt n_sends,n_recvs,commsize; 4302 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 4303 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 4304 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,source_dest; 4305 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals; 4306 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals; 4307 PetscErrorCode ierr; 4308 4309 PetscFunctionBegin; 4310 /* TODO: add missing checks */ 4311 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 4312 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 4313 PetscValidLogicalCollectiveEnum(mat,reuse,5); 4314 PetscValidLogicalCollectiveInt(mat,nis,7); 4315 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 4316 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 4317 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 4318 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 4319 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 4320 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 4321 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 4322 if (reuse == MAT_REUSE_MATRIX && *mat_n) { 4323 PetscInt mrows,mcols,mnrows,mncols; 4324 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 4325 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 4326 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 4327 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 4328 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 4329 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 4330 } 4331 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 4332 PetscValidLogicalCollectiveInt(mat,bs,0); 4333 /* prepare IS for sending if not provided */ 4334 if (!is_sends) { 4335 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 4336 ierr = MatISGetSubassemblingPattern(mat,n_subdomains,0,&is_sends_internal);CHKERRQ(ierr); 4337 } else { 4338 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 4339 is_sends_internal = is_sends; 4340 } 4341 4342 /* get comm */ 4343 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 4344 4345 /* compute number of sends */ 4346 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 4347 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 4348 4349 /* compute number of receives */ 4350 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 4351 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 4352 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 4353 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 4354 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 4355 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 4356 ierr = PetscFree(iflags);CHKERRQ(ierr); 4357 4358 /* restrict comm if requested */ 4359 subcomm = 0; 4360 destroy_mat = PETSC_FALSE; 4361 if (restrict_comm) { 4362 PetscMPIInt color,subcommsize; 4363 4364 color = 0; 4365 if (restrict_full) { 4366 if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm (full restriction) */ 4367 } else { 4368 if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not partecipate in new comm */ 4369 } 4370 ierr = MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 4371 subcommsize = commsize - subcommsize; 4372 /* check if reuse has been requested */ 4373 if (reuse == MAT_REUSE_MATRIX) { 4374 if (*mat_n) { 4375 PetscMPIInt subcommsize2; 4376 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 4377 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 4378 comm_n = PetscObjectComm((PetscObject)*mat_n); 4379 } else { 4380 comm_n = PETSC_COMM_SELF; 4381 } 4382 } else { /* MAT_INITIAL_MATRIX */ 4383 PetscMPIInt rank; 4384 4385 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4386 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 4387 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 4388 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 4389 comm_n = PetscSubcommChild(subcomm); 4390 } 4391 /* flag to destroy *mat_n if not significative */ 4392 if (color) destroy_mat = PETSC_TRUE; 4393 } else { 4394 comm_n = comm; 4395 } 4396 4397 /* prepare send/receive buffers */ 4398 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 4399 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 4400 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 4401 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 4402 if (nis) { 4403 ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr); 4404 } 4405 4406 /* Get data from local matrices */ 4407 if (!isdense) { 4408 SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 4409 /* TODO: See below some guidelines on how to prepare the local buffers */ 4410 /* 4411 send_buffer_vals should contain the raw values of the local matrix 4412 send_buffer_idxs should contain: 4413 - MatType_PRIVATE type 4414 - PetscInt size_of_l2gmap 4415 - PetscInt global_row_indices[size_of_l2gmap] 4416 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 4417 */ 4418 } else { 4419 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 4420 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&i);CHKERRQ(ierr); 4421 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 4422 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 4423 send_buffer_idxs[1] = i; 4424 ierr = ISLocalToGlobalMappingGetIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 4425 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 4426 ierr = ISLocalToGlobalMappingRestoreIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 4427 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 4428 for (i=0;i<n_sends;i++) { 4429 ilengths_vals[is_indices[i]] = len*len; 4430 ilengths_idxs[is_indices[i]] = len+2; 4431 } 4432 } 4433 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 4434 /* additional is (if any) */ 4435 if (nis) { 4436 PetscMPIInt psum; 4437 PetscInt j; 4438 for (j=0,psum=0;j<nis;j++) { 4439 PetscInt plen; 4440 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 4441 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 4442 psum += len+1; /* indices + lenght */ 4443 } 4444 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 4445 for (j=0,psum=0;j<nis;j++) { 4446 PetscInt plen; 4447 const PetscInt *is_array_idxs; 4448 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 4449 send_buffer_idxs_is[psum] = plen; 4450 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 4451 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 4452 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 4453 psum += plen+1; /* indices + lenght */ 4454 } 4455 for (i=0;i<n_sends;i++) { 4456 ilengths_idxs_is[is_indices[i]] = psum; 4457 } 4458 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 4459 } 4460 4461 buf_size_idxs = 0; 4462 buf_size_vals = 0; 4463 buf_size_idxs_is = 0; 4464 for (i=0;i<n_recvs;i++) { 4465 buf_size_idxs += (PetscInt)olengths_idxs[i]; 4466 buf_size_vals += (PetscInt)olengths_vals[i]; 4467 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 4468 } 4469 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 4470 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 4471 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 4472 4473 /* get new tags for clean communications */ 4474 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 4475 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 4476 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 4477 4478 /* allocate for requests */ 4479 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 4480 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 4481 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 4482 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 4483 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 4484 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 4485 4486 /* communications */ 4487 ptr_idxs = recv_buffer_idxs; 4488 ptr_vals = recv_buffer_vals; 4489 ptr_idxs_is = recv_buffer_idxs_is; 4490 for (i=0;i<n_recvs;i++) { 4491 source_dest = onodes[i]; 4492 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 4493 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 4494 ptr_idxs += olengths_idxs[i]; 4495 ptr_vals += olengths_vals[i]; 4496 if (nis) { 4497 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 4498 ptr_idxs_is += olengths_idxs_is[i]; 4499 } 4500 } 4501 for (i=0;i<n_sends;i++) { 4502 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 4503 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 4504 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 4505 if (nis) { 4506 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 4507 } 4508 } 4509 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 4510 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 4511 4512 /* assemble new l2g map */ 4513 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4514 ptr_idxs = recv_buffer_idxs; 4515 new_local_rows = 0; 4516 for (i=0;i<n_recvs;i++) { 4517 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 4518 ptr_idxs += olengths_idxs[i]; 4519 } 4520 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 4521 ptr_idxs = recv_buffer_idxs; 4522 new_local_rows = 0; 4523 for (i=0;i<n_recvs;i++) { 4524 ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 4525 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 4526 ptr_idxs += olengths_idxs[i]; 4527 } 4528 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 4529 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 4530 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 4531 4532 /* infer new local matrix type from received local matrices type */ 4533 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 4534 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 4535 if (n_recvs) { 4536 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 4537 ptr_idxs = recv_buffer_idxs; 4538 for (i=0;i<n_recvs;i++) { 4539 if ((PetscInt)new_local_type_private != *ptr_idxs) { 4540 new_local_type_private = MATAIJ_PRIVATE; 4541 break; 4542 } 4543 ptr_idxs += olengths_idxs[i]; 4544 } 4545 switch (new_local_type_private) { 4546 case MATDENSE_PRIVATE: 4547 if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */ 4548 new_local_type = MATSEQAIJ; 4549 bs = 1; 4550 } else { /* if I receive only 1 dense matrix */ 4551 new_local_type = MATSEQDENSE; 4552 bs = 1; 4553 } 4554 break; 4555 case MATAIJ_PRIVATE: 4556 new_local_type = MATSEQAIJ; 4557 bs = 1; 4558 break; 4559 case MATBAIJ_PRIVATE: 4560 new_local_type = MATSEQBAIJ; 4561 break; 4562 case MATSBAIJ_PRIVATE: 4563 new_local_type = MATSEQSBAIJ; 4564 break; 4565 default: 4566 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__); 4567 break; 4568 } 4569 } else { /* by default, new_local_type is seqdense */ 4570 new_local_type = MATSEQDENSE; 4571 bs = 1; 4572 } 4573 4574 /* create MATIS object if needed */ 4575 if (reuse == MAT_INITIAL_MATRIX) { 4576 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 4577 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 4578 } else { 4579 /* it also destroys the local matrices */ 4580 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 4581 } 4582 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 4583 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 4584 4585 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4586 4587 /* Global to local map of received indices */ 4588 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 4589 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 4590 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 4591 4592 /* restore attributes -> type of incoming data and its size */ 4593 buf_size_idxs = 0; 4594 for (i=0;i<n_recvs;i++) { 4595 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 4596 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 4597 buf_size_idxs += (PetscInt)olengths_idxs[i]; 4598 } 4599 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 4600 4601 /* set preallocation */ 4602 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 4603 if (!newisdense) { 4604 PetscInt *new_local_nnz=0; 4605 4606 ptr_vals = recv_buffer_vals; 4607 ptr_idxs = recv_buffer_idxs_local; 4608 if (n_recvs) { 4609 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 4610 } 4611 for (i=0;i<n_recvs;i++) { 4612 PetscInt j; 4613 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 4614 for (j=0;j<*(ptr_idxs+1);j++) { 4615 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 4616 } 4617 } else { 4618 /* TODO */ 4619 } 4620 ptr_idxs += olengths_idxs[i]; 4621 } 4622 if (new_local_nnz) { 4623 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 4624 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 4625 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 4626 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 4627 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 4628 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 4629 } else { 4630 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 4631 } 4632 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 4633 } else { 4634 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 4635 } 4636 4637 /* set values */ 4638 ptr_vals = recv_buffer_vals; 4639 ptr_idxs = recv_buffer_idxs_local; 4640 for (i=0;i<n_recvs;i++) { 4641 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 4642 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 4643 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 4644 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 4645 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 4646 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 4647 } else { 4648 /* TODO */ 4649 } 4650 ptr_idxs += olengths_idxs[i]; 4651 ptr_vals += olengths_vals[i]; 4652 } 4653 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4654 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4655 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4656 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4657 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 4658 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 4659 4660 #if 0 4661 if (!restrict_comm) { /* check */ 4662 Vec lvec,rvec; 4663 PetscReal infty_error; 4664 4665 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 4666 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 4667 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 4668 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 4669 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 4670 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 4671 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 4672 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 4673 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 4674 } 4675 #endif 4676 4677 /* assemble new additional is (if any) */ 4678 if (nis) { 4679 PetscInt **temp_idxs,*count_is,j,psum; 4680 4681 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4682 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 4683 ptr_idxs = recv_buffer_idxs_is; 4684 psum = 0; 4685 for (i=0;i<n_recvs;i++) { 4686 for (j=0;j<nis;j++) { 4687 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 4688 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 4689 psum += plen; 4690 ptr_idxs += plen+1; /* shift pointer to received data */ 4691 } 4692 } 4693 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 4694 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 4695 for (i=1;i<nis;i++) { 4696 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 4697 } 4698 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 4699 ptr_idxs = recv_buffer_idxs_is; 4700 for (i=0;i<n_recvs;i++) { 4701 for (j=0;j<nis;j++) { 4702 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 4703 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 4704 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 4705 ptr_idxs += plen+1; /* shift pointer to received data */ 4706 } 4707 } 4708 for (i=0;i<nis;i++) { 4709 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 4710 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 4711 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 4712 } 4713 ierr = PetscFree(count_is);CHKERRQ(ierr); 4714 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 4715 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 4716 } 4717 /* free workspace */ 4718 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 4719 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4720 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 4721 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4722 if (isdense) { 4723 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 4724 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 4725 } else { 4726 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 4727 } 4728 if (nis) { 4729 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4730 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 4731 } 4732 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 4733 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 4734 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 4735 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 4736 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 4737 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 4738 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 4739 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 4740 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 4741 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 4742 ierr = PetscFree(onodes);CHKERRQ(ierr); 4743 if (nis) { 4744 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 4745 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 4746 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 4747 } 4748 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 4749 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 4750 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 4751 for (i=0;i<nis;i++) { 4752 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 4753 } 4754 *mat_n = NULL; 4755 } 4756 PetscFunctionReturn(0); 4757 } 4758 4759 /* temporary hack into ksp private data structure */ 4760 #include <petsc/private/kspimpl.h> 4761 4762 #undef __FUNCT__ 4763 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 4764 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 4765 { 4766 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 4767 PC_IS *pcis = (PC_IS*)pc->data; 4768 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 4769 MatNullSpace CoarseNullSpace=NULL; 4770 ISLocalToGlobalMapping coarse_islg; 4771 IS coarse_is,*isarray; 4772 PetscInt i,im_active=-1,active_procs=-1; 4773 PetscInt nis,nisdofs,nisneu,nisvert; 4774 PC pc_temp; 4775 PCType coarse_pc_type; 4776 KSPType coarse_ksp_type; 4777 PetscBool multilevel_requested,multilevel_allowed; 4778 PetscBool isredundant,isbddc,isnn,coarse_reuse; 4779 Mat t_coarse_mat_is; 4780 PetscInt void_procs,ncoarse_ml,ncoarse_ds,ncoarse; 4781 PetscMPIInt all_procs; 4782 PetscBool csin_ml,csin_ds,csin,csin_type_simple,redist; 4783 PetscBool compute_vecs = PETSC_FALSE; 4784 PetscScalar *array; 4785 PetscErrorCode ierr; 4786 4787 PetscFunctionBegin; 4788 /* Assign global numbering to coarse dofs */ 4789 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 4790 PetscInt ocoarse_size; 4791 compute_vecs = PETSC_TRUE; 4792 ocoarse_size = pcbddc->coarse_size; 4793 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 4794 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 4795 /* see if we can avoid some work */ 4796 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 4797 /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */ 4798 if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) { 4799 PC pc; 4800 PetscBool isbddc; 4801 4802 /* temporary workaround since PCBDDC does not have a reset method so far */ 4803 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc);CHKERRQ(ierr); 4804 ierr = PetscObjectTypeCompare((PetscObject)pc,PCBDDC,&isbddc);CHKERRQ(ierr); 4805 if (isbddc) { 4806 ierr = PCDestroy(&pc);CHKERRQ(ierr); 4807 } 4808 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 4809 coarse_reuse = PETSC_FALSE; 4810 } else { /* we can safely reuse already computed coarse matrix */ 4811 coarse_reuse = PETSC_TRUE; 4812 } 4813 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 4814 coarse_reuse = PETSC_FALSE; 4815 } 4816 /* reset any subassembling information */ 4817 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 4818 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4819 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 4820 coarse_reuse = PETSC_TRUE; 4821 } 4822 4823 /* count "active" (i.e. with positive local size) and "void" processes */ 4824 im_active = !!(pcis->n); 4825 ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4826 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr); 4827 void_procs = all_procs-active_procs; 4828 csin_type_simple = PETSC_TRUE; 4829 redist = PETSC_FALSE; 4830 if (pcbddc->current_level && void_procs) { 4831 csin_ml = PETSC_TRUE; 4832 ncoarse_ml = void_procs; 4833 /* it has no sense to redistribute on a set of processors larger than the number of active processes */ 4834 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < active_procs) { 4835 csin_ds = PETSC_TRUE; 4836 ncoarse_ds = pcbddc->redistribute_coarse; 4837 redist = PETSC_TRUE; 4838 } else { 4839 csin_ds = PETSC_TRUE; 4840 ncoarse_ds = active_procs; 4841 redist = PETSC_TRUE; 4842 } 4843 } else { 4844 csin_ml = PETSC_FALSE; 4845 ncoarse_ml = all_procs; 4846 if (void_procs) { 4847 csin_ds = PETSC_TRUE; 4848 ncoarse_ds = void_procs; 4849 csin_type_simple = PETSC_FALSE; 4850 } else { 4851 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < all_procs) { 4852 csin_ds = PETSC_TRUE; 4853 ncoarse_ds = pcbddc->redistribute_coarse; 4854 redist = PETSC_TRUE; 4855 } else { 4856 csin_ds = PETSC_FALSE; 4857 ncoarse_ds = all_procs; 4858 } 4859 } 4860 } 4861 4862 /* 4863 test if we can go multilevel: three conditions must be satisfied: 4864 - we have not exceeded the number of levels requested 4865 - we can actually subassemble the active processes 4866 - we can find a suitable number of MPI processes where we can place the subassembled problem 4867 */ 4868 multilevel_allowed = PETSC_FALSE; 4869 multilevel_requested = PETSC_FALSE; 4870 if (pcbddc->current_level < pcbddc->max_levels) { 4871 multilevel_requested = PETSC_TRUE; 4872 if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) { 4873 multilevel_allowed = PETSC_FALSE; 4874 } else { 4875 multilevel_allowed = PETSC_TRUE; 4876 } 4877 } 4878 /* determine number of process partecipating to coarse solver */ 4879 if (multilevel_allowed) { 4880 ncoarse = ncoarse_ml; 4881 csin = csin_ml; 4882 redist = PETSC_FALSE; 4883 } else { 4884 ncoarse = ncoarse_ds; 4885 csin = csin_ds; 4886 } 4887 4888 /* creates temporary l2gmap and IS for coarse indexes */ 4889 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 4890 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 4891 4892 /* creates temporary MATIS object for coarse matrix */ 4893 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr); 4894 ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr); 4895 ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr); 4896 ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr); 4897 #if 0 4898 { 4899 PetscViewer viewer; 4900 char filename[256]; 4901 sprintf(filename,"local_coarse_mat%d_level%d.m",PetscGlobalRank,pcbddc->current_level); 4902 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 4903 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 4904 ierr = MatView(coarse_submat_dense,viewer);CHKERRQ(ierr); 4905 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 4906 } 4907 #endif 4908 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,NULL,&t_coarse_mat_is);CHKERRQ(ierr); 4909 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 4910 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4911 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4912 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 4913 4914 /* compute dofs splitting and neumann boundaries for coarse dofs */ 4915 if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal || (pcbddc->benign_saddle_point && pcbddc->user_primal_vertices_local))) { /* protects from unneded computations */ 4916 PetscInt *tidxs,*tidxs2,nout,tsize,i; 4917 const PetscInt *idxs; 4918 ISLocalToGlobalMapping tmap; 4919 4920 /* create map between primal indices (in local representative ordering) and local primal numbering */ 4921 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 4922 /* allocate space for temporary storage */ 4923 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 4924 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 4925 /* allocate for IS array */ 4926 nisdofs = pcbddc->n_ISForDofsLocal; 4927 nisneu = !!pcbddc->NeumannBoundariesLocal; 4928 nisvert = 0; 4929 if (pcbddc->benign_saddle_point && pcbddc->user_primal_vertices_local) { 4930 nisvert = 1; 4931 } 4932 nis = nisdofs + nisneu + nisvert; 4933 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 4934 /* dofs splitting */ 4935 for (i=0;i<nisdofs;i++) { 4936 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 4937 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 4938 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 4939 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 4940 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 4941 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 4942 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 4943 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 4944 } 4945 /* neumann boundaries */ 4946 if (pcbddc->NeumannBoundariesLocal) { 4947 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 4948 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 4949 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 4950 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 4951 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 4952 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 4953 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 4954 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 4955 } 4956 /* primal vertices (benign) */ 4957 if (pcbddc->benign_saddle_point && pcbddc->user_primal_vertices_local) { 4958 ierr = ISGetLocalSize(pcbddc->user_primal_vertices_local,&tsize);CHKERRQ(ierr); 4959 ierr = ISGetIndices(pcbddc->user_primal_vertices_local,&idxs);CHKERRQ(ierr); 4960 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 4961 ierr = ISRestoreIndices(pcbddc->user_primal_vertices_local,&idxs);CHKERRQ(ierr); 4962 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 4963 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nis-1]);CHKERRQ(ierr); 4964 } 4965 /* free memory */ 4966 ierr = PetscFree(tidxs);CHKERRQ(ierr); 4967 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 4968 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 4969 } else { 4970 nis = 0; 4971 nisdofs = 0; 4972 nisneu = 0; 4973 nisvert = 0; 4974 isarray = NULL; 4975 } 4976 /* destroy no longer needed map */ 4977 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 4978 4979 /* restrict on coarse candidates (if needed) */ 4980 coarse_mat_is = NULL; 4981 if (csin) { 4982 if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */ 4983 if (redist) { 4984 PetscMPIInt rank; 4985 PetscInt spc,n_spc_p1,dest[1],destsize; 4986 4987 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 4988 spc = active_procs/ncoarse; 4989 n_spc_p1 = active_procs%ncoarse; 4990 if (im_active) { 4991 destsize = 1; 4992 if (rank > n_spc_p1*(spc+1)-1) { 4993 dest[0] = n_spc_p1+(rank-(n_spc_p1*(spc+1)))/spc; 4994 } else { 4995 dest[0] = rank/(spc+1); 4996 } 4997 } else { 4998 destsize = 0; 4999 } 5000 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),destsize,dest,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 5001 } else if (csin_type_simple) { 5002 PetscMPIInt rank; 5003 PetscInt issize,isidx; 5004 5005 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 5006 if (im_active) { 5007 issize = 1; 5008 isidx = (PetscInt)rank; 5009 } else { 5010 issize = 0; 5011 isidx = -1; 5012 } 5013 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 5014 } else { /* get a suitable subassembling pattern from MATIS code */ 5015 ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 5016 } 5017 5018 /* we need to shift on coarse candidates either if we are not redistributing or we are redistributing and we have enough void processes */ 5019 if (!redist || ncoarse <= void_procs) { 5020 PetscInt ncoarse_cand,tissize,*nisindices; 5021 PetscInt *coarse_candidates; 5022 const PetscInt* tisindices; 5023 5024 /* get coarse candidates' ranks in pc communicator */ 5025 ierr = PetscMalloc1(all_procs,&coarse_candidates);CHKERRQ(ierr); 5026 ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 5027 for (i=0,ncoarse_cand=0;i<all_procs;i++) { 5028 if (!coarse_candidates[i]) { 5029 coarse_candidates[ncoarse_cand++]=i; 5030 } 5031 } 5032 if (ncoarse_cand < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",ncoarse_cand,ncoarse); 5033 5034 5035 if (pcbddc->dbg_flag) { 5036 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5037 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr); 5038 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 5039 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr); 5040 for (i=0;i<ncoarse_cand;i++) { 5041 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr); 5042 } 5043 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr); 5044 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5045 } 5046 /* shift the pattern on coarse candidates */ 5047 ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr); 5048 ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 5049 ierr = PetscMalloc1(tissize,&nisindices);CHKERRQ(ierr); 5050 for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]]; 5051 ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 5052 ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr); 5053 ierr = PetscFree(coarse_candidates);CHKERRQ(ierr); 5054 } 5055 if (pcbddc->dbg_flag) { 5056 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5057 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr); 5058 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 5059 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5060 } 5061 } 5062 /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */ 5063 if (multilevel_allowed) { /* we need to keep tracking of void processes for future placements */ 5064 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,PETSC_FALSE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 5065 } else { /* this is the last level, so use just receiving processes in subcomm */ 5066 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 5067 } 5068 } else { 5069 if (pcbddc->dbg_flag) { 5070 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5071 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr); 5072 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5073 } 5074 ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr); 5075 coarse_mat_is = t_coarse_mat_is; 5076 } 5077 5078 /* create local to global scatters for coarse problem */ 5079 if (compute_vecs) { 5080 PetscInt lrows; 5081 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 5082 if (coarse_mat_is) { 5083 ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr); 5084 } else { 5085 lrows = 0; 5086 } 5087 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 5088 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 5089 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 5090 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 5091 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 5092 } 5093 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 5094 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 5095 5096 /* set defaults for coarse KSP and PC */ 5097 if (multilevel_allowed) { 5098 coarse_ksp_type = KSPRICHARDSON; 5099 coarse_pc_type = PCBDDC; 5100 } else { 5101 coarse_ksp_type = KSPPREONLY; 5102 coarse_pc_type = PCREDUNDANT; 5103 } 5104 5105 /* print some info if requested */ 5106 if (pcbddc->dbg_flag) { 5107 if (!multilevel_allowed) { 5108 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5109 if (multilevel_requested) { 5110 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 5111 } else if (pcbddc->max_levels) { 5112 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 5113 } 5114 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5115 } 5116 } 5117 5118 /* create the coarse KSP object only once with defaults */ 5119 if (coarse_mat_is) { 5120 MatReuse coarse_mat_reuse; 5121 PetscViewer dbg_viewer = NULL; 5122 if (pcbddc->dbg_flag) { 5123 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is)); 5124 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 5125 } 5126 if (!pcbddc->coarse_ksp) { 5127 char prefix[256],str_level[16]; 5128 size_t len; 5129 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr); 5130 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 5131 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 5132 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 5133 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr); 5134 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 5135 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 5136 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 5137 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 5138 /* prefix */ 5139 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 5140 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 5141 if (!pcbddc->current_level) { 5142 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 5143 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 5144 } else { 5145 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 5146 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 5147 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 5148 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 5149 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 5150 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 5151 } 5152 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 5153 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 5154 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 5155 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 5156 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 5157 /* allow user customization */ 5158 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 5159 } 5160 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 5161 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 5162 if (nisdofs) { 5163 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 5164 for (i=0;i<nisdofs;i++) { 5165 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 5166 } 5167 } 5168 if (nisneu) { 5169 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 5170 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 5171 } 5172 if (nisvert) { 5173 ierr = PCBDDCSetPrimalVerticesIS(pc_temp,isarray[nis-1]);CHKERRQ(ierr); 5174 ierr = ISDestroy(&isarray[nis-1]);CHKERRQ(ierr); 5175 } 5176 5177 /* get some info after set from options */ 5178 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 5179 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 5180 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 5181 if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */ 5182 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 5183 isbddc = PETSC_FALSE; 5184 } 5185 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 5186 if (isredundant) { 5187 KSP inner_ksp; 5188 PC inner_pc; 5189 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 5190 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 5191 ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr); 5192 } 5193 5194 /* assemble coarse matrix */ 5195 if (coarse_reuse) { 5196 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 5197 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 5198 coarse_mat_reuse = MAT_REUSE_MATRIX; 5199 } else { 5200 coarse_mat_reuse = MAT_INITIAL_MATRIX; 5201 } 5202 if (isbddc || isnn) { 5203 if (pcbddc->coarsening_ratio > 1) { 5204 if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */ 5205 ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 5206 if (pcbddc->dbg_flag) { 5207 ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5208 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr); 5209 ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr); 5210 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 5211 } 5212 } 5213 ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr); 5214 } else { 5215 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 5216 coarse_mat = coarse_mat_is; 5217 } 5218 } else { 5219 ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 5220 } 5221 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 5222 5223 /* propagate symmetry info of coarse matrix */ 5224 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 5225 if (pc->pmat->symmetric_set) { 5226 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);CHKERRQ(ierr); 5227 } 5228 if (pc->pmat->hermitian_set) { 5229 ierr = MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);CHKERRQ(ierr); 5230 } 5231 if (pc->pmat->spd_set) { 5232 ierr = MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);CHKERRQ(ierr); 5233 } 5234 /* set operators */ 5235 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 5236 if (pcbddc->dbg_flag) { 5237 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 5238 } 5239 } else { /* processes non partecipating to coarse solver (if any) */ 5240 coarse_mat = 0; 5241 } 5242 ierr = PetscFree(isarray);CHKERRQ(ierr); 5243 #if 0 5244 { 5245 PetscViewer viewer; 5246 char filename[256]; 5247 sprintf(filename,"coarse_mat_level%d.m",pcbddc->current_level); 5248 ierr = PetscViewerASCIIOpen(PetscObjectComm((PetscObject)coarse_mat),filename,&viewer);CHKERRQ(ierr); 5249 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 5250 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 5251 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 5252 } 5253 #endif 5254 5255 /* Compute coarse null space (special handling by BDDC only) */ 5256 #if 0 5257 if (pcbddc->NullSpace) { 5258 ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr); 5259 } 5260 #endif 5261 /* hack */ 5262 if (pcbddc->coarse_ksp) { 5263 Vec crhs,csol; 5264 5265 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 5266 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 5267 if (!csol) { 5268 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 5269 } 5270 if (!crhs) { 5271 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 5272 } 5273 } 5274 5275 /* compute null space for coarse solver if the benign trick has been requested */ 5276 if (pcbddc->benign_null) { 5277 5278 ierr = VecSet(pcbddc->vec1_P,0.);CHKERRQ(ierr); 5279 for (i=0;i<pcbddc->benign_n;i++) { 5280 ierr = VecSetValue(pcbddc->vec1_P,pcbddc->local_primal_size-pcbddc->benign_n+i,1.0,INSERT_VALUES);CHKERRQ(ierr); 5281 } 5282 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 5283 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 5284 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5285 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5286 if (coarse_mat) { 5287 Vec nullv; 5288 PetscScalar *array,*array2; 5289 PetscInt nl; 5290 5291 ierr = MatCreateVecs(coarse_mat,&nullv,NULL);CHKERRQ(ierr); 5292 ierr = VecGetLocalSize(nullv,&nl);CHKERRQ(ierr); 5293 ierr = VecGetArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 5294 ierr = VecGetArray(nullv,&array2);CHKERRQ(ierr); 5295 ierr = PetscMemcpy(array2,array,nl*sizeof(*array));CHKERRQ(ierr); 5296 ierr = VecRestoreArray(nullv,&array2);CHKERRQ(ierr); 5297 ierr = VecRestoreArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 5298 ierr = VecNormalize(nullv,NULL);CHKERRQ(ierr); 5299 ierr = MatNullSpaceCreate(PetscObjectComm((PetscObject)coarse_mat),PETSC_FALSE,1,&nullv,&CoarseNullSpace);CHKERRQ(ierr); 5300 ierr = VecDestroy(&nullv);CHKERRQ(ierr); 5301 } 5302 } 5303 5304 if (pcbddc->coarse_ksp) { 5305 PetscBool ispreonly; 5306 5307 if (CoarseNullSpace) { 5308 PetscBool isnull; 5309 ierr = MatNullSpaceTest(CoarseNullSpace,coarse_mat,&isnull);CHKERRQ(ierr); 5310 if (isnull) { 5311 if (isbddc && !pcbddc->benign_saddle_point) { 5312 ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr); 5313 } else { 5314 ierr = MatSetNullSpace(coarse_mat,CoarseNullSpace);CHKERRQ(ierr); 5315 } 5316 } else { 5317 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 5318 } 5319 } 5320 /* setup coarse ksp */ 5321 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 5322 /* Check coarse problem if in debug mode or if solving with an iterative method */ 5323 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 5324 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 5325 KSP check_ksp; 5326 KSPType check_ksp_type; 5327 PC check_pc; 5328 Vec check_vec,coarse_vec; 5329 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 5330 PetscInt its; 5331 PetscBool compute_eigs; 5332 PetscReal *eigs_r,*eigs_c; 5333 PetscInt neigs; 5334 const char *prefix; 5335 5336 /* Create ksp object suitable for estimation of extreme eigenvalues */ 5337 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 5338 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 5339 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 5340 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 5341 if (ispreonly) { 5342 check_ksp_type = KSPPREONLY; 5343 compute_eigs = PETSC_FALSE; 5344 } else { 5345 check_ksp_type = KSPGMRES; 5346 compute_eigs = PETSC_TRUE; 5347 } 5348 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 5349 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 5350 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 5351 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 5352 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 5353 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 5354 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 5355 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 5356 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 5357 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 5358 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 5359 /* create random vec */ 5360 ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr); 5361 ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr); 5362 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 5363 if (CoarseNullSpace) { 5364 ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr); 5365 } 5366 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 5367 /* solve coarse problem */ 5368 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 5369 if (CoarseNullSpace) { 5370 ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr); 5371 } 5372 /* set eigenvalue estimation if preonly has not been requested */ 5373 if (compute_eigs) { 5374 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 5375 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 5376 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 5377 lambda_max = eigs_r[neigs-1]; 5378 lambda_min = eigs_r[0]; 5379 if (pcbddc->use_coarse_estimates) { 5380 if (lambda_max>lambda_min) { 5381 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr); 5382 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 5383 } 5384 } 5385 } 5386 5387 /* check coarse problem residual error */ 5388 if (pcbddc->dbg_flag) { 5389 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 5390 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 5391 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 5392 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 5393 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 5394 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 5395 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 5396 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 5397 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 5398 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 5399 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 5400 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 5401 if (CoarseNullSpace) { 5402 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem is singular\n");CHKERRQ(ierr); 5403 } 5404 if (compute_eigs) { 5405 PetscReal lambda_max_s,lambda_min_s; 5406 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 5407 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 5408 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 5409 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 5410 for (i=0;i<neigs;i++) { 5411 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 5412 } 5413 } 5414 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 5415 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 5416 } 5417 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 5418 if (compute_eigs) { 5419 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 5420 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 5421 } 5422 } 5423 } 5424 /* print additional info */ 5425 if (pcbddc->dbg_flag) { 5426 /* waits until all processes reaches this point */ 5427 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 5428 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 5429 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5430 } 5431 5432 /* free memory */ 5433 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 5434 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 5435 PetscFunctionReturn(0); 5436 } 5437 5438 #undef __FUNCT__ 5439 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 5440 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 5441 { 5442 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 5443 PC_IS* pcis = (PC_IS*)pc->data; 5444 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 5445 IS subset,subset_mult,subset_n; 5446 PetscInt local_size,coarse_size=0; 5447 PetscInt *local_primal_indices=NULL; 5448 const PetscInt *t_local_primal_indices; 5449 PetscErrorCode ierr; 5450 5451 PetscFunctionBegin; 5452 /* Compute global number of coarse dofs */ 5453 if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) { 5454 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first"); 5455 } 5456 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n);CHKERRQ(ierr); 5457 ierr = ISLocalToGlobalMappingApplyIS(pcis->mapping,subset_n,&subset);CHKERRQ(ierr); 5458 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 5459 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult);CHKERRQ(ierr); 5460 ierr = PCBDDCSubsetNumbering(subset,subset_mult,&coarse_size,&subset_n);CHKERRQ(ierr); 5461 ierr = ISDestroy(&subset);CHKERRQ(ierr); 5462 ierr = ISDestroy(&subset_mult);CHKERRQ(ierr); 5463 ierr = ISGetLocalSize(subset_n,&local_size);CHKERRQ(ierr); 5464 if (local_size != pcbddc->local_primal_size) { 5465 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %d != %d",local_size,pcbddc->local_primal_size); 5466 } 5467 ierr = PetscMalloc1(local_size,&local_primal_indices);CHKERRQ(ierr); 5468 ierr = ISGetIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 5469 ierr = PetscMemcpy(local_primal_indices,t_local_primal_indices,local_size*sizeof(PetscInt));CHKERRQ(ierr); 5470 ierr = ISRestoreIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 5471 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 5472 5473 /* check numbering */ 5474 if (pcbddc->dbg_flag) { 5475 PetscScalar coarsesum,*array,*array2; 5476 PetscInt i; 5477 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 5478 5479 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5480 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5481 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 5482 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 5483 /* counter */ 5484 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 5485 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 5486 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5487 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5488 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5489 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5490 5491 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 5492 for (i=0;i<pcbddc->local_primal_size;i++) { 5493 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 5494 } 5495 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 5496 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 5497 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 5498 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5499 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5500 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5501 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5502 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 5503 ierr = VecGetArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 5504 for (i=0;i<pcis->n;i++) { 5505 if (array[i] != 0.0 && array[i] != array2[i]) { 5506 PetscInt owned = (PetscInt)PetscRealPart(array[i]); 5507 PetscInt neigh = (PetscInt)PetscRealPart(array2[i]); 5508 set_error = PETSC_TRUE; 5509 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a %d processes instead of %d!\n",PetscGlobalRank,i,owned,neigh);CHKERRQ(ierr); 5510 } 5511 } 5512 ierr = VecRestoreArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 5513 ierr = MPI_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 5514 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5515 for (i=0;i<pcis->n;i++) { 5516 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 5517 } 5518 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 5519 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 5520 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5521 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5522 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 5523 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 5524 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 5525 PetscInt *gidxs; 5526 5527 ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr); 5528 ierr = ISLocalToGlobalMappingApply(pcis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr); 5529 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 5530 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5531 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 5532 for (i=0;i<pcbddc->local_primal_size;i++) { 5533 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr); 5534 } 5535 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5536 ierr = PetscFree(gidxs);CHKERRQ(ierr); 5537 } 5538 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5539 if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 5540 } 5541 /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */ 5542 /* get back data */ 5543 *coarse_size_n = coarse_size; 5544 *local_primal_indices_n = local_primal_indices; 5545 PetscFunctionReturn(0); 5546 } 5547 5548 #undef __FUNCT__ 5549 #define __FUNCT__ "PCBDDCGlobalToLocal" 5550 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 5551 { 5552 IS localis_t; 5553 PetscInt i,lsize,*idxs,n; 5554 PetscScalar *vals; 5555 PetscErrorCode ierr; 5556 5557 PetscFunctionBegin; 5558 /* get indices in local ordering exploiting local to global map */ 5559 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 5560 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 5561 for (i=0;i<lsize;i++) vals[i] = 1.0; 5562 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 5563 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 5564 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 5565 if (idxs) { /* multilevel guard */ 5566 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 5567 } 5568 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 5569 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 5570 ierr = PetscFree(vals);CHKERRQ(ierr); 5571 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 5572 /* now compute set in local ordering */ 5573 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5574 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5575 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 5576 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 5577 for (i=0,lsize=0;i<n;i++) { 5578 if (PetscRealPart(vals[i]) > 0.5) { 5579 lsize++; 5580 } 5581 } 5582 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 5583 for (i=0,lsize=0;i<n;i++) { 5584 if (PetscRealPart(vals[i]) > 0.5) { 5585 idxs[lsize++] = i; 5586 } 5587 } 5588 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 5589 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 5590 *localis = localis_t; 5591 PetscFunctionReturn(0); 5592 } 5593 5594 /* the next two functions will be called in KSPMatMult if a change of basis has been requested */ 5595 #undef __FUNCT__ 5596 #define __FUNCT__ "PCBDDCMatMult_Private" 5597 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y) 5598 { 5599 PCBDDCChange_ctx change_ctx; 5600 PetscErrorCode ierr; 5601 5602 PetscFunctionBegin; 5603 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 5604 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 5605 ierr = MatMult(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 5606 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 5607 PetscFunctionReturn(0); 5608 } 5609 5610 #undef __FUNCT__ 5611 #define __FUNCT__ "PCBDDCMatMultTranspose_Private" 5612 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y) 5613 { 5614 PCBDDCChange_ctx change_ctx; 5615 PetscErrorCode ierr; 5616 5617 PetscFunctionBegin; 5618 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 5619 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 5620 ierr = MatMultTranspose(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 5621 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 5622 PetscFunctionReturn(0); 5623 } 5624 5625 #undef __FUNCT__ 5626 #define __FUNCT__ "PCBDDCSetUpSubSchurs" 5627 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 5628 { 5629 PC_IS *pcis=(PC_IS*)pc->data; 5630 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 5631 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 5632 Mat S_j; 5633 PetscInt *used_xadj,*used_adjncy; 5634 PetscBool free_used_adj; 5635 PetscErrorCode ierr; 5636 5637 PetscFunctionBegin; 5638 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 5639 free_used_adj = PETSC_FALSE; 5640 if (pcbddc->sub_schurs_layers == -1) { 5641 used_xadj = NULL; 5642 used_adjncy = NULL; 5643 } else { 5644 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 5645 used_xadj = pcbddc->mat_graph->xadj; 5646 used_adjncy = pcbddc->mat_graph->adjncy; 5647 } else if (pcbddc->computed_rowadj) { 5648 used_xadj = pcbddc->mat_graph->xadj; 5649 used_adjncy = pcbddc->mat_graph->adjncy; 5650 } else { 5651 PetscBool flg_row=PETSC_FALSE; 5652 const PetscInt *xadj,*adjncy; 5653 PetscInt nvtxs; 5654 5655 ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 5656 if (flg_row) { 5657 ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr); 5658 ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr); 5659 ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr); 5660 free_used_adj = PETSC_TRUE; 5661 } else { 5662 pcbddc->sub_schurs_layers = -1; 5663 used_xadj = NULL; 5664 used_adjncy = NULL; 5665 } 5666 ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 5667 } 5668 } 5669 5670 /* setup sub_schurs data */ 5671 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 5672 if (!sub_schurs->use_mumps) { 5673 /* pcbddc->ksp_D up to date only if not using MUMPS */ 5674 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 5675 ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,PETSC_FALSE,PETSC_FALSE);CHKERRQ(ierr); 5676 } else { 5677 PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis; 5678 PetscBool isseqaij; 5679 if (!pcbddc->use_vertices && reuse_solvers) { 5680 PetscInt n_vertices; 5681 5682 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 5683 reuse_solvers = (PetscBool)!n_vertices; 5684 } 5685 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 5686 if (!isseqaij) { 5687 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 5688 if (matis->A == pcbddc->local_mat) { 5689 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 5690 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 5691 } else { 5692 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 5693 } 5694 } 5695 ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point);CHKERRQ(ierr); 5696 } 5697 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 5698 5699 /* free adjacency */ 5700 if (free_used_adj) { 5701 ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr); 5702 } 5703 PetscFunctionReturn(0); 5704 } 5705 5706 #undef __FUNCT__ 5707 #define __FUNCT__ "PCBDDCInitSubSchurs" 5708 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 5709 { 5710 PC_IS *pcis=(PC_IS*)pc->data; 5711 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 5712 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 5713 PCBDDCGraph graph; 5714 PetscErrorCode ierr; 5715 5716 PetscFunctionBegin; 5717 /* attach interface graph for determining subsets */ 5718 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 5719 IS verticesIS,verticescomm; 5720 PetscInt vsize,*idxs; 5721 5722 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 5723 ierr = ISGetSize(verticesIS,&vsize);CHKERRQ(ierr); 5724 ierr = ISGetIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 5725 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);CHKERRQ(ierr); 5726 ierr = ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 5727 ierr = ISDestroy(&verticesIS);CHKERRQ(ierr); 5728 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 5729 ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global);CHKERRQ(ierr); 5730 ierr = PCBDDCGraphSetUp(graph,0,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);CHKERRQ(ierr); 5731 ierr = ISDestroy(&verticescomm);CHKERRQ(ierr); 5732 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 5733 /* 5734 if (pcbddc->dbg_flag) { 5735 ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 5736 } 5737 */ 5738 } else { 5739 graph = pcbddc->mat_graph; 5740 } 5741 5742 /* sub_schurs init */ 5743 ierr = PCBDDCSubSchursInit(sub_schurs,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap);CHKERRQ(ierr); 5744 5745 /* free graph struct */ 5746 if (pcbddc->sub_schurs_rebuild) { 5747 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 5748 } 5749 PetscFunctionReturn(0); 5750 } 5751 5752 #undef __FUNCT__ 5753 #define __FUNCT__ "PCBDDCCheckOperator" 5754 PetscErrorCode PCBDDCCheckOperator(PC pc) 5755 { 5756 PC_IS *pcis=(PC_IS*)pc->data; 5757 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 5758 PetscErrorCode ierr; 5759 5760 PetscFunctionBegin; 5761 if (pcbddc->n_vertices == pcbddc->local_primal_size) { 5762 IS zerodiag = NULL; 5763 Mat S_j,B0_B=NULL; 5764 Vec dummy_vec=NULL,vec_check_B,vec_scale_P; 5765 PetscScalar *p0_check,*array,*array2; 5766 PetscReal norm; 5767 PetscInt i; 5768 5769 /* B0 and B0_B */ 5770 if (zerodiag) { 5771 IS dummy; 5772 5773 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);CHKERRQ(ierr); 5774 ierr = MatGetSubMatrix(pcbddc->benign_B0,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 5775 ierr = MatCreateVecs(B0_B,NULL,&dummy_vec);CHKERRQ(ierr); 5776 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 5777 } 5778 /* I need a primal vector to scale primal nodes since BDDC sums contibutions */ 5779 ierr = VecDuplicate(pcbddc->vec1_P,&vec_scale_P);CHKERRQ(ierr); 5780 ierr = VecSet(pcbddc->vec1_P,1.0);CHKERRQ(ierr); 5781 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5782 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5783 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5784 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5785 ierr = VecReciprocal(vec_scale_P);CHKERRQ(ierr); 5786 /* S_j */ 5787 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 5788 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 5789 5790 /* mimic vector in \widetilde{W}_\Gamma */ 5791 ierr = VecSetRandom(pcis->vec1_N,NULL);CHKERRQ(ierr); 5792 /* continuous in primal space */ 5793 ierr = VecSetRandom(pcbddc->coarse_vec,NULL);CHKERRQ(ierr); 5794 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5795 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5796 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5797 ierr = PetscCalloc1(pcbddc->benign_n,&p0_check);CHKERRQ(ierr); 5798 for (i=0;i<pcbddc->benign_n;i++) p0_check[i] = array[pcbddc->local_primal_size-pcbddc->benign_n+i]; 5799 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 5800 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5801 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 5802 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 5803 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5804 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5805 ierr = VecDuplicate(pcis->vec2_B,&vec_check_B);CHKERRQ(ierr); 5806 ierr = VecCopy(pcis->vec2_B,vec_check_B);CHKERRQ(ierr); 5807 5808 /* assemble rhs for coarse problem */ 5809 /* widetilde{S}_\Gamma w_\Gamma + \widetilde{B0}^T_B p0 */ 5810 /* local with Schur */ 5811 ierr = MatMult(S_j,pcis->vec2_B,pcis->vec1_B);CHKERRQ(ierr); 5812 if (zerodiag) { 5813 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 5814 for (i=0;i<pcbddc->benign_n;i++) array[i] = p0_check[i]; 5815 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 5816 ierr = MatMultTransposeAdd(B0_B,dummy_vec,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 5817 } 5818 /* sum on primal nodes the local contributions */ 5819 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5820 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5821 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 5822 ierr = VecGetArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 5823 for (i=0;i<pcbddc->local_primal_size;i++) array2[i] = array[pcbddc->local_primal_ref_node[i]]; 5824 ierr = VecRestoreArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 5825 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 5826 ierr = VecSet(pcbddc->coarse_vec,0.);CHKERRQ(ierr); 5827 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5828 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5829 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5830 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5831 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5832 /* scale primal nodes (BDDC sums contibutions) */ 5833 ierr = VecPointwiseMult(pcbddc->vec1_P,vec_scale_P,pcbddc->vec1_P);CHKERRQ(ierr); 5834 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 5835 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5836 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 5837 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 5838 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5839 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5840 /* global: \widetilde{B0}_B w_\Gamma */ 5841 if (zerodiag) { 5842 ierr = MatMult(B0_B,pcis->vec2_B,dummy_vec);CHKERRQ(ierr); 5843 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 5844 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = array[i]; 5845 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 5846 } 5847 /* BDDC */ 5848 ierr = VecSet(pcis->vec1_D,0.);CHKERRQ(ierr); 5849 ierr = PCBDDCApplyInterfacePreconditioner(pc,PETSC_FALSE);CHKERRQ(ierr); 5850 5851 ierr = VecCopy(pcis->vec1_B,pcis->vec2_B);CHKERRQ(ierr); 5852 ierr = VecAXPY(pcis->vec1_B,-1.0,vec_check_B);CHKERRQ(ierr); 5853 ierr = VecNorm(pcis->vec1_B,NORM_INFINITY,&norm);CHKERRQ(ierr); 5854 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC local error is %1.4e\n",PetscGlobalRank,norm); 5855 for (i=0;i<pcbddc->benign_n;i++) { 5856 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC p0[%d] error is %1.4e\n",PetscGlobalRank,i,PetscAbsScalar(pcbddc->benign_p0[i]-p0_check[i])); 5857 } 5858 ierr = PetscFree(p0_check);CHKERRQ(ierr); 5859 ierr = VecDestroy(&vec_scale_P);CHKERRQ(ierr); 5860 ierr = VecDestroy(&vec_check_B);CHKERRQ(ierr); 5861 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 5862 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 5863 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 5864 } 5865 PetscFunctionReturn(0); 5866 } 5867