1 #include <../src/mat/impls/aij/seq/aij.h> 2 #include <../src/ksp/pc/impls/bddc/bddc.h> 3 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 4 #include <petscblaslapack.h> 5 6 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y); 7 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y); 8 9 /* TODO: add reuse flag */ 10 #undef __FUNCT__ 11 #define __FUNCT__ "MatSeqAIJCompress" 12 PetscErrorCode MatSeqAIJCompress(Mat A, Mat *B) 13 { 14 Mat Bt; 15 PetscScalar *a,*bdata; 16 const PetscInt *ii,*ij; 17 PetscInt m,n,i,nnz,*bii,*bij; 18 PetscBool flg_row; 19 PetscErrorCode ierr; 20 21 PetscFunctionBegin; 22 ierr = MatGetSize(A,&n,&m);CHKERRQ(ierr); 23 ierr = MatGetRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 24 ierr = MatSeqAIJGetArray(A,&a);CHKERRQ(ierr); 25 nnz = n; 26 for (i=0;i<ii[n];i++) { 27 if (PetscLikely(PetscAbsScalar(a[i]) > PETSC_SMALL)) nnz++; 28 } 29 ierr = PetscMalloc1(n+1,&bii);CHKERRQ(ierr); 30 ierr = PetscMalloc1(nnz,&bij);CHKERRQ(ierr); 31 ierr = PetscMalloc1(nnz,&bdata);CHKERRQ(ierr); 32 nnz = 0; 33 bii[0] = 0; 34 for (i=0;i<n;i++) { 35 PetscInt j; 36 for (j=ii[i];j<ii[i+1];j++) { 37 PetscScalar entry = a[j]; 38 if (PetscLikely(PetscAbsScalar(entry) > PETSC_SMALL) || ij[j] == i) { 39 bij[nnz] = ij[j]; 40 bdata[nnz] = entry; 41 nnz++; 42 } 43 } 44 bii[i+1] = nnz; 45 } 46 ierr = MatSeqAIJRestoreArray(A,&a);CHKERRQ(ierr); 47 ierr = MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),n,m,bii,bij,bdata,&Bt);CHKERRQ(ierr); 48 ierr = MatRestoreRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 49 { 50 Mat_SeqAIJ *b = (Mat_SeqAIJ*)(Bt->data); 51 b->free_a = PETSC_TRUE; 52 b->free_ij = PETSC_TRUE; 53 } 54 *B = Bt; 55 PetscFunctionReturn(0); 56 } 57 58 #undef __FUNCT__ 59 #define __FUNCT__ "MatDetectDisconnectedComponents" 60 PetscErrorCode MatDetectDisconnectedComponents(Mat A, PetscBool filter, PetscInt *ncc, IS* cc[]) 61 { 62 Mat B; 63 IS is_dummy,*cc_n; 64 ISLocalToGlobalMapping l2gmap_dummy; 65 PCBDDCGraph graph; 66 PetscInt i,n; 67 PetscInt *xadj,*adjncy; 68 PetscInt *xadj_filtered,*adjncy_filtered; 69 PetscBool flg_row,isseqaij; 70 PetscErrorCode ierr; 71 72 PetscFunctionBegin; 73 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 74 if (!isseqaij && filter) { 75 PetscBool isseqdense; 76 77 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQDENSE,&isseqdense);CHKERRQ(ierr); 78 if (!isseqdense) { 79 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 80 } else { /* TODO: rectangular case and LDA */ 81 PetscScalar *array; 82 PetscReal chop=1.e-6; 83 84 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 85 ierr = MatDenseGetArray(B,&array);CHKERRQ(ierr); 86 ierr = MatGetSize(B,&n,NULL);CHKERRQ(ierr); 87 for (i=0;i<n;i++) { 88 PetscInt j; 89 for (j=i+1;j<n;j++) { 90 PetscReal thresh = chop*(PetscAbsScalar(array[i*(n+1)])+PetscAbsScalar(array[j*(n+1)])); 91 if (PetscAbsScalar(array[i*n+j]) < thresh) array[i*n+j] = 0.; 92 if (PetscAbsScalar(array[j*n+i]) < thresh) array[j*n+i] = 0.; 93 } 94 } 95 ierr = MatDenseRestoreArray(B,&array);CHKERRQ(ierr); 96 ierr = MatConvert(B,MATSEQAIJ,MAT_REUSE_MATRIX,&B);CHKERRQ(ierr); 97 } 98 } else { 99 B = A; 100 } 101 ierr = MatGetRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 102 103 /* if filter is true, then removes entries lower than PETSC_SMALL in magnitude */ 104 if (filter) { 105 PetscScalar *data; 106 PetscInt j,cum; 107 108 ierr = PetscCalloc2(n+1,&xadj_filtered,xadj[n],&adjncy_filtered);CHKERRQ(ierr); 109 ierr = MatSeqAIJGetArray(B,&data);CHKERRQ(ierr); 110 cum = 0; 111 for (i=0;i<n;i++) { 112 PetscInt t; 113 114 for (j=xadj[i];j<xadj[i+1];j++) { 115 if (PetscUnlikely(PetscAbsScalar(data[j]) < PETSC_SMALL)) { 116 continue; 117 } 118 adjncy_filtered[cum+xadj_filtered[i]++] = adjncy[j]; 119 } 120 t = xadj_filtered[i]; 121 xadj_filtered[i] = cum; 122 cum += t; 123 } 124 ierr = MatSeqAIJRestoreArray(B,&data);CHKERRQ(ierr); 125 } else { 126 xadj_filtered = NULL; 127 adjncy_filtered = NULL; 128 } 129 130 /* compute local connected components using PCBDDCGraph */ 131 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&is_dummy);CHKERRQ(ierr); 132 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 133 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 134 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 135 ierr = PCBDDCGraphInit(graph,l2gmap_dummy,n);CHKERRQ(ierr); 136 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 137 if (xadj_filtered) { 138 graph->xadj = xadj_filtered; 139 graph->adjncy = adjncy_filtered; 140 } else { 141 graph->xadj = xadj; 142 graph->adjncy = adjncy; 143 } 144 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 145 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 146 /* partial clean up */ 147 ierr = PetscFree2(xadj_filtered,adjncy_filtered);CHKERRQ(ierr); 148 ierr = MatRestoreRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 149 if (A != B) { 150 ierr = MatDestroy(&B);CHKERRQ(ierr); 151 } 152 153 /* get back data */ 154 if (ncc) *ncc = graph->ncc; 155 if (cc) { 156 ierr = PetscMalloc1(graph->ncc,&cc_n);CHKERRQ(ierr); 157 for (i=0;i<graph->ncc;i++) { 158 ierr = ISCreateGeneral(PETSC_COMM_SELF,graph->cptr[i+1]-graph->cptr[i],graph->queue+graph->cptr[i],PETSC_COPY_VALUES,&cc_n[i]);CHKERRQ(ierr); 159 } 160 *cc = cc_n; 161 } 162 /* clean up graph */ 163 graph->xadj = 0; 164 graph->adjncy = 0; 165 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 166 PetscFunctionReturn(0); 167 } 168 169 #undef __FUNCT__ 170 #define __FUNCT__ "PCBDDCBenignCheck" 171 PetscErrorCode PCBDDCBenignCheck(PC pc, IS zerodiag) 172 { 173 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 174 PC_IS* pcis = (PC_IS*)(pc->data); 175 IS dirIS = NULL; 176 PetscInt i; 177 PetscErrorCode ierr; 178 179 PetscFunctionBegin; 180 ierr = PCBDDCGraphGetDirichletDofs(pcbddc->mat_graph,&dirIS);CHKERRQ(ierr); 181 if (zerodiag) { 182 Mat A; 183 Vec vec3_N; 184 PetscScalar *vals; 185 const PetscInt *idxs; 186 PetscInt nz; 187 188 /* p0 */ 189 ierr = VecSet(pcis->vec1_N,0.);CHKERRQ(ierr); 190 ierr = PetscMalloc1(pcis->n,&vals);CHKERRQ(ierr); 191 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 192 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 193 for (i=0;i<nz;i++) vals[i] = 1.; 194 ierr = VecSetValues(pcis->vec1_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 195 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 196 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 197 /* v_I */ 198 ierr = VecSetRandom(pcis->vec2_N,NULL);CHKERRQ(ierr); 199 for (i=0;i<nz;i++) vals[i] = 0.; 200 ierr = VecSetValues(pcis->vec2_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 201 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 202 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 203 for (i=0;i<pcis->n_B;i++) vals[i] = 0.; 204 ierr = VecSetValues(pcis->vec2_N,pcis->n_B,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 205 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 206 if (dirIS) { 207 PetscInt n; 208 209 ierr = ISGetLocalSize(dirIS,&n);CHKERRQ(ierr); 210 ierr = ISGetIndices(dirIS,&idxs);CHKERRQ(ierr); 211 for (i=0;i<n;i++) vals[i] = 0.; 212 ierr = VecSetValues(pcis->vec2_N,n,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 213 ierr = ISRestoreIndices(dirIS,&idxs);CHKERRQ(ierr); 214 } 215 ierr = VecAssemblyBegin(pcis->vec2_N);CHKERRQ(ierr); 216 ierr = VecAssemblyEnd(pcis->vec2_N);CHKERRQ(ierr); 217 ierr = VecDuplicate(pcis->vec1_N,&vec3_N);CHKERRQ(ierr); 218 ierr = VecSet(vec3_N,0.);CHKERRQ(ierr); 219 ierr = MatISGetLocalMat(pc->mat,&A);CHKERRQ(ierr); 220 ierr = MatMult(A,pcis->vec1_N,vec3_N);CHKERRQ(ierr); 221 ierr = VecDot(vec3_N,pcis->vec2_N,&vals[0]);CHKERRQ(ierr); 222 if (PetscAbsScalar(vals[0]) > PETSC_SMALL) { 223 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! b(v_I,p_0) = %f (should be numerically 0.)",PetscAbsScalar(vals[0])); 224 } 225 ierr = PetscFree(vals);CHKERRQ(ierr); 226 ierr = VecDestroy(&vec3_N);CHKERRQ(ierr); 227 } 228 ierr = ISDestroy(&dirIS);CHKERRQ(ierr); 229 230 /* check PCBDDCBenignGetOrSetP0 */ 231 ierr = VecSetRandom(pcis->vec1_global,NULL);CHKERRQ(ierr); 232 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = -PetscGlobalRank-i; 233 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_FALSE);CHKERRQ(ierr); 234 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = 1; 235 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_TRUE);CHKERRQ(ierr); 236 for (i=0;i<pcbddc->benign_n;i++) { 237 if ((PetscInt)PetscRealPart(pcbddc->benign_p0[i]) != -PetscGlobalRank-i) { 238 SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error testing PCBDDCBenignGetOrSetP0! Found %1.4e at %d instead of %1.4e\n",pcbddc->benign_p0[i],i,-PetscGlobalRank-i);CHKERRQ(ierr); 239 } 240 } 241 PetscFunctionReturn(0); 242 } 243 244 #undef __FUNCT__ 245 #define __FUNCT__ "PCBDDCBenignDetectSaddlePoint" 246 PetscErrorCode PCBDDCBenignDetectSaddlePoint(PC pc, IS *zerodiaglocal) 247 { 248 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 249 IS pressures,zerodiag,*zerodiag_subs; 250 PetscInt nz,n; 251 PetscBool sorted,have_null,has_null_pressures,recompute_zerodiag; 252 PetscErrorCode ierr; 253 254 PetscFunctionBegin; 255 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 256 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 257 ierr = MatDestroy(&pcbddc->benign_original_mat);CHKERRQ(ierr); 258 /* if a local info on dofs is present, assumes the last field is represented by "pressures" 259 otherwise, it uses only zerodiagonal dofs (ok if the pressure block is all zero; it could fail if it is not) 260 Checks if all the pressure dofs in each subdomain have a zero diagonal 261 If not, a change of basis on pressures is not needed 262 since the local Schur complements are SPD 263 */ 264 has_null_pressures = PETSC_TRUE; 265 have_null = PETSC_TRUE; 266 if (pcbddc->n_ISForDofsLocal) { 267 PetscInt npl,*idxs,p = pcbddc->n_ISForDofsLocal-1; 268 269 /* Dofs splitting for BDDC cannot have PETSC_COMM_SELF, so create a sequential IS */ 270 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[p],&npl);CHKERRQ(ierr); 271 ierr = ISGetIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 272 ierr = ISCreateGeneral(PETSC_COMM_SELF,npl,idxs,PETSC_COPY_VALUES,&pressures);CHKERRQ(ierr); 273 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 274 ierr = ISSorted(pressures,&sorted);CHKERRQ(ierr); 275 if (!sorted) { 276 ierr = ISSort(pressures);CHKERRQ(ierr); 277 } 278 } else { 279 pressures = NULL; 280 } 281 /* pcis has not been setup yet, so get the local size from the subdomain matrix */ 282 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 283 /* TODO: add check for shared dofs and raise error */ 284 ierr = MatFindZeroDiagonals(pcbddc->local_mat,&zerodiag);CHKERRQ(ierr); 285 ierr = ISSorted(zerodiag,&sorted);CHKERRQ(ierr); 286 if (!sorted) { 287 ierr = ISSort(zerodiag);CHKERRQ(ierr); 288 } 289 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 290 if (!nz) { 291 if (n) have_null = PETSC_FALSE; 292 has_null_pressures = PETSC_FALSE; 293 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 294 } 295 recompute_zerodiag = PETSC_FALSE; 296 /* in case disconnected subdomains info is present, split the pressures accordingly (otherwise the benign trick could fail) */ 297 zerodiag_subs = NULL; 298 pcbddc->benign_n = 0; 299 if (has_null_pressures) { 300 IS *subs; 301 PetscInt nsubs,i; 302 303 subs = pcbddc->local_subs; 304 nsubs = pcbddc->n_local_subs; 305 if (nsubs > 1) { 306 ierr = PetscCalloc1(nsubs,&zerodiag_subs);CHKERRQ(ierr); 307 for (i=0;i<nsubs;i++) { 308 ISLocalToGlobalMapping l2g; 309 IS t_zerodiag_subs; 310 PetscInt nl; 311 312 ierr = ISLocalToGlobalMappingCreateIS(subs[i],&l2g);CHKERRQ(ierr); 313 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,zerodiag,&t_zerodiag_subs);CHKERRQ(ierr); 314 ierr = ISGetLocalSize(t_zerodiag_subs,&nl);CHKERRQ(ierr); 315 if (nl) { 316 PetscBool valid = PETSC_TRUE; 317 318 if (pressures) { 319 IS t_pressure_subs; 320 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,pressures,&t_pressure_subs);CHKERRQ(ierr); 321 ierr = ISEqual(t_pressure_subs,t_zerodiag_subs,&valid);CHKERRQ(ierr); 322 ierr = ISDestroy(&t_pressure_subs);CHKERRQ(ierr); 323 } 324 if (valid) { 325 ierr = ISLocalToGlobalMappingApplyIS(l2g,t_zerodiag_subs,&zerodiag_subs[pcbddc->benign_n]);CHKERRQ(ierr); 326 pcbddc->benign_n++; 327 } else { 328 recompute_zerodiag = PETSC_TRUE; 329 } 330 } 331 ierr = ISDestroy(&t_zerodiag_subs);CHKERRQ(ierr); 332 ierr = ISLocalToGlobalMappingDestroy(&l2g);CHKERRQ(ierr); 333 } 334 } else { /* there's just one subdomain (or zero if they have not been detected */ 335 PetscBool valid = PETSC_TRUE; 336 if (pressures) { 337 ierr = ISEqual(pressures,zerodiag,&valid);CHKERRQ(ierr); 338 } 339 if (valid) { 340 pcbddc->benign_n = 1; 341 ierr = PetscCalloc1(1,&zerodiag_subs);CHKERRQ(ierr); 342 ierr = PetscObjectReference((PetscObject)zerodiag);CHKERRQ(ierr); 343 zerodiag_subs[0] = zerodiag; 344 } 345 } 346 } 347 348 if (!pcbddc->benign_n) { 349 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 350 recompute_zerodiag = PETSC_FALSE; 351 has_null_pressures = PETSC_FALSE; 352 have_null = PETSC_FALSE; 353 } 354 355 /* final check for null pressures */ 356 if (zerodiag && pressures) { 357 PetscInt nz,np; 358 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 359 ierr = ISGetLocalSize(pressures,&np);CHKERRQ(ierr); 360 if (nz != np) have_null = PETSC_FALSE; 361 } 362 363 if (recompute_zerodiag) { 364 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 365 if (pcbddc->benign_n == 1) { 366 ierr = PetscObjectReference((PetscObject)zerodiag_subs[0]);CHKERRQ(ierr); 367 zerodiag = zerodiag_subs[0]; 368 } else { 369 PetscInt i,nzn,*new_idxs; 370 371 nzn = 0; 372 for (i=0;i<pcbddc->benign_n;i++) { 373 PetscInt ns; 374 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 375 nzn += ns; 376 } 377 ierr = PetscMalloc1(nzn,&new_idxs);CHKERRQ(ierr); 378 nzn = 0; 379 for (i=0;i<pcbddc->benign_n;i++) { 380 PetscInt ns,*idxs; 381 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 382 ierr = ISGetIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 383 ierr = PetscMemcpy(new_idxs+nzn,idxs,ns*sizeof(PetscInt));CHKERRQ(ierr); 384 ierr = ISRestoreIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 385 nzn += ns; 386 } 387 ierr = PetscSortInt(nzn,new_idxs);CHKERRQ(ierr); 388 ierr = ISCreateGeneral(PETSC_COMM_SELF,nzn,new_idxs,PETSC_OWN_POINTER,&zerodiag);CHKERRQ(ierr); 389 } 390 have_null = PETSC_FALSE; 391 } 392 393 if (has_null_pressures) { 394 IS zerodiagc; 395 const PetscInt *idxs,*idxsc; 396 PetscInt i,s,*nnz; 397 398 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 399 ierr = ISComplement(zerodiag,0,n,&zerodiagc);CHKERRQ(ierr); 400 ierr = ISGetIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 401 /* local change of basis for pressures */ 402 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 403 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_change);CHKERRQ(ierr); 404 ierr = MatSetType(pcbddc->benign_change,MATAIJ);CHKERRQ(ierr); 405 ierr = MatSetSizes(pcbddc->benign_change,n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 406 ierr = PetscMalloc1(n,&nnz);CHKERRQ(ierr); 407 for (i=0;i<n-nz;i++) nnz[idxsc[i]] = 1; /* identity on velocities plus pressure dofs for non-singular subdomains */ 408 for (i=0;i<pcbddc->benign_n;i++) { 409 PetscInt nzs,j; 410 411 ierr = ISGetLocalSize(zerodiag_subs[i],&nzs);CHKERRQ(ierr); 412 ierr = ISGetIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 413 for (j=0;j<nzs-1;j++) nnz[idxs[j]] = 2; /* change on pressures */ 414 nnz[idxs[nzs-1]] = nzs; /* last local pressure dof in subdomain */ 415 ierr = ISRestoreIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 416 } 417 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_change,0,nnz);CHKERRQ(ierr); 418 ierr = PetscFree(nnz);CHKERRQ(ierr); 419 /* set identity on velocities */ 420 for (i=0;i<n-nz;i++) { 421 ierr = MatSetValue(pcbddc->benign_change,idxsc[i],idxsc[i],1.,INSERT_VALUES);CHKERRQ(ierr); 422 } 423 ierr = ISRestoreIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 424 ierr = ISDestroy(&zerodiagc);CHKERRQ(ierr); 425 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 426 ierr = PetscMalloc3(pcbddc->benign_n,&pcbddc->benign_p0_lidx,pcbddc->benign_n,&pcbddc->benign_p0_gidx,pcbddc->benign_n,&pcbddc->benign_p0);CHKERRQ(ierr); 427 /* set change on pressures */ 428 for (s=0;s<pcbddc->benign_n;s++) { 429 PetscScalar *array; 430 PetscInt nzs; 431 432 ierr = ISGetLocalSize(zerodiag_subs[s],&nzs);CHKERRQ(ierr); 433 ierr = ISGetIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 434 for (i=0;i<nzs-1;i++) { 435 PetscScalar vals[2]; 436 PetscInt cols[2]; 437 438 cols[0] = idxs[i]; 439 cols[1] = idxs[nzs-1]; 440 vals[0] = 1.; 441 vals[1] = 1.; 442 ierr = MatSetValues(pcbddc->benign_change,1,cols,2,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 443 } 444 ierr = PetscMalloc1(nzs,&array);CHKERRQ(ierr); 445 for (i=0;i<nzs-1;i++) array[i] = -1.; 446 array[nzs-1] = 1.; 447 ierr = MatSetValues(pcbddc->benign_change,1,idxs+nzs-1,nzs,idxs,array,INSERT_VALUES);CHKERRQ(ierr); 448 /* store local idxs for p0 */ 449 pcbddc->benign_p0_lidx[s] = idxs[nzs-1]; 450 ierr = ISRestoreIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 451 ierr = ISDestroy(&zerodiag_subs[s]);CHKERRQ(ierr); 452 ierr = PetscFree(array);CHKERRQ(ierr); 453 } 454 ierr = MatAssemblyBegin(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 455 ierr = MatAssemblyEnd(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 456 /* TODO: need optimization? */ 457 ierr = MatPtAP(pcbddc->local_mat,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&pcbddc->benign_original_mat);CHKERRQ(ierr); 458 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 459 ierr = MatSeqAIJCompress(pcbddc->benign_original_mat,&pcbddc->local_mat);CHKERRQ(ierr); 460 ierr = MatDestroy(&pcbddc->benign_original_mat);CHKERRQ(ierr); 461 /* store global idxs for p0 */ 462 ierr = ISLocalToGlobalMappingApply(pc->pmat->rmap->mapping,pcbddc->benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 463 } 464 ierr = PetscFree(zerodiag_subs);CHKERRQ(ierr); 465 ierr = ISDestroy(&pressures);CHKERRQ(ierr); 466 467 /* determines if the coarse solver will be singular or not */ 468 ierr = MPI_Allreduce(&have_null,&pcbddc->benign_null,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 469 *zerodiaglocal = zerodiag; 470 PetscFunctionReturn(0); 471 } 472 473 #undef __FUNCT__ 474 #define __FUNCT__ "PCBDDCBenignGetOrSetP0" 475 PetscErrorCode PCBDDCBenignGetOrSetP0(PC pc, Vec v, PetscBool get) 476 { 477 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 478 PetscErrorCode ierr; 479 480 PetscFunctionBegin; 481 if (!pcbddc->benign_sf) { 482 ierr = PetscSFCreate(PetscObjectComm((PetscObject)pc),&pcbddc->benign_sf);CHKERRQ(ierr); 483 ierr = PetscSFSetGraphLayout(pcbddc->benign_sf,pc->pmat->rmap,pcbddc->benign_n,NULL,PETSC_OWN_POINTER,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 484 } 485 if (get) { /* use SF to get values */ 486 PetscScalar *array; 487 488 ierr = VecGetArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 489 ierr = PetscSFBcastBegin(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 490 ierr = PetscSFBcastEnd(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 491 ierr = VecRestoreArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 492 } else { /* use VecSetValues (not scalable, I should try to find a better solution (defining a new MPI_OP for reduction) */ 493 ierr = VecSetValues(v,pcbddc->benign_n,pcbddc->benign_p0_gidx,pcbddc->benign_p0,INSERT_VALUES);CHKERRQ(ierr); 494 ierr = VecAssemblyBegin(v);CHKERRQ(ierr); 495 ierr = VecAssemblyEnd(v);CHKERRQ(ierr); 496 } 497 PetscFunctionReturn(0); 498 } 499 500 #undef __FUNCT__ 501 #define __FUNCT__ "PCBDDCBenignPopOrPushB0" 502 PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop) 503 { 504 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 505 PetscErrorCode ierr; 506 507 PetscFunctionBegin; 508 /* TODO: add error checking 509 - avoid nested pop (or push) calls. 510 - cannot push before pop. 511 - cannot call this if pcbddc->local_mat is NULL 512 */ 513 if (!pcbddc->benign_n) { 514 PetscFunctionReturn(0); 515 } 516 if (pop) { 517 IS is_p0; 518 MatReuse reuse; 519 520 /* extract B_0 */ 521 reuse = MAT_INITIAL_MATRIX; 522 if (pcbddc->benign_B0) { 523 reuse = MAT_REUSE_MATRIX; 524 } 525 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->benign_n,pcbddc->benign_p0_lidx,PETSC_COPY_VALUES,&is_p0);CHKERRQ(ierr); 526 ierr = MatGetSubMatrix(pcbddc->local_mat,is_p0,NULL,reuse,&pcbddc->benign_B0);CHKERRQ(ierr); 527 /* remove rows and cols from local problem */ 528 ierr = MatSetOption(pcbddc->local_mat,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE);CHKERRQ(ierr); 529 ierr = MatSetOption(pcbddc->local_mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 530 ierr = MatZeroRowsColumnsIS(pcbddc->local_mat,is_p0,1.0,NULL,NULL);CHKERRQ(ierr); 531 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 532 } else { /* push */ 533 PetscInt i; 534 535 for (i=0;i<pcbddc->benign_n;i++) { 536 PetscScalar *B0_vals; 537 PetscInt *B0_cols,B0_ncol; 538 539 ierr = MatGetRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 540 ierr = MatSetValues(pcbddc->local_mat,1,pcbddc->benign_p0_lidx+i,B0_ncol,B0_cols,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 541 ierr = MatSetValues(pcbddc->local_mat,B0_ncol,B0_cols,1,pcbddc->benign_p0_lidx+i,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 542 ierr = MatSetValue(pcbddc->local_mat,pcbddc->benign_p0_lidx[i],pcbddc->benign_p0_lidx[i],0.0,INSERT_VALUES);CHKERRQ(ierr); 543 ierr = MatRestoreRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 544 } 545 ierr = MatAssemblyBegin(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 546 ierr = MatAssemblyEnd(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 547 } 548 PetscFunctionReturn(0); 549 } 550 551 #undef __FUNCT__ 552 #define __FUNCT__ "PCBDDCAdaptiveSelection" 553 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 554 { 555 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 556 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 557 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 558 PetscBLASInt *B_iwork,*B_ifail; 559 PetscScalar *work,lwork; 560 PetscScalar *St,*S,*eigv; 561 PetscScalar *Sarray,*Starray; 562 PetscReal *eigs,thresh; 563 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs; 564 PetscBool allocated_S_St; 565 #if defined(PETSC_USE_COMPLEX) 566 PetscReal *rwork; 567 #endif 568 PetscErrorCode ierr; 569 570 PetscFunctionBegin; 571 if (!sub_schurs->use_mumps) { 572 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS"); 573 } 574 575 if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) { 576 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\n",sub_schurs->is_hermitian,sub_schurs->is_posdef); 577 } 578 579 if (pcbddc->dbg_flag) { 580 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 581 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 582 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr); 583 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 584 } 585 586 if (pcbddc->dbg_flag) { 587 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef); 588 } 589 590 /* max size of subsets */ 591 mss = 0; 592 for (i=0;i<sub_schurs->n_subs;i++) { 593 PetscInt subset_size; 594 595 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 596 mss = PetscMax(mss,subset_size); 597 } 598 599 /* min/max and threshold */ 600 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 601 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0; 602 nmax = PetscMax(nmin,nmax); 603 allocated_S_St = PETSC_FALSE; 604 if (nmin) { 605 allocated_S_St = PETSC_TRUE; 606 } 607 608 /* allocate lapack workspace */ 609 cum = cum2 = 0; 610 maxneigs = 0; 611 for (i=0;i<sub_schurs->n_subs;i++) { 612 PetscInt n,subset_size; 613 614 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 615 n = PetscMin(subset_size,nmax); 616 cum += subset_size; 617 cum2 += subset_size*n; 618 maxneigs = PetscMax(maxneigs,n); 619 } 620 if (mss) { 621 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 622 PetscBLASInt B_itype = 1; 623 PetscBLASInt B_N = mss; 624 PetscReal zero = 0.0; 625 PetscReal eps = 0.0; /* dlamch? */ 626 627 B_lwork = -1; 628 S = NULL; 629 St = NULL; 630 eigs = NULL; 631 eigv = NULL; 632 B_iwork = NULL; 633 B_ifail = NULL; 634 #if defined(PETSC_USE_COMPLEX) 635 rwork = NULL; 636 #endif 637 thresh = 1.0; 638 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 639 #if defined(PETSC_USE_COMPLEX) 640 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 641 #else 642 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 643 #endif 644 if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 645 ierr = PetscFPTrapPop();CHKERRQ(ierr); 646 } else { 647 /* TODO */ 648 } 649 } else { 650 lwork = 0; 651 } 652 653 nv = 0; 654 if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */ 655 ierr = ISGetLocalSize(sub_schurs->is_vertices,&nv);CHKERRQ(ierr); 656 } 657 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr); 658 if (allocated_S_St) { 659 ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr); 660 } 661 ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr); 662 #if defined(PETSC_USE_COMPLEX) 663 ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr); 664 #endif 665 ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 666 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr, 667 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr, 668 nv+cum,&pcbddc->adaptive_constraints_idxs, 669 nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 670 ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr); 671 672 maxneigs = 0; 673 cum = cum2 = cumarray = 0; 674 pcbddc->adaptive_constraints_idxs_ptr[0] = 0; 675 pcbddc->adaptive_constraints_data_ptr[0] = 0; 676 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 677 const PetscInt *idxs; 678 679 ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 680 for (cum=0;cum<nv;cum++) { 681 pcbddc->adaptive_constraints_n[cum] = 1; 682 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 683 pcbddc->adaptive_constraints_data[cum] = 1.0; 684 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1; 685 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1; 686 } 687 cum2 = cum; 688 ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 689 } 690 691 if (mss) { /* multilevel */ 692 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 693 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 694 } 695 696 thresh = pcbddc->adaptive_threshold; 697 for (i=0;i<sub_schurs->n_subs;i++) { 698 const PetscInt *idxs; 699 PetscReal infty = PETSC_MAX_REAL; 700 PetscInt j,subset_size,eigs_start = 0; 701 PetscBLASInt B_N; 702 PetscBool same_data = PETSC_FALSE; 703 704 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 705 ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 706 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 707 if (allocated_S_St) { /* S and S_t should be copied since we could need them later */ 708 if (sub_schurs->is_hermitian) { 709 PetscInt j,k; 710 if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscMemcmp later */ 711 ierr = PetscMemzero(S,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 712 ierr = PetscMemzero(St,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 713 } 714 for (j=0;j<subset_size;j++) { 715 for (k=j;k<subset_size;k++) { 716 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 717 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 718 } 719 } 720 } else { 721 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 722 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 723 } 724 } else { 725 S = Sarray + cumarray; 726 St = Starray + cumarray; 727 } 728 729 /* see if we can save some work */ 730 if (sub_schurs->n_subs == 1) { 731 ierr = PetscMemcmp(S,St,subset_size*subset_size*sizeof(PetscScalar),&same_data);CHKERRQ(ierr); 732 } 733 734 if (same_data) { /* there's no need of constraints here, deluxe scaling is enough */ 735 B_neigs = 0; 736 } else { 737 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 738 PetscBLASInt B_itype = 1; 739 PetscBLASInt B_IL, B_IU; 740 PetscReal eps = -1.0; /* dlamch? */ 741 PetscInt nmin_s; 742 743 if (pcbddc->dbg_flag) { 744 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d %d %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]); 745 } 746 747 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 748 if (thresh > 1.+PETSC_SMALL) { 749 750 /* ask for eigenvalues larger than thresh */ 751 #if defined(PETSC_USE_COMPLEX) 752 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 753 #else 754 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 755 #endif 756 } else { 757 B_IU = PetscMax(1,PetscMin(B_N,nmax)); 758 B_IL = 1; 759 #if defined(PETSC_USE_COMPLEX) 760 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 761 #else 762 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 763 #endif 764 } 765 ierr = PetscFPTrapPop();CHKERRQ(ierr); 766 if (B_ierr) { 767 if (B_ierr < 0 ) { 768 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 769 } else if (B_ierr <= B_N) { 770 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 771 } else { 772 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 773 } 774 } 775 776 if (B_neigs > nmax) { 777 if (pcbddc->dbg_flag) { 778 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, more than maximum required %d.\n",B_neigs,nmax); 779 } 780 eigs_start = B_neigs -nmax; 781 B_neigs = nmax; 782 } 783 784 nmin_s = PetscMin(nmin,B_N); 785 if (B_neigs < nmin_s) { 786 PetscBLASInt B_neigs2; 787 788 B_IU = B_N - B_neigs; 789 B_IL = B_N - nmin_s + 1; 790 if (pcbddc->dbg_flag) { 791 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU); 792 } 793 if (sub_schurs->is_hermitian) { 794 PetscInt j; 795 for (j=0;j<subset_size;j++) { 796 ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 797 } 798 for (j=0;j<subset_size;j++) { 799 ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 800 } 801 } else { 802 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 803 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 804 } 805 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 806 #if defined(PETSC_USE_COMPLEX) 807 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 808 #else 809 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 810 #endif 811 ierr = PetscFPTrapPop();CHKERRQ(ierr); 812 B_neigs += B_neigs2; 813 } 814 if (B_ierr) { 815 if (B_ierr < 0 ) { 816 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 817 } else if (B_ierr <= B_N) { 818 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 819 } else { 820 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 821 } 822 } 823 if (pcbddc->dbg_flag) { 824 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Got %d eigs\n",B_neigs);CHKERRQ(ierr); 825 for (j=0;j<B_neigs;j++) { 826 if (eigs[j] == 0.0) { 827 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," Inf\n");CHKERRQ(ierr); 828 } else { 829 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",eigs[j+eigs_start]);CHKERRQ(ierr); 830 } 831 } 832 } 833 } else { 834 /* TODO */ 835 } 836 } 837 maxneigs = PetscMax(B_neigs,maxneigs); 838 pcbddc->adaptive_constraints_n[i+nv] = B_neigs; 839 if (B_neigs) { 840 ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 841 842 if (pcbddc->dbg_flag > 1) { 843 PetscInt ii; 844 for (ii=0;ii<B_neigs;ii++) { 845 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 846 for (j=0;j<B_N;j++) { 847 #if defined(PETSC_USE_COMPLEX) 848 PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 849 PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 850 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 851 #else 852 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);CHKERRQ(ierr); 853 #endif 854 } 855 } 856 } 857 #if 0 858 for (j=0;j<B_neigs;j++) { 859 PetscBLASInt Blas_N,Blas_one = 1.0; 860 PetscScalar norm; 861 ierr = PetscBLASIntCast(subset_size,&Blas_N);CHKERRQ(ierr); 862 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size, 863 &Blas_one,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one)); 864 if (pcbddc->adaptive_constraints_data[cum2] > 0.0) { 865 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 866 } else { 867 norm = -1.0/PetscSqrtReal(PetscRealPart(norm)); 868 } 869 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one)); 870 } 871 #endif 872 ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr); 873 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size; 874 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs; 875 cum++; 876 } 877 ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 878 /* shift for next computation */ 879 cumarray += subset_size*subset_size; 880 } 881 if (pcbddc->dbg_flag) { 882 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 883 } 884 885 if (mss) { 886 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 887 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 888 /* destroy matrices (junk) */ 889 ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr); 890 ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr); 891 } 892 if (allocated_S_St) { 893 ierr = PetscFree2(S,St);CHKERRQ(ierr); 894 } 895 ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr); 896 #if defined(PETSC_USE_COMPLEX) 897 ierr = PetscFree(rwork);CHKERRQ(ierr); 898 #endif 899 if (pcbddc->dbg_flag) { 900 PetscInt maxneigs_r; 901 ierr = MPI_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 902 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr); 903 } 904 PetscFunctionReturn(0); 905 } 906 907 #undef __FUNCT__ 908 #define __FUNCT__ "PCBDDCSetUpSolvers" 909 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 910 { 911 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 912 PetscScalar *coarse_submat_vals; 913 PetscErrorCode ierr; 914 915 PetscFunctionBegin; 916 /* Setup local scatters R_to_B and (optionally) R_to_D */ 917 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 918 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 919 920 /* Setup local neumann solver ksp_R */ 921 /* PCBDDCSetUpLocalScatters should be called first! */ 922 ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 923 924 /* Change global null space passed in by the user if change of basis has been requested */ 925 if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) { 926 ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr); 927 } 928 929 /* 930 Setup local correction and local part of coarse basis. 931 Gives back the dense local part of the coarse matrix in column major ordering 932 */ 933 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 934 935 /* Compute total number of coarse nodes and setup coarse solver */ 936 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 937 938 /* free */ 939 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 940 PetscFunctionReturn(0); 941 } 942 943 #undef __FUNCT__ 944 #define __FUNCT__ "PCBDDCResetCustomization" 945 PetscErrorCode PCBDDCResetCustomization(PC pc) 946 { 947 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 948 PetscErrorCode ierr; 949 950 PetscFunctionBegin; 951 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 952 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 953 ierr = ISDestroy(&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 954 ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr); 955 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 956 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 957 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 958 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 959 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 960 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 961 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 962 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 963 PetscFunctionReturn(0); 964 } 965 966 #undef __FUNCT__ 967 #define __FUNCT__ "PCBDDCResetTopography" 968 PetscErrorCode PCBDDCResetTopography(PC pc) 969 { 970 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 971 PetscInt i; 972 PetscErrorCode ierr; 973 974 PetscFunctionBegin; 975 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 976 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 977 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 978 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 979 for (i=0;i<pcbddc->n_local_subs;i++) { 980 ierr = ISDestroy(&pcbddc->local_subs[i]);CHKERRQ(ierr); 981 } 982 ierr = PetscFree(pcbddc->local_subs);CHKERRQ(ierr); 983 ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr); 984 PetscFunctionReturn(0); 985 } 986 987 #undef __FUNCT__ 988 #define __FUNCT__ "PCBDDCResetSolvers" 989 PetscErrorCode PCBDDCResetSolvers(PC pc) 990 { 991 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 992 PetscScalar *array; 993 PetscErrorCode ierr; 994 995 PetscFunctionBegin; 996 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 997 if (pcbddc->coarse_phi_B) { 998 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr); 999 ierr = PetscFree(array);CHKERRQ(ierr); 1000 } 1001 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 1002 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 1003 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 1004 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 1005 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 1006 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 1007 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 1008 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 1009 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 1010 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 1011 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 1012 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 1013 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 1014 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 1015 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 1016 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 1017 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 1018 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1019 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 1020 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 1021 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 1022 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 1023 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 1024 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 1025 ierr = VecDestroy(&pcbddc->benign_vec);CHKERRQ(ierr); 1026 ierr = MatDestroy(&pcbddc->benign_original_mat);CHKERRQ(ierr); 1027 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 1028 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 1029 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 1030 PetscFunctionReturn(0); 1031 } 1032 1033 #undef __FUNCT__ 1034 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors" 1035 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 1036 { 1037 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1038 PC_IS *pcis = (PC_IS*)pc->data; 1039 VecType impVecType; 1040 PetscInt n_constraints,n_R,old_size; 1041 PetscErrorCode ierr; 1042 1043 PetscFunctionBegin; 1044 if (!pcbddc->ConstraintMatrix) { 1045 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created"); 1046 } 1047 /* get sizes */ 1048 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - pcbddc->n_vertices; 1049 n_R = pcis->n - pcbddc->n_vertices; 1050 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 1051 /* local work vectors (try to avoid unneeded work)*/ 1052 /* R nodes */ 1053 old_size = -1; 1054 if (pcbddc->vec1_R) { 1055 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 1056 } 1057 if (n_R != old_size) { 1058 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 1059 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 1060 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 1061 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 1062 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 1063 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 1064 } 1065 /* local primal dofs */ 1066 old_size = -1; 1067 if (pcbddc->vec1_P) { 1068 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 1069 } 1070 if (pcbddc->local_primal_size != old_size) { 1071 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 1072 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 1073 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 1074 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 1075 } 1076 /* local explicit constraints */ 1077 old_size = -1; 1078 if (pcbddc->vec1_C) { 1079 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 1080 } 1081 if (n_constraints && n_constraints != old_size) { 1082 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 1083 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 1084 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 1085 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 1086 } 1087 PetscFunctionReturn(0); 1088 } 1089 1090 #undef __FUNCT__ 1091 #define __FUNCT__ "PCBDDCSetUpCorrection" 1092 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 1093 { 1094 PetscErrorCode ierr; 1095 /* pointers to pcis and pcbddc */ 1096 PC_IS* pcis = (PC_IS*)pc->data; 1097 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1098 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1099 /* submatrices of local problem */ 1100 Mat A_RV,A_VR,A_VV,local_auxmat2_R; 1101 /* submatrices of benign trick */ 1102 Mat B0_V = NULL; 1103 /* submatrices of local coarse problem */ 1104 Mat S_VV,S_CV,S_VC,S_CC; 1105 /* working matrices */ 1106 Mat C_CR; 1107 /* additional working stuff */ 1108 PC pc_R; 1109 Mat F; 1110 PetscBool isLU,isCHOL,isILU; 1111 1112 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 1113 PetscScalar *work; 1114 PetscInt *idx_V_B; 1115 PetscInt lda_rhs,n,n_vertices,n_constraints,*p0_lidx_I; 1116 PetscInt i,n_R,n_D,n_B; 1117 1118 /* some shortcuts to scalars */ 1119 PetscScalar one=1.0,m_one=-1.0; 1120 1121 PetscFunctionBegin; 1122 if (!pcbddc->symmetric_primal && pcbddc->benign_n) { 1123 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Non-symmetric primal basis computation with benign trick not yet implemented"); 1124 } 1125 1126 /* Set Non-overlapping dimensions */ 1127 n_vertices = pcbddc->n_vertices; 1128 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - n_vertices; 1129 n_B = pcis->n_B; 1130 n_D = pcis->n - n_B; 1131 n_R = pcis->n - n_vertices; 1132 1133 /* vertices in boundary numbering */ 1134 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 1135 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);CHKERRQ(ierr); 1136 if (i != n_vertices) { 1137 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i); 1138 } 1139 1140 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 1141 ierr = PetscCalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 1142 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr); 1143 ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr); 1144 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr); 1145 ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr); 1146 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr); 1147 ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr); 1148 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr); 1149 ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr); 1150 1151 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 1152 ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr); 1153 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr); 1154 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr); 1155 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr); 1156 lda_rhs = n_R; 1157 if (isLU || isILU || isCHOL) { 1158 ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr); 1159 } else if (sub_schurs->reuse_mumps) { 1160 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1161 MatFactorType type; 1162 1163 F = reuse_mumps->F; 1164 ierr = MatGetFactorType(F,&type);CHKERRQ(ierr); 1165 if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE; 1166 ierr = MatGetSize(F,&lda_rhs,NULL);CHKERRQ(ierr); 1167 } else { 1168 F = NULL; 1169 } 1170 1171 /* allocate workspace */ 1172 n = 0; 1173 if (n_constraints) { 1174 n += lda_rhs*n_constraints; 1175 } 1176 if (n_vertices) { 1177 n = PetscMax(2*lda_rhs*n_vertices,n); 1178 n = PetscMax((lda_rhs+n_B)*n_vertices,n); 1179 } 1180 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 1181 1182 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 1183 if (n_constraints) { 1184 Mat M1,M2,M3; 1185 Mat auxmat; 1186 IS is_aux; 1187 PetscScalar *array,*array2; 1188 1189 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 1190 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 1191 1192 /* Extract constraints on R nodes: C_{CR} */ 1193 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 1194 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 1195 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&auxmat);CHKERRQ(ierr); 1196 1197 /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */ 1198 /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */ 1199 ierr = PetscMemzero(work,lda_rhs*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 1200 for (i=0;i<n_constraints;i++) { 1201 const PetscScalar *row_cmat_values; 1202 const PetscInt *row_cmat_indices; 1203 PetscInt size_of_constraint,j; 1204 1205 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 1206 for (j=0;j<size_of_constraint;j++) { 1207 work[row_cmat_indices[j]+i*lda_rhs] = -row_cmat_values[j]; 1208 } 1209 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 1210 } 1211 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,NULL,&local_auxmat2_R);CHKERRQ(ierr); 1212 if (F) { 1213 Mat B; 1214 1215 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr); 1216 ierr = MatMatSolve(F,B,local_auxmat2_R);CHKERRQ(ierr); 1217 ierr = MatDestroy(&B);CHKERRQ(ierr); 1218 } else { 1219 PetscScalar *marr; 1220 1221 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1222 for (i=0;i<n_constraints;i++) { 1223 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 1224 ierr = VecPlaceArray(pcbddc->vec2_R,marr+i*lda_rhs);CHKERRQ(ierr); 1225 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1226 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1227 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 1228 } 1229 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1230 } 1231 if (!pcbddc->switch_static) { 1232 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);CHKERRQ(ierr); 1233 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 1234 ierr = MatDenseGetArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 1235 for (i=0;i<n_constraints;i++) { 1236 ierr = VecPlaceArray(pcbddc->vec1_R,array2+i*lda_rhs);CHKERRQ(ierr); 1237 ierr = VecPlaceArray(pcis->vec1_B,array+i*n_B);CHKERRQ(ierr); 1238 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1239 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1240 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1241 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1242 } 1243 ierr = MatDenseRestoreArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 1244 ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 1245 ierr = MatMatMult(auxmat,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 1246 } else { 1247 if (lda_rhs != n_R) { 1248 IS dummy; 1249 1250 ierr = ISCreateStride(PETSC_COMM_SELF,n_R,0,1,&dummy);CHKERRQ(ierr); 1251 ierr = MatGetSubMatrix(local_auxmat2_R,dummy,dummy,MAT_INITIAL_MATRIX,&pcbddc->local_auxmat2);CHKERRQ(ierr); 1252 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 1253 } else { 1254 ierr = PetscObjectReference((PetscObject)local_auxmat2_R);CHKERRQ(ierr); 1255 pcbddc->local_auxmat2 = local_auxmat2_R; 1256 } 1257 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 1258 } 1259 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 1260 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 1261 ierr = MatScale(M3,m_one);CHKERRQ(ierr); 1262 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr); 1263 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 1264 if (isCHOL) { 1265 ierr = MatCholeskyFactor(M3,NULL,NULL);CHKERRQ(ierr); 1266 } else { 1267 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 1268 } 1269 ierr = VecSet(pcbddc->vec1_C,one);CHKERRQ(ierr); 1270 ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr); 1271 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 1272 ierr = MatDestroy(&M2);CHKERRQ(ierr); 1273 ierr = MatDestroy(&M3);CHKERRQ(ierr); 1274 /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */ 1275 ierr = MatMatMult(M1,auxmat,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 1276 ierr = MatDestroy(&auxmat);CHKERRQ(ierr); 1277 ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 1278 ierr = MatDestroy(&M1);CHKERRQ(ierr); 1279 } 1280 /* Get submatrices from subdomain matrix */ 1281 if (pcbddc->benign_n) { 1282 IS dummy; 1283 Mat B0_R; 1284 PetscReal norm; 1285 1286 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);CHKERRQ(ierr); 1287 ierr = MatGetSubMatrix(pcbddc->benign_B0,dummy,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&B0_R);CHKERRQ(ierr); 1288 ierr = MatNorm(B0_R,NORM_INFINITY,&norm);CHKERRQ(ierr); 1289 if (norm > PETSC_SMALL) { 1290 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! ||B0_R|| = %f (should be numerically 0.)",norm); 1291 } 1292 ierr = MatDestroy(&B0_R);CHKERRQ(ierr); 1293 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 1294 } 1295 1296 if (n_vertices) { 1297 IS is_aux; 1298 1299 if (sub_schurs->reuse_mumps) { /* is_R_local is not sorted, ISComplement doesn't like it */ 1300 IS tis; 1301 1302 ierr = ISDuplicate(pcbddc->is_R_local,&tis);CHKERRQ(ierr); 1303 ierr = ISSort(tis);CHKERRQ(ierr); 1304 ierr = ISComplement(tis,0,pcis->n,&is_aux);CHKERRQ(ierr); 1305 ierr = ISDestroy(&tis);CHKERRQ(ierr); 1306 } else { 1307 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 1308 } 1309 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 1310 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 1311 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 1312 if (pcbddc->benign_n) { 1313 IS dummy; 1314 1315 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);CHKERRQ(ierr); 1316 ierr = MatGetSubMatrix(pcbddc->benign_B0,dummy,is_aux,MAT_INITIAL_MATRIX,&B0_V);CHKERRQ(ierr); 1317 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 1318 } 1319 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 1320 } 1321 1322 /* Matrix of coarse basis functions (local) */ 1323 if (pcbddc->coarse_phi_B) { 1324 PetscInt on_B,on_primal,on_D=n_D; 1325 if (pcbddc->coarse_phi_D) { 1326 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr); 1327 } 1328 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 1329 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 1330 PetscScalar *marray; 1331 1332 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr); 1333 ierr = PetscFree(marray);CHKERRQ(ierr); 1334 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 1335 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 1336 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 1337 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 1338 } 1339 } 1340 1341 if (!pcbddc->coarse_phi_B) { 1342 PetscScalar *marray; 1343 1344 n = n_B*pcbddc->local_primal_size; 1345 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1346 n += n_D*pcbddc->local_primal_size; 1347 } 1348 if (!pcbddc->symmetric_primal) { 1349 n *= 2; 1350 } 1351 ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr); 1352 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 1353 n = n_B*pcbddc->local_primal_size; 1354 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1355 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 1356 n += n_D*pcbddc->local_primal_size; 1357 } 1358 if (!pcbddc->symmetric_primal) { 1359 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 1360 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1361 n = n_B*pcbddc->local_primal_size; 1362 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 1363 } 1364 } else { 1365 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 1366 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 1367 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1368 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 1369 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 1370 } 1371 } 1372 } 1373 1374 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 1375 p0_lidx_I = NULL; 1376 if (pcbddc->benign_n && (pcbddc->switch_static || pcbddc->dbg_flag)) { 1377 const PetscInt *idxs; 1378 1379 ierr = ISGetIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 1380 ierr = PetscMalloc1(pcbddc->benign_n,&p0_lidx_I);CHKERRQ(ierr); 1381 for (i=0;i<pcbddc->benign_n;i++) { 1382 ierr = PetscFindInt(pcbddc->benign_p0_lidx[i],pcis->n-pcis->n_B,idxs,&p0_lidx_I[i]);CHKERRQ(ierr); 1383 } 1384 ierr = ISRestoreIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 1385 } 1386 1387 /* vertices */ 1388 if (n_vertices) { 1389 1390 ierr = MatConvert(A_VV,MATDENSE,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr); 1391 1392 if (n_R) { 1393 Mat A_RRmA_RV,S_VVt; /* S_VVt with LDA=N */ 1394 PetscBLASInt B_N,B_one = 1; 1395 PetscScalar *x,*y; 1396 PetscBool isseqaij; 1397 1398 ierr = MatScale(A_RV,m_one);CHKERRQ(ierr); 1399 if (lda_rhs == n_R) { 1400 ierr = MatConvert(A_RV,MATDENSE,MAT_REUSE_MATRIX,&A_RV);CHKERRQ(ierr); 1401 } else { 1402 Mat A_RVt; 1403 PetscScalar *array; 1404 1405 /* TODO optimize */ 1406 ierr = MatConvert(A_RV,MATDENSE,MAT_INITIAL_MATRIX,&A_RVt);CHKERRQ(ierr); 1407 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 1408 ierr = PetscMemzero(work+lda_rhs*n_vertices,lda_rhs*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 1409 ierr = MatDenseGetArray(A_RVt,&array);CHKERRQ(ierr); 1410 for (i=0;i<n_vertices;i++) { 1411 ierr = PetscMemcpy(work+(n_vertices+i)*lda_rhs,array+i*n_R,n_R*sizeof(PetscScalar));CHKERRQ(ierr); 1412 } 1413 ierr = MatDenseRestoreArray(A_RVt,&array);CHKERRQ(ierr); 1414 ierr = MatDestroy(&A_RVt);CHKERRQ(ierr); 1415 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work+lda_rhs*n_vertices,&A_RV);CHKERRQ(ierr); 1416 } 1417 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 1418 if (F) { /* TODO could be optimized for symmetric problems */ 1419 ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr); 1420 } else { 1421 ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr); 1422 for (i=0;i<n_vertices;i++) { 1423 ierr = VecPlaceArray(pcbddc->vec1_R,y+i*lda_rhs);CHKERRQ(ierr); 1424 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*lda_rhs);CHKERRQ(ierr); 1425 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1426 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1427 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 1428 } 1429 ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr); 1430 } 1431 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 1432 /* S_VV and S_CV */ 1433 if (n_constraints) { 1434 Mat B; 1435 1436 ierr = PetscMemzero(work+lda_rhs*n_vertices,n_B*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 1437 for (i=0;i<n_vertices;i++) { 1438 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 1439 ierr = VecPlaceArray(pcis->vec1_B,work+lda_rhs*n_vertices+i*n_B);CHKERRQ(ierr); 1440 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1441 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1442 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1443 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1444 } 1445 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 1446 ierr = MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr); 1447 ierr = MatDestroy(&B);CHKERRQ(ierr); 1448 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 1449 ierr = MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 1450 ierr = MatScale(S_CV,m_one);CHKERRQ(ierr); 1451 ierr = PetscBLASIntCast(lda_rhs*n_vertices,&B_N);CHKERRQ(ierr); 1452 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+lda_rhs*n_vertices,&B_one,work,&B_one)); 1453 ierr = MatDestroy(&B);CHKERRQ(ierr); 1454 } 1455 ierr = PetscObjectTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 1456 if (!isseqaij) { /* MatMatMult with SEQ(S)BAIJ below will raise an error */ 1457 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_REUSE_MATRIX,&A_VR);CHKERRQ(ierr); 1458 } 1459 if (lda_rhs != n_R) { 1460 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 1461 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 1462 ierr = MatSeqDenseSetLDA(A_RRmA_RV,lda_rhs);CHKERRQ(ierr); 1463 } 1464 ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr); 1465 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 1466 ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr); 1467 ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr); 1468 ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr); 1469 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one)); 1470 ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr); 1471 ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr); 1472 ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1473 ierr = MatDestroy(&S_VVt);CHKERRQ(ierr); 1474 } else { 1475 ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1476 } 1477 for (i=0;i<pcbddc->benign_n;i++) { 1478 const PetscScalar *vals; 1479 const PetscInt *idxs; 1480 PetscInt n,j,primal_idx; 1481 1482 ierr = MatGetRow(B0_V,i,&n,&idxs,&vals);CHKERRQ(ierr); 1483 primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + i; 1484 for (j=0;j<n;j++) { 1485 coarse_submat_vals[primal_idx*pcbddc->local_primal_size+idxs[j]] = vals[j]; 1486 coarse_submat_vals[idxs[j]*pcbddc->local_primal_size+primal_idx] = vals[j]; 1487 } 1488 ierr = MatRestoreRow(B0_V,i,&n,&idxs,&vals);CHKERRQ(ierr); 1489 } 1490 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 1491 1492 /* coarse basis functions */ 1493 for (i=0;i<n_vertices;i++) { 1494 PetscScalar *y; 1495 1496 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 1497 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 1498 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 1499 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1500 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1501 y[n_B*i+idx_V_B[i]] = 1.0; 1502 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 1503 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1504 1505 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1506 PetscInt j; 1507 1508 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 1509 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 1510 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1511 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1512 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 1513 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 1514 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 1515 } 1516 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1517 } 1518 /* if n_R == 0 the object is not destroyed */ 1519 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 1520 } 1521 1522 if (n_constraints) { 1523 Mat B; 1524 1525 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr); 1526 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 1527 ierr = MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 1528 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 1529 if (n_vertices) { 1530 if (isCHOL) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */ 1531 ierr = MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);CHKERRQ(ierr); 1532 } else { 1533 Mat S_VCt; 1534 1535 if (lda_rhs != n_R) { 1536 ierr = MatDestroy(&B);CHKERRQ(ierr); 1537 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&B);CHKERRQ(ierr); 1538 ierr = MatSeqDenseSetLDA(B,lda_rhs);CHKERRQ(ierr); 1539 } 1540 ierr = MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);CHKERRQ(ierr); 1541 ierr = MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1542 ierr = MatDestroy(&S_VCt);CHKERRQ(ierr); 1543 } 1544 } 1545 ierr = MatDestroy(&B);CHKERRQ(ierr); 1546 /* coarse basis functions */ 1547 for (i=0;i<n_constraints;i++) { 1548 PetscScalar *y; 1549 1550 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 1551 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 1552 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr); 1553 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1554 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1555 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 1556 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1557 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1558 PetscInt j; 1559 1560 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 1561 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr); 1562 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1563 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1564 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 1565 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 1566 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 1567 } 1568 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1569 } 1570 } 1571 if (n_constraints) { 1572 ierr = MatDestroy(&local_auxmat2_R);CHKERRQ(ierr); 1573 } 1574 ierr = PetscFree(p0_lidx_I);CHKERRQ(ierr); 1575 ierr = MatDestroy(&B0_V);CHKERRQ(ierr); 1576 1577 /* compute other basis functions for non-symmetric problems */ 1578 if (!pcbddc->symmetric_primal) { 1579 Mat B_V=NULL,B_C=NULL; 1580 PetscScalar *marray; 1581 1582 if (n_constraints) { 1583 Mat S_CCT,C_CRT; 1584 1585 ierr = MatTranspose(C_CR,MAT_INITIAL_MATRIX,&C_CRT);CHKERRQ(ierr); 1586 ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr); 1587 ierr = MatMatMult(C_CRT,S_CCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr); 1588 ierr = MatDestroy(&S_CCT);CHKERRQ(ierr); 1589 if (n_vertices) { 1590 Mat S_VCT; 1591 1592 ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr); 1593 ierr = MatMatMult(C_CRT,S_VCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr); 1594 ierr = MatDestroy(&S_VCT);CHKERRQ(ierr); 1595 } 1596 ierr = MatDestroy(&C_CRT);CHKERRQ(ierr); 1597 } 1598 if (n_vertices && n_R) { 1599 PetscScalar *av,*marray; 1600 const PetscInt *xadj,*adjncy; 1601 PetscInt n; 1602 PetscBool flg_row; 1603 1604 /* B_V = B_V - A_VR^T */ 1605 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_REUSE_MATRIX,&A_VR);CHKERRQ(ierr); 1606 ierr = MatGetRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 1607 ierr = MatSeqAIJGetArray(A_VR,&av);CHKERRQ(ierr); 1608 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 1609 for (i=0;i<n;i++) { 1610 PetscInt j; 1611 for (j=xadj[i];j<xadj[i+1];j++) marray[i*n_R + adjncy[j]] -= av[j]; 1612 } 1613 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 1614 ierr = MatRestoreRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 1615 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 1616 } 1617 1618 /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 1619 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 1620 for (i=0;i<n_vertices;i++) { 1621 ierr = VecPlaceArray(pcbddc->vec1_R,marray+i*n_R);CHKERRQ(ierr); 1622 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 1623 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1624 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1625 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 1626 } 1627 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 1628 ierr = MatDenseGetArray(B_C,&marray);CHKERRQ(ierr); 1629 for (i=n_vertices;i<n_constraints+n_vertices;i++) { 1630 ierr = VecPlaceArray(pcbddc->vec1_R,marray+(i-n_vertices)*n_R);CHKERRQ(ierr); 1631 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 1632 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1633 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1634 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 1635 } 1636 ierr = MatDenseRestoreArray(B_C,&marray);CHKERRQ(ierr); 1637 /* coarse basis functions */ 1638 for (i=0;i<pcbddc->local_primal_size;i++) { 1639 PetscScalar *y; 1640 1641 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 1642 ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 1643 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 1644 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1645 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1646 if (i<n_vertices) { 1647 y[n_B*i+idx_V_B[i]] = 1.0; 1648 } 1649 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 1650 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1651 1652 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1653 ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 1654 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 1655 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1656 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1657 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 1658 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 1659 } 1660 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1661 } 1662 ierr = MatDestroy(&B_V);CHKERRQ(ierr); 1663 ierr = MatDestroy(&B_C);CHKERRQ(ierr); 1664 } 1665 /* free memory */ 1666 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 1667 ierr = MatDestroy(&S_VV);CHKERRQ(ierr); 1668 ierr = MatDestroy(&S_CV);CHKERRQ(ierr); 1669 ierr = MatDestroy(&S_VC);CHKERRQ(ierr); 1670 ierr = MatDestroy(&S_CC);CHKERRQ(ierr); 1671 ierr = PetscFree(work);CHKERRQ(ierr); 1672 if (n_vertices) { 1673 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 1674 } 1675 if (n_constraints) { 1676 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 1677 } 1678 /* Checking coarse_sub_mat and coarse basis functios */ 1679 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 1680 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 1681 if (pcbddc->dbg_flag) { 1682 Mat coarse_sub_mat; 1683 Mat AUXMAT,TM1,TM2,TM3,TM4; 1684 Mat coarse_phi_D,coarse_phi_B; 1685 Mat coarse_psi_D,coarse_psi_B; 1686 Mat A_II,A_BB,A_IB,A_BI; 1687 Mat C_B,CPHI; 1688 IS is_dummy; 1689 Vec mones; 1690 MatType checkmattype=MATSEQAIJ; 1691 PetscReal real_value; 1692 1693 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 1694 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 1695 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 1696 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 1697 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 1698 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 1699 if (!pcbddc->symmetric_primal) { 1700 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 1701 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 1702 } 1703 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 1704 1705 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1706 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);CHKERRQ(ierr); 1707 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1708 if (!pcbddc->symmetric_primal) { 1709 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1710 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 1711 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1712 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1713 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 1714 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1715 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1716 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 1717 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1718 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1719 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 1720 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1721 } else { 1722 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 1723 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 1724 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1725 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 1726 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1727 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1728 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 1729 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1730 } 1731 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1732 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1733 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1734 ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr); 1735 if (pcbddc->benign_n) { 1736 Mat B0_I,B0_B,B0_BPHI,B0_IPHI; 1737 PetscScalar *data,*data2; 1738 PetscInt j; 1739 1740 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 1741 ierr = MatGetSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B); 1742 ierr = MatGetSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_I_local,MAT_INITIAL_MATRIX,&B0_I); 1743 ierr = MatMatMult(B0_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 1744 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_REUSE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 1745 ierr = MatDenseGetArray(TM1,&data);CHKERRQ(ierr); 1746 ierr = MatDenseGetArray(B0_BPHI,&data2);CHKERRQ(ierr); 1747 for (j=0;j<pcbddc->benign_n;j++) { 1748 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 1749 for (i=0;i<pcbddc->local_primal_size;i++) { 1750 data[primal_idx*pcbddc->local_primal_size+i] += data2[i*pcbddc->benign_n+j]; 1751 data[i*pcbddc->local_primal_size+primal_idx] += data2[i*pcbddc->benign_n+j]; 1752 } 1753 } 1754 ierr = MatDenseRestoreArray(TM1,&data);CHKERRQ(ierr); 1755 ierr = MatDenseRestoreArray(B0_BPHI,&data2);CHKERRQ(ierr); 1756 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 1757 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 1758 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 1759 ierr = MatMatMult(B0_I,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&B0_IPHI);CHKERRQ(ierr); 1760 ierr = MatDestroy(&B0_I);CHKERRQ(ierr); 1761 ierr = MatNorm(B0_IPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1762 ierr = MatDestroy(&B0_IPHI);CHKERRQ(ierr); 1763 } 1764 #if 0 1765 { 1766 PetscViewer viewer; 1767 char filename[256]; 1768 sprintf(filename,"details_local_coarse_mat%d_level%d.m",PetscGlobalRank,pcbddc->current_level); 1769 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 1770 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 1771 ierr = PetscObjectSetName((PetscObject)coarse_sub_mat,"computed");CHKERRQ(ierr); 1772 ierr = MatView(coarse_sub_mat,viewer);CHKERRQ(ierr); 1773 ierr = PetscObjectSetName((PetscObject)TM1,"projected");CHKERRQ(ierr); 1774 ierr = MatView(TM1,viewer);CHKERRQ(ierr); 1775 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_B,"phi_B");CHKERRQ(ierr); 1776 ierr = MatView(pcbddc->coarse_phi_B,viewer);CHKERRQ(ierr); 1777 if (pcbddc->coarse_phi_D) { 1778 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_D,"phi_D");CHKERRQ(ierr); 1779 ierr = MatView(pcbddc->coarse_phi_D,viewer);CHKERRQ(ierr); 1780 } 1781 if (pcbddc->coarse_psi_B) { 1782 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_B,"psi_B");CHKERRQ(ierr); 1783 ierr = MatView(pcbddc->coarse_psi_B,viewer);CHKERRQ(ierr); 1784 } 1785 if (pcbddc->coarse_phi_B) { 1786 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_D,"psi_D");CHKERRQ(ierr); 1787 ierr = MatView(pcbddc->coarse_psi_D,viewer);CHKERRQ(ierr); 1788 } 1789 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 1790 } 1791 #endif 1792 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1793 ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1794 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 1795 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 1796 1797 /* check constraints */ 1798 if (!pcbddc->benign_n) { /* TODO: add benign case */ 1799 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size,0,1,&is_dummy);CHKERRQ(ierr); 1800 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B); 1801 ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 1802 ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr); 1803 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 1804 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 1805 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1806 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 1807 if (!pcbddc->symmetric_primal) { 1808 ierr = MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 1809 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 1810 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 1811 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1812 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 1813 } 1814 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 1815 ierr = MatDestroy(&CPHI);CHKERRQ(ierr); 1816 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 1817 ierr = VecDestroy(&mones);CHKERRQ(ierr); 1818 } 1819 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1820 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 1821 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 1822 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 1823 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 1824 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 1825 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 1826 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 1827 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 1828 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 1829 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 1830 if (!pcbddc->symmetric_primal) { 1831 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 1832 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 1833 } 1834 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 1835 } 1836 /* get back data */ 1837 *coarse_submat_vals_n = coarse_submat_vals; 1838 PetscFunctionReturn(0); 1839 } 1840 1841 #undef __FUNCT__ 1842 #define __FUNCT__ "MatGetSubMatrixUnsorted" 1843 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B) 1844 { 1845 Mat *work_mat; 1846 IS isrow_s,iscol_s; 1847 PetscBool rsorted,csorted; 1848 PetscInt rsize,*idxs_perm_r,csize,*idxs_perm_c; 1849 PetscErrorCode ierr; 1850 1851 PetscFunctionBegin; 1852 ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr); 1853 ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr); 1854 ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr); 1855 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 1856 1857 if (!rsorted) { 1858 const PetscInt *idxs; 1859 PetscInt *idxs_sorted,i; 1860 1861 ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr); 1862 ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr); 1863 for (i=0;i<rsize;i++) { 1864 idxs_perm_r[i] = i; 1865 } 1866 ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr); 1867 ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr); 1868 for (i=0;i<rsize;i++) { 1869 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 1870 } 1871 ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr); 1872 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr); 1873 } else { 1874 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1875 isrow_s = isrow; 1876 } 1877 1878 if (!csorted) { 1879 if (isrow == iscol) { 1880 ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr); 1881 iscol_s = isrow_s; 1882 } else { 1883 const PetscInt *idxs; 1884 PetscInt *idxs_sorted,i; 1885 1886 ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr); 1887 ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr); 1888 for (i=0;i<csize;i++) { 1889 idxs_perm_c[i] = i; 1890 } 1891 ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr); 1892 ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr); 1893 for (i=0;i<csize;i++) { 1894 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 1895 } 1896 ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr); 1897 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr); 1898 } 1899 } else { 1900 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1901 iscol_s = iscol; 1902 } 1903 1904 ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 1905 1906 if (!rsorted || !csorted) { 1907 Mat new_mat; 1908 IS is_perm_r,is_perm_c; 1909 1910 if (!rsorted) { 1911 PetscInt *idxs_r,i; 1912 ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr); 1913 for (i=0;i<rsize;i++) { 1914 idxs_r[idxs_perm_r[i]] = i; 1915 } 1916 ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr); 1917 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr); 1918 } else { 1919 ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr); 1920 } 1921 ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr); 1922 1923 if (!csorted) { 1924 if (isrow_s == iscol_s) { 1925 ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr); 1926 is_perm_c = is_perm_r; 1927 } else { 1928 PetscInt *idxs_c,i; 1929 ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr); 1930 for (i=0;i<csize;i++) { 1931 idxs_c[idxs_perm_c[i]] = i; 1932 } 1933 ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr); 1934 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr); 1935 } 1936 } else { 1937 ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr); 1938 } 1939 ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr); 1940 1941 ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr); 1942 ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr); 1943 work_mat[0] = new_mat; 1944 ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr); 1945 ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr); 1946 } 1947 1948 ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr); 1949 *B = work_mat[0]; 1950 ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr); 1951 ierr = ISDestroy(&isrow_s);CHKERRQ(ierr); 1952 ierr = ISDestroy(&iscol_s);CHKERRQ(ierr); 1953 PetscFunctionReturn(0); 1954 } 1955 1956 #undef __FUNCT__ 1957 #define __FUNCT__ "PCBDDCComputeLocalMatrix" 1958 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 1959 { 1960 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1961 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1962 Mat new_mat; 1963 IS is_local,is_global; 1964 PetscInt local_size; 1965 PetscBool isseqaij; 1966 PetscErrorCode ierr; 1967 1968 PetscFunctionBegin; 1969 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1970 ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr); 1971 ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr); 1972 ierr = ISLocalToGlobalMappingApplyIS(pc->pmat->rmap->mapping,is_local,&is_global);CHKERRQ(ierr); 1973 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 1974 ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr); 1975 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 1976 1977 /* check */ 1978 if (pcbddc->dbg_flag) { 1979 Vec x,x_change; 1980 PetscReal error; 1981 1982 ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr); 1983 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 1984 ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr); 1985 ierr = VecScatterBegin(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1986 ierr = VecScatterEnd(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1987 ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr); 1988 ierr = VecScatterBegin(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1989 ierr = VecScatterEnd(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1990 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 1991 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 1992 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1993 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr); 1994 ierr = VecDestroy(&x);CHKERRQ(ierr); 1995 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 1996 } 1997 1998 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 1999 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 2000 if (isseqaij) { 2001 Mat M; 2002 2003 ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&M);CHKERRQ(ierr); 2004 ierr = MatSeqAIJCompress(M,&pcbddc->local_mat);CHKERRQ(ierr); 2005 ierr = MatDestroy(&M);CHKERRQ(ierr); 2006 } else { 2007 Mat work_mat,M; 2008 2009 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 2010 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&M);CHKERRQ(ierr); 2011 ierr = MatSeqAIJCompress(M,&pcbddc->local_mat);CHKERRQ(ierr); 2012 ierr = MatDestroy(&M);CHKERRQ(ierr); 2013 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 2014 } 2015 if (matis->A->symmetric_set) { 2016 ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);CHKERRQ(ierr); 2017 #if !defined(PETSC_USE_COMPLEX) 2018 ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);CHKERRQ(ierr); 2019 #endif 2020 } 2021 /* 2022 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 2023 ierr = MatView(new_mat,(PetscViewer)0);CHKERRQ(ierr); 2024 */ 2025 ierr = MatDestroy(&new_mat);CHKERRQ(ierr); 2026 PetscFunctionReturn(0); 2027 } 2028 2029 #undef __FUNCT__ 2030 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 2031 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 2032 { 2033 PC_IS* pcis = (PC_IS*)(pc->data); 2034 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2035 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2036 PetscInt *idx_R_local=NULL; 2037 PetscInt n_vertices,i,j,n_R,n_D,n_B; 2038 PetscInt vbs,bs; 2039 PetscBT bitmask=NULL; 2040 PetscErrorCode ierr; 2041 2042 PetscFunctionBegin; 2043 /* 2044 No need to setup local scatters if 2045 - primal space is unchanged 2046 AND 2047 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 2048 AND 2049 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 2050 */ 2051 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 2052 PetscFunctionReturn(0); 2053 } 2054 /* destroy old objects */ 2055 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 2056 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 2057 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 2058 /* Set Non-overlapping dimensions */ 2059 n_B = pcis->n_B; 2060 n_D = pcis->n - n_B; 2061 n_vertices = pcbddc->n_vertices; 2062 2063 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 2064 2065 /* create auxiliary bitmask and allocate workspace */ 2066 if (!sub_schurs->reuse_mumps) { 2067 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 2068 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 2069 for (i=0;i<n_vertices;i++) { 2070 ierr = PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);CHKERRQ(ierr); 2071 } 2072 2073 for (i=0, n_R=0; i<pcis->n; i++) { 2074 if (!PetscBTLookup(bitmask,i)) { 2075 idx_R_local[n_R++] = i; 2076 } 2077 } 2078 } else { /* A different ordering (already computed) is present if we are reusing MUMPS Schur solver */ 2079 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 2080 2081 ierr = ISGetIndices(reuse_mumps->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2082 ierr = ISGetLocalSize(reuse_mumps->is_R,&n_R);CHKERRQ(ierr); 2083 } 2084 2085 /* Block code */ 2086 vbs = 1; 2087 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 2088 if (bs>1 && !(n_vertices%bs)) { 2089 PetscBool is_blocked = PETSC_TRUE; 2090 PetscInt *vary; 2091 if (!sub_schurs->reuse_mumps) { 2092 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 2093 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 2094 /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 2095 /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */ 2096 for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++; 2097 for (i=0; i<pcis->n/bs; i++) { 2098 if (vary[i]!=0 && vary[i]!=bs) { 2099 is_blocked = PETSC_FALSE; 2100 break; 2101 } 2102 } 2103 ierr = PetscFree(vary);CHKERRQ(ierr); 2104 } else { 2105 /* Verify directly the R set */ 2106 for (i=0; i<n_R/bs; i++) { 2107 PetscInt j,node=idx_R_local[bs*i]; 2108 for (j=1; j<bs; j++) { 2109 if (node != idx_R_local[bs*i+j]-j) { 2110 is_blocked = PETSC_FALSE; 2111 break; 2112 } 2113 } 2114 } 2115 } 2116 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 2117 vbs = bs; 2118 for (i=0;i<n_R/vbs;i++) { 2119 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 2120 } 2121 } 2122 } 2123 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 2124 if (sub_schurs->reuse_mumps) { 2125 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 2126 2127 ierr = ISRestoreIndices(reuse_mumps->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2128 ierr = ISDestroy(&reuse_mumps->is_R);CHKERRQ(ierr); 2129 ierr = PetscObjectReference((PetscObject)pcbddc->is_R_local);CHKERRQ(ierr); 2130 reuse_mumps->is_R = pcbddc->is_R_local; 2131 } else { 2132 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 2133 } 2134 2135 /* print some info if requested */ 2136 if (pcbddc->dbg_flag) { 2137 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2138 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2139 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 2140 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 2141 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 2142 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices-pcbddc->benign_n,pcbddc->local_primal_size);CHKERRQ(ierr); 2143 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2144 } 2145 2146 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 2147 if (!sub_schurs->reuse_mumps) { 2148 IS is_aux1,is_aux2; 2149 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 2150 2151 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2152 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 2153 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 2154 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2155 for (i=0; i<n_D; i++) { 2156 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 2157 } 2158 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2159 for (i=0, j=0; i<n_R; i++) { 2160 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 2161 aux_array1[j++] = i; 2162 } 2163 } 2164 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 2165 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2166 for (i=0, j=0; i<n_B; i++) { 2167 if (!PetscBTLookup(bitmask,is_indices[i])) { 2168 aux_array2[j++] = i; 2169 } 2170 } 2171 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2172 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 2173 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 2174 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 2175 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 2176 2177 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2178 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 2179 for (i=0, j=0; i<n_R; i++) { 2180 if (PetscBTLookup(bitmask,idx_R_local[i])) { 2181 aux_array1[j++] = i; 2182 } 2183 } 2184 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 2185 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 2186 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 2187 } 2188 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 2189 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2190 } else { 2191 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 2192 IS tis; 2193 PetscInt schur_size; 2194 2195 ierr = ISGetLocalSize(reuse_mumps->is_B,&schur_size);CHKERRQ(ierr); 2196 ierr = ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);CHKERRQ(ierr); 2197 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_mumps->is_B,&pcbddc->R_to_B);CHKERRQ(ierr); 2198 ierr = ISDestroy(&tis);CHKERRQ(ierr); 2199 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2200 ierr = ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);CHKERRQ(ierr); 2201 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 2202 ierr = ISDestroy(&tis);CHKERRQ(ierr); 2203 } 2204 } 2205 PetscFunctionReturn(0); 2206 } 2207 2208 2209 #undef __FUNCT__ 2210 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 2211 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 2212 { 2213 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2214 PC_IS *pcis = (PC_IS*)pc->data; 2215 PC pc_temp; 2216 Mat A_RR; 2217 MatReuse reuse; 2218 PetscScalar m_one = -1.0; 2219 PetscReal value; 2220 PetscInt n_D,n_R; 2221 PetscBool use_exact,use_exact_reduced,issbaij; 2222 PetscErrorCode ierr; 2223 /* prefixes stuff */ 2224 char dir_prefix[256],neu_prefix[256],str_level[16]; 2225 size_t len; 2226 2227 PetscFunctionBegin; 2228 2229 /* compute prefixes */ 2230 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 2231 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 2232 if (!pcbddc->current_level) { 2233 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 2234 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 2235 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 2236 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 2237 } else { 2238 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 2239 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 2240 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 2241 len -= 15; /* remove "pc_bddc_coarse_" */ 2242 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 2243 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 2244 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 2245 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 2246 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 2247 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 2248 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 2249 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 2250 } 2251 2252 /* DIRICHLET PROBLEM */ 2253 if (dirichlet) { 2254 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2255 if (pcbddc->local_mat->symmetric_set) { 2256 ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 2257 } 2258 /* Matrix for Dirichlet problem is pcis->A_II */ 2259 n_D = pcis->n - pcis->n_B; 2260 if (!pcbddc->ksp_D) { /* create object if not yet build */ 2261 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 2262 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 2263 /* default */ 2264 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 2265 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 2266 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 2267 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 2268 if (issbaij) { 2269 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 2270 } else { 2271 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 2272 } 2273 /* Allow user's customization */ 2274 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 2275 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 2276 } 2277 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 2278 if (sub_schurs->reuse_mumps) { 2279 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 2280 2281 ierr = KSPSetPC(pcbddc->ksp_D,reuse_mumps->interior_solver);CHKERRQ(ierr); 2282 } 2283 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 2284 if (!n_D) { 2285 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 2286 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 2287 } 2288 /* Set Up KSP for Dirichlet problem of BDDC */ 2289 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 2290 /* set ksp_D into pcis data */ 2291 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 2292 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 2293 pcis->ksp_D = pcbddc->ksp_D; 2294 } 2295 2296 /* NEUMANN PROBLEM */ 2297 A_RR = 0; 2298 if (neumann) { 2299 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2300 PetscInt ibs,mbs; 2301 PetscBool issbaij; 2302 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 2303 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 2304 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 2305 if (pcbddc->ksp_R) { /* already created ksp */ 2306 PetscInt nn_R; 2307 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 2308 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 2309 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 2310 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 2311 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 2312 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2313 reuse = MAT_INITIAL_MATRIX; 2314 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 2315 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 2316 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2317 reuse = MAT_INITIAL_MATRIX; 2318 } else { /* safe to reuse the matrix */ 2319 reuse = MAT_REUSE_MATRIX; 2320 } 2321 } 2322 /* last check */ 2323 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 2324 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2325 reuse = MAT_INITIAL_MATRIX; 2326 } 2327 } else { /* first time, so we need to create the matrix */ 2328 reuse = MAT_INITIAL_MATRIX; 2329 } 2330 /* extract A_RR */ 2331 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 2332 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 2333 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 2334 if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */ 2335 if (matis->A == pcbddc->local_mat) { 2336 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2337 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 2338 } else { 2339 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 2340 } 2341 } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 2342 if (matis->A == pcbddc->local_mat) { 2343 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2344 ierr = MatConvert(matis->A,MATSEQBAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 2345 } else { 2346 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 2347 } 2348 } 2349 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 2350 if (pcbddc->local_mat->symmetric_set) { 2351 ierr = MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 2352 } 2353 if (!pcbddc->ksp_R) { /* create object if not present */ 2354 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 2355 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 2356 /* default */ 2357 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 2358 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 2359 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 2360 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 2361 if (issbaij) { 2362 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 2363 } else { 2364 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 2365 } 2366 /* Allow user's customization */ 2367 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 2368 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 2369 } 2370 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 2371 if (!n_R) { 2372 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 2373 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 2374 } 2375 /* Reuse MUMPS solver if it is present */ 2376 if (sub_schurs->reuse_mumps) { 2377 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 2378 2379 ierr = KSPSetPC(pcbddc->ksp_R,reuse_mumps->correction_solver);CHKERRQ(ierr); 2380 } 2381 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 2382 /* Set Up KSP for Neumann problem of BDDC */ 2383 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 2384 } 2385 /* free Neumann problem's matrix */ 2386 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2387 2388 /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */ 2389 if (pcbddc->NullSpace || pcbddc->dbg_flag) { 2390 if (pcbddc->dbg_flag) { 2391 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2392 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 2393 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2394 } 2395 if (dirichlet) { /* Dirichlet */ 2396 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 2397 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 2398 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 2399 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 2400 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 2401 /* need to be adapted? */ 2402 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 2403 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2404 ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr); 2405 /* print info */ 2406 if (pcbddc->dbg_flag) { 2407 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 2408 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2409 } 2410 if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) { 2411 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_TRUE,pcis->is_I_local);CHKERRQ(ierr); 2412 } 2413 } 2414 if (neumann) { /* Neumann */ 2415 ierr = KSPGetOperators(pcbddc->ksp_R,&A_RR,NULL);CHKERRQ(ierr); 2416 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 2417 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 2418 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 2419 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 2420 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 2421 /* need to be adapted? */ 2422 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 2423 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2424 /* print info */ 2425 if (pcbddc->dbg_flag) { 2426 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 2427 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2428 } 2429 if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */ 2430 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_FALSE,pcbddc->is_R_local);CHKERRQ(ierr); 2431 } 2432 } 2433 } 2434 PetscFunctionReturn(0); 2435 } 2436 2437 #undef __FUNCT__ 2438 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection" 2439 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose) 2440 { 2441 PetscErrorCode ierr; 2442 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 2443 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2444 2445 PetscFunctionBegin; 2446 if (!sub_schurs->reuse_mumps) { 2447 ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr); 2448 } 2449 if (!pcbddc->switch_static) { 2450 if (applytranspose && pcbddc->local_auxmat1) { 2451 ierr = MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 2452 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 2453 } 2454 if (!sub_schurs->reuse_mumps) { 2455 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2456 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2457 } else { 2458 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 2459 2460 ierr = VecScatterBegin(reuse_mumps->correction_scatter_B,inout_B,reuse_mumps->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2461 ierr = VecScatterEnd(reuse_mumps->correction_scatter_B,inout_B,reuse_mumps->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2462 } 2463 } else { 2464 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2465 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2466 ierr = VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2467 ierr = VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2468 if (applytranspose && pcbddc->local_auxmat1) { 2469 ierr = MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);CHKERRQ(ierr); 2470 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 2471 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2472 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2473 } 2474 } 2475 if (!sub_schurs->reuse_mumps) { 2476 if (applytranspose) { 2477 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 2478 } else { 2479 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 2480 } 2481 } else { 2482 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 2483 2484 if (applytranspose) { 2485 ierr = MatFactorSolveSchurComplementTranspose(reuse_mumps->F,reuse_mumps->rhs_B,reuse_mumps->sol_B);CHKERRQ(ierr); 2486 } else { 2487 ierr = MatFactorSolveSchurComplement(reuse_mumps->F,reuse_mumps->rhs_B,reuse_mumps->sol_B);CHKERRQ(ierr); 2488 } 2489 } 2490 ierr = VecSet(inout_B,0.);CHKERRQ(ierr); 2491 if (!pcbddc->switch_static) { 2492 if (!sub_schurs->reuse_mumps) { 2493 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2494 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2495 } else { 2496 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 2497 2498 ierr = VecScatterBegin(reuse_mumps->correction_scatter_B,reuse_mumps->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2499 ierr = VecScatterEnd(reuse_mumps->correction_scatter_B,reuse_mumps->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2500 } 2501 if (!applytranspose && pcbddc->local_auxmat1) { 2502 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 2503 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 2504 } 2505 } else { 2506 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2507 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2508 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2509 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2510 if (!applytranspose && pcbddc->local_auxmat1) { 2511 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 2512 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 2513 } 2514 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2515 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2516 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2517 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2518 } 2519 PetscFunctionReturn(0); 2520 } 2521 2522 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 2523 #undef __FUNCT__ 2524 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 2525 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 2526 { 2527 PetscErrorCode ierr; 2528 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 2529 PC_IS* pcis = (PC_IS*) (pc->data); 2530 const PetscScalar zero = 0.0; 2531 2532 PetscFunctionBegin; 2533 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 2534 if (applytranspose) { 2535 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 2536 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 2537 } else { 2538 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 2539 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 2540 } 2541 2542 /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */ 2543 if (pcbddc->benign_n) { 2544 PetscScalar *array; 2545 PetscInt j; 2546 2547 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 2548 for (j=0;j<pcbddc->benign_n;j++) array[pcbddc->local_primal_size-pcbddc->benign_n+j] += pcbddc->benign_p0[j]; 2549 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 2550 } 2551 2552 /* start communications from local primal nodes to rhs of coarse solver */ 2553 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 2554 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2555 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2556 2557 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 2558 /* TODO remove null space when doing multilevel */ 2559 if (pcbddc->coarse_ksp) { 2560 Mat coarse_mat; 2561 Vec rhs,sol; 2562 MatNullSpace nullsp; 2563 2564 ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr); 2565 ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr); 2566 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 2567 ierr = MatGetNullSpace(coarse_mat,&nullsp);CHKERRQ(ierr); 2568 if (nullsp) { 2569 ierr = MatNullSpaceRemove(nullsp,rhs);CHKERRQ(ierr); 2570 } 2571 if (applytranspose) { 2572 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 2573 } else { 2574 ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 2575 } 2576 if (nullsp) { 2577 ierr = MatNullSpaceRemove(nullsp,sol);CHKERRQ(ierr); 2578 } 2579 } 2580 2581 /* Local solution on R nodes */ 2582 if (pcis->n) { /* in/out pcbddc->vec1_B,pcbddc->vec1_D */ 2583 ierr = PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);CHKERRQ(ierr); 2584 } 2585 2586 /* communications from coarse sol to local primal nodes */ 2587 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2588 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2589 2590 /* Sum contributions from two levels */ 2591 if (applytranspose) { 2592 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 2593 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 2594 } else { 2595 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 2596 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 2597 } 2598 /* store p0 */ 2599 if (pcbddc->benign_n) { 2600 PetscScalar *array; 2601 PetscInt j; 2602 2603 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 2604 for (j=0;j<pcbddc->benign_n;j++) pcbddc->benign_p0[j] = array[pcbddc->local_primal_size-pcbddc->benign_n+j]; 2605 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 2606 } 2607 PetscFunctionReturn(0); 2608 } 2609 2610 #undef __FUNCT__ 2611 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 2612 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 2613 { 2614 PetscErrorCode ierr; 2615 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 2616 PetscScalar *array; 2617 Vec from,to; 2618 2619 PetscFunctionBegin; 2620 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 2621 from = pcbddc->coarse_vec; 2622 to = pcbddc->vec1_P; 2623 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 2624 Vec tvec; 2625 2626 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 2627 ierr = VecResetArray(tvec);CHKERRQ(ierr); 2628 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 2629 ierr = VecGetArray(tvec,&array);CHKERRQ(ierr); 2630 ierr = VecPlaceArray(from,array);CHKERRQ(ierr); 2631 ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr); 2632 } 2633 } else { /* from local to global -> put data in coarse right hand side */ 2634 from = pcbddc->vec1_P; 2635 to = pcbddc->coarse_vec; 2636 } 2637 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 2638 PetscFunctionReturn(0); 2639 } 2640 2641 #undef __FUNCT__ 2642 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 2643 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 2644 { 2645 PetscErrorCode ierr; 2646 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 2647 PetscScalar *array; 2648 Vec from,to; 2649 2650 PetscFunctionBegin; 2651 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 2652 from = pcbddc->coarse_vec; 2653 to = pcbddc->vec1_P; 2654 } else { /* from local to global -> put data in coarse right hand side */ 2655 from = pcbddc->vec1_P; 2656 to = pcbddc->coarse_vec; 2657 } 2658 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 2659 if (smode == SCATTER_FORWARD) { 2660 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 2661 Vec tvec; 2662 2663 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 2664 ierr = VecGetArray(to,&array);CHKERRQ(ierr); 2665 ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr); 2666 ierr = VecRestoreArray(to,&array);CHKERRQ(ierr); 2667 } 2668 } else { 2669 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 2670 ierr = VecResetArray(from);CHKERRQ(ierr); 2671 } 2672 } 2673 PetscFunctionReturn(0); 2674 } 2675 2676 /* uncomment for testing purposes */ 2677 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 2678 #undef __FUNCT__ 2679 #define __FUNCT__ "PCBDDCConstraintsSetUp" 2680 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 2681 { 2682 PetscErrorCode ierr; 2683 PC_IS* pcis = (PC_IS*)(pc->data); 2684 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2685 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 2686 /* one and zero */ 2687 PetscScalar one=1.0,zero=0.0; 2688 /* space to store constraints and their local indices */ 2689 PetscScalar *constraints_data; 2690 PetscInt *constraints_idxs,*constraints_idxs_B; 2691 PetscInt *constraints_idxs_ptr,*constraints_data_ptr; 2692 PetscInt *constraints_n; 2693 /* iterators */ 2694 PetscInt i,j,k,total_counts,total_counts_cc,cum; 2695 /* BLAS integers */ 2696 PetscBLASInt lwork,lierr; 2697 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 2698 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 2699 /* reuse */ 2700 PetscInt olocal_primal_size,olocal_primal_size_cc; 2701 PetscInt *olocal_primal_ref_node,*olocal_primal_ref_mult; 2702 /* change of basis */ 2703 PetscBool qr_needed; 2704 PetscBT change_basis,qr_needed_idx; 2705 /* auxiliary stuff */ 2706 PetscInt *nnz,*is_indices; 2707 PetscInt ncc; 2708 /* some quantities */ 2709 PetscInt n_vertices,total_primal_vertices,valid_constraints; 2710 PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints; 2711 2712 PetscFunctionBegin; 2713 /* Destroy Mat objects computed previously */ 2714 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2715 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 2716 /* save info on constraints from previous setup (if any) */ 2717 olocal_primal_size = pcbddc->local_primal_size; 2718 olocal_primal_size_cc = pcbddc->local_primal_size_cc; 2719 ierr = PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);CHKERRQ(ierr); 2720 ierr = PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 2721 ierr = PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 2722 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 2723 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2724 2725 /* print some info */ 2726 if (pcbddc->dbg_flag) { 2727 IS vertices; 2728 PetscInt nv,nedges,nfaces; 2729 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 2730 ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr); 2731 ierr = ISDestroy(&vertices);CHKERRQ(ierr); 2732 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 2733 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 2734 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 2735 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr); 2736 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr); 2737 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2738 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 2739 } 2740 2741 if (!pcbddc->adaptive_selection) { 2742 IS ISForVertices,*ISForFaces,*ISForEdges; 2743 MatNullSpace nearnullsp; 2744 const Vec *nearnullvecs; 2745 Vec *localnearnullsp; 2746 PetscScalar *array; 2747 PetscInt n_ISForFaces,n_ISForEdges,nnsp_size; 2748 PetscBool nnsp_has_cnst; 2749 /* LAPACK working arrays for SVD or POD */ 2750 PetscBool skip_lapack,boolforchange; 2751 PetscScalar *work; 2752 PetscReal *singular_vals; 2753 #if defined(PETSC_USE_COMPLEX) 2754 PetscReal *rwork; 2755 #endif 2756 #if defined(PETSC_MISSING_LAPACK_GESVD) 2757 PetscScalar *temp_basis,*correlation_mat; 2758 #else 2759 PetscBLASInt dummy_int=1; 2760 PetscScalar dummy_scalar=1.; 2761 #endif 2762 2763 /* Get index sets for faces, edges and vertices from graph */ 2764 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 2765 /* free unneeded index sets */ 2766 if (!pcbddc->use_vertices) { 2767 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 2768 } 2769 if (!pcbddc->use_edges) { 2770 for (i=0;i<n_ISForEdges;i++) { 2771 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 2772 } 2773 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 2774 n_ISForEdges = 0; 2775 } 2776 if (!pcbddc->use_faces) { 2777 for (i=0;i<n_ISForFaces;i++) { 2778 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 2779 } 2780 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 2781 n_ISForFaces = 0; 2782 } 2783 2784 #if defined(PETSC_USE_DEBUG) 2785 /* HACK: when solving singular problems not using vertices, a change of basis is mandatory. 2786 Also use_change_of_basis should be consistent among processors */ 2787 if (pcbddc->NullSpace) { 2788 PetscBool tbool[2],gbool[2]; 2789 2790 if (!ISForVertices && !pcbddc->user_ChangeOfBasisMatrix) { 2791 pcbddc->use_change_of_basis = PETSC_TRUE; 2792 if (!ISForEdges) { 2793 pcbddc->use_change_on_faces = PETSC_TRUE; 2794 } 2795 } 2796 tbool[0] = pcbddc->use_change_of_basis; 2797 tbool[1] = pcbddc->use_change_on_faces; 2798 ierr = MPI_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2799 pcbddc->use_change_of_basis = gbool[0]; 2800 pcbddc->use_change_on_faces = gbool[1]; 2801 } 2802 #endif 2803 2804 /* check if near null space is attached to global mat */ 2805 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 2806 if (nearnullsp) { 2807 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 2808 /* remove any stored info */ 2809 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 2810 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 2811 /* store information for BDDC solver reuse */ 2812 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 2813 pcbddc->onearnullspace = nearnullsp; 2814 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 2815 for (i=0;i<nnsp_size;i++) { 2816 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 2817 } 2818 } else { /* if near null space is not provided BDDC uses constants by default */ 2819 nnsp_size = 0; 2820 nnsp_has_cnst = PETSC_TRUE; 2821 } 2822 /* get max number of constraints on a single cc */ 2823 max_constraints = nnsp_size; 2824 if (nnsp_has_cnst) max_constraints++; 2825 2826 /* 2827 Evaluate maximum storage size needed by the procedure 2828 - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]" 2829 - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]" 2830 There can be multiple constraints per connected component 2831 */ 2832 n_vertices = 0; 2833 if (ISForVertices) { 2834 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 2835 } 2836 ncc = n_vertices+n_ISForFaces+n_ISForEdges; 2837 ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr); 2838 2839 total_counts = n_ISForFaces+n_ISForEdges; 2840 total_counts *= max_constraints; 2841 total_counts += n_vertices; 2842 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 2843 2844 total_counts = 0; 2845 max_size_of_constraint = 0; 2846 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 2847 IS used_is; 2848 if (i<n_ISForEdges) { 2849 used_is = ISForEdges[i]; 2850 } else { 2851 used_is = ISForFaces[i-n_ISForEdges]; 2852 } 2853 ierr = ISGetSize(used_is,&j);CHKERRQ(ierr); 2854 total_counts += j; 2855 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 2856 } 2857 ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr); 2858 2859 /* get local part of global near null space vectors */ 2860 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 2861 for (k=0;k<nnsp_size;k++) { 2862 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 2863 ierr = VecScatterBegin(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2864 ierr = VecScatterEnd(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2865 } 2866 2867 /* whether or not to skip lapack calls */ 2868 skip_lapack = PETSC_TRUE; 2869 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 2870 2871 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 2872 if (!skip_lapack) { 2873 PetscScalar temp_work; 2874 2875 #if defined(PETSC_MISSING_LAPACK_GESVD) 2876 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 2877 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 2878 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 2879 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 2880 #if defined(PETSC_USE_COMPLEX) 2881 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 2882 #endif 2883 /* now we evaluate the optimal workspace using query with lwork=-1 */ 2884 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 2885 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 2886 lwork = -1; 2887 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2888 #if !defined(PETSC_USE_COMPLEX) 2889 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 2890 #else 2891 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 2892 #endif 2893 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2894 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 2895 #else /* on missing GESVD */ 2896 /* SVD */ 2897 PetscInt max_n,min_n; 2898 max_n = max_size_of_constraint; 2899 min_n = max_constraints; 2900 if (max_size_of_constraint < max_constraints) { 2901 min_n = max_size_of_constraint; 2902 max_n = max_constraints; 2903 } 2904 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 2905 #if defined(PETSC_USE_COMPLEX) 2906 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 2907 #endif 2908 /* now we evaluate the optimal workspace using query with lwork=-1 */ 2909 lwork = -1; 2910 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 2911 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 2912 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 2913 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2914 #if !defined(PETSC_USE_COMPLEX) 2915 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr)); 2916 #else 2917 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr)); 2918 #endif 2919 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2920 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 2921 #endif /* on missing GESVD */ 2922 /* Allocate optimal workspace */ 2923 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 2924 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 2925 } 2926 /* Now we can loop on constraining sets */ 2927 total_counts = 0; 2928 constraints_idxs_ptr[0] = 0; 2929 constraints_data_ptr[0] = 0; 2930 /* vertices */ 2931 if (n_vertices) { 2932 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2933 ierr = PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 2934 for (i=0;i<n_vertices;i++) { 2935 constraints_n[total_counts] = 1; 2936 constraints_data[total_counts] = 1.0; 2937 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 2938 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 2939 total_counts++; 2940 } 2941 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2942 n_vertices = total_counts; 2943 } 2944 2945 /* edges and faces */ 2946 total_counts_cc = total_counts; 2947 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 2948 IS used_is; 2949 PetscBool idxs_copied = PETSC_FALSE; 2950 2951 if (ncc<n_ISForEdges) { 2952 used_is = ISForEdges[ncc]; 2953 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 2954 } else { 2955 used_is = ISForFaces[ncc-n_ISForEdges]; 2956 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 2957 } 2958 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 2959 2960 ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr); 2961 ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2962 /* change of basis should not be performed on local periodic nodes */ 2963 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 2964 if (nnsp_has_cnst) { 2965 PetscScalar quad_value; 2966 2967 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 2968 idxs_copied = PETSC_TRUE; 2969 2970 if (!pcbddc->use_nnsp_true) { 2971 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 2972 } else { 2973 quad_value = 1.0; 2974 } 2975 for (j=0;j<size_of_constraint;j++) { 2976 constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value; 2977 } 2978 temp_constraints++; 2979 total_counts++; 2980 } 2981 for (k=0;k<nnsp_size;k++) { 2982 PetscReal real_value; 2983 PetscScalar *ptr_to_data; 2984 2985 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2986 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint]; 2987 for (j=0;j<size_of_constraint;j++) { 2988 ptr_to_data[j] = array[is_indices[j]]; 2989 } 2990 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2991 /* check if array is null on the connected component */ 2992 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2993 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one)); 2994 if (real_value > 0.0) { /* keep indices and values */ 2995 temp_constraints++; 2996 total_counts++; 2997 if (!idxs_copied) { 2998 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 2999 idxs_copied = PETSC_TRUE; 3000 } 3001 } 3002 } 3003 ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3004 valid_constraints = temp_constraints; 3005 if (!pcbddc->use_nnsp_true && temp_constraints) { 3006 if (temp_constraints == 1) { /* just normalize the constraint */ 3007 PetscScalar norm,*ptr_to_data; 3008 3009 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 3010 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3011 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one)); 3012 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 3013 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one)); 3014 } else { /* perform SVD */ 3015 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 3016 PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 3017 3018 #if defined(PETSC_MISSING_LAPACK_GESVD) 3019 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 3020 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 3021 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 3022 the constraints basis will differ (by a complex factor with absolute value equal to 1) 3023 from that computed using LAPACKgesvd 3024 -> This is due to a different computation of eigenvectors in LAPACKheev 3025 -> The quality of the POD-computed basis will be the same */ 3026 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 3027 /* Store upper triangular part of correlation matrix */ 3028 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3029 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3030 for (j=0;j<temp_constraints;j++) { 3031 for (k=0;k<j+1;k++) { 3032 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one)); 3033 } 3034 } 3035 /* compute eigenvalues and eigenvectors of correlation matrix */ 3036 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 3037 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 3038 #if !defined(PETSC_USE_COMPLEX) 3039 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 3040 #else 3041 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 3042 #endif 3043 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3044 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 3045 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 3046 j = 0; 3047 while (j < temp_constraints && singular_vals[j] < tol) j++; 3048 total_counts = total_counts-j; 3049 valid_constraints = temp_constraints-j; 3050 /* scale and copy POD basis into used quadrature memory */ 3051 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3052 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 3053 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 3054 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3055 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 3056 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 3057 if (j<temp_constraints) { 3058 PetscInt ii; 3059 for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]); 3060 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3061 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 3062 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3063 for (k=0;k<temp_constraints-j;k++) { 3064 for (ii=0;ii<size_of_constraint;ii++) { 3065 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 3066 } 3067 } 3068 } 3069 #else /* on missing GESVD */ 3070 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3071 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 3072 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3073 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3074 #if !defined(PETSC_USE_COMPLEX) 3075 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr)); 3076 #else 3077 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr)); 3078 #endif 3079 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 3080 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3081 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 3082 k = temp_constraints; 3083 if (k > size_of_constraint) k = size_of_constraint; 3084 j = 0; 3085 while (j < k && singular_vals[k-j-1] < tol) j++; 3086 valid_constraints = k-j; 3087 total_counts = total_counts-temp_constraints+valid_constraints; 3088 #endif /* on missing GESVD */ 3089 } 3090 } 3091 /* update pointers information */ 3092 if (valid_constraints) { 3093 constraints_n[total_counts_cc] = valid_constraints; 3094 constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint; 3095 constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints; 3096 /* set change_of_basis flag */ 3097 if (boolforchange) { 3098 PetscBTSet(change_basis,total_counts_cc); 3099 } 3100 total_counts_cc++; 3101 } 3102 } 3103 /* free workspace */ 3104 if (!skip_lapack) { 3105 ierr = PetscFree(work);CHKERRQ(ierr); 3106 #if defined(PETSC_USE_COMPLEX) 3107 ierr = PetscFree(rwork);CHKERRQ(ierr); 3108 #endif 3109 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 3110 #if defined(PETSC_MISSING_LAPACK_GESVD) 3111 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 3112 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 3113 #endif 3114 } 3115 for (k=0;k<nnsp_size;k++) { 3116 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 3117 } 3118 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 3119 /* free index sets of faces, edges and vertices */ 3120 for (i=0;i<n_ISForFaces;i++) { 3121 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 3122 } 3123 if (n_ISForFaces) { 3124 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 3125 } 3126 for (i=0;i<n_ISForEdges;i++) { 3127 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 3128 } 3129 if (n_ISForEdges) { 3130 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 3131 } 3132 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 3133 } else { 3134 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3135 3136 total_counts = 0; 3137 n_vertices = 0; 3138 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 3139 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 3140 } 3141 max_constraints = 0; 3142 total_counts_cc = 0; 3143 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 3144 total_counts += pcbddc->adaptive_constraints_n[i]; 3145 if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++; 3146 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 3147 } 3148 constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr; 3149 constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr; 3150 constraints_idxs = pcbddc->adaptive_constraints_idxs; 3151 constraints_data = pcbddc->adaptive_constraints_data; 3152 /* constraints_n differs from pcbddc->adaptive_constraints_n */ 3153 ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr); 3154 total_counts_cc = 0; 3155 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 3156 if (pcbddc->adaptive_constraints_n[i]) { 3157 constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i]; 3158 } 3159 } 3160 #if 0 3161 printf("Found %d totals (%d)\n",total_counts_cc,total_counts); 3162 for (i=0;i<total_counts_cc;i++) { 3163 printf("const %d, start %d",i,constraints_idxs_ptr[i]); 3164 printf(" end %d:\n",constraints_idxs_ptr[i+1]); 3165 for (j=constraints_idxs_ptr[i];j<constraints_idxs_ptr[i+1];j++) { 3166 printf(" %d",constraints_idxs[j]); 3167 } 3168 printf("\n"); 3169 printf("number of cc: %d\n",constraints_n[i]); 3170 } 3171 for (i=0;i<n_vertices;i++) { 3172 PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]); 3173 } 3174 for (i=0;i<sub_schurs->n_subs;i++) { 3175 PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]); 3176 } 3177 #endif 3178 3179 max_size_of_constraint = 0; 3180 for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]); 3181 ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr); 3182 /* Change of basis */ 3183 ierr = PetscBTCreate(total_counts_cc,&change_basis);CHKERRQ(ierr); 3184 if (pcbddc->use_change_of_basis) { 3185 for (i=0;i<sub_schurs->n_subs;i++) { 3186 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 3187 ierr = PetscBTSet(change_basis,i+n_vertices);CHKERRQ(ierr); 3188 } 3189 } 3190 } 3191 } 3192 pcbddc->local_primal_size = total_counts; 3193 ierr = PetscMalloc1(pcbddc->local_primal_size+pcbddc->benign_n,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 3194 3195 /* map constraints_idxs in boundary numbering */ 3196 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr); 3197 if (i != constraints_idxs_ptr[total_counts_cc]) { 3198 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",constraints_idxs_ptr[total_counts_cc],i); 3199 } 3200 3201 /* Create constraint matrix */ 3202 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 3203 ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr); 3204 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 3205 3206 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 3207 /* determine if a QR strategy is needed for change of basis */ 3208 qr_needed = PETSC_FALSE; 3209 ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr); 3210 total_primal_vertices=0; 3211 pcbddc->local_primal_size_cc = 0; 3212 for (i=0;i<total_counts_cc;i++) { 3213 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 3214 if (size_of_constraint == 1) { 3215 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]]; 3216 pcbddc->local_primal_size_cc += 1; 3217 } else if (PetscBTLookup(change_basis,i)) { 3218 for (k=0;k<constraints_n[i];k++) { 3219 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 3220 } 3221 pcbddc->local_primal_size_cc += constraints_n[i]; 3222 if (constraints_n[i] > 1 || pcbddc->use_qr_single || pcbddc->faster_deluxe) { 3223 PetscBTSet(qr_needed_idx,i); 3224 qr_needed = PETSC_TRUE; 3225 } 3226 } else { 3227 pcbddc->local_primal_size_cc += 1; 3228 } 3229 } 3230 /* note that the local variable n_vertices used below stores the number of pointwise constraints */ 3231 pcbddc->n_vertices = total_primal_vertices; 3232 /* permute indices in order to have a sorted set of vertices */ 3233 ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 3234 3235 ierr = PetscMalloc2(pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 3236 ierr = PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));CHKERRQ(ierr); 3237 for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1; 3238 3239 /* nonzero structure of constraint matrix */ 3240 /* and get reference dof for local constraints */ 3241 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 3242 for (i=0;i<total_primal_vertices;i++) nnz[i] = 1; 3243 3244 j = total_primal_vertices; 3245 total_counts = total_primal_vertices; 3246 cum = total_primal_vertices; 3247 for (i=n_vertices;i<total_counts_cc;i++) { 3248 if (!PetscBTLookup(change_basis,i)) { 3249 pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]]; 3250 pcbddc->local_primal_ref_mult[cum] = constraints_n[i]; 3251 cum++; 3252 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 3253 for (k=0;k<constraints_n[i];k++) { 3254 pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 3255 nnz[j+k] = size_of_constraint; 3256 } 3257 j += constraints_n[i]; 3258 } 3259 } 3260 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 3261 ierr = PetscFree(nnz);CHKERRQ(ierr); 3262 3263 /* set values in constraint matrix */ 3264 for (i=0;i<total_primal_vertices;i++) { 3265 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 3266 } 3267 total_counts = total_primal_vertices; 3268 for (i=n_vertices;i<total_counts_cc;i++) { 3269 if (!PetscBTLookup(change_basis,i)) { 3270 PetscInt *cols; 3271 3272 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 3273 cols = constraints_idxs+constraints_idxs_ptr[i]; 3274 for (k=0;k<constraints_n[i];k++) { 3275 PetscInt row = total_counts+k; 3276 PetscScalar *vals; 3277 3278 vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint; 3279 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 3280 } 3281 total_counts += constraints_n[i]; 3282 } 3283 } 3284 /* assembling */ 3285 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3286 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3287 3288 /* 3289 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 3290 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 3291 */ 3292 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 3293 if (pcbddc->use_change_of_basis) { 3294 /* dual and primal dofs on a single cc */ 3295 PetscInt dual_dofs,primal_dofs; 3296 /* working stuff for GEQRF */ 3297 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 3298 PetscBLASInt lqr_work; 3299 /* working stuff for UNGQR */ 3300 PetscScalar *gqr_work,lgqr_work_t; 3301 PetscBLASInt lgqr_work; 3302 /* working stuff for TRTRS */ 3303 PetscScalar *trs_rhs; 3304 PetscBLASInt Blas_NRHS; 3305 /* pointers for values insertion into change of basis matrix */ 3306 PetscInt *start_rows,*start_cols; 3307 PetscScalar *start_vals; 3308 /* working stuff for values insertion */ 3309 PetscBT is_primal; 3310 PetscInt *aux_primal_numbering_B; 3311 /* matrix sizes */ 3312 PetscInt global_size,local_size; 3313 /* temporary change of basis */ 3314 Mat localChangeOfBasisMatrix; 3315 /* extra space for debugging */ 3316 PetscScalar *dbg_work; 3317 3318 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 3319 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 3320 ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 3321 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 3322 /* nonzeros for local mat */ 3323 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 3324 for (i=0;i<pcis->n;i++) nnz[i]=1; 3325 for (i=n_vertices;i<total_counts_cc;i++) { 3326 if (PetscBTLookup(change_basis,i)) { 3327 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 3328 if (PetscBTLookup(qr_needed_idx,i)) { 3329 for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint; 3330 } else { 3331 nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint; 3332 for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2; 3333 } 3334 } 3335 } 3336 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 3337 ierr = PetscFree(nnz);CHKERRQ(ierr); 3338 /* Set initial identity in the matrix */ 3339 for (i=0;i<pcis->n;i++) { 3340 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 3341 } 3342 3343 if (pcbddc->dbg_flag) { 3344 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 3345 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 3346 } 3347 3348 3349 /* Now we loop on the constraints which need a change of basis */ 3350 /* 3351 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 3352 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 3353 3354 Basic blocks of change of basis matrix T computed by 3355 3356 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 3357 3358 | 1 0 ... 0 s_1/S | 3359 | 0 1 ... 0 s_2/S | 3360 | ... | 3361 | 0 ... 1 s_{n-1}/S | 3362 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 3363 3364 with S = \sum_{i=1}^n s_i^2 3365 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 3366 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 3367 3368 - QR decomposition of constraints otherwise 3369 */ 3370 if (qr_needed) { 3371 /* space to store Q */ 3372 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 3373 /* first we issue queries for optimal work */ 3374 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 3375 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 3376 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3377 lqr_work = -1; 3378 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 3379 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 3380 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 3381 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 3382 lgqr_work = -1; 3383 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 3384 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 3385 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 3386 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3387 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 3388 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 3389 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 3390 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 3391 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 3392 /* array to store scaling factors for reflectors */ 3393 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 3394 /* array to store rhs and solution of triangular solver */ 3395 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 3396 /* allocating workspace for check */ 3397 if (pcbddc->dbg_flag) { 3398 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr); 3399 } 3400 } 3401 /* array to store whether a node is primal or not */ 3402 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 3403 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 3404 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);CHKERRQ(ierr); 3405 if (i != total_primal_vertices) { 3406 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i); 3407 } 3408 for (i=0;i<total_primal_vertices;i++) { 3409 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 3410 } 3411 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 3412 3413 /* loop on constraints and see whether or not they need a change of basis and compute it */ 3414 for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) { 3415 size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts]; 3416 if (PetscBTLookup(change_basis,total_counts)) { 3417 /* get constraint info */ 3418 primal_dofs = constraints_n[total_counts]; 3419 dual_dofs = size_of_constraint-primal_dofs; 3420 3421 if (pcbddc->dbg_flag) { 3422 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d: %d need a change of basis (size %d)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr); 3423 } 3424 3425 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 3426 3427 /* copy quadrature constraints for change of basis check */ 3428 if (pcbddc->dbg_flag) { 3429 ierr = PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 3430 } 3431 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 3432 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 3433 3434 /* compute QR decomposition of constraints */ 3435 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3436 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 3437 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3438 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3439 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 3440 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 3441 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3442 3443 /* explictly compute R^-T */ 3444 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 3445 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 3446 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 3447 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 3448 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3449 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 3450 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3451 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 3452 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 3453 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3454 3455 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 3456 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3457 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3458 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 3459 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3460 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3461 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 3462 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 3463 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3464 3465 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 3466 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 3467 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 3468 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3469 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 3470 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 3471 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3472 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 3473 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 3474 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3475 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC)); 3476 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3477 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 3478 3479 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 3480 start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]]; 3481 /* insert cols for primal dofs */ 3482 for (j=0;j<primal_dofs;j++) { 3483 start_vals = &qr_basis[j*size_of_constraint]; 3484 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 3485 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 3486 } 3487 /* insert cols for dual dofs */ 3488 for (j=0,k=0;j<dual_dofs;k++) { 3489 if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) { 3490 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 3491 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 3492 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 3493 j++; 3494 } 3495 } 3496 3497 /* check change of basis */ 3498 if (pcbddc->dbg_flag) { 3499 PetscInt ii,jj; 3500 PetscBool valid_qr=PETSC_TRUE; 3501 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 3502 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3503 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 3504 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3505 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 3506 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 3507 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3508 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC)); 3509 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3510 for (jj=0;jj<size_of_constraint;jj++) { 3511 for (ii=0;ii<primal_dofs;ii++) { 3512 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 3513 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 3514 } 3515 } 3516 if (!valid_qr) { 3517 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 3518 for (jj=0;jj<size_of_constraint;jj++) { 3519 for (ii=0;ii<primal_dofs;ii++) { 3520 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 3521 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 3522 } 3523 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 3524 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 3525 } 3526 } 3527 } 3528 } else { 3529 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 3530 } 3531 } 3532 } else { /* simple transformation block */ 3533 PetscInt row,col; 3534 PetscScalar val,norm; 3535 3536 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3537 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one)); 3538 for (j=0;j<size_of_constraint;j++) { 3539 PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j]; 3540 row = constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 3541 if (!PetscBTLookup(is_primal,row_B)) { 3542 col = constraints_idxs[constraints_idxs_ptr[total_counts]]; 3543 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 3544 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 3545 } else { 3546 for (k=0;k<size_of_constraint;k++) { 3547 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 3548 if (row != col) { 3549 val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]]; 3550 } else { 3551 val = constraints_data[constraints_data_ptr[total_counts]]/norm; 3552 } 3553 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 3554 } 3555 } 3556 } 3557 if (pcbddc->dbg_flag) { 3558 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 3559 } 3560 } 3561 } else { 3562 if (pcbddc->dbg_flag) { 3563 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,size_of_constraint);CHKERRQ(ierr); 3564 } 3565 } 3566 } 3567 3568 /* free workspace */ 3569 if (qr_needed) { 3570 if (pcbddc->dbg_flag) { 3571 ierr = PetscFree(dbg_work);CHKERRQ(ierr); 3572 } 3573 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 3574 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 3575 ierr = PetscFree(qr_work);CHKERRQ(ierr); 3576 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 3577 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 3578 } 3579 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 3580 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3581 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3582 3583 /* assembling of global change of variable */ 3584 { 3585 Mat tmat; 3586 PetscInt bs; 3587 3588 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 3589 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 3590 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 3591 ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr); 3592 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 3593 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 3594 ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr); 3595 ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr); 3596 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 3597 ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr); 3598 ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 3599 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 3600 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 3601 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 3602 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3603 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3604 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 3605 ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr); 3606 } 3607 /* check */ 3608 if (pcbddc->dbg_flag) { 3609 PetscReal error; 3610 Vec x,x_change; 3611 3612 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 3613 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 3614 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 3615 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 3616 ierr = VecScatterBegin(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3617 ierr = VecScatterEnd(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3618 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 3619 ierr = VecScatterBegin(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3620 ierr = VecScatterEnd(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3621 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 3622 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 3623 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 3624 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3625 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr); 3626 ierr = VecDestroy(&x);CHKERRQ(ierr); 3627 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 3628 } 3629 3630 /* adapt sub_schurs computed (if any) */ 3631 if (pcbddc->use_deluxe_scaling) { 3632 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 3633 if (sub_schurs->S_Ej_all) { 3634 Mat S_new,tmat; 3635 ISLocalToGlobalMapping NtoSall; 3636 IS is_all_N,is_V,is_V_Sall; 3637 const PetscScalar *array; 3638 const PetscInt *idxs_V,*idxs_all; 3639 PetscInt i,n_V; 3640 3641 ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr); 3642 ierr = MatGetSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr); 3643 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->n_vertices,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&is_V);CHKERRQ(ierr); 3644 ierr = ISLocalToGlobalMappingCreateIS(is_all_N,&NtoSall);CHKERRQ(ierr); 3645 ierr = ISGlobalToLocalMappingApplyIS(NtoSall,IS_GTOLM_DROP,is_V,&is_V_Sall);CHKERRQ(ierr); 3646 ierr = ISLocalToGlobalMappingDestroy(&NtoSall);CHKERRQ(ierr); 3647 ierr = ISDestroy(&is_all_N);CHKERRQ(ierr); 3648 ierr = ISDestroy(&is_V);CHKERRQ(ierr); 3649 ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 3650 ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr); 3651 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 3652 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 3653 ierr = ISGetLocalSize(is_V_Sall,&n_V);CHKERRQ(ierr); 3654 ierr = ISGetIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 3655 ierr = ISGetIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 3656 ierr = VecGetArrayRead(pcis->D,&array);CHKERRQ(ierr); 3657 for (i=0;i<n_V;i++) { 3658 PetscScalar val; 3659 PetscInt idx; 3660 3661 idx = idxs_V[i]; 3662 val = array[idxs_all[idxs_V[i]]]; 3663 ierr = MatSetValue(S_new,idx,idx,val,INSERT_VALUES);CHKERRQ(ierr); 3664 } 3665 ierr = MatAssemblyBegin(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3666 ierr = MatAssemblyEnd(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3667 sub_schurs->S_Ej_all = S_new; 3668 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 3669 if (sub_schurs->sum_S_Ej_all) { 3670 ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 3671 ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 3672 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 3673 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 3674 sub_schurs->sum_S_Ej_all = S_new; 3675 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 3676 } 3677 ierr = VecRestoreArrayRead(pcis->D,&array);CHKERRQ(ierr); 3678 ierr = ISRestoreIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 3679 ierr = ISRestoreIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 3680 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 3681 ierr = ISDestroy(&is_V_Sall);CHKERRQ(ierr); 3682 } 3683 } 3684 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 3685 } else if (pcbddc->user_ChangeOfBasisMatrix) { 3686 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 3687 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 3688 } 3689 3690 /* set up change of basis context */ 3691 if (pcbddc->ChangeOfBasisMatrix) { 3692 PCBDDCChange_ctx change_ctx; 3693 3694 if (!pcbddc->new_global_mat) { 3695 PetscInt global_size,local_size; 3696 3697 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 3698 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 3699 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->new_global_mat);CHKERRQ(ierr); 3700 ierr = MatSetSizes(pcbddc->new_global_mat,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 3701 ierr = MatSetType(pcbddc->new_global_mat,MATSHELL);CHKERRQ(ierr); 3702 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT,(void (*)(void))PCBDDCMatMult_Private);CHKERRQ(ierr); 3703 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCMatMultTranspose_Private);CHKERRQ(ierr); 3704 ierr = PetscNew(&change_ctx);CHKERRQ(ierr); 3705 ierr = MatShellSetContext(pcbddc->new_global_mat,change_ctx);CHKERRQ(ierr); 3706 } else { 3707 ierr = MatShellGetContext(pcbddc->new_global_mat,&change_ctx);CHKERRQ(ierr); 3708 ierr = MatDestroy(&change_ctx->global_change);CHKERRQ(ierr); 3709 ierr = VecDestroyVecs(2,&change_ctx->work);CHKERRQ(ierr); 3710 } 3711 if (!pcbddc->user_ChangeOfBasisMatrix) { 3712 ierr = PetscObjectReference((PetscObject)pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 3713 change_ctx->global_change = pcbddc->ChangeOfBasisMatrix; 3714 } else { 3715 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 3716 change_ctx->global_change = pcbddc->user_ChangeOfBasisMatrix; 3717 } 3718 ierr = VecDuplicateVecs(pcis->vec1_global,2,&change_ctx->work);CHKERRQ(ierr); 3719 ierr = MatSetUp(pcbddc->new_global_mat);CHKERRQ(ierr); 3720 ierr = MatAssemblyBegin(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3721 ierr = MatAssemblyEnd(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3722 } 3723 3724 /* add pressure dofs to set of primal nodes for numbering purposes */ 3725 for (i=0;i<pcbddc->benign_n;i++) { 3726 pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc] = pcbddc->benign_p0_lidx[i]; 3727 pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size] = pcbddc->benign_p0_lidx[i]; 3728 pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc] = 1; 3729 pcbddc->local_primal_size_cc++; 3730 pcbddc->local_primal_size++; 3731 } 3732 3733 /* check if a new primal space has been introduced (also take into account benign trick) */ 3734 pcbddc->new_primal_space_local = PETSC_TRUE; 3735 if (olocal_primal_size == pcbddc->local_primal_size) { 3736 ierr = PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 3737 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 3738 if (!pcbddc->new_primal_space_local) { 3739 ierr = PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 3740 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 3741 } 3742 } 3743 ierr = PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);CHKERRQ(ierr); 3744 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 3745 ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3746 3747 /* flush dbg viewer */ 3748 if (pcbddc->dbg_flag) { 3749 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3750 } 3751 3752 /* free workspace */ 3753 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 3754 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 3755 if (!pcbddc->adaptive_selection) { 3756 ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr); 3757 ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr); 3758 } else { 3759 ierr = PetscFree5(pcbddc->adaptive_constraints_n, 3760 pcbddc->adaptive_constraints_idxs_ptr, 3761 pcbddc->adaptive_constraints_data_ptr, 3762 pcbddc->adaptive_constraints_idxs, 3763 pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 3764 ierr = PetscFree(constraints_n);CHKERRQ(ierr); 3765 ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr); 3766 } 3767 PetscFunctionReturn(0); 3768 } 3769 3770 #undef __FUNCT__ 3771 #define __FUNCT__ "PCBDDCAnalyzeInterface" 3772 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 3773 { 3774 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3775 PC_IS *pcis = (PC_IS*)pc->data; 3776 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 3777 PetscInt ierr,i,vertex_size,N; 3778 PetscViewer viewer=pcbddc->dbg_viewer; 3779 3780 PetscFunctionBegin; 3781 /* Reset previously computed graph */ 3782 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 3783 /* Init local Graph struct */ 3784 ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr); 3785 ierr = PCBDDCGraphInit(pcbddc->mat_graph,pcis->mapping,N);CHKERRQ(ierr); 3786 3787 /* Check validity of the csr graph passed in by the user */ 3788 if (pcbddc->mat_graph->nvtxs_csr && pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) { 3789 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid size of local CSR graph! Found %d, expected %d\n",pcbddc->mat_graph->nvtxs_csr,pcbddc->mat_graph->nvtxs); 3790 } 3791 3792 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 3793 if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) { 3794 PetscInt *xadj,*adjncy; 3795 PetscInt nvtxs; 3796 PetscBool flg_row=PETSC_FALSE; 3797 3798 if (pcbddc->use_local_adj) { 3799 3800 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3801 if (flg_row) { 3802 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 3803 pcbddc->computed_rowadj = PETSC_TRUE; 3804 } 3805 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3806 } else if (pcbddc->current_level && pcis->n_B) { /* just compute subdomain's connected components for coarser levels when the local boundary is not empty */ 3807 IS is_dummy; 3808 ISLocalToGlobalMapping l2gmap_dummy; 3809 PetscInt j,sum; 3810 PetscInt *cxadj,*cadjncy; 3811 const PetscInt *idxs; 3812 PCBDDCGraph graph; 3813 PetscBT is_on_boundary; 3814 3815 ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n,0,1,&is_dummy);CHKERRQ(ierr); 3816 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 3817 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 3818 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 3819 ierr = PCBDDCGraphInit(graph,l2gmap_dummy,pcis->n);CHKERRQ(ierr); 3820 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 3821 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3822 if (flg_row) { 3823 graph->xadj = xadj; 3824 graph->adjncy = adjncy; 3825 } 3826 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 3827 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 3828 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3829 3830 if (pcbddc->dbg_flag) { 3831 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] Found %d subdomains (local size %d)\n",PetscGlobalRank,graph->ncc,pcis->n);CHKERRQ(ierr); 3832 for (i=0;i<graph->ncc;i++) { 3833 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] %d cc size %d\n",PetscGlobalRank,i,graph->cptr[i+1]-graph->cptr[i]);CHKERRQ(ierr); 3834 } 3835 } 3836 3837 ierr = PetscBTCreate(pcis->n,&is_on_boundary);CHKERRQ(ierr); 3838 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 3839 for (i=0;i<pcis->n_B;i++) { 3840 ierr = PetscBTSet(is_on_boundary,idxs[i]);CHKERRQ(ierr); 3841 } 3842 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 3843 3844 ierr = PetscCalloc1(pcis->n+1,&cxadj);CHKERRQ(ierr); 3845 sum = 0; 3846 for (i=0;i<graph->ncc;i++) { 3847 PetscInt sizecc = 0; 3848 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 3849 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 3850 sizecc++; 3851 } 3852 } 3853 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 3854 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 3855 cxadj[graph->queue[j]] = sizecc; 3856 } 3857 } 3858 sum += sizecc*sizecc; 3859 } 3860 ierr = PetscMalloc1(sum,&cadjncy);CHKERRQ(ierr); 3861 sum = 0; 3862 for (i=0;i<pcis->n;i++) { 3863 PetscInt temp = cxadj[i]; 3864 cxadj[i] = sum; 3865 sum += temp; 3866 } 3867 cxadj[pcis->n] = sum; 3868 for (i=0;i<graph->ncc;i++) { 3869 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 3870 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 3871 PetscInt k,sizecc = 0; 3872 for (k=graph->cptr[i];k<graph->cptr[i+1];k++) { 3873 if (PetscBTLookup(is_on_boundary,graph->queue[k])) { 3874 cadjncy[cxadj[graph->queue[j]]+sizecc]=graph->queue[k]; 3875 sizecc++; 3876 } 3877 } 3878 } 3879 } 3880 } 3881 if (sum) { 3882 ierr = PCBDDCSetLocalAdjacencyGraph(pc,pcis->n,cxadj,cadjncy,PETSC_OWN_POINTER);CHKERRQ(ierr); 3883 } else { 3884 ierr = PetscFree(cxadj);CHKERRQ(ierr); 3885 ierr = PetscFree(cadjncy);CHKERRQ(ierr); 3886 } 3887 graph->xadj = 0; 3888 graph->adjncy = 0; 3889 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 3890 ierr = PetscBTDestroy(&is_on_boundary);CHKERRQ(ierr); 3891 } 3892 } 3893 if (pcbddc->dbg_flag) { 3894 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3895 } 3896 3897 /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */ 3898 vertex_size = 1; 3899 if (pcbddc->user_provided_isfordofs) { 3900 if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */ 3901 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 3902 for (i=0;i<pcbddc->n_ISForDofs;i++) { 3903 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 3904 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 3905 } 3906 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 3907 pcbddc->n_ISForDofs = 0; 3908 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 3909 } 3910 /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */ 3911 ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr); 3912 } else { 3913 if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */ 3914 ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr); 3915 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 3916 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 3917 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 3918 } 3919 } 3920 } 3921 3922 /* Setup of Graph */ 3923 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */ 3924 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 3925 } 3926 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */ 3927 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 3928 } 3929 if (!pcbddc->user_primal_vertices_local && pcbddc->user_primal_vertices) { /* need to convert from global to local */ 3930 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->user_primal_vertices,&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 3931 } 3932 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 3933 3934 /* attach info on disconnected subdomains if present */ 3935 if (pcbddc->n_local_subs) { 3936 PetscInt *local_subs; 3937 3938 ierr = PetscMalloc1(N,&local_subs);CHKERRQ(ierr); 3939 for (i=0;i<pcbddc->n_local_subs;i++) { 3940 const PetscInt *idxs; 3941 PetscInt nl,j; 3942 3943 ierr = ISGetLocalSize(pcbddc->local_subs[i],&nl);CHKERRQ(ierr); 3944 ierr = ISGetIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 3945 for (j=0;j<nl;j++) { 3946 local_subs[idxs[j]] = i; 3947 } 3948 ierr = ISRestoreIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 3949 } 3950 pcbddc->mat_graph->n_local_subs = pcbddc->n_local_subs; 3951 pcbddc->mat_graph->local_subs = local_subs; 3952 } 3953 3954 /* Graph's connected components analysis */ 3955 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 3956 3957 /* print some info to stdout */ 3958 if (pcbddc->dbg_flag) { 3959 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);CHKERRQ(ierr); 3960 } 3961 3962 /* mark topography has done */ 3963 pcbddc->recompute_topography = PETSC_FALSE; 3964 PetscFunctionReturn(0); 3965 } 3966 3967 /* given an index sets possibly with holes, renumbers the indexes removing the holes */ 3968 #undef __FUNCT__ 3969 #define __FUNCT__ "PCBDDCSubsetNumbering" 3970 PetscErrorCode PCBDDCSubsetNumbering(IS subset, IS subset_mult, PetscInt *N_n, IS *subset_n) 3971 { 3972 PetscSF sf; 3973 PetscLayout map; 3974 const PetscInt *idxs; 3975 PetscInt *leaf_data,*root_data,*gidxs; 3976 PetscInt N,n,i,lbounds[2],gbounds[2],Nl; 3977 PetscInt n_n,nlocals,start,first_index; 3978 PetscMPIInt commsize; 3979 PetscBool first_found; 3980 PetscErrorCode ierr; 3981 3982 PetscFunctionBegin; 3983 ierr = ISGetLocalSize(subset,&n);CHKERRQ(ierr); 3984 if (subset_mult) { 3985 PetscCheckSameComm(subset,1,subset_mult,2); 3986 ierr = ISGetLocalSize(subset,&i);CHKERRQ(ierr); 3987 if (i != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Local subset and multiplicity sizes don't match! %d != %d",n,i); 3988 } 3989 /* create workspace layout for computing global indices of subset */ 3990 ierr = ISGetIndices(subset,&idxs);CHKERRQ(ierr); 3991 lbounds[0] = lbounds[1] = 0; 3992 for (i=0;i<n;i++) { 3993 if (idxs[i] < lbounds[0]) lbounds[0] = idxs[i]; 3994 else if (idxs[i] > lbounds[1]) lbounds[1] = idxs[i]; 3995 } 3996 lbounds[0] = -lbounds[0]; 3997 ierr = MPI_Allreduce(lbounds,gbounds,2,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 3998 gbounds[0] = -gbounds[0]; 3999 N = gbounds[1] - gbounds[0] + 1; 4000 ierr = PetscLayoutCreate(PetscObjectComm((PetscObject)subset),&map);CHKERRQ(ierr); 4001 ierr = PetscLayoutSetBlockSize(map,1);CHKERRQ(ierr); 4002 ierr = PetscLayoutSetSize(map,N);CHKERRQ(ierr); 4003 ierr = PetscLayoutSetUp(map);CHKERRQ(ierr); 4004 ierr = PetscLayoutGetLocalSize(map,&Nl);CHKERRQ(ierr); 4005 4006 /* create sf : leaf_data == multiplicity of indexes, root data == global index in layout */ 4007 ierr = PetscMalloc2(n,&leaf_data,Nl,&root_data);CHKERRQ(ierr); 4008 if (subset_mult) { 4009 const PetscInt* idxs_mult; 4010 4011 ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 4012 ierr = PetscMemcpy(leaf_data,idxs_mult,n*sizeof(PetscInt));CHKERRQ(ierr); 4013 ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 4014 } else { 4015 for (i=0;i<n;i++) leaf_data[i] = 1; 4016 } 4017 /* local size of new subset */ 4018 n_n = 0; 4019 for (i=0;i<n;i++) n_n += leaf_data[i]; 4020 4021 /* global indexes in layout */ 4022 ierr = PetscMalloc1(n_n,&gidxs);CHKERRQ(ierr); /* allocating possibly extra space in gidxs which will be used later */ 4023 for (i=0;i<n;i++) gidxs[i] = idxs[i] - gbounds[0]; 4024 ierr = ISRestoreIndices(subset,&idxs);CHKERRQ(ierr); 4025 ierr = PetscSFCreate(PetscObjectComm((PetscObject)subset),&sf);CHKERRQ(ierr); 4026 ierr = PetscSFSetGraphLayout(sf,map,n,NULL,PETSC_COPY_VALUES,gidxs);CHKERRQ(ierr); 4027 ierr = PetscLayoutDestroy(&map);CHKERRQ(ierr); 4028 4029 /* reduce from leaves to roots */ 4030 ierr = PetscMemzero(root_data,Nl*sizeof(PetscInt));CHKERRQ(ierr); 4031 ierr = PetscSFReduceBegin(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr); 4032 ierr = PetscSFReduceEnd(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr); 4033 4034 /* count indexes in local part of layout */ 4035 nlocals = 0; 4036 first_index = -1; 4037 first_found = PETSC_FALSE; 4038 for (i=0;i<Nl;i++) { 4039 if (!first_found && root_data[i]) { 4040 first_found = PETSC_TRUE; 4041 first_index = i; 4042 } 4043 nlocals += root_data[i]; 4044 } 4045 4046 /* cumulative of number of indexes and size of subset without holes */ 4047 #if defined(PETSC_HAVE_MPI_EXSCAN) 4048 start = 0; 4049 ierr = MPI_Exscan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 4050 #else 4051 ierr = MPI_Scan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 4052 start = start-nlocals; 4053 #endif 4054 4055 if (N_n) { /* compute total size of new subset if requested */ 4056 *N_n = start + nlocals; 4057 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)subset),&commsize);CHKERRQ(ierr); 4058 ierr = MPI_Bcast(N_n,1,MPIU_INT,commsize-1,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 4059 } 4060 4061 /* adapt root data with cumulative */ 4062 if (first_found) { 4063 PetscInt old_index; 4064 4065 root_data[first_index] += start; 4066 old_index = first_index; 4067 for (i=first_index+1;i<Nl;i++) { 4068 if (root_data[i]) { 4069 root_data[i] += root_data[old_index]; 4070 old_index = i; 4071 } 4072 } 4073 } 4074 4075 /* from roots to leaves */ 4076 ierr = PetscSFBcastBegin(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr); 4077 ierr = PetscSFBcastEnd(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr); 4078 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 4079 4080 /* create new IS with global indexes without holes */ 4081 if (subset_mult) { 4082 const PetscInt* idxs_mult; 4083 PetscInt cum; 4084 4085 cum = 0; 4086 ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 4087 for (i=0;i<n;i++) { 4088 PetscInt j; 4089 for (j=0;j<idxs_mult[i];j++) gidxs[cum++] = leaf_data[i] - idxs_mult[i] + j; 4090 } 4091 ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 4092 } else { 4093 for (i=0;i<n;i++) { 4094 gidxs[i] = leaf_data[i]-1; 4095 } 4096 } 4097 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)subset),n_n,gidxs,PETSC_OWN_POINTER,subset_n);CHKERRQ(ierr); 4098 ierr = PetscFree2(leaf_data,root_data);CHKERRQ(ierr); 4099 PetscFunctionReturn(0); 4100 } 4101 4102 #undef __FUNCT__ 4103 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 4104 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 4105 { 4106 PetscInt i,j; 4107 PetscScalar *alphas; 4108 PetscErrorCode ierr; 4109 4110 PetscFunctionBegin; 4111 /* this implements stabilized Gram-Schmidt */ 4112 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 4113 for (i=0;i<n;i++) { 4114 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 4115 if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); } 4116 for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); } 4117 } 4118 ierr = PetscFree(alphas);CHKERRQ(ierr); 4119 PetscFunctionReturn(0); 4120 } 4121 4122 #undef __FUNCT__ 4123 #define __FUNCT__ "MatISGetSubassemblingPattern" 4124 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscInt redprocs, IS* is_sends) 4125 { 4126 IS ranks_send_to; 4127 PetscInt n_neighs,*neighs,*n_shared,**shared; 4128 PetscMPIInt size,rank,color; 4129 PetscInt *xadj,*adjncy; 4130 PetscInt *adjncy_wgt,*v_wgt,*ranks_send_to_idx; 4131 PetscInt i,local_size,threshold=0; 4132 PetscBool use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE; 4133 PetscSubcomm subcomm; 4134 PetscErrorCode ierr; 4135 4136 PetscFunctionBegin; 4137 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr); 4138 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 4139 ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 4140 4141 /* Get info on mapping */ 4142 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&local_size);CHKERRQ(ierr); 4143 ierr = ISLocalToGlobalMappingGetInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 4144 4145 /* build local CSR graph of subdomains' connectivity */ 4146 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 4147 xadj[0] = 0; 4148 xadj[1] = PetscMax(n_neighs-1,0); 4149 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 4150 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 4151 4152 if (threshold) { 4153 PetscInt xadj_count = 0; 4154 for (i=1;i<n_neighs;i++) { 4155 if (n_shared[i] > threshold) { 4156 adjncy[xadj_count] = neighs[i]; 4157 adjncy_wgt[xadj_count] = n_shared[i]; 4158 xadj_count++; 4159 } 4160 } 4161 xadj[1] = xadj_count; 4162 } else { 4163 if (xadj[1]) { 4164 ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr); 4165 ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr); 4166 } 4167 } 4168 ierr = ISLocalToGlobalMappingRestoreInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 4169 if (use_square) { 4170 for (i=0;i<xadj[1];i++) { 4171 adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i]; 4172 } 4173 } 4174 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 4175 4176 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 4177 4178 /* 4179 Restrict work on active processes only. 4180 */ 4181 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr); 4182 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 4183 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 4184 ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr); 4185 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 4186 if (color) { 4187 ierr = PetscFree(xadj);CHKERRQ(ierr); 4188 ierr = PetscFree(adjncy);CHKERRQ(ierr); 4189 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 4190 } else { 4191 Mat subdomain_adj; 4192 IS new_ranks,new_ranks_contig; 4193 MatPartitioning partitioner; 4194 PetscInt prank,rstart=0,rend=0; 4195 PetscInt *is_indices,*oldranks; 4196 PetscBool aggregate; 4197 4198 ierr = MPI_Comm_size(PetscSubcommChild(subcomm),&size);CHKERRQ(ierr); 4199 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 4200 prank = rank; 4201 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,PetscSubcommChild(subcomm));CHKERRQ(ierr); 4202 /* 4203 for (i=0;i<size;i++) { 4204 PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]); 4205 } 4206 */ 4207 for (i=0;i<xadj[1];i++) { 4208 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 4209 } 4210 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 4211 aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE); 4212 if (aggregate) { 4213 PetscInt lrows,row,ncols,*cols; 4214 PetscMPIInt nrank; 4215 PetscScalar *vals; 4216 4217 ierr = MPI_Comm_rank(PetscSubcommChild(subcomm),&nrank);CHKERRQ(ierr); 4218 lrows = 0; 4219 if (nrank<redprocs) { 4220 lrows = size/redprocs; 4221 if (nrank<size%redprocs) lrows++; 4222 } 4223 ierr = MatCreateAIJ(PetscSubcommChild(subcomm),lrows,lrows,size,size,50,NULL,50,NULL,&subdomain_adj);CHKERRQ(ierr); 4224 ierr = MatGetOwnershipRange(subdomain_adj,&rstart,&rend);CHKERRQ(ierr); 4225 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4226 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4227 row = nrank; 4228 ncols = xadj[1]-xadj[0]; 4229 cols = adjncy; 4230 ierr = PetscMalloc1(ncols,&vals);CHKERRQ(ierr); 4231 for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i]; 4232 ierr = MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 4233 ierr = MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4234 ierr = MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4235 ierr = PetscFree(xadj);CHKERRQ(ierr); 4236 ierr = PetscFree(adjncy);CHKERRQ(ierr); 4237 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 4238 ierr = PetscFree(vals);CHKERRQ(ierr); 4239 } else { 4240 ierr = MatCreateMPIAdj(PetscSubcommChild(subcomm),1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 4241 } 4242 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 4243 4244 /* Partition */ 4245 ierr = MatPartitioningCreate(PetscSubcommChild(subcomm),&partitioner);CHKERRQ(ierr); 4246 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 4247 if (use_vwgt) { 4248 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 4249 v_wgt[0] = local_size; 4250 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 4251 } 4252 n_subdomains = PetscMin((PetscInt)size,n_subdomains); 4253 ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr); 4254 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 4255 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 4256 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 4257 4258 /* renumber new_ranks to avoid "holes" in new set of processors */ 4259 ierr = PCBDDCSubsetNumbering(new_ranks,NULL,NULL,&new_ranks_contig);CHKERRQ(ierr); 4260 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 4261 ierr = ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4262 if (!redprocs) { 4263 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 4264 } else { 4265 PetscInt idxs[1]; 4266 PetscMPIInt tag; 4267 MPI_Request *reqs; 4268 4269 ierr = PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag);CHKERRQ(ierr); 4270 ierr = PetscMalloc1(rend-rstart,&reqs);CHKERRQ(ierr); 4271 for (i=rstart;i<rend;i++) { 4272 ierr = MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,PetscSubcommChild(subcomm),&reqs[i-rstart]);CHKERRQ(ierr); 4273 } 4274 ierr = MPI_Recv(idxs,1,MPIU_INT,MPI_ANY_SOURCE,tag,PetscSubcommChild(subcomm),MPI_STATUS_IGNORE);CHKERRQ(ierr); 4275 ierr = MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4276 ierr = PetscFree(reqs);CHKERRQ(ierr); 4277 ranks_send_to_idx[0] = oldranks[idxs[0]]; 4278 } 4279 ierr = ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4280 /* clean up */ 4281 ierr = PetscFree(oldranks);CHKERRQ(ierr); 4282 ierr = ISDestroy(&new_ranks_contig);CHKERRQ(ierr); 4283 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 4284 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 4285 } 4286 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 4287 4288 /* assemble parallel IS for sends */ 4289 i = 1; 4290 if (color) i=0; 4291 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr); 4292 /* get back IS */ 4293 *is_sends = ranks_send_to; 4294 PetscFunctionReturn(0); 4295 } 4296 4297 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 4298 4299 #undef __FUNCT__ 4300 #define __FUNCT__ "MatISSubassemble" 4301 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[]) 4302 { 4303 Mat local_mat; 4304 IS is_sends_internal; 4305 PetscInt rows,cols,new_local_rows; 4306 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals; 4307 PetscBool ismatis,isdense,newisdense,destroy_mat; 4308 ISLocalToGlobalMapping l2gmap; 4309 PetscInt* l2gmap_indices; 4310 const PetscInt* is_indices; 4311 MatType new_local_type; 4312 /* buffers */ 4313 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 4314 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 4315 PetscInt *recv_buffer_idxs_local; 4316 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 4317 /* MPI */ 4318 MPI_Comm comm,comm_n; 4319 PetscSubcomm subcomm; 4320 PetscMPIInt n_sends,n_recvs,commsize; 4321 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 4322 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 4323 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,source_dest; 4324 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals; 4325 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals; 4326 PetscErrorCode ierr; 4327 4328 PetscFunctionBegin; 4329 /* TODO: add missing checks */ 4330 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 4331 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 4332 PetscValidLogicalCollectiveEnum(mat,reuse,5); 4333 PetscValidLogicalCollectiveInt(mat,nis,7); 4334 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 4335 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 4336 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 4337 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 4338 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 4339 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 4340 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 4341 if (reuse == MAT_REUSE_MATRIX && *mat_n) { 4342 PetscInt mrows,mcols,mnrows,mncols; 4343 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 4344 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 4345 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 4346 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 4347 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 4348 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 4349 } 4350 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 4351 PetscValidLogicalCollectiveInt(mat,bs,0); 4352 /* prepare IS for sending if not provided */ 4353 if (!is_sends) { 4354 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 4355 ierr = MatISGetSubassemblingPattern(mat,n_subdomains,0,&is_sends_internal);CHKERRQ(ierr); 4356 } else { 4357 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 4358 is_sends_internal = is_sends; 4359 } 4360 4361 /* get comm */ 4362 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 4363 4364 /* compute number of sends */ 4365 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 4366 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 4367 4368 /* compute number of receives */ 4369 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 4370 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 4371 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 4372 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 4373 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 4374 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 4375 ierr = PetscFree(iflags);CHKERRQ(ierr); 4376 4377 /* restrict comm if requested */ 4378 subcomm = 0; 4379 destroy_mat = PETSC_FALSE; 4380 if (restrict_comm) { 4381 PetscMPIInt color,subcommsize; 4382 4383 color = 0; 4384 if (restrict_full) { 4385 if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm (full restriction) */ 4386 } else { 4387 if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not partecipate in new comm */ 4388 } 4389 ierr = MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 4390 subcommsize = commsize - subcommsize; 4391 /* check if reuse has been requested */ 4392 if (reuse == MAT_REUSE_MATRIX) { 4393 if (*mat_n) { 4394 PetscMPIInt subcommsize2; 4395 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 4396 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 4397 comm_n = PetscObjectComm((PetscObject)*mat_n); 4398 } else { 4399 comm_n = PETSC_COMM_SELF; 4400 } 4401 } else { /* MAT_INITIAL_MATRIX */ 4402 PetscMPIInt rank; 4403 4404 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4405 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 4406 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 4407 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 4408 comm_n = PetscSubcommChild(subcomm); 4409 } 4410 /* flag to destroy *mat_n if not significative */ 4411 if (color) destroy_mat = PETSC_TRUE; 4412 } else { 4413 comm_n = comm; 4414 } 4415 4416 /* prepare send/receive buffers */ 4417 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 4418 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 4419 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 4420 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 4421 if (nis) { 4422 ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr); 4423 } 4424 4425 /* Get data from local matrices */ 4426 if (!isdense) { 4427 SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 4428 /* TODO: See below some guidelines on how to prepare the local buffers */ 4429 /* 4430 send_buffer_vals should contain the raw values of the local matrix 4431 send_buffer_idxs should contain: 4432 - MatType_PRIVATE type 4433 - PetscInt size_of_l2gmap 4434 - PetscInt global_row_indices[size_of_l2gmap] 4435 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 4436 */ 4437 } else { 4438 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 4439 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&i);CHKERRQ(ierr); 4440 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 4441 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 4442 send_buffer_idxs[1] = i; 4443 ierr = ISLocalToGlobalMappingGetIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 4444 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 4445 ierr = ISLocalToGlobalMappingRestoreIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 4446 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 4447 for (i=0;i<n_sends;i++) { 4448 ilengths_vals[is_indices[i]] = len*len; 4449 ilengths_idxs[is_indices[i]] = len+2; 4450 } 4451 } 4452 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 4453 /* additional is (if any) */ 4454 if (nis) { 4455 PetscMPIInt psum; 4456 PetscInt j; 4457 for (j=0,psum=0;j<nis;j++) { 4458 PetscInt plen; 4459 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 4460 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 4461 psum += len+1; /* indices + lenght */ 4462 } 4463 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 4464 for (j=0,psum=0;j<nis;j++) { 4465 PetscInt plen; 4466 const PetscInt *is_array_idxs; 4467 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 4468 send_buffer_idxs_is[psum] = plen; 4469 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 4470 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 4471 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 4472 psum += plen+1; /* indices + lenght */ 4473 } 4474 for (i=0;i<n_sends;i++) { 4475 ilengths_idxs_is[is_indices[i]] = psum; 4476 } 4477 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 4478 } 4479 4480 buf_size_idxs = 0; 4481 buf_size_vals = 0; 4482 buf_size_idxs_is = 0; 4483 for (i=0;i<n_recvs;i++) { 4484 buf_size_idxs += (PetscInt)olengths_idxs[i]; 4485 buf_size_vals += (PetscInt)olengths_vals[i]; 4486 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 4487 } 4488 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 4489 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 4490 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 4491 4492 /* get new tags for clean communications */ 4493 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 4494 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 4495 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 4496 4497 /* allocate for requests */ 4498 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 4499 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 4500 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 4501 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 4502 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 4503 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 4504 4505 /* communications */ 4506 ptr_idxs = recv_buffer_idxs; 4507 ptr_vals = recv_buffer_vals; 4508 ptr_idxs_is = recv_buffer_idxs_is; 4509 for (i=0;i<n_recvs;i++) { 4510 source_dest = onodes[i]; 4511 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 4512 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 4513 ptr_idxs += olengths_idxs[i]; 4514 ptr_vals += olengths_vals[i]; 4515 if (nis) { 4516 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 4517 ptr_idxs_is += olengths_idxs_is[i]; 4518 } 4519 } 4520 for (i=0;i<n_sends;i++) { 4521 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 4522 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 4523 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 4524 if (nis) { 4525 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 4526 } 4527 } 4528 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 4529 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 4530 4531 /* assemble new l2g map */ 4532 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4533 ptr_idxs = recv_buffer_idxs; 4534 new_local_rows = 0; 4535 for (i=0;i<n_recvs;i++) { 4536 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 4537 ptr_idxs += olengths_idxs[i]; 4538 } 4539 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 4540 ptr_idxs = recv_buffer_idxs; 4541 new_local_rows = 0; 4542 for (i=0;i<n_recvs;i++) { 4543 ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 4544 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 4545 ptr_idxs += olengths_idxs[i]; 4546 } 4547 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 4548 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 4549 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 4550 4551 /* infer new local matrix type from received local matrices type */ 4552 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 4553 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 4554 if (n_recvs) { 4555 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 4556 ptr_idxs = recv_buffer_idxs; 4557 for (i=0;i<n_recvs;i++) { 4558 if ((PetscInt)new_local_type_private != *ptr_idxs) { 4559 new_local_type_private = MATAIJ_PRIVATE; 4560 break; 4561 } 4562 ptr_idxs += olengths_idxs[i]; 4563 } 4564 switch (new_local_type_private) { 4565 case MATDENSE_PRIVATE: 4566 if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */ 4567 new_local_type = MATSEQAIJ; 4568 bs = 1; 4569 } else { /* if I receive only 1 dense matrix */ 4570 new_local_type = MATSEQDENSE; 4571 bs = 1; 4572 } 4573 break; 4574 case MATAIJ_PRIVATE: 4575 new_local_type = MATSEQAIJ; 4576 bs = 1; 4577 break; 4578 case MATBAIJ_PRIVATE: 4579 new_local_type = MATSEQBAIJ; 4580 break; 4581 case MATSBAIJ_PRIVATE: 4582 new_local_type = MATSEQSBAIJ; 4583 break; 4584 default: 4585 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__); 4586 break; 4587 } 4588 } else { /* by default, new_local_type is seqdense */ 4589 new_local_type = MATSEQDENSE; 4590 bs = 1; 4591 } 4592 4593 /* create MATIS object if needed */ 4594 if (reuse == MAT_INITIAL_MATRIX) { 4595 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 4596 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 4597 } else { 4598 /* it also destroys the local matrices */ 4599 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 4600 } 4601 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 4602 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 4603 4604 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4605 4606 /* Global to local map of received indices */ 4607 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 4608 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 4609 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 4610 4611 /* restore attributes -> type of incoming data and its size */ 4612 buf_size_idxs = 0; 4613 for (i=0;i<n_recvs;i++) { 4614 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 4615 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 4616 buf_size_idxs += (PetscInt)olengths_idxs[i]; 4617 } 4618 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 4619 4620 /* set preallocation */ 4621 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 4622 if (!newisdense) { 4623 PetscInt *new_local_nnz=0; 4624 4625 ptr_vals = recv_buffer_vals; 4626 ptr_idxs = recv_buffer_idxs_local; 4627 if (n_recvs) { 4628 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 4629 } 4630 for (i=0;i<n_recvs;i++) { 4631 PetscInt j; 4632 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 4633 for (j=0;j<*(ptr_idxs+1);j++) { 4634 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 4635 } 4636 } else { 4637 /* TODO */ 4638 } 4639 ptr_idxs += olengths_idxs[i]; 4640 } 4641 if (new_local_nnz) { 4642 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 4643 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 4644 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 4645 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 4646 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 4647 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 4648 } else { 4649 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 4650 } 4651 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 4652 } else { 4653 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 4654 } 4655 4656 /* set values */ 4657 ptr_vals = recv_buffer_vals; 4658 ptr_idxs = recv_buffer_idxs_local; 4659 for (i=0;i<n_recvs;i++) { 4660 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 4661 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 4662 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 4663 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 4664 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 4665 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 4666 } else { 4667 /* TODO */ 4668 } 4669 ptr_idxs += olengths_idxs[i]; 4670 ptr_vals += olengths_vals[i]; 4671 } 4672 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4673 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4674 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4675 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4676 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 4677 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 4678 4679 #if 0 4680 if (!restrict_comm) { /* check */ 4681 Vec lvec,rvec; 4682 PetscReal infty_error; 4683 4684 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 4685 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 4686 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 4687 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 4688 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 4689 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 4690 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 4691 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 4692 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 4693 } 4694 #endif 4695 4696 /* assemble new additional is (if any) */ 4697 if (nis) { 4698 PetscInt **temp_idxs,*count_is,j,psum; 4699 4700 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4701 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 4702 ptr_idxs = recv_buffer_idxs_is; 4703 psum = 0; 4704 for (i=0;i<n_recvs;i++) { 4705 for (j=0;j<nis;j++) { 4706 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 4707 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 4708 psum += plen; 4709 ptr_idxs += plen+1; /* shift pointer to received data */ 4710 } 4711 } 4712 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 4713 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 4714 for (i=1;i<nis;i++) { 4715 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 4716 } 4717 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 4718 ptr_idxs = recv_buffer_idxs_is; 4719 for (i=0;i<n_recvs;i++) { 4720 for (j=0;j<nis;j++) { 4721 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 4722 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 4723 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 4724 ptr_idxs += plen+1; /* shift pointer to received data */ 4725 } 4726 } 4727 for (i=0;i<nis;i++) { 4728 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 4729 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 4730 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 4731 } 4732 ierr = PetscFree(count_is);CHKERRQ(ierr); 4733 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 4734 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 4735 } 4736 /* free workspace */ 4737 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 4738 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4739 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 4740 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4741 if (isdense) { 4742 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 4743 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 4744 } else { 4745 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 4746 } 4747 if (nis) { 4748 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4749 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 4750 } 4751 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 4752 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 4753 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 4754 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 4755 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 4756 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 4757 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 4758 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 4759 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 4760 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 4761 ierr = PetscFree(onodes);CHKERRQ(ierr); 4762 if (nis) { 4763 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 4764 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 4765 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 4766 } 4767 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 4768 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 4769 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 4770 for (i=0;i<nis;i++) { 4771 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 4772 } 4773 *mat_n = NULL; 4774 } 4775 PetscFunctionReturn(0); 4776 } 4777 4778 /* temporary hack into ksp private data structure */ 4779 #include <petsc/private/kspimpl.h> 4780 4781 #undef __FUNCT__ 4782 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 4783 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 4784 { 4785 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 4786 PC_IS *pcis = (PC_IS*)pc->data; 4787 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 4788 MatNullSpace CoarseNullSpace=NULL; 4789 ISLocalToGlobalMapping coarse_islg; 4790 IS coarse_is,*isarray; 4791 PetscInt i,im_active=-1,active_procs=-1; 4792 PetscInt nis,nisdofs,nisneu,nisvert; 4793 PC pc_temp; 4794 PCType coarse_pc_type; 4795 KSPType coarse_ksp_type; 4796 PetscBool multilevel_requested,multilevel_allowed; 4797 PetscBool isredundant,isbddc,isnn,coarse_reuse; 4798 Mat t_coarse_mat_is; 4799 PetscInt void_procs,ncoarse_ml,ncoarse_ds,ncoarse; 4800 PetscMPIInt all_procs; 4801 PetscBool csin_ml,csin_ds,csin,csin_type_simple,redist; 4802 PetscBool compute_vecs = PETSC_FALSE; 4803 PetscScalar *array; 4804 PetscErrorCode ierr; 4805 4806 PetscFunctionBegin; 4807 /* Assign global numbering to coarse dofs */ 4808 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 4809 PetscInt ocoarse_size; 4810 compute_vecs = PETSC_TRUE; 4811 ocoarse_size = pcbddc->coarse_size; 4812 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 4813 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 4814 /* see if we can avoid some work */ 4815 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 4816 /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */ 4817 if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) { 4818 PC pc; 4819 PetscBool isbddc; 4820 4821 /* temporary workaround since PCBDDC does not have a reset method so far */ 4822 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc);CHKERRQ(ierr); 4823 ierr = PetscObjectTypeCompare((PetscObject)pc,PCBDDC,&isbddc);CHKERRQ(ierr); 4824 if (isbddc) { 4825 ierr = PCDestroy(&pc);CHKERRQ(ierr); 4826 } 4827 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 4828 coarse_reuse = PETSC_FALSE; 4829 } else { /* we can safely reuse already computed coarse matrix */ 4830 coarse_reuse = PETSC_TRUE; 4831 } 4832 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 4833 coarse_reuse = PETSC_FALSE; 4834 } 4835 /* reset any subassembling information */ 4836 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 4837 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4838 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 4839 coarse_reuse = PETSC_TRUE; 4840 } 4841 4842 /* count "active" (i.e. with positive local size) and "void" processes */ 4843 im_active = !!(pcis->n); 4844 ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4845 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr); 4846 void_procs = all_procs-active_procs; 4847 csin_type_simple = PETSC_TRUE; 4848 redist = PETSC_FALSE; 4849 if (pcbddc->current_level && void_procs) { 4850 csin_ml = PETSC_TRUE; 4851 ncoarse_ml = void_procs; 4852 /* it has no sense to redistribute on a set of processors larger than the number of active processes */ 4853 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < active_procs) { 4854 csin_ds = PETSC_TRUE; 4855 ncoarse_ds = pcbddc->redistribute_coarse; 4856 redist = PETSC_TRUE; 4857 } else { 4858 csin_ds = PETSC_TRUE; 4859 ncoarse_ds = active_procs; 4860 redist = PETSC_TRUE; 4861 } 4862 } else { 4863 csin_ml = PETSC_FALSE; 4864 ncoarse_ml = all_procs; 4865 if (void_procs) { 4866 csin_ds = PETSC_TRUE; 4867 ncoarse_ds = void_procs; 4868 csin_type_simple = PETSC_FALSE; 4869 } else { 4870 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < all_procs) { 4871 csin_ds = PETSC_TRUE; 4872 ncoarse_ds = pcbddc->redistribute_coarse; 4873 redist = PETSC_TRUE; 4874 } else { 4875 csin_ds = PETSC_FALSE; 4876 ncoarse_ds = all_procs; 4877 } 4878 } 4879 } 4880 4881 /* 4882 test if we can go multilevel: three conditions must be satisfied: 4883 - we have not exceeded the number of levels requested 4884 - we can actually subassemble the active processes 4885 - we can find a suitable number of MPI processes where we can place the subassembled problem 4886 */ 4887 multilevel_allowed = PETSC_FALSE; 4888 multilevel_requested = PETSC_FALSE; 4889 if (pcbddc->current_level < pcbddc->max_levels) { 4890 multilevel_requested = PETSC_TRUE; 4891 if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) { 4892 multilevel_allowed = PETSC_FALSE; 4893 } else { 4894 multilevel_allowed = PETSC_TRUE; 4895 } 4896 } 4897 /* determine number of process partecipating to coarse solver */ 4898 if (multilevel_allowed) { 4899 ncoarse = ncoarse_ml; 4900 csin = csin_ml; 4901 redist = PETSC_FALSE; 4902 } else { 4903 ncoarse = ncoarse_ds; 4904 csin = csin_ds; 4905 } 4906 4907 /* creates temporary l2gmap and IS for coarse indexes */ 4908 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 4909 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 4910 4911 /* creates temporary MATIS object for coarse matrix */ 4912 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr); 4913 ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr); 4914 ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr); 4915 ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr); 4916 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,NULL,&t_coarse_mat_is);CHKERRQ(ierr); 4917 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 4918 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4919 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4920 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 4921 4922 /* compute dofs splitting and neumann boundaries for coarse dofs */ 4923 if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal || (pcbddc->benign_saddle_point && pcbddc->user_primal_vertices_local))) { /* protects from unneded computations */ 4924 PetscInt *tidxs,*tidxs2,nout,tsize,i; 4925 const PetscInt *idxs; 4926 ISLocalToGlobalMapping tmap; 4927 4928 /* create map between primal indices (in local representative ordering) and local primal numbering */ 4929 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 4930 /* allocate space for temporary storage */ 4931 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 4932 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 4933 /* allocate for IS array */ 4934 nisdofs = pcbddc->n_ISForDofsLocal; 4935 nisneu = !!pcbddc->NeumannBoundariesLocal; 4936 nisvert = 0; 4937 if (pcbddc->benign_saddle_point && pcbddc->user_primal_vertices_local) { 4938 nisvert = 1; 4939 } 4940 nis = nisdofs + nisneu + nisvert; 4941 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 4942 /* dofs splitting */ 4943 for (i=0;i<nisdofs;i++) { 4944 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 4945 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 4946 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 4947 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 4948 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 4949 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 4950 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 4951 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 4952 } 4953 /* neumann boundaries */ 4954 if (pcbddc->NeumannBoundariesLocal) { 4955 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 4956 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 4957 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 4958 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 4959 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 4960 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 4961 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 4962 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 4963 } 4964 /* primal vertices (benign) */ 4965 if (pcbddc->benign_saddle_point && pcbddc->user_primal_vertices_local) { 4966 ierr = ISGetLocalSize(pcbddc->user_primal_vertices_local,&tsize);CHKERRQ(ierr); 4967 ierr = ISGetIndices(pcbddc->user_primal_vertices_local,&idxs);CHKERRQ(ierr); 4968 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 4969 ierr = ISRestoreIndices(pcbddc->user_primal_vertices_local,&idxs);CHKERRQ(ierr); 4970 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 4971 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nis-1]);CHKERRQ(ierr); 4972 } 4973 /* free memory */ 4974 ierr = PetscFree(tidxs);CHKERRQ(ierr); 4975 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 4976 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 4977 } else { 4978 nis = 0; 4979 nisdofs = 0; 4980 nisneu = 0; 4981 nisvert = 0; 4982 isarray = NULL; 4983 } 4984 /* destroy no longer needed map */ 4985 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 4986 4987 /* restrict on coarse candidates (if needed) */ 4988 coarse_mat_is = NULL; 4989 if (csin) { 4990 if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */ 4991 if (redist) { 4992 PetscMPIInt rank; 4993 PetscInt spc,n_spc_p1,dest[1],destsize; 4994 4995 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 4996 spc = active_procs/ncoarse; 4997 n_spc_p1 = active_procs%ncoarse; 4998 if (im_active) { 4999 destsize = 1; 5000 if (rank > n_spc_p1*(spc+1)-1) { 5001 dest[0] = n_spc_p1+(rank-(n_spc_p1*(spc+1)))/spc; 5002 } else { 5003 dest[0] = rank/(spc+1); 5004 } 5005 } else { 5006 destsize = 0; 5007 } 5008 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),destsize,dest,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 5009 } else if (csin_type_simple) { 5010 PetscMPIInt rank; 5011 PetscInt issize,isidx; 5012 5013 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 5014 if (im_active) { 5015 issize = 1; 5016 isidx = (PetscInt)rank; 5017 } else { 5018 issize = 0; 5019 isidx = -1; 5020 } 5021 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 5022 } else { /* get a suitable subassembling pattern from MATIS code */ 5023 ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 5024 } 5025 5026 /* we need to shift on coarse candidates either if we are not redistributing or we are redistributing and we have enough void processes */ 5027 if (!redist || ncoarse <= void_procs) { 5028 PetscInt ncoarse_cand,tissize,*nisindices; 5029 PetscInt *coarse_candidates; 5030 const PetscInt* tisindices; 5031 5032 /* get coarse candidates' ranks in pc communicator */ 5033 ierr = PetscMalloc1(all_procs,&coarse_candidates);CHKERRQ(ierr); 5034 ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 5035 for (i=0,ncoarse_cand=0;i<all_procs;i++) { 5036 if (!coarse_candidates[i]) { 5037 coarse_candidates[ncoarse_cand++]=i; 5038 } 5039 } 5040 if (ncoarse_cand < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",ncoarse_cand,ncoarse); 5041 5042 5043 if (pcbddc->dbg_flag) { 5044 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5045 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr); 5046 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 5047 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr); 5048 for (i=0;i<ncoarse_cand;i++) { 5049 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr); 5050 } 5051 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr); 5052 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5053 } 5054 /* shift the pattern on coarse candidates */ 5055 ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr); 5056 ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 5057 ierr = PetscMalloc1(tissize,&nisindices);CHKERRQ(ierr); 5058 for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]]; 5059 ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 5060 ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr); 5061 ierr = PetscFree(coarse_candidates);CHKERRQ(ierr); 5062 } 5063 if (pcbddc->dbg_flag) { 5064 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5065 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr); 5066 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 5067 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5068 } 5069 } 5070 /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */ 5071 if (multilevel_allowed) { /* we need to keep tracking of void processes for future placements */ 5072 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,PETSC_FALSE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 5073 } else { /* this is the last level, so use just receiving processes in subcomm */ 5074 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 5075 } 5076 } else { 5077 if (pcbddc->dbg_flag) { 5078 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5079 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr); 5080 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5081 } 5082 ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr); 5083 coarse_mat_is = t_coarse_mat_is; 5084 } 5085 5086 /* create local to global scatters for coarse problem */ 5087 if (compute_vecs) { 5088 PetscInt lrows; 5089 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 5090 if (coarse_mat_is) { 5091 ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr); 5092 } else { 5093 lrows = 0; 5094 } 5095 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 5096 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 5097 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 5098 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 5099 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 5100 } 5101 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 5102 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 5103 5104 /* set defaults for coarse KSP and PC */ 5105 if (multilevel_allowed) { 5106 coarse_ksp_type = KSPRICHARDSON; 5107 coarse_pc_type = PCBDDC; 5108 } else { 5109 coarse_ksp_type = KSPPREONLY; 5110 coarse_pc_type = PCREDUNDANT; 5111 } 5112 5113 /* print some info if requested */ 5114 if (pcbddc->dbg_flag) { 5115 if (!multilevel_allowed) { 5116 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5117 if (multilevel_requested) { 5118 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 5119 } else if (pcbddc->max_levels) { 5120 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 5121 } 5122 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5123 } 5124 } 5125 5126 /* create the coarse KSP object only once with defaults */ 5127 if (coarse_mat_is) { 5128 MatReuse coarse_mat_reuse; 5129 PetscViewer dbg_viewer = NULL; 5130 if (pcbddc->dbg_flag) { 5131 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is)); 5132 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 5133 } 5134 if (!pcbddc->coarse_ksp) { 5135 char prefix[256],str_level[16]; 5136 size_t len; 5137 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr); 5138 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 5139 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 5140 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 5141 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr); 5142 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 5143 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 5144 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 5145 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 5146 /* prefix */ 5147 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 5148 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 5149 if (!pcbddc->current_level) { 5150 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 5151 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 5152 } else { 5153 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 5154 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 5155 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 5156 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 5157 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 5158 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 5159 } 5160 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 5161 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 5162 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 5163 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 5164 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 5165 /* allow user customization */ 5166 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 5167 } 5168 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 5169 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 5170 if (nisdofs) { 5171 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 5172 for (i=0;i<nisdofs;i++) { 5173 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 5174 } 5175 } 5176 if (nisneu) { 5177 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 5178 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 5179 } 5180 if (nisvert) { 5181 ierr = PCBDDCSetPrimalVerticesIS(pc_temp,isarray[nis-1]);CHKERRQ(ierr); 5182 ierr = ISDestroy(&isarray[nis-1]);CHKERRQ(ierr); 5183 } 5184 5185 /* get some info after set from options */ 5186 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 5187 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 5188 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 5189 if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */ 5190 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 5191 isbddc = PETSC_FALSE; 5192 } 5193 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 5194 if (isredundant) { 5195 KSP inner_ksp; 5196 PC inner_pc; 5197 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 5198 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 5199 ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr); 5200 } 5201 5202 /* assemble coarse matrix */ 5203 if (coarse_reuse) { 5204 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 5205 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 5206 coarse_mat_reuse = MAT_REUSE_MATRIX; 5207 } else { 5208 coarse_mat_reuse = MAT_INITIAL_MATRIX; 5209 } 5210 if (isbddc || isnn) { 5211 if (pcbddc->coarsening_ratio > 1) { 5212 if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */ 5213 ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 5214 if (pcbddc->dbg_flag) { 5215 ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5216 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr); 5217 ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr); 5218 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 5219 } 5220 } 5221 ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr); 5222 } else { 5223 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 5224 coarse_mat = coarse_mat_is; 5225 } 5226 } else { 5227 ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 5228 } 5229 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 5230 5231 /* propagate symmetry info of coarse matrix */ 5232 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 5233 if (pc->pmat->symmetric_set) { 5234 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);CHKERRQ(ierr); 5235 } 5236 if (pc->pmat->hermitian_set) { 5237 ierr = MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);CHKERRQ(ierr); 5238 } 5239 if (pc->pmat->spd_set) { 5240 ierr = MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);CHKERRQ(ierr); 5241 } 5242 /* set operators */ 5243 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 5244 if (pcbddc->dbg_flag) { 5245 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 5246 } 5247 } else { /* processes non partecipating to coarse solver (if any) */ 5248 coarse_mat = 0; 5249 } 5250 ierr = PetscFree(isarray);CHKERRQ(ierr); 5251 #if 0 5252 { 5253 PetscViewer viewer; 5254 char filename[256]; 5255 sprintf(filename,"coarse_mat_level%d.m",pcbddc->current_level); 5256 ierr = PetscViewerASCIIOpen(PetscObjectComm((PetscObject)coarse_mat),filename,&viewer);CHKERRQ(ierr); 5257 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 5258 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 5259 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 5260 } 5261 #endif 5262 5263 /* Compute coarse null space (special handling by BDDC only) */ 5264 #if 0 5265 if (pcbddc->NullSpace) { 5266 ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr); 5267 } 5268 #endif 5269 /* hack */ 5270 if (pcbddc->coarse_ksp) { 5271 Vec crhs,csol; 5272 5273 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 5274 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 5275 if (!csol) { 5276 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 5277 } 5278 if (!crhs) { 5279 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 5280 } 5281 } 5282 5283 /* compute null space for coarse solver if the benign trick has been requested */ 5284 if (pcbddc->benign_null) { 5285 5286 ierr = VecSet(pcbddc->vec1_P,0.);CHKERRQ(ierr); 5287 for (i=0;i<pcbddc->benign_n;i++) { 5288 ierr = VecSetValue(pcbddc->vec1_P,pcbddc->local_primal_size-pcbddc->benign_n+i,1.0,INSERT_VALUES);CHKERRQ(ierr); 5289 } 5290 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 5291 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 5292 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5293 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5294 if (coarse_mat) { 5295 Vec nullv; 5296 PetscScalar *array,*array2; 5297 PetscInt nl; 5298 5299 ierr = MatCreateVecs(coarse_mat,&nullv,NULL);CHKERRQ(ierr); 5300 ierr = VecGetLocalSize(nullv,&nl);CHKERRQ(ierr); 5301 ierr = VecGetArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 5302 ierr = VecGetArray(nullv,&array2);CHKERRQ(ierr); 5303 ierr = PetscMemcpy(array2,array,nl*sizeof(*array));CHKERRQ(ierr); 5304 ierr = VecRestoreArray(nullv,&array2);CHKERRQ(ierr); 5305 ierr = VecRestoreArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 5306 ierr = VecNormalize(nullv,NULL);CHKERRQ(ierr); 5307 ierr = MatNullSpaceCreate(PetscObjectComm((PetscObject)coarse_mat),PETSC_FALSE,1,&nullv,&CoarseNullSpace);CHKERRQ(ierr); 5308 ierr = VecDestroy(&nullv);CHKERRQ(ierr); 5309 } 5310 } 5311 5312 if (pcbddc->coarse_ksp) { 5313 PetscBool ispreonly; 5314 5315 if (CoarseNullSpace) { 5316 PetscBool isnull; 5317 ierr = MatNullSpaceTest(CoarseNullSpace,coarse_mat,&isnull);CHKERRQ(ierr); 5318 if (isnull) { 5319 if (isbddc && !pcbddc->benign_saddle_point) { 5320 ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr); 5321 } else { 5322 ierr = MatSetNullSpace(coarse_mat,CoarseNullSpace);CHKERRQ(ierr); 5323 } 5324 } else { 5325 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 5326 } 5327 } 5328 /* setup coarse ksp */ 5329 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 5330 /* Check coarse problem if in debug mode or if solving with an iterative method */ 5331 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 5332 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 5333 KSP check_ksp; 5334 KSPType check_ksp_type; 5335 PC check_pc; 5336 Vec check_vec,coarse_vec; 5337 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 5338 PetscInt its; 5339 PetscBool compute_eigs; 5340 PetscReal *eigs_r,*eigs_c; 5341 PetscInt neigs; 5342 const char *prefix; 5343 5344 /* Create ksp object suitable for estimation of extreme eigenvalues */ 5345 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 5346 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 5347 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 5348 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 5349 if (ispreonly) { 5350 check_ksp_type = KSPPREONLY; 5351 compute_eigs = PETSC_FALSE; 5352 } else { 5353 check_ksp_type = KSPGMRES; 5354 compute_eigs = PETSC_TRUE; 5355 } 5356 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 5357 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 5358 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 5359 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 5360 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 5361 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 5362 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 5363 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 5364 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 5365 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 5366 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 5367 /* create random vec */ 5368 ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr); 5369 ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr); 5370 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 5371 if (CoarseNullSpace) { 5372 ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr); 5373 } 5374 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 5375 /* solve coarse problem */ 5376 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 5377 if (CoarseNullSpace) { 5378 ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr); 5379 } 5380 /* set eigenvalue estimation if preonly has not been requested */ 5381 if (compute_eigs) { 5382 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 5383 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 5384 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 5385 lambda_max = eigs_r[neigs-1]; 5386 lambda_min = eigs_r[0]; 5387 if (pcbddc->use_coarse_estimates) { 5388 if (lambda_max>lambda_min) { 5389 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr); 5390 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 5391 } 5392 } 5393 } 5394 5395 /* check coarse problem residual error */ 5396 if (pcbddc->dbg_flag) { 5397 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 5398 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 5399 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 5400 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 5401 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 5402 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 5403 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 5404 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 5405 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 5406 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 5407 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 5408 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 5409 if (CoarseNullSpace) { 5410 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem is singular\n");CHKERRQ(ierr); 5411 } 5412 if (compute_eigs) { 5413 PetscReal lambda_max_s,lambda_min_s; 5414 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 5415 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 5416 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 5417 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 5418 for (i=0;i<neigs;i++) { 5419 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 5420 } 5421 } 5422 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 5423 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 5424 } 5425 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 5426 if (compute_eigs) { 5427 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 5428 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 5429 } 5430 } 5431 } 5432 /* print additional info */ 5433 if (pcbddc->dbg_flag) { 5434 /* waits until all processes reaches this point */ 5435 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 5436 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 5437 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5438 } 5439 5440 /* free memory */ 5441 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 5442 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 5443 PetscFunctionReturn(0); 5444 } 5445 5446 #undef __FUNCT__ 5447 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 5448 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 5449 { 5450 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 5451 PC_IS* pcis = (PC_IS*)pc->data; 5452 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 5453 IS subset,subset_mult,subset_n; 5454 PetscInt local_size,coarse_size=0; 5455 PetscInt *local_primal_indices=NULL; 5456 const PetscInt *t_local_primal_indices; 5457 PetscErrorCode ierr; 5458 5459 PetscFunctionBegin; 5460 /* Compute global number of coarse dofs */ 5461 if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) { 5462 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first"); 5463 } 5464 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n);CHKERRQ(ierr); 5465 ierr = ISLocalToGlobalMappingApplyIS(pcis->mapping,subset_n,&subset);CHKERRQ(ierr); 5466 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 5467 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult);CHKERRQ(ierr); 5468 ierr = PCBDDCSubsetNumbering(subset,subset_mult,&coarse_size,&subset_n);CHKERRQ(ierr); 5469 ierr = ISDestroy(&subset);CHKERRQ(ierr); 5470 ierr = ISDestroy(&subset_mult);CHKERRQ(ierr); 5471 ierr = ISGetLocalSize(subset_n,&local_size);CHKERRQ(ierr); 5472 if (local_size != pcbddc->local_primal_size) { 5473 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %d != %d",local_size,pcbddc->local_primal_size); 5474 } 5475 ierr = PetscMalloc1(local_size,&local_primal_indices);CHKERRQ(ierr); 5476 ierr = ISGetIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 5477 ierr = PetscMemcpy(local_primal_indices,t_local_primal_indices,local_size*sizeof(PetscInt));CHKERRQ(ierr); 5478 ierr = ISRestoreIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 5479 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 5480 5481 /* check numbering */ 5482 if (pcbddc->dbg_flag) { 5483 PetscScalar coarsesum,*array,*array2; 5484 PetscInt i; 5485 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 5486 5487 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5488 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5489 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 5490 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 5491 /* counter */ 5492 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 5493 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 5494 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5495 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5496 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5497 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5498 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 5499 for (i=0;i<pcbddc->local_primal_size;i++) { 5500 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 5501 } 5502 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 5503 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 5504 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 5505 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5506 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5507 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5508 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5509 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 5510 ierr = VecGetArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 5511 for (i=0;i<pcis->n;i++) { 5512 if (array[i] != 0.0 && array[i] != array2[i]) { 5513 PetscInt owned = (PetscInt)PetscRealPart(array[i]); 5514 PetscInt neigh = (PetscInt)PetscRealPart(array2[i]); 5515 set_error = PETSC_TRUE; 5516 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a %d processes instead of %d!\n",PetscGlobalRank,i,owned,neigh);CHKERRQ(ierr); 5517 } 5518 } 5519 ierr = VecRestoreArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 5520 ierr = MPI_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 5521 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5522 for (i=0;i<pcis->n;i++) { 5523 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 5524 } 5525 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 5526 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 5527 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5528 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5529 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 5530 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 5531 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 5532 PetscInt *gidxs; 5533 5534 ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr); 5535 ierr = ISLocalToGlobalMappingApply(pcis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr); 5536 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 5537 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5538 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 5539 for (i=0;i<pcbddc->local_primal_size;i++) { 5540 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr); 5541 } 5542 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5543 ierr = PetscFree(gidxs);CHKERRQ(ierr); 5544 } 5545 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5546 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 5547 if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 5548 } 5549 /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */ 5550 /* get back data */ 5551 *coarse_size_n = coarse_size; 5552 *local_primal_indices_n = local_primal_indices; 5553 PetscFunctionReturn(0); 5554 } 5555 5556 #undef __FUNCT__ 5557 #define __FUNCT__ "PCBDDCGlobalToLocal" 5558 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 5559 { 5560 IS localis_t; 5561 PetscInt i,lsize,*idxs,n; 5562 PetscScalar *vals; 5563 PetscErrorCode ierr; 5564 5565 PetscFunctionBegin; 5566 /* get indices in local ordering exploiting local to global map */ 5567 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 5568 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 5569 for (i=0;i<lsize;i++) vals[i] = 1.0; 5570 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 5571 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 5572 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 5573 if (idxs) { /* multilevel guard */ 5574 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 5575 } 5576 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 5577 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 5578 ierr = PetscFree(vals);CHKERRQ(ierr); 5579 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 5580 /* now compute set in local ordering */ 5581 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5582 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5583 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 5584 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 5585 for (i=0,lsize=0;i<n;i++) { 5586 if (PetscRealPart(vals[i]) > 0.5) { 5587 lsize++; 5588 } 5589 } 5590 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 5591 for (i=0,lsize=0;i<n;i++) { 5592 if (PetscRealPart(vals[i]) > 0.5) { 5593 idxs[lsize++] = i; 5594 } 5595 } 5596 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 5597 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 5598 *localis = localis_t; 5599 PetscFunctionReturn(0); 5600 } 5601 5602 /* the next two functions will be called in KSPMatMult if a change of basis has been requested */ 5603 #undef __FUNCT__ 5604 #define __FUNCT__ "PCBDDCMatMult_Private" 5605 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y) 5606 { 5607 PCBDDCChange_ctx change_ctx; 5608 PetscErrorCode ierr; 5609 5610 PetscFunctionBegin; 5611 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 5612 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 5613 ierr = MatMult(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 5614 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 5615 PetscFunctionReturn(0); 5616 } 5617 5618 #undef __FUNCT__ 5619 #define __FUNCT__ "PCBDDCMatMultTranspose_Private" 5620 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y) 5621 { 5622 PCBDDCChange_ctx change_ctx; 5623 PetscErrorCode ierr; 5624 5625 PetscFunctionBegin; 5626 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 5627 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 5628 ierr = MatMultTranspose(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 5629 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 5630 PetscFunctionReturn(0); 5631 } 5632 5633 #undef __FUNCT__ 5634 #define __FUNCT__ "PCBDDCSetUpSubSchurs" 5635 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 5636 { 5637 PC_IS *pcis=(PC_IS*)pc->data; 5638 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 5639 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 5640 Mat S_j; 5641 PetscInt *used_xadj,*used_adjncy; 5642 PetscBool free_used_adj; 5643 PetscErrorCode ierr; 5644 5645 PetscFunctionBegin; 5646 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 5647 free_used_adj = PETSC_FALSE; 5648 if (pcbddc->sub_schurs_layers == -1) { 5649 used_xadj = NULL; 5650 used_adjncy = NULL; 5651 } else { 5652 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 5653 used_xadj = pcbddc->mat_graph->xadj; 5654 used_adjncy = pcbddc->mat_graph->adjncy; 5655 } else if (pcbddc->computed_rowadj) { 5656 used_xadj = pcbddc->mat_graph->xadj; 5657 used_adjncy = pcbddc->mat_graph->adjncy; 5658 } else { 5659 PetscBool flg_row=PETSC_FALSE; 5660 const PetscInt *xadj,*adjncy; 5661 PetscInt nvtxs; 5662 5663 ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 5664 if (flg_row) { 5665 ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr); 5666 ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr); 5667 ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr); 5668 free_used_adj = PETSC_TRUE; 5669 } else { 5670 pcbddc->sub_schurs_layers = -1; 5671 used_xadj = NULL; 5672 used_adjncy = NULL; 5673 } 5674 ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 5675 } 5676 } 5677 5678 /* setup sub_schurs data */ 5679 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 5680 if (!sub_schurs->use_mumps) { 5681 /* pcbddc->ksp_D up to date only if not using MUMPS */ 5682 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 5683 ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,PETSC_FALSE,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,PETSC_FALSE,PETSC_FALSE);CHKERRQ(ierr); 5684 } else { 5685 PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis; 5686 PetscBool isseqaij; 5687 if (!pcbddc->use_vertices && reuse_solvers) { 5688 PetscInt n_vertices; 5689 5690 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 5691 reuse_solvers = (PetscBool)!n_vertices; 5692 } 5693 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 5694 if (!isseqaij) { 5695 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 5696 if (matis->A == pcbddc->local_mat) { 5697 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 5698 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 5699 } else { 5700 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 5701 } 5702 } 5703 ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,pcbddc->sub_schurs_exact_schur,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point);CHKERRQ(ierr); 5704 } 5705 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 5706 5707 /* free adjacency */ 5708 if (free_used_adj) { 5709 ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr); 5710 } 5711 PetscFunctionReturn(0); 5712 } 5713 5714 #undef __FUNCT__ 5715 #define __FUNCT__ "PCBDDCInitSubSchurs" 5716 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 5717 { 5718 PC_IS *pcis=(PC_IS*)pc->data; 5719 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 5720 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 5721 PCBDDCGraph graph; 5722 PetscErrorCode ierr; 5723 5724 PetscFunctionBegin; 5725 /* attach interface graph for determining subsets */ 5726 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 5727 IS verticesIS,verticescomm; 5728 PetscInt vsize,*idxs; 5729 5730 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 5731 ierr = ISGetSize(verticesIS,&vsize);CHKERRQ(ierr); 5732 ierr = ISGetIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 5733 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);CHKERRQ(ierr); 5734 ierr = ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 5735 ierr = ISDestroy(&verticesIS);CHKERRQ(ierr); 5736 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 5737 ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global);CHKERRQ(ierr); 5738 ierr = PCBDDCGraphSetUp(graph,0,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);CHKERRQ(ierr); 5739 ierr = ISDestroy(&verticescomm);CHKERRQ(ierr); 5740 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 5741 /* 5742 if (pcbddc->dbg_flag) { 5743 ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 5744 } 5745 */ 5746 } else { 5747 graph = pcbddc->mat_graph; 5748 } 5749 5750 /* sub_schurs init */ 5751 ierr = PCBDDCSubSchursInit(sub_schurs,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap);CHKERRQ(ierr); 5752 5753 /* free graph struct */ 5754 if (pcbddc->sub_schurs_rebuild) { 5755 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 5756 } 5757 PetscFunctionReturn(0); 5758 } 5759 5760 #undef __FUNCT__ 5761 #define __FUNCT__ "PCBDDCCheckOperator" 5762 PetscErrorCode PCBDDCCheckOperator(PC pc) 5763 { 5764 PC_IS *pcis=(PC_IS*)pc->data; 5765 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 5766 PetscErrorCode ierr; 5767 5768 PetscFunctionBegin; 5769 if (pcbddc->n_vertices == pcbddc->local_primal_size) { 5770 IS zerodiag = NULL; 5771 Mat S_j,B0_B=NULL; 5772 Vec dummy_vec=NULL,vec_check_B,vec_scale_P; 5773 PetscScalar *p0_check,*array,*array2; 5774 PetscReal norm; 5775 PetscInt i; 5776 5777 /* B0 and B0_B */ 5778 if (zerodiag) { 5779 IS dummy; 5780 5781 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);CHKERRQ(ierr); 5782 ierr = MatGetSubMatrix(pcbddc->benign_B0,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 5783 ierr = MatCreateVecs(B0_B,NULL,&dummy_vec);CHKERRQ(ierr); 5784 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 5785 } 5786 /* I need a primal vector to scale primal nodes since BDDC sums contibutions */ 5787 ierr = VecDuplicate(pcbddc->vec1_P,&vec_scale_P);CHKERRQ(ierr); 5788 ierr = VecSet(pcbddc->vec1_P,1.0);CHKERRQ(ierr); 5789 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5790 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5791 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5792 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5793 ierr = VecReciprocal(vec_scale_P);CHKERRQ(ierr); 5794 /* S_j */ 5795 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 5796 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 5797 5798 /* mimic vector in \widetilde{W}_\Gamma */ 5799 ierr = VecSetRandom(pcis->vec1_N,NULL);CHKERRQ(ierr); 5800 /* continuous in primal space */ 5801 ierr = VecSetRandom(pcbddc->coarse_vec,NULL);CHKERRQ(ierr); 5802 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5803 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5804 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5805 ierr = PetscCalloc1(pcbddc->benign_n,&p0_check);CHKERRQ(ierr); 5806 for (i=0;i<pcbddc->benign_n;i++) p0_check[i] = array[pcbddc->local_primal_size-pcbddc->benign_n+i]; 5807 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 5808 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5809 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 5810 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 5811 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5812 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5813 ierr = VecDuplicate(pcis->vec2_B,&vec_check_B);CHKERRQ(ierr); 5814 ierr = VecCopy(pcis->vec2_B,vec_check_B);CHKERRQ(ierr); 5815 5816 /* assemble rhs for coarse problem */ 5817 /* widetilde{S}_\Gamma w_\Gamma + \widetilde{B0}^T_B p0 */ 5818 /* local with Schur */ 5819 ierr = MatMult(S_j,pcis->vec2_B,pcis->vec1_B);CHKERRQ(ierr); 5820 if (zerodiag) { 5821 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 5822 for (i=0;i<pcbddc->benign_n;i++) array[i] = p0_check[i]; 5823 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 5824 ierr = MatMultTransposeAdd(B0_B,dummy_vec,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 5825 } 5826 /* sum on primal nodes the local contributions */ 5827 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5828 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5829 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 5830 ierr = VecGetArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 5831 for (i=0;i<pcbddc->local_primal_size;i++) array2[i] = array[pcbddc->local_primal_ref_node[i]]; 5832 ierr = VecRestoreArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 5833 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 5834 ierr = VecSet(pcbddc->coarse_vec,0.);CHKERRQ(ierr); 5835 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5836 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5837 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5838 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5839 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5840 /* scale primal nodes (BDDC sums contibutions) */ 5841 ierr = VecPointwiseMult(pcbddc->vec1_P,vec_scale_P,pcbddc->vec1_P);CHKERRQ(ierr); 5842 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 5843 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5844 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 5845 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 5846 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5847 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5848 /* global: \widetilde{B0}_B w_\Gamma */ 5849 if (zerodiag) { 5850 ierr = MatMult(B0_B,pcis->vec2_B,dummy_vec);CHKERRQ(ierr); 5851 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 5852 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = array[i]; 5853 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 5854 } 5855 /* BDDC */ 5856 ierr = VecSet(pcis->vec1_D,0.);CHKERRQ(ierr); 5857 ierr = PCBDDCApplyInterfacePreconditioner(pc,PETSC_FALSE);CHKERRQ(ierr); 5858 5859 ierr = VecCopy(pcis->vec1_B,pcis->vec2_B);CHKERRQ(ierr); 5860 ierr = VecAXPY(pcis->vec1_B,-1.0,vec_check_B);CHKERRQ(ierr); 5861 ierr = VecNorm(pcis->vec1_B,NORM_INFINITY,&norm);CHKERRQ(ierr); 5862 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC local error is %1.4e\n",PetscGlobalRank,norm); 5863 for (i=0;i<pcbddc->benign_n;i++) { 5864 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC p0[%d] error is %1.4e\n",PetscGlobalRank,i,PetscAbsScalar(pcbddc->benign_p0[i]-p0_check[i])); 5865 } 5866 ierr = PetscFree(p0_check);CHKERRQ(ierr); 5867 ierr = VecDestroy(&vec_scale_P);CHKERRQ(ierr); 5868 ierr = VecDestroy(&vec_check_B);CHKERRQ(ierr); 5869 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 5870 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 5871 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 5872 } 5873 PetscFunctionReturn(0); 5874 } 5875