1 #include <../src/mat/impls/aij/seq/aij.h> 2 #include <../src/ksp/pc/impls/bddc/bddc.h> 3 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 4 #include <petscblaslapack.h> 5 6 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y); 7 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y); 8 9 /* TODO: add reuse flag */ 10 #undef __FUNCT__ 11 #define __FUNCT__ "MatSeqAIJCompress" 12 PetscErrorCode MatSeqAIJCompress(Mat A, Mat *B) 13 { 14 Mat Bt; 15 PetscScalar *a,*bdata; 16 const PetscInt *ii,*ij; 17 PetscInt m,n,i,nnz,*bii,*bij; 18 PetscBool flg_row; 19 PetscErrorCode ierr; 20 21 PetscFunctionBegin; 22 ierr = MatGetSize(A,&n,&m);CHKERRQ(ierr); 23 ierr = MatGetRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 24 ierr = MatSeqAIJGetArray(A,&a);CHKERRQ(ierr); 25 nnz = n; 26 for (i=0;i<ii[n];i++) { 27 if (PetscLikely(PetscAbsScalar(a[i]) > PETSC_SMALL)) nnz++; 28 } 29 ierr = PetscMalloc1(n+1,&bii);CHKERRQ(ierr); 30 ierr = PetscMalloc1(nnz,&bij);CHKERRQ(ierr); 31 ierr = PetscMalloc1(nnz,&bdata);CHKERRQ(ierr); 32 nnz = 0; 33 bii[0] = 0; 34 for (i=0;i<n;i++) { 35 PetscInt j; 36 for (j=ii[i];j<ii[i+1];j++) { 37 PetscScalar entry = a[j]; 38 if (PetscLikely(PetscAbsScalar(entry) > PETSC_SMALL) || ij[j] == i) { 39 bij[nnz] = ij[j]; 40 bdata[nnz] = entry; 41 nnz++; 42 } 43 } 44 bii[i+1] = nnz; 45 } 46 ierr = MatSeqAIJRestoreArray(A,&a);CHKERRQ(ierr); 47 ierr = MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),n,m,bii,bij,bdata,&Bt);CHKERRQ(ierr); 48 ierr = MatRestoreRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 49 { 50 Mat_SeqAIJ *b = (Mat_SeqAIJ*)(Bt->data); 51 b->free_a = PETSC_TRUE; 52 b->free_ij = PETSC_TRUE; 53 } 54 *B = Bt; 55 PetscFunctionReturn(0); 56 } 57 58 #undef __FUNCT__ 59 #define __FUNCT__ "MatDetectDisconnectedComponents" 60 PetscErrorCode MatDetectDisconnectedComponents(Mat A, PetscBool filter, PetscInt *ncc, IS* cc[]) 61 { 62 Mat B; 63 IS is_dummy,*cc_n; 64 ISLocalToGlobalMapping l2gmap_dummy; 65 PCBDDCGraph graph; 66 PetscInt i,n; 67 PetscInt *xadj,*adjncy; 68 PetscInt *xadj_filtered,*adjncy_filtered; 69 PetscBool flg_row,isseqaij; 70 PetscErrorCode ierr; 71 72 PetscFunctionBegin; 73 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 74 if (!isseqaij && filter) { 75 PetscBool isseqdense; 76 77 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQDENSE,&isseqdense);CHKERRQ(ierr); 78 if (!isseqdense) { 79 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 80 } else { /* TODO: rectangular case and LDA */ 81 PetscScalar *array; 82 PetscReal chop=1.e-6; 83 84 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 85 ierr = MatDenseGetArray(B,&array);CHKERRQ(ierr); 86 ierr = MatGetSize(B,&n,NULL);CHKERRQ(ierr); 87 for (i=0;i<n;i++) { 88 PetscInt j; 89 for (j=i+1;j<n;j++) { 90 PetscReal thresh = chop*(PetscAbsScalar(array[i*(n+1)])+PetscAbsScalar(array[j*(n+1)])); 91 if (PetscAbsScalar(array[i*n+j]) < thresh) array[i*n+j] = 0.; 92 if (PetscAbsScalar(array[j*n+i]) < thresh) array[j*n+i] = 0.; 93 } 94 } 95 ierr = MatDenseRestoreArray(B,&array);CHKERRQ(ierr); 96 ierr = MatConvert(B,MATSEQAIJ,MAT_REUSE_MATRIX,&B);CHKERRQ(ierr); 97 } 98 } else { 99 B = A; 100 } 101 ierr = MatGetRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 102 103 /* if filter is true, then removes entries lower than PETSC_SMALL in magnitude */ 104 if (filter) { 105 PetscScalar *data; 106 PetscInt j,cum; 107 108 ierr = PetscCalloc2(n+1,&xadj_filtered,xadj[n],&adjncy_filtered);CHKERRQ(ierr); 109 ierr = MatSeqAIJGetArray(B,&data);CHKERRQ(ierr); 110 cum = 0; 111 for (i=0;i<n;i++) { 112 PetscInt t; 113 114 for (j=xadj[i];j<xadj[i+1];j++) { 115 if (PetscUnlikely(PetscAbsScalar(data[j]) < PETSC_SMALL)) { 116 continue; 117 } 118 adjncy_filtered[cum+xadj_filtered[i]++] = adjncy[j]; 119 } 120 t = xadj_filtered[i]; 121 xadj_filtered[i] = cum; 122 cum += t; 123 } 124 ierr = MatSeqAIJRestoreArray(B,&data);CHKERRQ(ierr); 125 } else { 126 xadj_filtered = NULL; 127 adjncy_filtered = NULL; 128 } 129 130 /* compute local connected components using PCBDDCGraph */ 131 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&is_dummy);CHKERRQ(ierr); 132 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 133 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 134 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 135 ierr = PCBDDCGraphInit(graph,l2gmap_dummy,n);CHKERRQ(ierr); 136 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 137 if (xadj_filtered) { 138 graph->xadj = xadj_filtered; 139 graph->adjncy = adjncy_filtered; 140 } else { 141 graph->xadj = xadj; 142 graph->adjncy = adjncy; 143 } 144 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 145 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 146 147 /* partial clean up */ 148 ierr = PetscFree2(xadj_filtered,adjncy_filtered);CHKERRQ(ierr); 149 ierr = MatRestoreRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 150 if (A != B) { 151 ierr = MatDestroy(&B);CHKERRQ(ierr); 152 } 153 154 /* get back data */ 155 if (ncc) *ncc = graph->ncc; 156 if (cc) { 157 ierr = PetscMalloc1(graph->ncc,&cc_n);CHKERRQ(ierr); 158 for (i=0;i<graph->ncc;i++) { 159 ierr = ISCreateGeneral(PETSC_COMM_SELF,graph->cptr[i+1]-graph->cptr[i],graph->queue+graph->cptr[i],PETSC_COPY_VALUES,&cc_n[i]);CHKERRQ(ierr); 160 } 161 *cc = cc_n; 162 } 163 /* clean up graph */ 164 graph->xadj = 0; 165 graph->adjncy = 0; 166 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 167 PetscFunctionReturn(0); 168 } 169 170 #undef __FUNCT__ 171 #define __FUNCT__ "PCBDDCBenignCheck" 172 PetscErrorCode PCBDDCBenignCheck(PC pc, IS zerodiag) 173 { 174 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 175 PC_IS* pcis = (PC_IS*)(pc->data); 176 IS dirIS = NULL; 177 PetscInt i; 178 PetscErrorCode ierr; 179 180 PetscFunctionBegin; 181 ierr = PCBDDCGraphGetDirichletDofs(pcbddc->mat_graph,&dirIS);CHKERRQ(ierr); 182 if (zerodiag) { 183 Mat A; 184 Vec vec3_N; 185 PetscScalar *vals; 186 const PetscInt *idxs; 187 PetscInt nz; 188 189 /* p0 */ 190 ierr = VecSet(pcis->vec1_N,0.);CHKERRQ(ierr); 191 ierr = PetscMalloc1(pcis->n,&vals);CHKERRQ(ierr); 192 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 193 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 194 for (i=0;i<nz;i++) vals[i] = 1.; 195 ierr = VecSetValues(pcis->vec1_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 196 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 197 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 198 /* v_I */ 199 ierr = VecSetRandom(pcis->vec2_N,NULL);CHKERRQ(ierr); 200 for (i=0;i<nz;i++) vals[i] = 0.; 201 ierr = VecSetValues(pcis->vec2_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 202 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 203 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 204 for (i=0;i<pcis->n_B;i++) vals[i] = 0.; 205 ierr = VecSetValues(pcis->vec2_N,pcis->n_B,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 206 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 207 if (dirIS) { 208 PetscInt n; 209 210 ierr = ISGetLocalSize(dirIS,&n);CHKERRQ(ierr); 211 ierr = ISGetIndices(dirIS,&idxs);CHKERRQ(ierr); 212 for (i=0;i<n;i++) vals[i] = 0.; 213 ierr = VecSetValues(pcis->vec2_N,n,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 214 ierr = ISRestoreIndices(dirIS,&idxs);CHKERRQ(ierr); 215 } 216 ierr = VecAssemblyBegin(pcis->vec2_N);CHKERRQ(ierr); 217 ierr = VecAssemblyEnd(pcis->vec2_N);CHKERRQ(ierr); 218 ierr = VecDuplicate(pcis->vec1_N,&vec3_N);CHKERRQ(ierr); 219 ierr = VecSet(vec3_N,0.);CHKERRQ(ierr); 220 ierr = MatISGetLocalMat(pc->mat,&A);CHKERRQ(ierr); 221 ierr = MatMult(A,pcis->vec1_N,vec3_N);CHKERRQ(ierr); 222 ierr = VecDot(vec3_N,pcis->vec2_N,&vals[0]);CHKERRQ(ierr); 223 if (PetscAbsScalar(vals[0]) > PETSC_SMALL) { 224 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! b(v_I,p_0) = %f (should be numerically 0.)",PetscAbsScalar(vals[0])); 225 } 226 ierr = PetscFree(vals);CHKERRQ(ierr); 227 ierr = VecDestroy(&vec3_N);CHKERRQ(ierr); 228 } 229 ierr = ISDestroy(&dirIS);CHKERRQ(ierr); 230 231 /* check PCBDDCBenignGetOrSetP0 */ 232 ierr = VecSetRandom(pcis->vec1_global,NULL);CHKERRQ(ierr); 233 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = -PetscGlobalRank-i; 234 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_FALSE);CHKERRQ(ierr); 235 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = 1; 236 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_TRUE);CHKERRQ(ierr); 237 for (i=0;i<pcbddc->benign_n;i++) { 238 if ((PetscInt)PetscRealPart(pcbddc->benign_p0[i]) != -PetscGlobalRank-i) { 239 SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error testing PCBDDCBenignGetOrSetP0! Found %1.4e at %d instead of %1.4e\n",pcbddc->benign_p0[i],i,-PetscGlobalRank-i);CHKERRQ(ierr); 240 } 241 } 242 PetscFunctionReturn(0); 243 } 244 245 #undef __FUNCT__ 246 #define __FUNCT__ "PCBDDCBenignDetectSaddlePoint" 247 PetscErrorCode PCBDDCBenignDetectSaddlePoint(PC pc, IS *zerodiaglocal) 248 { 249 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 250 IS pressures,zerodiag,*zerodiag_subs; 251 PetscInt nz,n; 252 PetscBool sorted,have_null,has_null_pressures,recompute_zerodiag; 253 PetscErrorCode ierr; 254 255 PetscFunctionBegin; 256 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 257 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 258 ierr = MatDestroy(&pcbddc->benign_original_mat);CHKERRQ(ierr); 259 ierr = PetscObjectReference((PetscObject)pcbddc->local_mat);CHKERRQ(ierr); 260 pcbddc->benign_original_mat = pcbddc->local_mat; 261 /* if a local info on dofs is present, assumes the last field is represented by "pressures" 262 otherwise, it uses only zerodiagonal dofs (ok if the pressure block is all zero; it could fail if it is not) 263 Checks if all the pressure dofs in each subdomain have a zero diagonal 264 If not, a change of basis on pressures is not needed 265 since the local Schur complements are SPD 266 */ 267 has_null_pressures = PETSC_TRUE; 268 have_null = PETSC_TRUE; 269 if (pcbddc->n_ISForDofsLocal) { 270 PetscInt npl,*idxs,p = pcbddc->n_ISForDofsLocal-1; 271 272 /* Dofs splitting for BDDC cannot have PETSC_COMM_SELF, so create a sequential IS */ 273 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[p],&npl);CHKERRQ(ierr); 274 ierr = ISGetIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 275 ierr = ISCreateGeneral(PETSC_COMM_SELF,npl,idxs,PETSC_COPY_VALUES,&pressures);CHKERRQ(ierr); 276 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 277 ierr = ISSorted(pressures,&sorted);CHKERRQ(ierr); 278 if (!sorted) { 279 ierr = ISSort(pressures);CHKERRQ(ierr); 280 } 281 } else { 282 pressures = NULL; 283 } 284 ierr = MatGetLocalSize(pcbddc->benign_original_mat,&n,NULL);CHKERRQ(ierr); 285 /* TODO: add check for shared dofs and raise error */ 286 ierr = MatFindZeroDiagonals(pcbddc->benign_original_mat,&zerodiag);CHKERRQ(ierr); 287 ierr = ISSorted(zerodiag,&sorted);CHKERRQ(ierr); 288 if (!sorted) { 289 ierr = ISSort(zerodiag);CHKERRQ(ierr); 290 } 291 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 292 if (!nz) { 293 if (n) have_null = PETSC_FALSE; 294 has_null_pressures = PETSC_FALSE; 295 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 296 } 297 recompute_zerodiag = PETSC_FALSE; 298 /* in case disconnected subdomains info is present, split the pressures accordingly (otherwise the benign trick could fail) */ 299 zerodiag_subs = NULL; 300 pcbddc->benign_n = 0; 301 if (has_null_pressures) { 302 IS *subs; 303 PetscInt nsubs,i; 304 305 subs = pcbddc->local_subs; 306 nsubs = pcbddc->n_local_subs; 307 if (nsubs > 1) { 308 ierr = PetscCalloc1(nsubs,&zerodiag_subs);CHKERRQ(ierr); 309 for (i=0;i<nsubs;i++) { 310 ISLocalToGlobalMapping l2g; 311 IS t_zerodiag_subs; 312 PetscInt nl; 313 314 ierr = ISLocalToGlobalMappingCreateIS(subs[i],&l2g);CHKERRQ(ierr); 315 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,zerodiag,&t_zerodiag_subs);CHKERRQ(ierr); 316 ierr = ISGetLocalSize(t_zerodiag_subs,&nl);CHKERRQ(ierr); 317 if (nl) { 318 PetscBool valid = PETSC_TRUE; 319 320 if (pressures) { 321 IS t_pressure_subs; 322 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,pressures,&t_pressure_subs);CHKERRQ(ierr); 323 ierr = ISEqual(t_pressure_subs,t_zerodiag_subs,&valid);CHKERRQ(ierr); 324 ierr = ISDestroy(&t_pressure_subs);CHKERRQ(ierr); 325 } 326 if (valid) { 327 ierr = ISLocalToGlobalMappingApplyIS(l2g,t_zerodiag_subs,&zerodiag_subs[pcbddc->benign_n]);CHKERRQ(ierr); 328 pcbddc->benign_n++; 329 } else { 330 recompute_zerodiag = PETSC_TRUE; 331 } 332 } 333 ierr = ISDestroy(&t_zerodiag_subs);CHKERRQ(ierr); 334 ierr = ISLocalToGlobalMappingDestroy(&l2g);CHKERRQ(ierr); 335 } 336 } else { /* there's just one subdomain (or zero if they have not been detected */ 337 PetscBool valid = PETSC_TRUE; 338 if (pressures) { 339 ierr = ISEqual(pressures,zerodiag,&valid);CHKERRQ(ierr); 340 } 341 if (valid) { 342 pcbddc->benign_n = 1; 343 ierr = PetscCalloc1(1,&zerodiag_subs);CHKERRQ(ierr); 344 ierr = PetscObjectReference((PetscObject)zerodiag);CHKERRQ(ierr); 345 zerodiag_subs[0] = zerodiag; 346 } 347 } 348 } 349 350 if (!pcbddc->benign_n) { 351 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 352 recompute_zerodiag = PETSC_FALSE; 353 has_null_pressures = PETSC_FALSE; 354 have_null = PETSC_FALSE; 355 } 356 357 /* final check for null pressures */ 358 if (zerodiag && pressures) { 359 PetscInt nz,np; 360 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 361 ierr = ISGetLocalSize(pressures,&np);CHKERRQ(ierr); 362 if (nz != np) have_null = PETSC_FALSE; 363 } 364 365 if (recompute_zerodiag) { 366 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 367 if (pcbddc->benign_n == 1) { 368 ierr = PetscObjectReference((PetscObject)zerodiag_subs[0]);CHKERRQ(ierr); 369 zerodiag = zerodiag_subs[0]; 370 } else { 371 PetscInt i,nzn,*new_idxs; 372 373 nzn = 0; 374 for (i=0;i<pcbddc->benign_n;i++) { 375 PetscInt ns; 376 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 377 nzn += ns; 378 } 379 ierr = PetscMalloc1(nzn,&new_idxs);CHKERRQ(ierr); 380 nzn = 0; 381 for (i=0;i<pcbddc->benign_n;i++) { 382 PetscInt ns,*idxs; 383 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 384 ierr = ISGetIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 385 ierr = PetscMemcpy(new_idxs+nzn,idxs,ns*sizeof(PetscInt));CHKERRQ(ierr); 386 ierr = ISRestoreIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 387 nzn += ns; 388 } 389 ierr = PetscSortInt(nzn,new_idxs);CHKERRQ(ierr); 390 ierr = ISCreateGeneral(PETSC_COMM_SELF,nzn,new_idxs,PETSC_OWN_POINTER,&zerodiag);CHKERRQ(ierr); 391 } 392 have_null = PETSC_FALSE; 393 } 394 395 if (has_null_pressures) { 396 IS zerodiagc; 397 const PetscInt *idxs,*idxsc; 398 PetscInt i,s,*nnz; 399 Mat M; 400 401 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 402 ierr = ISComplement(zerodiag,0,n,&zerodiagc);CHKERRQ(ierr); 403 ierr = ISGetIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 404 /* local change of basis for pressures */ 405 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 406 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->benign_original_mat),&pcbddc->benign_change);CHKERRQ(ierr); 407 ierr = MatSetType(pcbddc->benign_change,MATAIJ);CHKERRQ(ierr); 408 ierr = MatSetSizes(pcbddc->benign_change,n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 409 ierr = PetscMalloc1(n,&nnz);CHKERRQ(ierr); 410 for (i=0;i<n-nz;i++) nnz[idxsc[i]] = 1; /* identity on velocities plus pressure dofs for non-singular subdomains */ 411 for (i=0;i<pcbddc->benign_n;i++) { 412 PetscInt nzs,j; 413 414 ierr = ISGetLocalSize(zerodiag_subs[i],&nzs);CHKERRQ(ierr); 415 ierr = ISGetIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 416 for (j=0;j<nzs-1;j++) nnz[idxs[j]] = 2; /* change on pressures */ 417 nnz[idxs[nzs-1]] = nzs; /* last local pressure dof in subdomain */ 418 ierr = ISRestoreIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 419 } 420 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_change,0,nnz);CHKERRQ(ierr); 421 ierr = PetscFree(nnz);CHKERRQ(ierr); 422 /* set identity on velocities */ 423 for (i=0;i<n-nz;i++) { 424 ierr = MatSetValue(pcbddc->benign_change,idxsc[i],idxsc[i],1.,INSERT_VALUES);CHKERRQ(ierr); 425 } 426 ierr = ISRestoreIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 427 ierr = ISDestroy(&zerodiagc);CHKERRQ(ierr); 428 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 429 ierr = PetscMalloc3(pcbddc->benign_n,&pcbddc->benign_p0_lidx,pcbddc->benign_n,&pcbddc->benign_p0_gidx,pcbddc->benign_n,&pcbddc->benign_p0);CHKERRQ(ierr); 430 /* set change on pressures */ 431 for (s=0;s<pcbddc->benign_n;s++) { 432 PetscScalar *array; 433 PetscInt nzs; 434 435 ierr = ISGetLocalSize(zerodiag_subs[s],&nzs);CHKERRQ(ierr); 436 ierr = ISGetIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 437 for (i=0;i<nzs-1;i++) { 438 PetscScalar vals[2]; 439 PetscInt cols[2]; 440 441 cols[0] = idxs[i]; 442 cols[1] = idxs[nzs-1]; 443 vals[0] = 1.; 444 vals[1] = 1.; 445 ierr = MatSetValues(pcbddc->benign_change,1,cols,2,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 446 } 447 ierr = PetscMalloc1(nzs,&array);CHKERRQ(ierr); 448 for (i=0;i<nzs-1;i++) array[i] = -1.; 449 array[nzs-1] = 1.; 450 ierr = MatSetValues(pcbddc->benign_change,1,idxs+nzs-1,nzs,idxs,array,INSERT_VALUES);CHKERRQ(ierr); 451 /* store local idxs for p0 */ 452 pcbddc->benign_p0_lidx[s] = idxs[nzs-1]; 453 ierr = ISRestoreIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 454 ierr = ISDestroy(&zerodiag_subs[s]);CHKERRQ(ierr); 455 ierr = PetscFree(array);CHKERRQ(ierr); 456 } 457 ierr = MatAssemblyBegin(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 458 ierr = MatAssemblyEnd(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 459 /* TODO: need optimization? */ 460 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 461 ierr = MatPtAP(pcbddc->benign_original_mat,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&M);CHKERRQ(ierr); 462 ierr = MatSeqAIJCompress(M,&pcbddc->local_mat);CHKERRQ(ierr); 463 ierr = MatDestroy(&M);CHKERRQ(ierr); 464 /* store global idxs for p0 */ 465 ierr = ISLocalToGlobalMappingApply(pc->pmat->rmap->mapping,pcbddc->benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 466 } 467 ierr = PetscFree(zerodiag_subs);CHKERRQ(ierr); 468 ierr = ISDestroy(&pressures);CHKERRQ(ierr); 469 470 /* determines if the coarse solver will be singular or not */ 471 ierr = MPI_Allreduce(&have_null,&pcbddc->benign_null,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 472 *zerodiaglocal = zerodiag; 473 PetscFunctionReturn(0); 474 } 475 476 #undef __FUNCT__ 477 #define __FUNCT__ "PCBDDCBenignGetOrSetP0" 478 PetscErrorCode PCBDDCBenignGetOrSetP0(PC pc, Vec v, PetscBool get) 479 { 480 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 481 PetscErrorCode ierr; 482 483 PetscFunctionBegin; 484 if (!pcbddc->benign_sf) { 485 ierr = PetscSFCreate(PetscObjectComm((PetscObject)pc),&pcbddc->benign_sf);CHKERRQ(ierr); 486 ierr = PetscSFSetGraphLayout(pcbddc->benign_sf,pc->pmat->rmap,pcbddc->benign_n,NULL,PETSC_OWN_POINTER,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 487 } 488 if (get) { /* use SF to get values */ 489 PetscScalar *array; 490 491 ierr = VecGetArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 492 ierr = PetscSFBcastBegin(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 493 ierr = PetscSFBcastEnd(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 494 ierr = VecRestoreArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 495 } else { /* use VecSetValues (not scalable, I should try to find a better solution (defining a new MPI_OP for reduction) */ 496 ierr = VecSetValues(v,pcbddc->benign_n,pcbddc->benign_p0_gidx,pcbddc->benign_p0,INSERT_VALUES);CHKERRQ(ierr); 497 ierr = VecAssemblyBegin(v);CHKERRQ(ierr); 498 ierr = VecAssemblyEnd(v);CHKERRQ(ierr); 499 } 500 PetscFunctionReturn(0); 501 } 502 503 #undef __FUNCT__ 504 #define __FUNCT__ "PCBDDCBenignPopOrPushB0" 505 PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop) 506 { 507 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 508 PetscErrorCode ierr; 509 510 PetscFunctionBegin; 511 /* TODO: add error checking 512 - avoid nested pop (or push) calls. 513 - cannot push before pop. 514 - cannot call this if pcbddc->local_mat is NULL 515 */ 516 if (!pcbddc->benign_n) { 517 PetscFunctionReturn(0); 518 } 519 if (pop) { 520 IS is_p0; 521 MatReuse reuse; 522 523 /* extract B_0 */ 524 reuse = MAT_INITIAL_MATRIX; 525 if (pcbddc->benign_B0) { 526 reuse = MAT_REUSE_MATRIX; 527 } 528 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->benign_n,pcbddc->benign_p0_lidx,PETSC_COPY_VALUES,&is_p0);CHKERRQ(ierr); 529 ierr = MatGetSubMatrix(pcbddc->local_mat,is_p0,NULL,reuse,&pcbddc->benign_B0);CHKERRQ(ierr); 530 /* remove rows and cols from local problem */ 531 ierr = MatSetOption(pcbddc->local_mat,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE);CHKERRQ(ierr); 532 ierr = MatZeroRowsColumnsIS(pcbddc->local_mat,is_p0,1.0,NULL,NULL);CHKERRQ(ierr); 533 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 534 } else { /* push */ 535 PetscInt i; 536 537 for (i=0;i<pcbddc->benign_n;i++) { 538 PetscScalar *B0_vals; 539 PetscInt *B0_cols,B0_ncol; 540 541 ierr = MatGetRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 542 ierr = MatSetValues(pcbddc->local_mat,1,pcbddc->benign_p0_lidx+i,B0_ncol,B0_cols,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 543 ierr = MatSetValues(pcbddc->local_mat,B0_ncol,B0_cols,1,pcbddc->benign_p0_lidx+1,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 544 ierr = MatSetValue(pcbddc->local_mat,pcbddc->benign_p0_lidx[i],pcbddc->benign_p0_lidx[i],0.0,INSERT_VALUES);CHKERRQ(ierr); 545 ierr = MatRestoreRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 546 } 547 ierr = MatAssemblyBegin(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 548 ierr = MatAssemblyEnd(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 549 } 550 PetscFunctionReturn(0); 551 } 552 553 #undef __FUNCT__ 554 #define __FUNCT__ "PCBDDCAdaptiveSelection" 555 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 556 { 557 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 558 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 559 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 560 PetscBLASInt *B_iwork,*B_ifail; 561 PetscScalar *work,lwork; 562 PetscScalar *St,*S,*eigv; 563 PetscScalar *Sarray,*Starray; 564 PetscReal *eigs,thresh; 565 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs; 566 PetscBool allocated_S_St; 567 #if defined(PETSC_USE_COMPLEX) 568 PetscReal *rwork; 569 #endif 570 PetscErrorCode ierr; 571 572 PetscFunctionBegin; 573 if (!sub_schurs->use_mumps) { 574 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS"); 575 } 576 577 if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) { 578 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\n",sub_schurs->is_hermitian,sub_schurs->is_posdef); 579 } 580 581 if (pcbddc->dbg_flag) { 582 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 583 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 584 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr); 585 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 586 } 587 588 if (pcbddc->dbg_flag) { 589 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef); 590 } 591 592 /* max size of subsets */ 593 mss = 0; 594 for (i=0;i<sub_schurs->n_subs;i++) { 595 PetscInt subset_size; 596 597 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 598 mss = PetscMax(mss,subset_size); 599 } 600 601 /* min/max and threshold */ 602 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 603 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0; 604 nmax = PetscMax(nmin,nmax); 605 allocated_S_St = PETSC_FALSE; 606 if (nmin) { 607 allocated_S_St = PETSC_TRUE; 608 } 609 610 /* allocate lapack workspace */ 611 cum = cum2 = 0; 612 maxneigs = 0; 613 for (i=0;i<sub_schurs->n_subs;i++) { 614 PetscInt n,subset_size; 615 616 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 617 n = PetscMin(subset_size,nmax); 618 cum += subset_size; 619 cum2 += subset_size*n; 620 maxneigs = PetscMax(maxneigs,n); 621 } 622 if (mss) { 623 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 624 PetscBLASInt B_itype = 1; 625 PetscBLASInt B_N = mss; 626 PetscReal zero = 0.0; 627 PetscReal eps = 0.0; /* dlamch? */ 628 629 B_lwork = -1; 630 S = NULL; 631 St = NULL; 632 eigs = NULL; 633 eigv = NULL; 634 B_iwork = NULL; 635 B_ifail = NULL; 636 #if defined(PETSC_USE_COMPLEX) 637 rwork = NULL; 638 #endif 639 thresh = 1.0; 640 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 641 #if defined(PETSC_USE_COMPLEX) 642 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 643 #else 644 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 645 #endif 646 if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 647 ierr = PetscFPTrapPop();CHKERRQ(ierr); 648 } else { 649 /* TODO */ 650 } 651 } else { 652 lwork = 0; 653 } 654 655 nv = 0; 656 if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */ 657 ierr = ISGetLocalSize(sub_schurs->is_vertices,&nv);CHKERRQ(ierr); 658 } 659 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr); 660 if (allocated_S_St) { 661 ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr); 662 } 663 ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr); 664 #if defined(PETSC_USE_COMPLEX) 665 ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr); 666 #endif 667 ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 668 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr, 669 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr, 670 nv+cum,&pcbddc->adaptive_constraints_idxs, 671 nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 672 ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr); 673 674 maxneigs = 0; 675 cum = cum2 = cumarray = 0; 676 pcbddc->adaptive_constraints_idxs_ptr[0] = 0; 677 pcbddc->adaptive_constraints_data_ptr[0] = 0; 678 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 679 const PetscInt *idxs; 680 681 ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 682 for (cum=0;cum<nv;cum++) { 683 pcbddc->adaptive_constraints_n[cum] = 1; 684 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 685 pcbddc->adaptive_constraints_data[cum] = 1.0; 686 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1; 687 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1; 688 } 689 cum2 = cum; 690 ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 691 } 692 693 if (mss) { /* multilevel */ 694 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 695 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 696 } 697 698 for (i=0;i<sub_schurs->n_subs;i++) { 699 700 const PetscInt *idxs; 701 PetscReal infty = PETSC_MAX_REAL; 702 PetscInt j,subset_size,eigs_start = 0; 703 PetscBLASInt B_N; 704 PetscBool same_data = PETSC_FALSE; 705 706 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 707 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 708 if (allocated_S_St) { /* S and S_t should be copied since we could need them later */ 709 if (sub_schurs->is_hermitian) { 710 PetscInt j,k; 711 if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscMemcmp later */ 712 ierr = PetscMemzero(S,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 713 ierr = PetscMemzero(St,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 714 } 715 for (j=0;j<subset_size;j++) { 716 for (k=j;k<subset_size;k++) { 717 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 718 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 719 } 720 } 721 } else { 722 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 723 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 724 } 725 } else { 726 S = Sarray + cumarray; 727 St = Starray + cumarray; 728 } 729 730 ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 731 /* see if we can save some work */ 732 if (sub_schurs->n_subs == 1) { 733 ierr = PetscMemcmp(S,St,subset_size*subset_size*sizeof(PetscScalar),&same_data);CHKERRQ(ierr); 734 } 735 736 if (same_data) { /* there's no need of constraints here, deluxe scaling is enough */ 737 B_neigs = 0; 738 } else { 739 /* Threshold: this is an heuristic for edges */ 740 thresh = pcbddc->mat_graph->count[idxs[0]]*pcbddc->adaptive_threshold; 741 if (nmin) { 742 Mat SM,StM; 743 PetscInt j,k,nccs,nccst; 744 745 for (j=0;j<subset_size;j++) { 746 for (k=j;k<subset_size;k++) { 747 S [k*subset_size+j] = S [j*subset_size+k]; 748 St[k*subset_size+j] = St[j*subset_size+k]; 749 } 750 } 751 ierr = MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,S,&SM);CHKERRQ(ierr); 752 ierr = MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,St,&StM);CHKERRQ(ierr); 753 ierr = MatDetectDisconnectedComponents(SM,PETSC_TRUE,&nccs,NULL);CHKERRQ(ierr); 754 ierr = MatDetectDisconnectedComponents(StM,PETSC_TRUE,&nccst,NULL);CHKERRQ(ierr); 755 if (nccs != 1 || nccst != 1) { 756 PetscPrintf(PETSC_COMM_SELF,"[%d] Found disc %d %d (size %d)\n",PetscGlobalRank,nccs,nccst,subset_size); 757 } 758 ierr = MatDestroy(&SM);CHKERRQ(ierr); 759 ierr = MatDestroy(&StM);CHKERRQ(ierr); 760 } 761 762 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 763 PetscBLASInt B_itype = 1; 764 PetscBLASInt B_IL, B_IU; 765 PetscReal eps = -1.0; /* dlamch? */ 766 PetscInt nmin_s; 767 768 if (pcbddc->dbg_flag) { 769 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d %d %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]); 770 } 771 772 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 773 if (thresh > 1.+PETSC_SMALL) { 774 775 /* ask for eigenvalues larger than thresh */ 776 #if defined(PETSC_USE_COMPLEX) 777 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 778 #else 779 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 780 #endif 781 } else { 782 B_IU = PetscMax(1,PetscMin(B_N,nmax)); 783 B_IL = 1; 784 #if defined(PETSC_USE_COMPLEX) 785 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 786 #else 787 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 788 #endif 789 } 790 ierr = PetscFPTrapPop();CHKERRQ(ierr); 791 if (B_ierr) { 792 if (B_ierr < 0 ) { 793 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 794 } else if (B_ierr <= B_N) { 795 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 796 } else { 797 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 798 } 799 } 800 801 if (B_neigs > nmax) { 802 if (pcbddc->dbg_flag) { 803 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, more than maximum required %d.\n",B_neigs,nmax); 804 } 805 eigs_start = B_neigs -nmax; 806 B_neigs = nmax; 807 } 808 809 nmin_s = PetscMin(nmin,B_N); 810 if (B_neigs < nmin_s) { 811 PetscBLASInt B_neigs2; 812 813 B_IU = B_N - B_neigs; 814 B_IL = B_N - nmin_s + 1; 815 if (pcbddc->dbg_flag) { 816 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU); 817 } 818 if (sub_schurs->is_hermitian) { 819 PetscInt j; 820 for (j=0;j<subset_size;j++) { 821 ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 822 } 823 for (j=0;j<subset_size;j++) { 824 ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 825 } 826 } else { 827 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 828 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 829 } 830 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 831 #if defined(PETSC_USE_COMPLEX) 832 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 833 #else 834 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 835 #endif 836 ierr = PetscFPTrapPop();CHKERRQ(ierr); 837 B_neigs += B_neigs2; 838 } 839 if (B_ierr) { 840 if (B_ierr < 0 ) { 841 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 842 } else if (B_ierr <= B_N) { 843 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 844 } else { 845 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 846 } 847 } 848 if (pcbddc->dbg_flag) { 849 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Got %d eigs\n",B_neigs);CHKERRQ(ierr); 850 for (j=0;j<B_neigs;j++) { 851 if (eigs[j] == 0.0) { 852 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," Inf\n");CHKERRQ(ierr); 853 } else { 854 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",eigs[j+eigs_start]);CHKERRQ(ierr); 855 } 856 } 857 } 858 } else { 859 /* TODO */ 860 } 861 } 862 maxneigs = PetscMax(B_neigs,maxneigs); 863 pcbddc->adaptive_constraints_n[i+nv] = B_neigs; 864 if (B_neigs) { 865 ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 866 867 if (pcbddc->dbg_flag > 1) { 868 PetscInt ii; 869 for (ii=0;ii<B_neigs;ii++) { 870 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 871 for (j=0;j<B_N;j++) { 872 #if defined(PETSC_USE_COMPLEX) 873 PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 874 PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 875 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 876 #else 877 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);CHKERRQ(ierr); 878 #endif 879 } 880 } 881 } 882 #if 0 883 for (j=0;j<B_neigs;j++) { 884 PetscBLASInt Blas_N,Blas_one = 1.0; 885 PetscScalar norm; 886 ierr = PetscBLASIntCast(subset_size,&Blas_N);CHKERRQ(ierr); 887 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size, 888 &Blas_one,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one)); 889 if (pcbddc->adaptive_constraints_data[cum2] > 0.0) { 890 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 891 } else { 892 norm = -1.0/PetscSqrtReal(PetscRealPart(norm)); 893 } 894 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one)); 895 } 896 #endif 897 ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr); 898 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size; 899 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs; 900 cum++; 901 } 902 ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 903 /* shift for next computation */ 904 cumarray += subset_size*subset_size; 905 } 906 if (pcbddc->dbg_flag) { 907 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 908 } 909 910 if (mss) { 911 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 912 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 913 /* destroy matrices (junk) */ 914 ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr); 915 ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr); 916 } 917 if (allocated_S_St) { 918 ierr = PetscFree2(S,St);CHKERRQ(ierr); 919 } 920 ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr); 921 #if defined(PETSC_USE_COMPLEX) 922 ierr = PetscFree(rwork);CHKERRQ(ierr); 923 #endif 924 if (pcbddc->dbg_flag) { 925 PetscInt maxneigs_r; 926 ierr = MPI_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 927 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr); 928 } 929 PetscFunctionReturn(0); 930 } 931 932 #undef __FUNCT__ 933 #define __FUNCT__ "PCBDDCSetUpSolvers" 934 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 935 { 936 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 937 PetscScalar *coarse_submat_vals; 938 PetscErrorCode ierr; 939 940 PetscFunctionBegin; 941 /* Setup local scatters R_to_B and (optionally) R_to_D */ 942 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 943 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 944 945 /* Setup local neumann solver ksp_R */ 946 /* PCBDDCSetUpLocalScatters should be called first! */ 947 ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 948 949 /* Change global null space passed in by the user if change of basis has been requested */ 950 if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) { 951 ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr); 952 } 953 954 /* 955 Setup local correction and local part of coarse basis. 956 Gives back the dense local part of the coarse matrix in column major ordering 957 */ 958 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 959 960 /* Compute total number of coarse nodes and setup coarse solver */ 961 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 962 963 /* free */ 964 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 965 PetscFunctionReturn(0); 966 } 967 968 #undef __FUNCT__ 969 #define __FUNCT__ "PCBDDCResetCustomization" 970 PetscErrorCode PCBDDCResetCustomization(PC pc) 971 { 972 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 973 PetscErrorCode ierr; 974 975 PetscFunctionBegin; 976 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 977 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 978 ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr); 979 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 980 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 981 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 982 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 983 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 984 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 985 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 986 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 987 PetscFunctionReturn(0); 988 } 989 990 #undef __FUNCT__ 991 #define __FUNCT__ "PCBDDCResetTopography" 992 PetscErrorCode PCBDDCResetTopography(PC pc) 993 { 994 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 995 PetscInt i; 996 PetscErrorCode ierr; 997 998 PetscFunctionBegin; 999 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 1000 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1001 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1002 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 1003 for (i=0;i<pcbddc->n_local_subs;i++) { 1004 ierr = ISDestroy(&pcbddc->local_subs[i]);CHKERRQ(ierr); 1005 } 1006 ierr = PetscFree(pcbddc->local_subs);CHKERRQ(ierr); 1007 ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr); 1008 PetscFunctionReturn(0); 1009 } 1010 1011 #undef __FUNCT__ 1012 #define __FUNCT__ "PCBDDCResetSolvers" 1013 PetscErrorCode PCBDDCResetSolvers(PC pc) 1014 { 1015 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1016 PetscScalar *array; 1017 PetscErrorCode ierr; 1018 1019 PetscFunctionBegin; 1020 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 1021 if (pcbddc->coarse_phi_B) { 1022 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr); 1023 ierr = PetscFree(array);CHKERRQ(ierr); 1024 } 1025 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 1026 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 1027 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 1028 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 1029 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 1030 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 1031 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 1032 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 1033 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 1034 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 1035 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 1036 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 1037 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 1038 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 1039 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 1040 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 1041 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 1042 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1043 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 1044 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 1045 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 1046 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 1047 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 1048 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 1049 ierr = VecDestroy(&pcbddc->benign_vec);CHKERRQ(ierr); 1050 ierr = MatDestroy(&pcbddc->benign_original_mat);CHKERRQ(ierr); 1051 ierr = MatDestroy(&pcbddc->benign_original_mat);CHKERRQ(ierr); 1052 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 1053 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 1054 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 1055 PetscFunctionReturn(0); 1056 } 1057 1058 #undef __FUNCT__ 1059 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors" 1060 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 1061 { 1062 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1063 PC_IS *pcis = (PC_IS*)pc->data; 1064 VecType impVecType; 1065 PetscInt n_constraints,n_R,old_size; 1066 PetscErrorCode ierr; 1067 1068 PetscFunctionBegin; 1069 if (!pcbddc->ConstraintMatrix) { 1070 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created"); 1071 } 1072 /* get sizes */ 1073 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - pcbddc->n_vertices; 1074 n_R = pcis->n - pcbddc->n_vertices; 1075 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 1076 /* local work vectors (try to avoid unneeded work)*/ 1077 /* R nodes */ 1078 old_size = -1; 1079 if (pcbddc->vec1_R) { 1080 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 1081 } 1082 if (n_R != old_size) { 1083 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 1084 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 1085 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 1086 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 1087 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 1088 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 1089 } 1090 /* local primal dofs */ 1091 old_size = -1; 1092 if (pcbddc->vec1_P) { 1093 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 1094 } 1095 if (pcbddc->local_primal_size != old_size) { 1096 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 1097 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 1098 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 1099 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 1100 } 1101 /* local explicit constraints */ 1102 old_size = -1; 1103 if (pcbddc->vec1_C) { 1104 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 1105 } 1106 if (n_constraints && n_constraints != old_size) { 1107 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 1108 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 1109 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 1110 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 1111 } 1112 PetscFunctionReturn(0); 1113 } 1114 1115 #undef __FUNCT__ 1116 #define __FUNCT__ "PCBDDCSetUpCorrection" 1117 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 1118 { 1119 PetscErrorCode ierr; 1120 /* pointers to pcis and pcbddc */ 1121 PC_IS* pcis = (PC_IS*)pc->data; 1122 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1123 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1124 /* submatrices of local problem */ 1125 Mat A_RV,A_VR,A_VV,local_auxmat2_R; 1126 /* submatrices of benign trick */ 1127 Mat B0_V = NULL; 1128 /* submatrices of local coarse problem */ 1129 Mat S_VV,S_CV,S_VC,S_CC; 1130 /* working matrices */ 1131 Mat C_CR; 1132 /* additional working stuff */ 1133 PC pc_R; 1134 Mat F; 1135 PetscBool isLU,isCHOL,isILU; 1136 1137 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 1138 PetscScalar *work; 1139 PetscInt *idx_V_B; 1140 PetscInt n,n_vertices,n_constraints,*p0_lidx_I; 1141 PetscInt i,n_R,n_D,n_B; 1142 PetscBool unsymmetric_check; 1143 /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */ 1144 MatType impMatType; 1145 /* some shortcuts to scalars */ 1146 PetscScalar one=1.0,m_one=-1.0; 1147 1148 PetscFunctionBegin; 1149 n_vertices = pcbddc->n_vertices; 1150 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - n_vertices; 1151 /* Set Non-overlapping dimensions */ 1152 n_B = pcis->n_B; 1153 n_D = pcis->n - n_B; 1154 n_R = pcis->n - n_vertices; 1155 1156 /* Set types for local objects needed by BDDC precondtioner */ 1157 impMatType = MATSEQDENSE; 1158 1159 /* vertices in boundary numbering */ 1160 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 1161 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);CHKERRQ(ierr); 1162 if (i != n_vertices) { 1163 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i); 1164 } 1165 1166 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 1167 ierr = PetscCalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 1168 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr); 1169 ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr); 1170 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr); 1171 ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr); 1172 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr); 1173 ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr); 1174 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr); 1175 ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr); 1176 1177 unsymmetric_check = PETSC_FALSE; 1178 /* allocate workspace */ 1179 n = 0; 1180 if (n_constraints) { 1181 n += n_R*n_constraints; 1182 } 1183 if (n_vertices) { 1184 n = PetscMax(2*n_R*n_vertices,n); 1185 n = PetscMax((n_R+n_B)*n_vertices,n); 1186 } 1187 if (!pcbddc->symmetric_primal) { 1188 n = PetscMax(2*n_R*pcbddc->local_primal_size,n); 1189 unsymmetric_check = PETSC_TRUE; 1190 } 1191 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 1192 1193 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 1194 ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr); 1195 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr); 1196 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr); 1197 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr); 1198 if (isLU || isILU || isCHOL) { 1199 ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr); 1200 } else if (sub_schurs->reuse_mumps) { 1201 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1202 MatFactorType type; 1203 1204 F = reuse_mumps->F; 1205 ierr = MatGetFactorType(F,&type);CHKERRQ(ierr); 1206 if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE; 1207 } else { 1208 F = NULL; 1209 } 1210 1211 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 1212 if (n_constraints) { 1213 Mat M1,M2,M3; 1214 Mat auxmat; 1215 IS is_aux; 1216 PetscScalar *array,*array2; 1217 1218 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 1219 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 1220 1221 /* Extract constraints on R nodes: C_{CR} */ 1222 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 1223 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 1224 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&auxmat);CHKERRQ(ierr); 1225 1226 /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */ 1227 /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */ 1228 ierr = PetscMemzero(work,n_R*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 1229 for (i=0;i<n_constraints;i++) { 1230 const PetscScalar *row_cmat_values; 1231 const PetscInt *row_cmat_indices; 1232 PetscInt size_of_constraint,j; 1233 1234 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 1235 for (j=0;j<size_of_constraint;j++) { 1236 work[row_cmat_indices[j]+i*n_R] = -row_cmat_values[j]; 1237 } 1238 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 1239 } 1240 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,NULL,&local_auxmat2_R);CHKERRQ(ierr); 1241 if (F) { 1242 Mat B; 1243 1244 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 1245 ierr = MatMatSolve(F,B,local_auxmat2_R);CHKERRQ(ierr); 1246 ierr = MatDestroy(&B);CHKERRQ(ierr); 1247 } else { 1248 PetscScalar *marr; 1249 1250 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1251 for (i=0;i<n_constraints;i++) { 1252 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 1253 ierr = VecPlaceArray(pcbddc->vec2_R,marr+i*n_R);CHKERRQ(ierr); 1254 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1255 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1256 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 1257 } 1258 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1259 } 1260 if (!pcbddc->switch_static) { 1261 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);CHKERRQ(ierr); 1262 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 1263 ierr = MatDenseGetArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 1264 for (i=0;i<n_constraints;i++) { 1265 ierr = VecPlaceArray(pcbddc->vec1_R,array2+i*n_R);CHKERRQ(ierr); 1266 ierr = VecPlaceArray(pcis->vec1_B,array+i*n_B);CHKERRQ(ierr); 1267 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1268 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1269 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1270 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1271 } 1272 ierr = MatDenseRestoreArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 1273 ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 1274 ierr = MatMatMult(auxmat,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 1275 } else { 1276 ierr = PetscObjectReference((PetscObject)local_auxmat2_R);CHKERRQ(ierr); 1277 pcbddc->local_auxmat2 = local_auxmat2_R; 1278 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 1279 } 1280 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 1281 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 1282 ierr = MatScale(M3,m_one);CHKERRQ(ierr); 1283 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr); 1284 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 1285 if (isCHOL) { 1286 ierr = MatCholeskyFactor(M3,NULL,NULL);CHKERRQ(ierr); 1287 } else { 1288 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 1289 } 1290 ierr = VecSet(pcbddc->vec1_C,one);CHKERRQ(ierr); 1291 ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr); 1292 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 1293 ierr = MatDestroy(&M2);CHKERRQ(ierr); 1294 ierr = MatDestroy(&M3);CHKERRQ(ierr); 1295 /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */ 1296 ierr = MatMatMult(M1,auxmat,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 1297 ierr = MatDestroy(&auxmat);CHKERRQ(ierr); 1298 ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 1299 ierr = MatDestroy(&M1);CHKERRQ(ierr); 1300 } 1301 /* Get submatrices from subdomain matrix */ 1302 if (pcbddc->benign_n) { 1303 IS dummy; 1304 Mat B0_R; 1305 PetscReal norm; 1306 1307 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);CHKERRQ(ierr); 1308 ierr = MatGetSubMatrix(pcbddc->benign_B0,dummy,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&B0_R);CHKERRQ(ierr); 1309 ierr = MatNorm(B0_R,NORM_INFINITY,&norm);CHKERRQ(ierr); 1310 if (norm > PETSC_SMALL) { 1311 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! ||B0_R|| = %f (should be numerically 0.)",norm); 1312 } 1313 ierr = MatDestroy(&B0_R);CHKERRQ(ierr); 1314 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 1315 } 1316 1317 if (n_vertices) { 1318 IS is_aux; 1319 1320 if (sub_schurs->reuse_mumps) { /* is_R_local is not sorted, ISComplement doesn't like it */ 1321 IS tis; 1322 1323 ierr = ISDuplicate(pcbddc->is_R_local,&tis);CHKERRQ(ierr); 1324 ierr = ISSort(tis);CHKERRQ(ierr); 1325 ierr = ISComplement(tis,0,pcis->n,&is_aux);CHKERRQ(ierr); 1326 ierr = ISDestroy(&tis);CHKERRQ(ierr); 1327 } else { 1328 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 1329 } 1330 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 1331 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 1332 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 1333 if (pcbddc->benign_n) { 1334 IS dummy; 1335 1336 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);CHKERRQ(ierr); 1337 ierr = MatGetSubMatrix(pcbddc->benign_B0,dummy,is_aux,MAT_INITIAL_MATRIX,&B0_V);CHKERRQ(ierr); 1338 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 1339 } 1340 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 1341 } 1342 1343 /* Matrix of coarse basis functions (local) */ 1344 if (pcbddc->coarse_phi_B) { 1345 PetscInt on_B,on_primal,on_D=n_D; 1346 if (pcbddc->coarse_phi_D) { 1347 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr); 1348 } 1349 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 1350 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 1351 PetscScalar *marray; 1352 1353 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr); 1354 ierr = PetscFree(marray);CHKERRQ(ierr); 1355 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 1356 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 1357 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 1358 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 1359 } 1360 } 1361 1362 if (!pcbddc->coarse_phi_B) { 1363 PetscScalar *marray; 1364 1365 n = n_B*pcbddc->local_primal_size; 1366 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1367 n += n_D*pcbddc->local_primal_size; 1368 } 1369 if (!pcbddc->symmetric_primal) { 1370 n *= 2; 1371 } 1372 ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr); 1373 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 1374 n = n_B*pcbddc->local_primal_size; 1375 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1376 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 1377 n += n_D*pcbddc->local_primal_size; 1378 } 1379 if (!pcbddc->symmetric_primal) { 1380 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 1381 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1382 n = n_B*pcbddc->local_primal_size; 1383 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 1384 } 1385 } else { 1386 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 1387 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 1388 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1389 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 1390 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 1391 } 1392 } 1393 } 1394 1395 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 1396 p0_lidx_I = NULL; 1397 if (pcbddc->benign_n && (pcbddc->switch_static || pcbddc->dbg_flag)) { 1398 const PetscInt *idxs; 1399 1400 ierr = ISGetIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 1401 ierr = PetscMalloc1(pcbddc->benign_n,&p0_lidx_I);CHKERRQ(ierr); 1402 for (i=0;i<pcbddc->benign_n;i++) { 1403 ierr = PetscFindInt(pcbddc->benign_p0_lidx[i],pcis->n-pcis->n_B,idxs,&p0_lidx_I[i]);CHKERRQ(ierr); 1404 } 1405 ierr = ISRestoreIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 1406 } 1407 1408 /* vertices */ 1409 if (n_vertices) { 1410 1411 ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr); 1412 1413 if (n_R) { 1414 Mat A_RRmA_RV,S_VVt; /* S_VVt with LDA=N */ 1415 PetscBLASInt B_N,B_one = 1; 1416 PetscScalar *x,*y; 1417 PetscBool isseqaij; 1418 1419 ierr = MatScale(A_RV,m_one);CHKERRQ(ierr); 1420 ierr = MatConvert(A_RV,impMatType,MAT_REUSE_MATRIX,&A_RV);CHKERRQ(ierr); 1421 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 1422 if (F) { /* TODO could be optimized for symmetric problems */ 1423 ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr); 1424 } else { 1425 ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr); 1426 for (i=0;i<n_vertices;i++) { 1427 ierr = VecPlaceArray(pcbddc->vec1_R,y+i*n_R);CHKERRQ(ierr); 1428 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 1429 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1430 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1431 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 1432 } 1433 ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr); 1434 } 1435 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 1436 /* S_VV and S_CV are the subdomain contribution to coarse matrix. WARNING -> column major ordering */ 1437 if (n_constraints) { 1438 Mat B; 1439 1440 ierr = PetscMemzero(work+n_R*n_vertices,n_B*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 1441 for (i=0;i<n_vertices;i++) { 1442 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 1443 ierr = VecPlaceArray(pcis->vec1_B,work+n_R*n_vertices+i*n_B);CHKERRQ(ierr); 1444 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1445 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1446 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1447 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1448 } 1449 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr); 1450 ierr = MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr); 1451 ierr = MatDestroy(&B);CHKERRQ(ierr); 1452 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr); 1453 ierr = MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 1454 ierr = MatScale(S_CV,m_one);CHKERRQ(ierr); 1455 ierr = PetscBLASIntCast(n_R*n_vertices,&B_N);CHKERRQ(ierr); 1456 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+n_R*n_vertices,&B_one,work,&B_one)); 1457 ierr = MatDestroy(&B);CHKERRQ(ierr); 1458 } 1459 ierr = PetscObjectTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 1460 if (!isseqaij) { /* MatMatMult with SEQ(S)BAIJ below will raise an error */ 1461 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_REUSE_MATRIX,&A_VR);CHKERRQ(ierr); 1462 } 1463 ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr); 1464 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 1465 ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr); 1466 ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr); 1467 ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr); 1468 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one)); 1469 ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr); 1470 ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr); 1471 ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1472 ierr = MatDestroy(&S_VVt);CHKERRQ(ierr); 1473 } else { 1474 ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1475 } 1476 for (i=0;i<pcbddc->benign_n;i++) { 1477 const PetscScalar *vals; 1478 const PetscInt *idxs; 1479 PetscInt n,j,primal_idx; 1480 1481 ierr = MatGetRow(B0_V,i,&n,&idxs,&vals);CHKERRQ(ierr); 1482 primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + i; 1483 for (j=0;j<n;j++) { 1484 coarse_submat_vals[primal_idx*pcbddc->local_primal_size+idxs[j]] = vals[j]; 1485 coarse_submat_vals[idxs[j]*pcbddc->local_primal_size+primal_idx] = vals[j]; 1486 } 1487 ierr = MatRestoreRow(B0_V,i,&n,&idxs,&vals);CHKERRQ(ierr); 1488 } 1489 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 1490 1491 /* coarse basis functions */ 1492 for (i=0;i<n_vertices;i++) { 1493 PetscScalar *y; 1494 1495 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr); 1496 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 1497 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 1498 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1499 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1500 y[n_B*i+idx_V_B[i]] = 1.0; 1501 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 1502 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1503 1504 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1505 PetscInt j; 1506 1507 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 1508 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 1509 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1510 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1511 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 1512 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 1513 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 1514 } 1515 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1516 } 1517 /* if n_R == 0 the object is not destroyed */ 1518 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 1519 } 1520 1521 if (n_constraints) { 1522 Mat B; 1523 1524 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 1525 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 1526 ierr = MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 1527 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 1528 if (n_vertices) { 1529 if (isCHOL) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */ 1530 ierr = MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);CHKERRQ(ierr); 1531 } else { 1532 Mat S_VCt; 1533 1534 ierr = MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);CHKERRQ(ierr); 1535 ierr = MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1536 ierr = MatDestroy(&S_VCt);CHKERRQ(ierr); 1537 } 1538 } 1539 ierr = MatDestroy(&B);CHKERRQ(ierr); 1540 /* coarse basis functions */ 1541 for (i=0;i<n_constraints;i++) { 1542 PetscScalar *y; 1543 1544 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr); 1545 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 1546 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr); 1547 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1548 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1549 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 1550 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1551 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1552 PetscInt j; 1553 1554 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 1555 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr); 1556 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1557 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1558 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 1559 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 1560 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 1561 } 1562 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1563 } 1564 } 1565 if (n_constraints) { 1566 ierr = MatDestroy(&local_auxmat2_R);CHKERRQ(ierr); 1567 } 1568 ierr = PetscFree(p0_lidx_I);CHKERRQ(ierr); 1569 ierr = MatDestroy(&B0_V);CHKERRQ(ierr); 1570 1571 /* compute other basis functions for non-symmetric problems */ 1572 if (!pcbddc->symmetric_primal) { 1573 1574 if (n_constraints) { 1575 Mat S_CCT,B_C; 1576 1577 /* this is a lazy thing */ 1578 ierr = MatConvert(C_CR,impMatType,MAT_REUSE_MATRIX,&C_CR);CHKERRQ(ierr); 1579 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work+n_vertices*n_R,&B_C);CHKERRQ(ierr); 1580 ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr); 1581 ierr = MatTransposeMatMult(C_CR,S_CCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr); 1582 ierr = MatDestroy(&S_CCT);CHKERRQ(ierr); 1583 if (n_vertices) { 1584 Mat B_V,S_VCT; 1585 1586 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&B_V);CHKERRQ(ierr); 1587 ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr); 1588 ierr = MatTransposeMatMult(C_CR,S_VCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr); 1589 ierr = MatDestroy(&B_V);CHKERRQ(ierr); 1590 ierr = MatDestroy(&S_VCT);CHKERRQ(ierr); 1591 } 1592 ierr = MatDestroy(&B_C);CHKERRQ(ierr); 1593 } else { /* if there are no constraints, reset work */ 1594 ierr = PetscMemzero(work,n_R*pcbddc->local_primal_size*sizeof(PetscScalar));CHKERRQ(ierr); 1595 } 1596 if (n_vertices && n_R) { 1597 Mat A_VRT; 1598 PetscScalar *marray; 1599 PetscBLASInt B_N,B_one = 1; 1600 1601 ierr = MatTranspose(A_VR,MAT_INITIAL_MATRIX,&A_VRT);CHKERRQ(ierr); 1602 ierr = MatConvert(A_VRT,impMatType,MAT_REUSE_MATRIX,&A_VRT);CHKERRQ(ierr); 1603 ierr = MatDenseGetArray(A_VRT,&marray);CHKERRQ(ierr); 1604 ierr = PetscBLASIntCast(n_vertices*n_R,&B_N);CHKERRQ(ierr); 1605 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&m_one,marray,&B_one,work,&B_one)); 1606 ierr = MatDenseRestoreArray(A_VRT,&marray);CHKERRQ(ierr); 1607 ierr = MatDestroy(&A_VRT);CHKERRQ(ierr); 1608 } 1609 1610 if (F) { /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 1611 for (i=0;i<pcbddc->local_primal_size;i++) { 1612 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 1613 ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr); 1614 ierr = MatSolveTranspose(F,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1615 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1616 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 1617 } 1618 } else { 1619 for (i=0;i<pcbddc->local_primal_size;i++) { 1620 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 1621 ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr); 1622 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1623 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1624 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 1625 } 1626 } 1627 /* coarse basis functions */ 1628 for (i=0;i<pcbddc->local_primal_size;i++) { 1629 PetscScalar *y; 1630 1631 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*(i+pcbddc->local_primal_size));CHKERRQ(ierr); 1632 ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 1633 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 1634 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1635 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1636 if (i<n_vertices) { 1637 y[n_B*i+idx_V_B[i]] = 1.0; 1638 } 1639 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 1640 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1641 1642 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1643 ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 1644 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 1645 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1646 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1647 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 1648 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 1649 } 1650 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1651 } 1652 } 1653 /* free memory */ 1654 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 1655 ierr = MatDestroy(&S_VV);CHKERRQ(ierr); 1656 ierr = MatDestroy(&S_CV);CHKERRQ(ierr); 1657 ierr = MatDestroy(&S_VC);CHKERRQ(ierr); 1658 ierr = MatDestroy(&S_CC);CHKERRQ(ierr); 1659 ierr = PetscFree(work);CHKERRQ(ierr); 1660 if (n_vertices) { 1661 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 1662 } 1663 if (n_constraints) { 1664 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 1665 } 1666 /* Checking coarse_sub_mat and coarse basis functios */ 1667 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 1668 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 1669 if (pcbddc->dbg_flag) { 1670 Mat coarse_sub_mat; 1671 Mat AUXMAT,TM1,TM2,TM3,TM4; 1672 Mat coarse_phi_D,coarse_phi_B; 1673 Mat coarse_psi_D,coarse_psi_B; 1674 Mat A_II,A_BB,A_IB,A_BI; 1675 Mat C_B,CPHI; 1676 IS is_dummy; 1677 Vec mones; 1678 MatType checkmattype=MATSEQAIJ; 1679 PetscReal real_value; 1680 1681 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 1682 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 1683 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 1684 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 1685 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 1686 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 1687 if (unsymmetric_check) { 1688 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 1689 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 1690 } 1691 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 1692 1693 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1694 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);CHKERRQ(ierr); 1695 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1696 if (unsymmetric_check) { 1697 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1698 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 1699 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1700 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1701 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 1702 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1703 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1704 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 1705 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1706 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1707 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 1708 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1709 } else { 1710 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 1711 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 1712 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1713 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 1714 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1715 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1716 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 1717 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1718 } 1719 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1720 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1721 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1722 ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr); 1723 if (pcbddc->benign_n) { 1724 Mat B0_I,B0_B,B0_BPHI,B0_IPHI; 1725 PetscScalar *data,*data2; 1726 PetscInt j; 1727 1728 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 1729 ierr = MatGetSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B); 1730 ierr = MatGetSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_I_local,MAT_INITIAL_MATRIX,&B0_I); 1731 ierr = MatMatMult(B0_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 1732 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_REUSE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 1733 ierr = MatDenseGetArray(TM1,&data);CHKERRQ(ierr); 1734 ierr = MatDenseGetArray(B0_BPHI,&data2);CHKERRQ(ierr); 1735 for (j=0;j<pcbddc->benign_n;j++) { 1736 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 1737 for (i=0;i<pcbddc->local_primal_size;i++) { 1738 data[primal_idx*pcbddc->local_primal_size+i] += data2[i*pcbddc->benign_n+j]; 1739 data[i*pcbddc->local_primal_size+primal_idx] += data2[i*pcbddc->benign_n+j]; 1740 } 1741 } 1742 ierr = MatDenseRestoreArray(TM1,&data);CHKERRQ(ierr); 1743 ierr = MatDenseRestoreArray(B0_BPHI,&data2);CHKERRQ(ierr); 1744 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 1745 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 1746 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 1747 ierr = MatMatMult(B0_I,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&B0_IPHI);CHKERRQ(ierr); 1748 ierr = MatDestroy(&B0_I);CHKERRQ(ierr); 1749 ierr = MatNorm(B0_IPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1750 ierr = MatDestroy(&B0_IPHI);CHKERRQ(ierr); 1751 } 1752 #if 0 1753 { 1754 PetscViewer viewer; 1755 char filename[256]; 1756 sprintf(filename,"proj_local_coarse_mat%d.m",PetscGlobalRank); 1757 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 1758 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 1759 ierr = MatView(TM1,viewer);CHKERRQ(ierr); 1760 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 1761 } 1762 #endif 1763 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1764 ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1765 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1766 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 1767 1768 /* check constraints */ 1769 if (!pcbddc->benign_n) { /* TODO: add benign case */ 1770 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size,0,1,&is_dummy);CHKERRQ(ierr); 1771 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B); 1772 ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 1773 ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr); 1774 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 1775 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 1776 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1777 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 1778 if (unsymmetric_check) { 1779 ierr = MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 1780 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 1781 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 1782 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1783 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 1784 } 1785 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 1786 ierr = MatDestroy(&CPHI);CHKERRQ(ierr); 1787 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 1788 ierr = VecDestroy(&mones);CHKERRQ(ierr); 1789 } 1790 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1791 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 1792 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 1793 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 1794 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 1795 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 1796 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 1797 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 1798 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 1799 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 1800 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 1801 if (unsymmetric_check) { 1802 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 1803 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 1804 } 1805 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 1806 } 1807 /* get back data */ 1808 *coarse_submat_vals_n = coarse_submat_vals; 1809 PetscFunctionReturn(0); 1810 } 1811 1812 #undef __FUNCT__ 1813 #define __FUNCT__ "MatGetSubMatrixUnsorted" 1814 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B) 1815 { 1816 Mat *work_mat; 1817 IS isrow_s,iscol_s; 1818 PetscBool rsorted,csorted; 1819 PetscInt rsize,*idxs_perm_r,csize,*idxs_perm_c; 1820 PetscErrorCode ierr; 1821 1822 PetscFunctionBegin; 1823 ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr); 1824 ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr); 1825 ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr); 1826 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 1827 1828 if (!rsorted) { 1829 const PetscInt *idxs; 1830 PetscInt *idxs_sorted,i; 1831 1832 ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr); 1833 ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr); 1834 for (i=0;i<rsize;i++) { 1835 idxs_perm_r[i] = i; 1836 } 1837 ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr); 1838 ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr); 1839 for (i=0;i<rsize;i++) { 1840 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 1841 } 1842 ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr); 1843 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr); 1844 } else { 1845 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1846 isrow_s = isrow; 1847 } 1848 1849 if (!csorted) { 1850 if (isrow == iscol) { 1851 ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr); 1852 iscol_s = isrow_s; 1853 } else { 1854 const PetscInt *idxs; 1855 PetscInt *idxs_sorted,i; 1856 1857 ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr); 1858 ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr); 1859 for (i=0;i<csize;i++) { 1860 idxs_perm_c[i] = i; 1861 } 1862 ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr); 1863 ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr); 1864 for (i=0;i<csize;i++) { 1865 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 1866 } 1867 ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr); 1868 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr); 1869 } 1870 } else { 1871 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1872 iscol_s = iscol; 1873 } 1874 1875 ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 1876 1877 if (!rsorted || !csorted) { 1878 Mat new_mat; 1879 IS is_perm_r,is_perm_c; 1880 1881 if (!rsorted) { 1882 PetscInt *idxs_r,i; 1883 ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr); 1884 for (i=0;i<rsize;i++) { 1885 idxs_r[idxs_perm_r[i]] = i; 1886 } 1887 ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr); 1888 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr); 1889 } else { 1890 ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr); 1891 } 1892 ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr); 1893 1894 if (!csorted) { 1895 if (isrow_s == iscol_s) { 1896 ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr); 1897 is_perm_c = is_perm_r; 1898 } else { 1899 PetscInt *idxs_c,i; 1900 ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr); 1901 for (i=0;i<csize;i++) { 1902 idxs_c[idxs_perm_c[i]] = i; 1903 } 1904 ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr); 1905 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr); 1906 } 1907 } else { 1908 ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr); 1909 } 1910 ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr); 1911 1912 ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr); 1913 ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr); 1914 work_mat[0] = new_mat; 1915 ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr); 1916 ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr); 1917 } 1918 1919 ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr); 1920 *B = work_mat[0]; 1921 ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr); 1922 ierr = ISDestroy(&isrow_s);CHKERRQ(ierr); 1923 ierr = ISDestroy(&iscol_s);CHKERRQ(ierr); 1924 PetscFunctionReturn(0); 1925 } 1926 1927 #undef __FUNCT__ 1928 #define __FUNCT__ "PCBDDCComputeLocalMatrix" 1929 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 1930 { 1931 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1932 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1933 Mat new_mat; 1934 IS is_local,is_global; 1935 PetscInt local_size; 1936 PetscBool isseqaij; 1937 PetscErrorCode ierr; 1938 1939 PetscFunctionBegin; 1940 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1941 ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr); 1942 ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr); 1943 ierr = ISLocalToGlobalMappingApplyIS(pc->pmat->rmap->mapping,is_local,&is_global);CHKERRQ(ierr); 1944 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 1945 ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr); 1946 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 1947 1948 /* check */ 1949 if (pcbddc->dbg_flag) { 1950 Vec x,x_change; 1951 PetscReal error; 1952 1953 ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr); 1954 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 1955 ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr); 1956 ierr = VecScatterBegin(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1957 ierr = VecScatterEnd(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1958 ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr); 1959 ierr = VecScatterBegin(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1960 ierr = VecScatterEnd(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1961 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 1962 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 1963 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1964 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr); 1965 ierr = VecDestroy(&x);CHKERRQ(ierr); 1966 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 1967 } 1968 1969 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 1970 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 1971 if (isseqaij) { 1972 Mat M; 1973 1974 ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&M);CHKERRQ(ierr); 1975 ierr = MatSeqAIJCompress(M,&pcbddc->local_mat);CHKERRQ(ierr); 1976 ierr = MatDestroy(&M);CHKERRQ(ierr); 1977 } else { 1978 Mat work_mat,M; 1979 1980 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 1981 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&M);CHKERRQ(ierr); 1982 ierr = MatSeqAIJCompress(M,&pcbddc->local_mat);CHKERRQ(ierr); 1983 ierr = MatDestroy(&M);CHKERRQ(ierr); 1984 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 1985 } 1986 if (matis->A->symmetric_set) { 1987 ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);CHKERRQ(ierr); 1988 #if !defined(PETSC_USE_COMPLEX) 1989 ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);CHKERRQ(ierr); 1990 #endif 1991 } 1992 /* 1993 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 1994 ierr = MatView(new_mat,(PetscViewer)0);CHKERRQ(ierr); 1995 */ 1996 ierr = MatDestroy(&new_mat);CHKERRQ(ierr); 1997 PetscFunctionReturn(0); 1998 } 1999 2000 #undef __FUNCT__ 2001 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 2002 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 2003 { 2004 PC_IS* pcis = (PC_IS*)(pc->data); 2005 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2006 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2007 PetscInt *idx_R_local=NULL; 2008 PetscInt n_vertices,i,j,n_R,n_D,n_B; 2009 PetscInt vbs,bs; 2010 PetscBT bitmask=NULL; 2011 PetscErrorCode ierr; 2012 2013 PetscFunctionBegin; 2014 /* 2015 No need to setup local scatters if 2016 - primal space is unchanged 2017 AND 2018 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 2019 AND 2020 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 2021 */ 2022 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 2023 PetscFunctionReturn(0); 2024 } 2025 /* destroy old objects */ 2026 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 2027 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 2028 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 2029 /* Set Non-overlapping dimensions */ 2030 n_B = pcis->n_B; 2031 n_D = pcis->n - n_B; 2032 n_vertices = pcbddc->n_vertices; 2033 2034 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 2035 2036 /* create auxiliary bitmask and allocate workspace */ 2037 if (!sub_schurs->reuse_mumps) { 2038 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 2039 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 2040 for (i=0;i<n_vertices;i++) { 2041 ierr = PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);CHKERRQ(ierr); 2042 } 2043 2044 for (i=0, n_R=0; i<pcis->n; i++) { 2045 if (!PetscBTLookup(bitmask,i)) { 2046 idx_R_local[n_R++] = i; 2047 } 2048 } 2049 } else { /* A different ordering (already computed) is present if we are reusing MUMPS Schur solver */ 2050 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 2051 2052 ierr = ISGetIndices(reuse_mumps->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2053 ierr = ISGetLocalSize(reuse_mumps->is_R,&n_R);CHKERRQ(ierr); 2054 } 2055 2056 /* Block code */ 2057 vbs = 1; 2058 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 2059 if (bs>1 && !(n_vertices%bs)) { 2060 PetscBool is_blocked = PETSC_TRUE; 2061 PetscInt *vary; 2062 if (!sub_schurs->reuse_mumps) { 2063 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 2064 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 2065 /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 2066 /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */ 2067 for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++; 2068 for (i=0; i<pcis->n/bs; i++) { 2069 if (vary[i]!=0 && vary[i]!=bs) { 2070 is_blocked = PETSC_FALSE; 2071 break; 2072 } 2073 } 2074 ierr = PetscFree(vary);CHKERRQ(ierr); 2075 } else { 2076 /* Verify directly the R set */ 2077 for (i=0; i<n_R/bs; i++) { 2078 PetscInt j,node=idx_R_local[bs*i]; 2079 for (j=1; j<bs; j++) { 2080 if (node != idx_R_local[bs*i+j]-j) { 2081 is_blocked = PETSC_FALSE; 2082 break; 2083 } 2084 } 2085 } 2086 } 2087 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 2088 vbs = bs; 2089 for (i=0;i<n_R/vbs;i++) { 2090 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 2091 } 2092 } 2093 } 2094 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 2095 if (sub_schurs->reuse_mumps) { 2096 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 2097 2098 ierr = ISRestoreIndices(reuse_mumps->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2099 ierr = ISDestroy(&reuse_mumps->is_R);CHKERRQ(ierr); 2100 ierr = PetscObjectReference((PetscObject)pcbddc->is_R_local);CHKERRQ(ierr); 2101 reuse_mumps->is_R = pcbddc->is_R_local; 2102 } else { 2103 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 2104 } 2105 2106 /* print some info if requested */ 2107 if (pcbddc->dbg_flag) { 2108 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2109 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2110 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 2111 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 2112 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 2113 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices-pcbddc->benign_n,pcbddc->local_primal_size);CHKERRQ(ierr); 2114 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2115 } 2116 2117 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 2118 if (!sub_schurs->reuse_mumps) { 2119 IS is_aux1,is_aux2; 2120 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 2121 2122 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2123 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 2124 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 2125 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2126 for (i=0; i<n_D; i++) { 2127 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 2128 } 2129 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2130 for (i=0, j=0; i<n_R; i++) { 2131 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 2132 aux_array1[j++] = i; 2133 } 2134 } 2135 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 2136 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2137 for (i=0, j=0; i<n_B; i++) { 2138 if (!PetscBTLookup(bitmask,is_indices[i])) { 2139 aux_array2[j++] = i; 2140 } 2141 } 2142 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2143 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 2144 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 2145 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 2146 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 2147 2148 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2149 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 2150 for (i=0, j=0; i<n_R; i++) { 2151 if (PetscBTLookup(bitmask,idx_R_local[i])) { 2152 aux_array1[j++] = i; 2153 } 2154 } 2155 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 2156 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 2157 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 2158 } 2159 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 2160 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2161 } else { 2162 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 2163 IS tis; 2164 PetscInt schur_size; 2165 2166 ierr = ISGetLocalSize(reuse_mumps->is_B,&schur_size);CHKERRQ(ierr); 2167 ierr = ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);CHKERRQ(ierr); 2168 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_mumps->is_B,&pcbddc->R_to_B);CHKERRQ(ierr); 2169 ierr = ISDestroy(&tis);CHKERRQ(ierr); 2170 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2171 ierr = ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);CHKERRQ(ierr); 2172 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 2173 ierr = ISDestroy(&tis);CHKERRQ(ierr); 2174 } 2175 } 2176 PetscFunctionReturn(0); 2177 } 2178 2179 2180 #undef __FUNCT__ 2181 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 2182 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 2183 { 2184 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2185 PC_IS *pcis = (PC_IS*)pc->data; 2186 PC pc_temp; 2187 Mat A_RR; 2188 MatReuse reuse; 2189 PetscScalar m_one = -1.0; 2190 PetscReal value; 2191 PetscInt n_D,n_R; 2192 PetscBool use_exact,use_exact_reduced,issbaij; 2193 PetscErrorCode ierr; 2194 /* prefixes stuff */ 2195 char dir_prefix[256],neu_prefix[256],str_level[16]; 2196 size_t len; 2197 2198 PetscFunctionBegin; 2199 2200 /* compute prefixes */ 2201 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 2202 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 2203 if (!pcbddc->current_level) { 2204 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 2205 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 2206 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 2207 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 2208 } else { 2209 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 2210 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 2211 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 2212 len -= 15; /* remove "pc_bddc_coarse_" */ 2213 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 2214 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 2215 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 2216 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 2217 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 2218 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 2219 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 2220 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 2221 } 2222 2223 /* DIRICHLET PROBLEM */ 2224 if (dirichlet) { 2225 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2226 if (pcbddc->local_mat->symmetric_set) { 2227 ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 2228 } 2229 /* Matrix for Dirichlet problem is pcis->A_II */ 2230 n_D = pcis->n - pcis->n_B; 2231 if (!pcbddc->ksp_D) { /* create object if not yet build */ 2232 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 2233 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 2234 /* default */ 2235 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 2236 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 2237 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 2238 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 2239 if (issbaij) { 2240 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 2241 } else { 2242 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 2243 } 2244 /* Allow user's customization */ 2245 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 2246 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 2247 } 2248 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 2249 if (sub_schurs->reuse_mumps) { 2250 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 2251 2252 ierr = KSPSetPC(pcbddc->ksp_D,reuse_mumps->interior_solver);CHKERRQ(ierr); 2253 } 2254 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 2255 if (!n_D) { 2256 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 2257 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 2258 } 2259 /* Set Up KSP for Dirichlet problem of BDDC */ 2260 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 2261 /* set ksp_D into pcis data */ 2262 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 2263 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 2264 pcis->ksp_D = pcbddc->ksp_D; 2265 } 2266 2267 /* NEUMANN PROBLEM */ 2268 A_RR = 0; 2269 if (neumann) { 2270 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2271 PetscInt ibs,mbs; 2272 PetscBool issbaij; 2273 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 2274 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 2275 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 2276 if (pcbddc->ksp_R) { /* already created ksp */ 2277 PetscInt nn_R; 2278 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 2279 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 2280 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 2281 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 2282 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 2283 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2284 reuse = MAT_INITIAL_MATRIX; 2285 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 2286 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 2287 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2288 reuse = MAT_INITIAL_MATRIX; 2289 } else { /* safe to reuse the matrix */ 2290 reuse = MAT_REUSE_MATRIX; 2291 } 2292 } 2293 /* last check */ 2294 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 2295 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2296 reuse = MAT_INITIAL_MATRIX; 2297 } 2298 } else { /* first time, so we need to create the matrix */ 2299 reuse = MAT_INITIAL_MATRIX; 2300 } 2301 /* extract A_RR */ 2302 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 2303 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 2304 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 2305 if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */ 2306 if (matis->A == pcbddc->local_mat) { 2307 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2308 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 2309 } else { 2310 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 2311 } 2312 } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 2313 if (matis->A == pcbddc->local_mat) { 2314 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2315 ierr = MatConvert(matis->A,MATSEQBAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 2316 } else { 2317 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 2318 } 2319 } 2320 if (!sub_schurs->reuse_mumps) { 2321 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 2322 if (pcbddc->local_mat->symmetric_set) { 2323 ierr = MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 2324 } 2325 } else { 2326 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 2327 2328 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2329 ierr = PCGetOperators(reuse_mumps->correction_solver,&A_RR,NULL);CHKERRQ(ierr); 2330 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 2331 } 2332 if (!pcbddc->ksp_R) { /* create object if not present */ 2333 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 2334 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 2335 /* default */ 2336 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 2337 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 2338 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 2339 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 2340 if (issbaij) { 2341 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 2342 } else { 2343 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 2344 } 2345 /* Allow user's customization */ 2346 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 2347 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 2348 } 2349 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 2350 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 2351 if (!n_R) { 2352 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 2353 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 2354 } 2355 /* Reuse MUMPS solver if it is present */ 2356 if (sub_schurs->reuse_mumps) { 2357 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 2358 2359 ierr = KSPSetPC(pcbddc->ksp_R,reuse_mumps->correction_solver);CHKERRQ(ierr); 2360 } 2361 /* Set Up KSP for Neumann problem of BDDC */ 2362 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 2363 } 2364 /* free Neumann problem's matrix */ 2365 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2366 2367 /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */ 2368 if (pcbddc->NullSpace || pcbddc->dbg_flag) { 2369 if (pcbddc->dbg_flag) { 2370 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2371 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 2372 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2373 } 2374 if (dirichlet) { /* Dirichlet */ 2375 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 2376 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 2377 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 2378 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 2379 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 2380 /* need to be adapted? */ 2381 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 2382 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2383 ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr); 2384 /* print info */ 2385 if (pcbddc->dbg_flag) { 2386 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 2387 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2388 } 2389 if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) { 2390 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_TRUE,pcis->is_I_local);CHKERRQ(ierr); 2391 } 2392 } 2393 if (neumann) { /* Neumann */ 2394 ierr = KSPGetOperators(pcbddc->ksp_R,&A_RR,NULL);CHKERRQ(ierr); 2395 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 2396 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 2397 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 2398 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 2399 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 2400 /* need to be adapted? */ 2401 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 2402 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2403 /* print info */ 2404 if (pcbddc->dbg_flag) { 2405 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 2406 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2407 } 2408 if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */ 2409 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_FALSE,pcbddc->is_R_local);CHKERRQ(ierr); 2410 } 2411 } 2412 } 2413 PetscFunctionReturn(0); 2414 } 2415 2416 #undef __FUNCT__ 2417 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection" 2418 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose) 2419 { 2420 PetscErrorCode ierr; 2421 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 2422 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2423 2424 PetscFunctionBegin; 2425 if (!sub_schurs->reuse_mumps) { 2426 ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr); 2427 } 2428 if (!pcbddc->switch_static) { 2429 if (applytranspose && pcbddc->local_auxmat1) { 2430 ierr = MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 2431 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 2432 } 2433 if (!sub_schurs->reuse_mumps) { 2434 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2435 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2436 } else { 2437 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 2438 2439 ierr = VecScatterBegin(reuse_mumps->correction_scatter_B,inout_B,reuse_mumps->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2440 ierr = VecScatterEnd(reuse_mumps->correction_scatter_B,inout_B,reuse_mumps->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2441 } 2442 } else { 2443 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2444 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2445 ierr = VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2446 ierr = VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2447 if (applytranspose && pcbddc->local_auxmat1) { 2448 ierr = MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);CHKERRQ(ierr); 2449 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 2450 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2451 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2452 } 2453 } 2454 if (!sub_schurs->reuse_mumps) { 2455 if (applytranspose) { 2456 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 2457 } else { 2458 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 2459 } 2460 #if defined(PETSC_HAVE_MUMPS) 2461 } else { 2462 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 2463 2464 if (applytranspose) { 2465 ierr = MatMumpsSolveSchurComplementTranspose(reuse_mumps->F,reuse_mumps->rhs_B,reuse_mumps->sol_B);CHKERRQ(ierr); 2466 } else { 2467 ierr = MatMumpsSolveSchurComplement(reuse_mumps->F,reuse_mumps->rhs_B,reuse_mumps->sol_B);CHKERRQ(ierr); 2468 } 2469 #endif 2470 } 2471 ierr = VecSet(inout_B,0.);CHKERRQ(ierr); 2472 if (!pcbddc->switch_static) { 2473 if (!sub_schurs->reuse_mumps) { 2474 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2475 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2476 } else { 2477 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 2478 2479 ierr = VecScatterBegin(reuse_mumps->correction_scatter_B,reuse_mumps->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2480 ierr = VecScatterEnd(reuse_mumps->correction_scatter_B,reuse_mumps->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2481 } 2482 if (!applytranspose && pcbddc->local_auxmat1) { 2483 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 2484 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 2485 } 2486 } else { 2487 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2488 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2489 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2490 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2491 if (!applytranspose && pcbddc->local_auxmat1) { 2492 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 2493 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 2494 } 2495 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2496 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2497 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2498 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2499 } 2500 PetscFunctionReturn(0); 2501 } 2502 2503 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 2504 #undef __FUNCT__ 2505 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 2506 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 2507 { 2508 PetscErrorCode ierr; 2509 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 2510 PC_IS* pcis = (PC_IS*) (pc->data); 2511 const PetscScalar zero = 0.0; 2512 2513 PetscFunctionBegin; 2514 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 2515 if (applytranspose) { 2516 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 2517 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 2518 } else { 2519 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 2520 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 2521 } 2522 2523 /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */ 2524 if (pcbddc->benign_n) { 2525 PetscScalar *array; 2526 PetscInt j; 2527 2528 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 2529 for (j=0;j<pcbddc->benign_n;j++) array[pcbddc->local_primal_size-pcbddc->benign_n+j] += pcbddc->benign_p0[j]; 2530 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 2531 } 2532 2533 /* start communications from local primal nodes to rhs of coarse solver */ 2534 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 2535 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2536 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2537 2538 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 2539 /* TODO remove null space when doing multilevel */ 2540 if (pcbddc->coarse_ksp) { 2541 Mat coarse_mat; 2542 Vec rhs,sol; 2543 MatNullSpace nullsp; 2544 2545 ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr); 2546 ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr); 2547 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 2548 ierr = MatGetNullSpace(coarse_mat,&nullsp);CHKERRQ(ierr); 2549 if (nullsp) { 2550 ierr = MatNullSpaceRemove(nullsp,rhs);CHKERRQ(ierr); 2551 } 2552 if (applytranspose) { 2553 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 2554 } else { 2555 ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 2556 } 2557 if (nullsp) { 2558 ierr = MatNullSpaceRemove(nullsp,sol);CHKERRQ(ierr); 2559 } 2560 } 2561 2562 /* Local solution on R nodes */ 2563 if (pcis->n) { /* in/out pcbddc->vec1_B,pcbddc->vec1_D */ 2564 ierr = PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);CHKERRQ(ierr); 2565 } 2566 2567 /* communications from coarse sol to local primal nodes */ 2568 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2569 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2570 2571 /* Sum contributions from two levels */ 2572 if (applytranspose) { 2573 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 2574 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 2575 } else { 2576 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 2577 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 2578 } 2579 /* store p0 */ 2580 if (pcbddc->benign_n) { 2581 PetscScalar *array; 2582 PetscInt j; 2583 2584 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 2585 for (j=0;j<pcbddc->benign_n;j++) pcbddc->benign_p0[j] = array[pcbddc->local_primal_size-pcbddc->benign_n+j]; 2586 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 2587 } 2588 PetscFunctionReturn(0); 2589 } 2590 2591 #undef __FUNCT__ 2592 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 2593 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 2594 { 2595 PetscErrorCode ierr; 2596 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 2597 PetscScalar *array; 2598 Vec from,to; 2599 2600 PetscFunctionBegin; 2601 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 2602 from = pcbddc->coarse_vec; 2603 to = pcbddc->vec1_P; 2604 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 2605 Vec tvec; 2606 2607 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 2608 ierr = VecResetArray(tvec);CHKERRQ(ierr); 2609 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 2610 ierr = VecGetArray(tvec,&array);CHKERRQ(ierr); 2611 ierr = VecPlaceArray(from,array);CHKERRQ(ierr); 2612 ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr); 2613 } 2614 } else { /* from local to global -> put data in coarse right hand side */ 2615 from = pcbddc->vec1_P; 2616 to = pcbddc->coarse_vec; 2617 } 2618 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 2619 PetscFunctionReturn(0); 2620 } 2621 2622 #undef __FUNCT__ 2623 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 2624 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 2625 { 2626 PetscErrorCode ierr; 2627 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 2628 PetscScalar *array; 2629 Vec from,to; 2630 2631 PetscFunctionBegin; 2632 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 2633 from = pcbddc->coarse_vec; 2634 to = pcbddc->vec1_P; 2635 } else { /* from local to global -> put data in coarse right hand side */ 2636 from = pcbddc->vec1_P; 2637 to = pcbddc->coarse_vec; 2638 } 2639 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 2640 if (smode == SCATTER_FORWARD) { 2641 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 2642 Vec tvec; 2643 2644 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 2645 ierr = VecGetArray(to,&array);CHKERRQ(ierr); 2646 ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr); 2647 ierr = VecRestoreArray(to,&array);CHKERRQ(ierr); 2648 } 2649 } else { 2650 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 2651 ierr = VecResetArray(from);CHKERRQ(ierr); 2652 } 2653 } 2654 PetscFunctionReturn(0); 2655 } 2656 2657 /* uncomment for testing purposes */ 2658 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 2659 #undef __FUNCT__ 2660 #define __FUNCT__ "PCBDDCConstraintsSetUp" 2661 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 2662 { 2663 PetscErrorCode ierr; 2664 PC_IS* pcis = (PC_IS*)(pc->data); 2665 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2666 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 2667 /* one and zero */ 2668 PetscScalar one=1.0,zero=0.0; 2669 /* space to store constraints and their local indices */ 2670 PetscScalar *constraints_data; 2671 PetscInt *constraints_idxs,*constraints_idxs_B; 2672 PetscInt *constraints_idxs_ptr,*constraints_data_ptr; 2673 PetscInt *constraints_n; 2674 /* iterators */ 2675 PetscInt i,j,k,total_counts,total_counts_cc,cum; 2676 /* BLAS integers */ 2677 PetscBLASInt lwork,lierr; 2678 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 2679 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 2680 /* reuse */ 2681 PetscInt olocal_primal_size,olocal_primal_size_cc; 2682 PetscInt *olocal_primal_ref_node,*olocal_primal_ref_mult; 2683 /* change of basis */ 2684 PetscBool qr_needed; 2685 PetscBT change_basis,qr_needed_idx; 2686 /* auxiliary stuff */ 2687 PetscInt *nnz,*is_indices; 2688 PetscInt ncc; 2689 /* some quantities */ 2690 PetscInt n_vertices,total_primal_vertices,valid_constraints; 2691 PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints; 2692 2693 PetscFunctionBegin; 2694 /* Destroy Mat objects computed previously */ 2695 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2696 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 2697 /* save info on constraints from previous setup (if any) */ 2698 olocal_primal_size = pcbddc->local_primal_size; 2699 olocal_primal_size_cc = pcbddc->local_primal_size_cc; 2700 ierr = PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);CHKERRQ(ierr); 2701 ierr = PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 2702 ierr = PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 2703 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 2704 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2705 2706 /* print some info */ 2707 if (pcbddc->dbg_flag) { 2708 IS vertices; 2709 PetscInt nv,nedges,nfaces; 2710 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 2711 ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr); 2712 ierr = ISDestroy(&vertices);CHKERRQ(ierr); 2713 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 2714 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 2715 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 2716 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr); 2717 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr); 2718 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2719 } 2720 2721 if (!pcbddc->adaptive_selection) { 2722 IS ISForVertices,*ISForFaces,*ISForEdges; 2723 MatNullSpace nearnullsp; 2724 const Vec *nearnullvecs; 2725 Vec *localnearnullsp; 2726 PetscScalar *array; 2727 PetscInt n_ISForFaces,n_ISForEdges,nnsp_size; 2728 PetscBool nnsp_has_cnst; 2729 /* LAPACK working arrays for SVD or POD */ 2730 PetscBool skip_lapack,boolforchange; 2731 PetscScalar *work; 2732 PetscReal *singular_vals; 2733 #if defined(PETSC_USE_COMPLEX) 2734 PetscReal *rwork; 2735 #endif 2736 #if defined(PETSC_MISSING_LAPACK_GESVD) 2737 PetscScalar *temp_basis,*correlation_mat; 2738 #else 2739 PetscBLASInt dummy_int=1; 2740 PetscScalar dummy_scalar=1.; 2741 #endif 2742 2743 /* Get index sets for faces, edges and vertices from graph */ 2744 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 2745 /* free unneeded index sets */ 2746 if (!pcbddc->use_vertices) { 2747 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 2748 } 2749 if (!pcbddc->use_edges) { 2750 for (i=0;i<n_ISForEdges;i++) { 2751 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 2752 } 2753 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 2754 n_ISForEdges = 0; 2755 } 2756 if (!pcbddc->use_faces) { 2757 for (i=0;i<n_ISForFaces;i++) { 2758 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 2759 } 2760 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 2761 n_ISForFaces = 0; 2762 } 2763 2764 #if defined(PETSC_USE_DEBUG) 2765 /* HACK: when solving singular problems not using vertices, a change of basis is mandatory. 2766 Also use_change_of_basis should be consistent among processors */ 2767 if (pcbddc->NullSpace) { 2768 PetscBool tbool[2],gbool[2]; 2769 2770 if (!ISForVertices && !pcbddc->user_ChangeOfBasisMatrix) { 2771 pcbddc->use_change_of_basis = PETSC_TRUE; 2772 if (!ISForEdges) { 2773 pcbddc->use_change_on_faces = PETSC_TRUE; 2774 } 2775 } 2776 tbool[0] = pcbddc->use_change_of_basis; 2777 tbool[1] = pcbddc->use_change_on_faces; 2778 ierr = MPI_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2779 pcbddc->use_change_of_basis = gbool[0]; 2780 pcbddc->use_change_on_faces = gbool[1]; 2781 } 2782 #endif 2783 2784 /* check if near null space is attached to global mat */ 2785 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 2786 if (nearnullsp) { 2787 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 2788 /* remove any stored info */ 2789 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 2790 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 2791 /* store information for BDDC solver reuse */ 2792 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 2793 pcbddc->onearnullspace = nearnullsp; 2794 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 2795 for (i=0;i<nnsp_size;i++) { 2796 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 2797 } 2798 } else { /* if near null space is not provided BDDC uses constants by default */ 2799 nnsp_size = 0; 2800 nnsp_has_cnst = PETSC_TRUE; 2801 } 2802 /* get max number of constraints on a single cc */ 2803 max_constraints = nnsp_size; 2804 if (nnsp_has_cnst) max_constraints++; 2805 2806 /* 2807 Evaluate maximum storage size needed by the procedure 2808 - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]" 2809 - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]" 2810 There can be multiple constraints per connected component 2811 */ 2812 n_vertices = 0; 2813 if (ISForVertices) { 2814 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 2815 } 2816 ncc = n_vertices+n_ISForFaces+n_ISForEdges; 2817 ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr); 2818 2819 total_counts = n_ISForFaces+n_ISForEdges; 2820 total_counts *= max_constraints; 2821 total_counts += n_vertices; 2822 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 2823 2824 total_counts = 0; 2825 max_size_of_constraint = 0; 2826 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 2827 IS used_is; 2828 if (i<n_ISForEdges) { 2829 used_is = ISForEdges[i]; 2830 } else { 2831 used_is = ISForFaces[i-n_ISForEdges]; 2832 } 2833 ierr = ISGetSize(used_is,&j);CHKERRQ(ierr); 2834 total_counts += j; 2835 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 2836 } 2837 ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr); 2838 2839 /* get local part of global near null space vectors */ 2840 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 2841 for (k=0;k<nnsp_size;k++) { 2842 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 2843 ierr = VecScatterBegin(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2844 ierr = VecScatterEnd(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2845 } 2846 2847 /* whether or not to skip lapack calls */ 2848 skip_lapack = PETSC_TRUE; 2849 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 2850 2851 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 2852 if (!skip_lapack) { 2853 PetscScalar temp_work; 2854 2855 #if defined(PETSC_MISSING_LAPACK_GESVD) 2856 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 2857 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 2858 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 2859 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 2860 #if defined(PETSC_USE_COMPLEX) 2861 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 2862 #endif 2863 /* now we evaluate the optimal workspace using query with lwork=-1 */ 2864 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 2865 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 2866 lwork = -1; 2867 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2868 #if !defined(PETSC_USE_COMPLEX) 2869 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 2870 #else 2871 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 2872 #endif 2873 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2874 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 2875 #else /* on missing GESVD */ 2876 /* SVD */ 2877 PetscInt max_n,min_n; 2878 max_n = max_size_of_constraint; 2879 min_n = max_constraints; 2880 if (max_size_of_constraint < max_constraints) { 2881 min_n = max_size_of_constraint; 2882 max_n = max_constraints; 2883 } 2884 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 2885 #if defined(PETSC_USE_COMPLEX) 2886 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 2887 #endif 2888 /* now we evaluate the optimal workspace using query with lwork=-1 */ 2889 lwork = -1; 2890 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 2891 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 2892 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 2893 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2894 #if !defined(PETSC_USE_COMPLEX) 2895 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr)); 2896 #else 2897 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr)); 2898 #endif 2899 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2900 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 2901 #endif /* on missing GESVD */ 2902 /* Allocate optimal workspace */ 2903 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 2904 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 2905 } 2906 /* Now we can loop on constraining sets */ 2907 total_counts = 0; 2908 constraints_idxs_ptr[0] = 0; 2909 constraints_data_ptr[0] = 0; 2910 /* vertices */ 2911 if (n_vertices) { 2912 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2913 ierr = PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 2914 for (i=0;i<n_vertices;i++) { 2915 constraints_n[total_counts] = 1; 2916 constraints_data[total_counts] = 1.0; 2917 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 2918 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 2919 total_counts++; 2920 } 2921 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2922 n_vertices = total_counts; 2923 } 2924 2925 /* edges and faces */ 2926 total_counts_cc = total_counts; 2927 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 2928 IS used_is; 2929 PetscBool idxs_copied = PETSC_FALSE; 2930 2931 if (ncc<n_ISForEdges) { 2932 used_is = ISForEdges[ncc]; 2933 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 2934 } else { 2935 used_is = ISForFaces[ncc-n_ISForEdges]; 2936 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 2937 } 2938 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 2939 2940 ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr); 2941 ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2942 /* change of basis should not be performed on local periodic nodes */ 2943 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 2944 if (nnsp_has_cnst) { 2945 PetscScalar quad_value; 2946 2947 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 2948 idxs_copied = PETSC_TRUE; 2949 2950 if (!pcbddc->use_nnsp_true) { 2951 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 2952 } else { 2953 quad_value = 1.0; 2954 } 2955 for (j=0;j<size_of_constraint;j++) { 2956 constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value; 2957 } 2958 temp_constraints++; 2959 total_counts++; 2960 } 2961 for (k=0;k<nnsp_size;k++) { 2962 PetscReal real_value; 2963 PetscScalar *ptr_to_data; 2964 2965 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2966 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint]; 2967 for (j=0;j<size_of_constraint;j++) { 2968 ptr_to_data[j] = array[is_indices[j]]; 2969 } 2970 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2971 /* check if array is null on the connected component */ 2972 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2973 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one)); 2974 if (real_value > 0.0) { /* keep indices and values */ 2975 temp_constraints++; 2976 total_counts++; 2977 if (!idxs_copied) { 2978 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 2979 idxs_copied = PETSC_TRUE; 2980 } 2981 } 2982 } 2983 ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2984 valid_constraints = temp_constraints; 2985 if (!pcbddc->use_nnsp_true && temp_constraints) { 2986 if (temp_constraints == 1) { /* just normalize the constraint */ 2987 PetscScalar norm,*ptr_to_data; 2988 2989 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 2990 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2991 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one)); 2992 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 2993 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one)); 2994 } else { /* perform SVD */ 2995 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 2996 PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 2997 2998 #if defined(PETSC_MISSING_LAPACK_GESVD) 2999 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 3000 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 3001 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 3002 the constraints basis will differ (by a complex factor with absolute value equal to 1) 3003 from that computed using LAPACKgesvd 3004 -> This is due to a different computation of eigenvectors in LAPACKheev 3005 -> The quality of the POD-computed basis will be the same */ 3006 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 3007 /* Store upper triangular part of correlation matrix */ 3008 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3009 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3010 for (j=0;j<temp_constraints;j++) { 3011 for (k=0;k<j+1;k++) { 3012 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one)); 3013 } 3014 } 3015 /* compute eigenvalues and eigenvectors of correlation matrix */ 3016 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 3017 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 3018 #if !defined(PETSC_USE_COMPLEX) 3019 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 3020 #else 3021 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 3022 #endif 3023 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3024 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 3025 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 3026 j = 0; 3027 while (j < temp_constraints && singular_vals[j] < tol) j++; 3028 total_counts = total_counts-j; 3029 valid_constraints = temp_constraints-j; 3030 /* scale and copy POD basis into used quadrature memory */ 3031 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3032 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 3033 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 3034 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3035 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 3036 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 3037 if (j<temp_constraints) { 3038 PetscInt ii; 3039 for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]); 3040 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3041 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 3042 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3043 for (k=0;k<temp_constraints-j;k++) { 3044 for (ii=0;ii<size_of_constraint;ii++) { 3045 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 3046 } 3047 } 3048 } 3049 #else /* on missing GESVD */ 3050 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3051 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 3052 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3053 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3054 #if !defined(PETSC_USE_COMPLEX) 3055 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr)); 3056 #else 3057 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr)); 3058 #endif 3059 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 3060 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3061 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 3062 k = temp_constraints; 3063 if (k > size_of_constraint) k = size_of_constraint; 3064 j = 0; 3065 while (j < k && singular_vals[k-j-1] < tol) j++; 3066 valid_constraints = k-j; 3067 total_counts = total_counts-temp_constraints+valid_constraints; 3068 #endif /* on missing GESVD */ 3069 } 3070 } 3071 /* update pointers information */ 3072 if (valid_constraints) { 3073 constraints_n[total_counts_cc] = valid_constraints; 3074 constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint; 3075 constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints; 3076 /* set change_of_basis flag */ 3077 if (boolforchange) { 3078 PetscBTSet(change_basis,total_counts_cc); 3079 } 3080 total_counts_cc++; 3081 } 3082 } 3083 /* free workspace */ 3084 if (!skip_lapack) { 3085 ierr = PetscFree(work);CHKERRQ(ierr); 3086 #if defined(PETSC_USE_COMPLEX) 3087 ierr = PetscFree(rwork);CHKERRQ(ierr); 3088 #endif 3089 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 3090 #if defined(PETSC_MISSING_LAPACK_GESVD) 3091 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 3092 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 3093 #endif 3094 } 3095 for (k=0;k<nnsp_size;k++) { 3096 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 3097 } 3098 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 3099 /* free index sets of faces, edges and vertices */ 3100 for (i=0;i<n_ISForFaces;i++) { 3101 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 3102 } 3103 if (n_ISForFaces) { 3104 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 3105 } 3106 for (i=0;i<n_ISForEdges;i++) { 3107 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 3108 } 3109 if (n_ISForEdges) { 3110 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 3111 } 3112 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 3113 } else { 3114 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3115 3116 total_counts = 0; 3117 n_vertices = 0; 3118 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 3119 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 3120 } 3121 max_constraints = 0; 3122 total_counts_cc = 0; 3123 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 3124 total_counts += pcbddc->adaptive_constraints_n[i]; 3125 if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++; 3126 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 3127 } 3128 constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr; 3129 constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr; 3130 constraints_idxs = pcbddc->adaptive_constraints_idxs; 3131 constraints_data = pcbddc->adaptive_constraints_data; 3132 /* constraints_n differs from pcbddc->adaptive_constraints_n */ 3133 ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr); 3134 total_counts_cc = 0; 3135 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 3136 if (pcbddc->adaptive_constraints_n[i]) { 3137 constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i]; 3138 } 3139 } 3140 #if 0 3141 printf("Found %d totals (%d)\n",total_counts_cc,total_counts); 3142 for (i=0;i<total_counts_cc;i++) { 3143 printf("const %d, start %d",i,constraints_idxs_ptr[i]); 3144 printf(" end %d:\n",constraints_idxs_ptr[i+1]); 3145 for (j=constraints_idxs_ptr[i];j<constraints_idxs_ptr[i+1];j++) { 3146 printf(" %d",constraints_idxs[j]); 3147 } 3148 printf("\n"); 3149 printf("number of cc: %d\n",constraints_n[i]); 3150 } 3151 for (i=0;i<n_vertices;i++) { 3152 PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]); 3153 } 3154 for (i=0;i<sub_schurs->n_subs;i++) { 3155 PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]); 3156 } 3157 #endif 3158 3159 max_size_of_constraint = 0; 3160 for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]); 3161 ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr); 3162 /* Change of basis */ 3163 ierr = PetscBTCreate(total_counts_cc,&change_basis);CHKERRQ(ierr); 3164 if (pcbddc->use_change_of_basis) { 3165 for (i=0;i<sub_schurs->n_subs;i++) { 3166 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 3167 ierr = PetscBTSet(change_basis,i+n_vertices);CHKERRQ(ierr); 3168 } 3169 } 3170 } 3171 } 3172 pcbddc->local_primal_size = total_counts; 3173 ierr = PetscMalloc1(pcbddc->local_primal_size+pcbddc->benign_n,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 3174 3175 /* map constraints_idxs in boundary numbering */ 3176 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr); 3177 if (i != constraints_idxs_ptr[total_counts_cc]) { 3178 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",constraints_idxs_ptr[total_counts_cc],i); 3179 } 3180 3181 /* Create constraint matrix */ 3182 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 3183 ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr); 3184 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 3185 3186 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 3187 /* determine if a QR strategy is needed for change of basis */ 3188 qr_needed = PETSC_FALSE; 3189 ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr); 3190 total_primal_vertices=0; 3191 pcbddc->local_primal_size_cc = 0; 3192 for (i=0;i<total_counts_cc;i++) { 3193 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 3194 if (size_of_constraint == 1) { 3195 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]]; 3196 pcbddc->local_primal_size_cc += 1; 3197 } else if (PetscBTLookup(change_basis,i)) { 3198 for (k=0;k<constraints_n[i];k++) { 3199 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 3200 } 3201 pcbddc->local_primal_size_cc += constraints_n[i]; 3202 if (constraints_n[i] > 1 || pcbddc->use_qr_single || pcbddc->faster_deluxe) { 3203 PetscBTSet(qr_needed_idx,i); 3204 qr_needed = PETSC_TRUE; 3205 } 3206 } else { 3207 pcbddc->local_primal_size_cc += 1; 3208 } 3209 } 3210 /* note that the local variable n_vertices used below stores the number of pointwise constraints */ 3211 pcbddc->n_vertices = total_primal_vertices; 3212 /* permute indices in order to have a sorted set of vertices */ 3213 ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 3214 3215 ierr = PetscMalloc2(pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 3216 ierr = PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));CHKERRQ(ierr); 3217 for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1; 3218 3219 /* nonzero structure of constraint matrix */ 3220 /* and get reference dof for local constraints */ 3221 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 3222 for (i=0;i<total_primal_vertices;i++) nnz[i] = 1; 3223 3224 j = total_primal_vertices; 3225 total_counts = total_primal_vertices; 3226 cum = total_primal_vertices; 3227 for (i=n_vertices;i<total_counts_cc;i++) { 3228 if (!PetscBTLookup(change_basis,i)) { 3229 pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]]; 3230 pcbddc->local_primal_ref_mult[cum] = constraints_n[i]; 3231 cum++; 3232 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 3233 for (k=0;k<constraints_n[i];k++) { 3234 pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 3235 nnz[j+k] = size_of_constraint; 3236 } 3237 j += constraints_n[i]; 3238 } 3239 } 3240 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 3241 ierr = PetscFree(nnz);CHKERRQ(ierr); 3242 3243 /* set values in constraint matrix */ 3244 for (i=0;i<total_primal_vertices;i++) { 3245 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 3246 } 3247 total_counts = total_primal_vertices; 3248 for (i=n_vertices;i<total_counts_cc;i++) { 3249 if (!PetscBTLookup(change_basis,i)) { 3250 PetscInt *cols; 3251 3252 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 3253 cols = constraints_idxs+constraints_idxs_ptr[i]; 3254 for (k=0;k<constraints_n[i];k++) { 3255 PetscInt row = total_counts+k; 3256 PetscScalar *vals; 3257 3258 vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint; 3259 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 3260 } 3261 total_counts += constraints_n[i]; 3262 } 3263 } 3264 /* assembling */ 3265 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3266 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3267 3268 /* 3269 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 3270 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 3271 */ 3272 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 3273 if (pcbddc->use_change_of_basis) { 3274 /* dual and primal dofs on a single cc */ 3275 PetscInt dual_dofs,primal_dofs; 3276 /* working stuff for GEQRF */ 3277 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 3278 PetscBLASInt lqr_work; 3279 /* working stuff for UNGQR */ 3280 PetscScalar *gqr_work,lgqr_work_t; 3281 PetscBLASInt lgqr_work; 3282 /* working stuff for TRTRS */ 3283 PetscScalar *trs_rhs; 3284 PetscBLASInt Blas_NRHS; 3285 /* pointers for values insertion into change of basis matrix */ 3286 PetscInt *start_rows,*start_cols; 3287 PetscScalar *start_vals; 3288 /* working stuff for values insertion */ 3289 PetscBT is_primal; 3290 PetscInt *aux_primal_numbering_B; 3291 /* matrix sizes */ 3292 PetscInt global_size,local_size; 3293 /* temporary change of basis */ 3294 Mat localChangeOfBasisMatrix; 3295 /* extra space for debugging */ 3296 PetscScalar *dbg_work; 3297 3298 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 3299 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 3300 ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 3301 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 3302 /* nonzeros for local mat */ 3303 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 3304 for (i=0;i<pcis->n;i++) nnz[i]=1; 3305 for (i=n_vertices;i<total_counts_cc;i++) { 3306 if (PetscBTLookup(change_basis,i)) { 3307 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 3308 if (PetscBTLookup(qr_needed_idx,i)) { 3309 for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint; 3310 } else { 3311 nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint; 3312 for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2; 3313 } 3314 } 3315 } 3316 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 3317 ierr = PetscFree(nnz);CHKERRQ(ierr); 3318 /* Set initial identity in the matrix */ 3319 for (i=0;i<pcis->n;i++) { 3320 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 3321 } 3322 3323 if (pcbddc->dbg_flag) { 3324 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 3325 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 3326 } 3327 3328 3329 /* Now we loop on the constraints which need a change of basis */ 3330 /* 3331 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 3332 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 3333 3334 Basic blocks of change of basis matrix T computed by 3335 3336 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 3337 3338 | 1 0 ... 0 s_1/S | 3339 | 0 1 ... 0 s_2/S | 3340 | ... | 3341 | 0 ... 1 s_{n-1}/S | 3342 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 3343 3344 with S = \sum_{i=1}^n s_i^2 3345 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 3346 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 3347 3348 - QR decomposition of constraints otherwise 3349 */ 3350 if (qr_needed) { 3351 /* space to store Q */ 3352 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 3353 /* first we issue queries for optimal work */ 3354 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 3355 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 3356 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3357 lqr_work = -1; 3358 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 3359 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 3360 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 3361 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 3362 lgqr_work = -1; 3363 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 3364 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 3365 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 3366 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3367 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 3368 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 3369 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 3370 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 3371 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 3372 /* array to store scaling factors for reflectors */ 3373 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 3374 /* array to store rhs and solution of triangular solver */ 3375 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 3376 /* allocating workspace for check */ 3377 if (pcbddc->dbg_flag) { 3378 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr); 3379 } 3380 } 3381 /* array to store whether a node is primal or not */ 3382 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 3383 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 3384 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);CHKERRQ(ierr); 3385 if (i != total_primal_vertices) { 3386 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i); 3387 } 3388 for (i=0;i<total_primal_vertices;i++) { 3389 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 3390 } 3391 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 3392 3393 /* loop on constraints and see whether or not they need a change of basis and compute it */ 3394 for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) { 3395 size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts]; 3396 if (PetscBTLookup(change_basis,total_counts)) { 3397 /* get constraint info */ 3398 primal_dofs = constraints_n[total_counts]; 3399 dual_dofs = size_of_constraint-primal_dofs; 3400 3401 if (pcbddc->dbg_flag) { 3402 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d: %d need a change of basis (size %d)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr); 3403 } 3404 3405 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 3406 3407 /* copy quadrature constraints for change of basis check */ 3408 if (pcbddc->dbg_flag) { 3409 ierr = PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 3410 } 3411 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 3412 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 3413 3414 /* compute QR decomposition of constraints */ 3415 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3416 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 3417 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3418 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3419 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 3420 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 3421 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3422 3423 /* explictly compute R^-T */ 3424 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 3425 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 3426 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 3427 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 3428 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3429 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 3430 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3431 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 3432 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 3433 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3434 3435 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 3436 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3437 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3438 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 3439 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3440 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3441 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 3442 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 3443 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3444 3445 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 3446 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 3447 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 3448 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3449 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 3450 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 3451 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3452 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 3453 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 3454 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3455 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC)); 3456 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3457 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 3458 3459 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 3460 start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]]; 3461 /* insert cols for primal dofs */ 3462 for (j=0;j<primal_dofs;j++) { 3463 start_vals = &qr_basis[j*size_of_constraint]; 3464 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 3465 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 3466 } 3467 /* insert cols for dual dofs */ 3468 for (j=0,k=0;j<dual_dofs;k++) { 3469 if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) { 3470 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 3471 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 3472 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 3473 j++; 3474 } 3475 } 3476 3477 /* check change of basis */ 3478 if (pcbddc->dbg_flag) { 3479 PetscInt ii,jj; 3480 PetscBool valid_qr=PETSC_TRUE; 3481 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 3482 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3483 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 3484 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3485 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 3486 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 3487 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3488 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC)); 3489 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3490 for (jj=0;jj<size_of_constraint;jj++) { 3491 for (ii=0;ii<primal_dofs;ii++) { 3492 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 3493 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 3494 } 3495 } 3496 if (!valid_qr) { 3497 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 3498 for (jj=0;jj<size_of_constraint;jj++) { 3499 for (ii=0;ii<primal_dofs;ii++) { 3500 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 3501 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 3502 } 3503 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 3504 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 3505 } 3506 } 3507 } 3508 } else { 3509 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 3510 } 3511 } 3512 } else { /* simple transformation block */ 3513 PetscInt row,col; 3514 PetscScalar val,norm; 3515 3516 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3517 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one)); 3518 for (j=0;j<size_of_constraint;j++) { 3519 PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j]; 3520 row = constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 3521 if (!PetscBTLookup(is_primal,row_B)) { 3522 col = constraints_idxs[constraints_idxs_ptr[total_counts]]; 3523 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 3524 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 3525 } else { 3526 for (k=0;k<size_of_constraint;k++) { 3527 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 3528 if (row != col) { 3529 val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]]; 3530 } else { 3531 val = constraints_data[constraints_data_ptr[total_counts]]/norm; 3532 } 3533 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 3534 } 3535 } 3536 } 3537 if (pcbddc->dbg_flag) { 3538 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 3539 } 3540 } 3541 } else { 3542 if (pcbddc->dbg_flag) { 3543 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,size_of_constraint);CHKERRQ(ierr); 3544 } 3545 } 3546 } 3547 3548 /* free workspace */ 3549 if (qr_needed) { 3550 if (pcbddc->dbg_flag) { 3551 ierr = PetscFree(dbg_work);CHKERRQ(ierr); 3552 } 3553 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 3554 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 3555 ierr = PetscFree(qr_work);CHKERRQ(ierr); 3556 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 3557 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 3558 } 3559 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 3560 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3561 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3562 3563 /* assembling of global change of variable */ 3564 { 3565 Mat tmat; 3566 PetscInt bs; 3567 3568 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 3569 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 3570 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 3571 ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr); 3572 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 3573 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 3574 ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr); 3575 ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr); 3576 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 3577 ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr); 3578 ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 3579 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 3580 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 3581 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 3582 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3583 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3584 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 3585 ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr); 3586 } 3587 /* check */ 3588 if (pcbddc->dbg_flag) { 3589 PetscReal error; 3590 Vec x,x_change; 3591 3592 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 3593 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 3594 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 3595 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 3596 ierr = VecScatterBegin(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3597 ierr = VecScatterEnd(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3598 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 3599 ierr = VecScatterBegin(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3600 ierr = VecScatterEnd(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3601 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 3602 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 3603 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 3604 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3605 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr); 3606 ierr = VecDestroy(&x);CHKERRQ(ierr); 3607 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 3608 } 3609 3610 /* adapt sub_schurs computed (if any) */ 3611 if (pcbddc->use_deluxe_scaling) { 3612 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 3613 if (sub_schurs->S_Ej_all) { 3614 Mat S_new,tmat; 3615 ISLocalToGlobalMapping NtoSall; 3616 IS is_all_N,is_V,is_V_Sall; 3617 const PetscScalar *array; 3618 const PetscInt *idxs_V,*idxs_all; 3619 PetscInt i,n_V; 3620 3621 ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr); 3622 ierr = MatGetSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr); 3623 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->n_vertices,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&is_V);CHKERRQ(ierr); 3624 ierr = ISLocalToGlobalMappingCreateIS(is_all_N,&NtoSall);CHKERRQ(ierr); 3625 ierr = ISGlobalToLocalMappingApplyIS(NtoSall,IS_GTOLM_DROP,is_V,&is_V_Sall);CHKERRQ(ierr); 3626 ierr = ISLocalToGlobalMappingDestroy(&NtoSall);CHKERRQ(ierr); 3627 ierr = ISDestroy(&is_all_N);CHKERRQ(ierr); 3628 ierr = ISDestroy(&is_V);CHKERRQ(ierr); 3629 ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 3630 ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr); 3631 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 3632 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 3633 ierr = ISGetLocalSize(is_V_Sall,&n_V);CHKERRQ(ierr); 3634 ierr = ISGetIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 3635 ierr = ISGetIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 3636 ierr = VecGetArrayRead(pcis->D,&array);CHKERRQ(ierr); 3637 for (i=0;i<n_V;i++) { 3638 PetscScalar val; 3639 PetscInt idx; 3640 3641 idx = idxs_V[i]; 3642 val = array[idxs_all[idxs_V[i]]]; 3643 ierr = MatSetValue(S_new,idx,idx,val,INSERT_VALUES);CHKERRQ(ierr); 3644 } 3645 ierr = MatAssemblyBegin(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3646 ierr = MatAssemblyEnd(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3647 sub_schurs->S_Ej_all = S_new; 3648 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 3649 if (sub_schurs->sum_S_Ej_all) { 3650 ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 3651 ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 3652 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 3653 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 3654 sub_schurs->sum_S_Ej_all = S_new; 3655 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 3656 } 3657 ierr = VecRestoreArrayRead(pcis->D,&array);CHKERRQ(ierr); 3658 ierr = ISRestoreIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 3659 ierr = ISRestoreIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 3660 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 3661 ierr = ISDestroy(&is_V_Sall);CHKERRQ(ierr); 3662 } 3663 } 3664 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 3665 } else if (pcbddc->user_ChangeOfBasisMatrix) { 3666 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 3667 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 3668 } 3669 3670 /* set up change of basis context */ 3671 if (pcbddc->ChangeOfBasisMatrix) { 3672 PCBDDCChange_ctx change_ctx; 3673 3674 if (!pcbddc->new_global_mat) { 3675 PetscInt global_size,local_size; 3676 3677 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 3678 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 3679 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->new_global_mat);CHKERRQ(ierr); 3680 ierr = MatSetSizes(pcbddc->new_global_mat,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 3681 ierr = MatSetType(pcbddc->new_global_mat,MATSHELL);CHKERRQ(ierr); 3682 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT,(void (*)(void))PCBDDCMatMult_Private);CHKERRQ(ierr); 3683 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCMatMultTranspose_Private);CHKERRQ(ierr); 3684 ierr = PetscNew(&change_ctx);CHKERRQ(ierr); 3685 ierr = MatShellSetContext(pcbddc->new_global_mat,change_ctx);CHKERRQ(ierr); 3686 } else { 3687 ierr = MatShellGetContext(pcbddc->new_global_mat,&change_ctx);CHKERRQ(ierr); 3688 ierr = MatDestroy(&change_ctx->global_change);CHKERRQ(ierr); 3689 ierr = VecDestroyVecs(2,&change_ctx->work);CHKERRQ(ierr); 3690 } 3691 if (!pcbddc->user_ChangeOfBasisMatrix) { 3692 ierr = PetscObjectReference((PetscObject)pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 3693 change_ctx->global_change = pcbddc->ChangeOfBasisMatrix; 3694 } else { 3695 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 3696 change_ctx->global_change = pcbddc->user_ChangeOfBasisMatrix; 3697 } 3698 ierr = VecDuplicateVecs(pcis->vec1_global,2,&change_ctx->work);CHKERRQ(ierr); 3699 ierr = MatSetUp(pcbddc->new_global_mat);CHKERRQ(ierr); 3700 ierr = MatAssemblyBegin(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3701 ierr = MatAssemblyEnd(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3702 } 3703 3704 /* add pressure dofs to set of primal nodes for numbering purposes */ 3705 for (i=0;i<pcbddc->benign_n;i++) { 3706 pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc] = pcbddc->benign_p0_lidx[i]; 3707 pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size] = pcbddc->benign_p0_lidx[i]; 3708 pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc] = 1; 3709 pcbddc->local_primal_size_cc++; 3710 pcbddc->local_primal_size++; 3711 } 3712 3713 /* check if a new primal space has been introduced (also take into account benign trick) */ 3714 pcbddc->new_primal_space_local = PETSC_TRUE; 3715 if (olocal_primal_size == pcbddc->local_primal_size) { 3716 ierr = PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 3717 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 3718 if (!pcbddc->new_primal_space_local) { 3719 ierr = PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 3720 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 3721 } 3722 } 3723 ierr = PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);CHKERRQ(ierr); 3724 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 3725 ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3726 3727 /* flush dbg viewer */ 3728 if (pcbddc->dbg_flag) { 3729 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3730 } 3731 3732 /* free workspace */ 3733 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 3734 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 3735 if (!pcbddc->adaptive_selection) { 3736 ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr); 3737 ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr); 3738 } else { 3739 ierr = PetscFree5(pcbddc->adaptive_constraints_n, 3740 pcbddc->adaptive_constraints_idxs_ptr, 3741 pcbddc->adaptive_constraints_data_ptr, 3742 pcbddc->adaptive_constraints_idxs, 3743 pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 3744 ierr = PetscFree(constraints_n);CHKERRQ(ierr); 3745 ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr); 3746 } 3747 PetscFunctionReturn(0); 3748 } 3749 3750 #undef __FUNCT__ 3751 #define __FUNCT__ "PCBDDCAnalyzeInterface" 3752 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 3753 { 3754 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3755 PC_IS *pcis = (PC_IS*)pc->data; 3756 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 3757 PetscInt ierr,i,vertex_size,N; 3758 PetscViewer viewer=pcbddc->dbg_viewer; 3759 3760 PetscFunctionBegin; 3761 /* Reset previously computed graph */ 3762 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 3763 /* Init local Graph struct */ 3764 ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr); 3765 ierr = PCBDDCGraphInit(pcbddc->mat_graph,pcis->mapping,N);CHKERRQ(ierr); 3766 3767 /* Check validity of the csr graph passed in by the user */ 3768 if (pcbddc->mat_graph->nvtxs_csr && pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) { 3769 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid size of local CSR graph! Found %d, expected %d\n",pcbddc->mat_graph->nvtxs_csr,pcbddc->mat_graph->nvtxs); 3770 } 3771 3772 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 3773 if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) { 3774 PetscInt *xadj,*adjncy; 3775 PetscInt nvtxs; 3776 PetscBool flg_row=PETSC_FALSE; 3777 3778 if (pcbddc->use_local_adj) { 3779 3780 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3781 if (flg_row) { 3782 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 3783 pcbddc->computed_rowadj = PETSC_TRUE; 3784 } 3785 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3786 } else if (pcbddc->current_level && pcis->n_B) { /* just compute subdomain's connected components for coarser levels when the local boundary is not empty */ 3787 IS is_dummy; 3788 ISLocalToGlobalMapping l2gmap_dummy; 3789 PetscInt j,sum; 3790 PetscInt *cxadj,*cadjncy; 3791 const PetscInt *idxs; 3792 PCBDDCGraph graph; 3793 PetscBT is_on_boundary; 3794 3795 ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n,0,1,&is_dummy);CHKERRQ(ierr); 3796 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 3797 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 3798 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 3799 ierr = PCBDDCGraphInit(graph,l2gmap_dummy,pcis->n);CHKERRQ(ierr); 3800 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 3801 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3802 if (flg_row) { 3803 graph->xadj = xadj; 3804 graph->adjncy = adjncy; 3805 } 3806 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 3807 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 3808 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3809 3810 if (pcbddc->dbg_flag) { 3811 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] Found %d subdomains (local size %d)\n",PetscGlobalRank,graph->ncc,pcis->n);CHKERRQ(ierr); 3812 for (i=0;i<graph->ncc;i++) { 3813 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] %d cc size %d\n",PetscGlobalRank,i,graph->cptr[i+1]-graph->cptr[i]);CHKERRQ(ierr); 3814 } 3815 } 3816 3817 ierr = PetscBTCreate(pcis->n,&is_on_boundary);CHKERRQ(ierr); 3818 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 3819 for (i=0;i<pcis->n_B;i++) { 3820 ierr = PetscBTSet(is_on_boundary,idxs[i]);CHKERRQ(ierr); 3821 } 3822 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 3823 3824 ierr = PetscCalloc1(pcis->n+1,&cxadj);CHKERRQ(ierr); 3825 sum = 0; 3826 for (i=0;i<graph->ncc;i++) { 3827 PetscInt sizecc = 0; 3828 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 3829 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 3830 sizecc++; 3831 } 3832 } 3833 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 3834 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 3835 cxadj[graph->queue[j]] = sizecc; 3836 } 3837 } 3838 sum += sizecc*sizecc; 3839 } 3840 ierr = PetscMalloc1(sum,&cadjncy);CHKERRQ(ierr); 3841 sum = 0; 3842 for (i=0;i<pcis->n;i++) { 3843 PetscInt temp = cxadj[i]; 3844 cxadj[i] = sum; 3845 sum += temp; 3846 } 3847 cxadj[pcis->n] = sum; 3848 for (i=0;i<graph->ncc;i++) { 3849 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 3850 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 3851 PetscInt k,sizecc = 0; 3852 for (k=graph->cptr[i];k<graph->cptr[i+1];k++) { 3853 if (PetscBTLookup(is_on_boundary,graph->queue[k])) { 3854 cadjncy[cxadj[graph->queue[j]]+sizecc]=graph->queue[k]; 3855 sizecc++; 3856 } 3857 } 3858 } 3859 } 3860 } 3861 if (sum) { 3862 ierr = PCBDDCSetLocalAdjacencyGraph(pc,pcis->n,cxadj,cadjncy,PETSC_OWN_POINTER);CHKERRQ(ierr); 3863 } else { 3864 ierr = PetscFree(cxadj);CHKERRQ(ierr); 3865 ierr = PetscFree(cadjncy);CHKERRQ(ierr); 3866 } 3867 graph->xadj = 0; 3868 graph->adjncy = 0; 3869 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 3870 ierr = PetscBTDestroy(&is_on_boundary);CHKERRQ(ierr); 3871 } 3872 } 3873 if (pcbddc->dbg_flag) { 3874 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3875 } 3876 3877 /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */ 3878 vertex_size = 1; 3879 if (pcbddc->user_provided_isfordofs) { 3880 if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */ 3881 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 3882 for (i=0;i<pcbddc->n_ISForDofs;i++) { 3883 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 3884 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 3885 } 3886 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 3887 pcbddc->n_ISForDofs = 0; 3888 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 3889 } 3890 /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */ 3891 ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr); 3892 } else { 3893 if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */ 3894 ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr); 3895 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 3896 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 3897 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 3898 } 3899 } 3900 } 3901 3902 /* Setup of Graph */ 3903 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */ 3904 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 3905 } 3906 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */ 3907 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 3908 } 3909 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices);CHKERRQ(ierr); 3910 3911 /* attach info on disconnected subdomains if present */ 3912 if (pcbddc->n_local_subs) { 3913 PetscInt *local_subs; 3914 3915 ierr = PetscMalloc1(N,&local_subs);CHKERRQ(ierr); 3916 for (i=0;i<pcbddc->n_local_subs;i++) { 3917 const PetscInt *idxs; 3918 PetscInt nl,j; 3919 3920 ierr = ISGetLocalSize(pcbddc->local_subs[i],&nl);CHKERRQ(ierr); 3921 ierr = ISGetIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 3922 for (j=0;j<nl;j++) { 3923 local_subs[idxs[j]] = i; 3924 } 3925 ierr = ISRestoreIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 3926 } 3927 pcbddc->mat_graph->n_local_subs = pcbddc->n_local_subs; 3928 pcbddc->mat_graph->local_subs = local_subs; 3929 } 3930 3931 /* Graph's connected components analysis */ 3932 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 3933 3934 /* print some info to stdout */ 3935 if (pcbddc->dbg_flag) { 3936 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);CHKERRQ(ierr); 3937 } 3938 3939 /* mark topography has done */ 3940 pcbddc->recompute_topography = PETSC_FALSE; 3941 PetscFunctionReturn(0); 3942 } 3943 3944 /* given an index sets possibly with holes, renumbers the indexes removing the holes */ 3945 #undef __FUNCT__ 3946 #define __FUNCT__ "PCBDDCSubsetNumbering" 3947 PetscErrorCode PCBDDCSubsetNumbering(IS subset, IS subset_mult, PetscInt *N_n, IS *subset_n) 3948 { 3949 PetscSF sf; 3950 PetscLayout map; 3951 const PetscInt *idxs; 3952 PetscInt *leaf_data,*root_data,*gidxs; 3953 PetscInt N,n,i,lbounds[2],gbounds[2],Nl; 3954 PetscInt n_n,nlocals,start,first_index; 3955 PetscMPIInt commsize; 3956 PetscBool first_found; 3957 PetscErrorCode ierr; 3958 3959 PetscFunctionBegin; 3960 ierr = ISGetLocalSize(subset,&n);CHKERRQ(ierr); 3961 if (subset_mult) { 3962 PetscCheckSameComm(subset,1,subset_mult,2); 3963 ierr = ISGetLocalSize(subset,&i);CHKERRQ(ierr); 3964 if (i != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Local subset and multiplicity sizes don't match! %d != %d",n,i); 3965 } 3966 /* create workspace layout for computing global indices of subset */ 3967 ierr = ISGetIndices(subset,&idxs);CHKERRQ(ierr); 3968 lbounds[0] = lbounds[1] = 0; 3969 for (i=0;i<n;i++) { 3970 if (idxs[i] < lbounds[0]) lbounds[0] = idxs[i]; 3971 else if (idxs[i] > lbounds[1]) lbounds[1] = idxs[i]; 3972 } 3973 lbounds[0] = -lbounds[0]; 3974 ierr = MPI_Allreduce(lbounds,gbounds,2,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 3975 gbounds[0] = -gbounds[0]; 3976 N = gbounds[1] - gbounds[0] + 1; 3977 ierr = PetscLayoutCreate(PetscObjectComm((PetscObject)subset),&map);CHKERRQ(ierr); 3978 ierr = PetscLayoutSetBlockSize(map,1);CHKERRQ(ierr); 3979 ierr = PetscLayoutSetSize(map,N);CHKERRQ(ierr); 3980 ierr = PetscLayoutSetUp(map);CHKERRQ(ierr); 3981 ierr = PetscLayoutGetLocalSize(map,&Nl);CHKERRQ(ierr); 3982 3983 /* create sf : leaf_data == multiplicity of indexes, root data == global index in layout */ 3984 ierr = PetscMalloc2(n,&leaf_data,Nl,&root_data);CHKERRQ(ierr); 3985 if (subset_mult) { 3986 const PetscInt* idxs_mult; 3987 3988 ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 3989 ierr = PetscMemcpy(leaf_data,idxs_mult,n*sizeof(PetscInt));CHKERRQ(ierr); 3990 ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 3991 } else { 3992 for (i=0;i<n;i++) leaf_data[i] = 1; 3993 } 3994 /* local size of new subset */ 3995 n_n = 0; 3996 for (i=0;i<n;i++) n_n += leaf_data[i]; 3997 3998 /* global indexes in layout */ 3999 ierr = PetscMalloc1(n_n,&gidxs);CHKERRQ(ierr); /* allocating possibly extra space in gidxs which will be used later */ 4000 for (i=0;i<n;i++) gidxs[i] = idxs[i] - gbounds[0]; 4001 ierr = ISRestoreIndices(subset,&idxs);CHKERRQ(ierr); 4002 ierr = PetscSFCreate(PetscObjectComm((PetscObject)subset),&sf);CHKERRQ(ierr); 4003 ierr = PetscSFSetGraphLayout(sf,map,n,NULL,PETSC_COPY_VALUES,gidxs);CHKERRQ(ierr); 4004 ierr = PetscLayoutDestroy(&map);CHKERRQ(ierr); 4005 4006 /* reduce from leaves to roots */ 4007 ierr = PetscMemzero(root_data,Nl*sizeof(PetscInt));CHKERRQ(ierr); 4008 ierr = PetscSFReduceBegin(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr); 4009 ierr = PetscSFReduceEnd(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr); 4010 4011 /* count indexes in local part of layout */ 4012 nlocals = 0; 4013 first_index = -1; 4014 first_found = PETSC_FALSE; 4015 for (i=0;i<Nl;i++) { 4016 if (!first_found && root_data[i]) { 4017 first_found = PETSC_TRUE; 4018 first_index = i; 4019 } 4020 nlocals += root_data[i]; 4021 } 4022 4023 /* cumulative of number of indexes and size of subset without holes */ 4024 #if defined(PETSC_HAVE_MPI_EXSCAN) 4025 start = 0; 4026 ierr = MPI_Exscan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 4027 #else 4028 ierr = MPI_Scan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 4029 start = start-nlocals; 4030 #endif 4031 4032 if (N_n) { /* compute total size of new subset if requested */ 4033 *N_n = start + nlocals; 4034 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)subset),&commsize);CHKERRQ(ierr); 4035 ierr = MPI_Bcast(N_n,1,MPIU_INT,commsize-1,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 4036 } 4037 4038 /* adapt root data with cumulative */ 4039 if (first_found) { 4040 PetscInt old_index; 4041 4042 root_data[first_index] += start; 4043 old_index = first_index; 4044 for (i=first_index+1;i<Nl;i++) { 4045 if (root_data[i]) { 4046 root_data[i] += root_data[old_index]; 4047 old_index = i; 4048 } 4049 } 4050 } 4051 4052 /* from roots to leaves */ 4053 ierr = PetscSFBcastBegin(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr); 4054 ierr = PetscSFBcastEnd(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr); 4055 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 4056 4057 /* create new IS with global indexes without holes */ 4058 if (subset_mult) { 4059 const PetscInt* idxs_mult; 4060 PetscInt cum; 4061 4062 cum = 0; 4063 ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 4064 for (i=0;i<n;i++) { 4065 PetscInt j; 4066 for (j=0;j<idxs_mult[i];j++) gidxs[cum++] = leaf_data[i] - idxs_mult[i] + j; 4067 } 4068 ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 4069 } else { 4070 for (i=0;i<n;i++) { 4071 gidxs[i] = leaf_data[i]-1; 4072 } 4073 } 4074 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)subset),n_n,gidxs,PETSC_OWN_POINTER,subset_n);CHKERRQ(ierr); 4075 ierr = PetscFree2(leaf_data,root_data);CHKERRQ(ierr); 4076 PetscFunctionReturn(0); 4077 } 4078 4079 #undef __FUNCT__ 4080 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 4081 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 4082 { 4083 PetscInt i,j; 4084 PetscScalar *alphas; 4085 PetscErrorCode ierr; 4086 4087 PetscFunctionBegin; 4088 /* this implements stabilized Gram-Schmidt */ 4089 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 4090 for (i=0;i<n;i++) { 4091 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 4092 if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); } 4093 for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); } 4094 } 4095 ierr = PetscFree(alphas);CHKERRQ(ierr); 4096 PetscFunctionReturn(0); 4097 } 4098 4099 #undef __FUNCT__ 4100 #define __FUNCT__ "MatISGetSubassemblingPattern" 4101 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscInt redprocs, IS* is_sends) 4102 { 4103 IS ranks_send_to; 4104 PetscInt n_neighs,*neighs,*n_shared,**shared; 4105 PetscMPIInt size,rank,color; 4106 PetscInt *xadj,*adjncy; 4107 PetscInt *adjncy_wgt,*v_wgt,*ranks_send_to_idx; 4108 PetscInt i,local_size,threshold=0; 4109 PetscBool use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE; 4110 PetscSubcomm subcomm; 4111 PetscErrorCode ierr; 4112 4113 PetscFunctionBegin; 4114 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr); 4115 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 4116 ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 4117 4118 /* Get info on mapping */ 4119 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&local_size);CHKERRQ(ierr); 4120 ierr = ISLocalToGlobalMappingGetInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 4121 4122 /* build local CSR graph of subdomains' connectivity */ 4123 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 4124 xadj[0] = 0; 4125 xadj[1] = PetscMax(n_neighs-1,0); 4126 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 4127 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 4128 4129 if (threshold) { 4130 PetscInt xadj_count = 0; 4131 for (i=1;i<n_neighs;i++) { 4132 if (n_shared[i] > threshold) { 4133 adjncy[xadj_count] = neighs[i]; 4134 adjncy_wgt[xadj_count] = n_shared[i]; 4135 xadj_count++; 4136 } 4137 } 4138 xadj[1] = xadj_count; 4139 } else { 4140 if (xadj[1]) { 4141 ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr); 4142 ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr); 4143 } 4144 } 4145 ierr = ISLocalToGlobalMappingRestoreInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 4146 if (use_square) { 4147 for (i=0;i<xadj[1];i++) { 4148 adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i]; 4149 } 4150 } 4151 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 4152 4153 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 4154 4155 /* 4156 Restrict work on active processes only. 4157 */ 4158 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr); 4159 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 4160 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 4161 ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr); 4162 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 4163 if (color) { 4164 ierr = PetscFree(xadj);CHKERRQ(ierr); 4165 ierr = PetscFree(adjncy);CHKERRQ(ierr); 4166 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 4167 } else { 4168 Mat subdomain_adj; 4169 IS new_ranks,new_ranks_contig; 4170 MatPartitioning partitioner; 4171 PetscInt prank,rstart=0,rend=0; 4172 PetscInt *is_indices,*oldranks; 4173 PetscBool aggregate; 4174 4175 ierr = MPI_Comm_size(PetscSubcommChild(subcomm),&size);CHKERRQ(ierr); 4176 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 4177 prank = rank; 4178 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,PetscSubcommChild(subcomm));CHKERRQ(ierr); 4179 /* 4180 for (i=0;i<size;i++) { 4181 PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]); 4182 } 4183 */ 4184 for (i=0;i<xadj[1];i++) { 4185 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 4186 } 4187 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 4188 aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE); 4189 if (aggregate) { 4190 PetscInt lrows,row,ncols,*cols; 4191 PetscMPIInt nrank; 4192 PetscScalar *vals; 4193 4194 ierr = MPI_Comm_rank(PetscSubcommChild(subcomm),&nrank);CHKERRQ(ierr); 4195 lrows = 0; 4196 if (nrank<redprocs) { 4197 lrows = size/redprocs; 4198 if (nrank<size%redprocs) lrows++; 4199 } 4200 ierr = MatCreateAIJ(PetscSubcommChild(subcomm),lrows,lrows,size,size,50,NULL,50,NULL,&subdomain_adj);CHKERRQ(ierr); 4201 ierr = MatGetOwnershipRange(subdomain_adj,&rstart,&rend);CHKERRQ(ierr); 4202 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4203 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4204 row = nrank; 4205 ncols = xadj[1]-xadj[0]; 4206 cols = adjncy; 4207 ierr = PetscMalloc1(ncols,&vals);CHKERRQ(ierr); 4208 for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i]; 4209 ierr = MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 4210 ierr = MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4211 ierr = MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4212 ierr = PetscFree(xadj);CHKERRQ(ierr); 4213 ierr = PetscFree(adjncy);CHKERRQ(ierr); 4214 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 4215 ierr = PetscFree(vals);CHKERRQ(ierr); 4216 } else { 4217 ierr = MatCreateMPIAdj(PetscSubcommChild(subcomm),1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 4218 } 4219 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 4220 4221 /* Partition */ 4222 ierr = MatPartitioningCreate(PetscSubcommChild(subcomm),&partitioner);CHKERRQ(ierr); 4223 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 4224 if (use_vwgt) { 4225 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 4226 v_wgt[0] = local_size; 4227 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 4228 } 4229 n_subdomains = PetscMin((PetscInt)size,n_subdomains); 4230 ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr); 4231 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 4232 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 4233 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 4234 4235 /* renumber new_ranks to avoid "holes" in new set of processors */ 4236 ierr = PCBDDCSubsetNumbering(new_ranks,NULL,NULL,&new_ranks_contig);CHKERRQ(ierr); 4237 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 4238 ierr = ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4239 if (!redprocs) { 4240 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 4241 } else { 4242 PetscInt idxs[1]; 4243 PetscMPIInt tag; 4244 MPI_Request *reqs; 4245 4246 ierr = PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag);CHKERRQ(ierr); 4247 ierr = PetscMalloc1(rend-rstart,&reqs);CHKERRQ(ierr); 4248 for (i=rstart;i<rend;i++) { 4249 ierr = MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,PetscSubcommChild(subcomm),&reqs[i-rstart]);CHKERRQ(ierr); 4250 } 4251 ierr = MPI_Recv(idxs,1,MPIU_INT,MPI_ANY_SOURCE,tag,PetscSubcommChild(subcomm),MPI_STATUS_IGNORE);CHKERRQ(ierr); 4252 ierr = MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4253 ierr = PetscFree(reqs);CHKERRQ(ierr); 4254 ranks_send_to_idx[0] = oldranks[idxs[0]]; 4255 } 4256 ierr = ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4257 /* clean up */ 4258 ierr = PetscFree(oldranks);CHKERRQ(ierr); 4259 ierr = ISDestroy(&new_ranks_contig);CHKERRQ(ierr); 4260 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 4261 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 4262 } 4263 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 4264 4265 /* assemble parallel IS for sends */ 4266 i = 1; 4267 if (color) i=0; 4268 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr); 4269 /* get back IS */ 4270 *is_sends = ranks_send_to; 4271 PetscFunctionReturn(0); 4272 } 4273 4274 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 4275 4276 #undef __FUNCT__ 4277 #define __FUNCT__ "MatISSubassemble" 4278 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[]) 4279 { 4280 Mat local_mat; 4281 IS is_sends_internal; 4282 PetscInt rows,cols,new_local_rows; 4283 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals; 4284 PetscBool ismatis,isdense,newisdense,destroy_mat; 4285 ISLocalToGlobalMapping l2gmap; 4286 PetscInt* l2gmap_indices; 4287 const PetscInt* is_indices; 4288 MatType new_local_type; 4289 /* buffers */ 4290 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 4291 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 4292 PetscInt *recv_buffer_idxs_local; 4293 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 4294 /* MPI */ 4295 MPI_Comm comm,comm_n; 4296 PetscSubcomm subcomm; 4297 PetscMPIInt n_sends,n_recvs,commsize; 4298 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 4299 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 4300 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,source_dest; 4301 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals; 4302 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals; 4303 PetscErrorCode ierr; 4304 4305 PetscFunctionBegin; 4306 /* TODO: add missing checks */ 4307 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 4308 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 4309 PetscValidLogicalCollectiveEnum(mat,reuse,5); 4310 PetscValidLogicalCollectiveInt(mat,nis,7); 4311 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 4312 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 4313 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 4314 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 4315 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 4316 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 4317 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 4318 if (reuse == MAT_REUSE_MATRIX && *mat_n) { 4319 PetscInt mrows,mcols,mnrows,mncols; 4320 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 4321 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 4322 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 4323 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 4324 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 4325 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 4326 } 4327 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 4328 PetscValidLogicalCollectiveInt(mat,bs,0); 4329 /* prepare IS for sending if not provided */ 4330 if (!is_sends) { 4331 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 4332 ierr = MatISGetSubassemblingPattern(mat,n_subdomains,0,&is_sends_internal);CHKERRQ(ierr); 4333 } else { 4334 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 4335 is_sends_internal = is_sends; 4336 } 4337 4338 /* get comm */ 4339 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 4340 4341 /* compute number of sends */ 4342 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 4343 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 4344 4345 /* compute number of receives */ 4346 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 4347 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 4348 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 4349 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 4350 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 4351 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 4352 ierr = PetscFree(iflags);CHKERRQ(ierr); 4353 4354 /* restrict comm if requested */ 4355 subcomm = 0; 4356 destroy_mat = PETSC_FALSE; 4357 if (restrict_comm) { 4358 PetscMPIInt color,subcommsize; 4359 4360 color = 0; 4361 if (restrict_full) { 4362 if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm (full restriction) */ 4363 } else { 4364 if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not partecipate in new comm */ 4365 } 4366 ierr = MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 4367 subcommsize = commsize - subcommsize; 4368 /* check if reuse has been requested */ 4369 if (reuse == MAT_REUSE_MATRIX) { 4370 if (*mat_n) { 4371 PetscMPIInt subcommsize2; 4372 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 4373 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 4374 comm_n = PetscObjectComm((PetscObject)*mat_n); 4375 } else { 4376 comm_n = PETSC_COMM_SELF; 4377 } 4378 } else { /* MAT_INITIAL_MATRIX */ 4379 PetscMPIInt rank; 4380 4381 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4382 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 4383 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 4384 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 4385 comm_n = PetscSubcommChild(subcomm); 4386 } 4387 /* flag to destroy *mat_n if not significative */ 4388 if (color) destroy_mat = PETSC_TRUE; 4389 } else { 4390 comm_n = comm; 4391 } 4392 4393 /* prepare send/receive buffers */ 4394 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 4395 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 4396 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 4397 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 4398 if (nis) { 4399 ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr); 4400 } 4401 4402 /* Get data from local matrices */ 4403 if (!isdense) { 4404 SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 4405 /* TODO: See below some guidelines on how to prepare the local buffers */ 4406 /* 4407 send_buffer_vals should contain the raw values of the local matrix 4408 send_buffer_idxs should contain: 4409 - MatType_PRIVATE type 4410 - PetscInt size_of_l2gmap 4411 - PetscInt global_row_indices[size_of_l2gmap] 4412 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 4413 */ 4414 } else { 4415 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 4416 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&i);CHKERRQ(ierr); 4417 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 4418 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 4419 send_buffer_idxs[1] = i; 4420 ierr = ISLocalToGlobalMappingGetIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 4421 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 4422 ierr = ISLocalToGlobalMappingRestoreIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 4423 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 4424 for (i=0;i<n_sends;i++) { 4425 ilengths_vals[is_indices[i]] = len*len; 4426 ilengths_idxs[is_indices[i]] = len+2; 4427 } 4428 } 4429 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 4430 /* additional is (if any) */ 4431 if (nis) { 4432 PetscMPIInt psum; 4433 PetscInt j; 4434 for (j=0,psum=0;j<nis;j++) { 4435 PetscInt plen; 4436 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 4437 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 4438 psum += len+1; /* indices + lenght */ 4439 } 4440 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 4441 for (j=0,psum=0;j<nis;j++) { 4442 PetscInt plen; 4443 const PetscInt *is_array_idxs; 4444 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 4445 send_buffer_idxs_is[psum] = plen; 4446 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 4447 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 4448 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 4449 psum += plen+1; /* indices + lenght */ 4450 } 4451 for (i=0;i<n_sends;i++) { 4452 ilengths_idxs_is[is_indices[i]] = psum; 4453 } 4454 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 4455 } 4456 4457 buf_size_idxs = 0; 4458 buf_size_vals = 0; 4459 buf_size_idxs_is = 0; 4460 for (i=0;i<n_recvs;i++) { 4461 buf_size_idxs += (PetscInt)olengths_idxs[i]; 4462 buf_size_vals += (PetscInt)olengths_vals[i]; 4463 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 4464 } 4465 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 4466 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 4467 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 4468 4469 /* get new tags for clean communications */ 4470 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 4471 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 4472 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 4473 4474 /* allocate for requests */ 4475 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 4476 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 4477 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 4478 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 4479 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 4480 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 4481 4482 /* communications */ 4483 ptr_idxs = recv_buffer_idxs; 4484 ptr_vals = recv_buffer_vals; 4485 ptr_idxs_is = recv_buffer_idxs_is; 4486 for (i=0;i<n_recvs;i++) { 4487 source_dest = onodes[i]; 4488 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 4489 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 4490 ptr_idxs += olengths_idxs[i]; 4491 ptr_vals += olengths_vals[i]; 4492 if (nis) { 4493 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 4494 ptr_idxs_is += olengths_idxs_is[i]; 4495 } 4496 } 4497 for (i=0;i<n_sends;i++) { 4498 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 4499 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 4500 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 4501 if (nis) { 4502 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 4503 } 4504 } 4505 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 4506 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 4507 4508 /* assemble new l2g map */ 4509 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4510 ptr_idxs = recv_buffer_idxs; 4511 new_local_rows = 0; 4512 for (i=0;i<n_recvs;i++) { 4513 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 4514 ptr_idxs += olengths_idxs[i]; 4515 } 4516 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 4517 ptr_idxs = recv_buffer_idxs; 4518 new_local_rows = 0; 4519 for (i=0;i<n_recvs;i++) { 4520 ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 4521 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 4522 ptr_idxs += olengths_idxs[i]; 4523 } 4524 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 4525 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 4526 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 4527 4528 /* infer new local matrix type from received local matrices type */ 4529 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 4530 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 4531 if (n_recvs) { 4532 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 4533 ptr_idxs = recv_buffer_idxs; 4534 for (i=0;i<n_recvs;i++) { 4535 if ((PetscInt)new_local_type_private != *ptr_idxs) { 4536 new_local_type_private = MATAIJ_PRIVATE; 4537 break; 4538 } 4539 ptr_idxs += olengths_idxs[i]; 4540 } 4541 switch (new_local_type_private) { 4542 case MATDENSE_PRIVATE: 4543 if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */ 4544 new_local_type = MATSEQAIJ; 4545 bs = 1; 4546 } else { /* if I receive only 1 dense matrix */ 4547 new_local_type = MATSEQDENSE; 4548 bs = 1; 4549 } 4550 break; 4551 case MATAIJ_PRIVATE: 4552 new_local_type = MATSEQAIJ; 4553 bs = 1; 4554 break; 4555 case MATBAIJ_PRIVATE: 4556 new_local_type = MATSEQBAIJ; 4557 break; 4558 case MATSBAIJ_PRIVATE: 4559 new_local_type = MATSEQSBAIJ; 4560 break; 4561 default: 4562 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__); 4563 break; 4564 } 4565 } else { /* by default, new_local_type is seqdense */ 4566 new_local_type = MATSEQDENSE; 4567 bs = 1; 4568 } 4569 4570 /* create MATIS object if needed */ 4571 if (reuse == MAT_INITIAL_MATRIX) { 4572 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 4573 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 4574 } else { 4575 /* it also destroys the local matrices */ 4576 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 4577 } 4578 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 4579 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 4580 4581 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4582 4583 /* Global to local map of received indices */ 4584 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 4585 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 4586 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 4587 4588 /* restore attributes -> type of incoming data and its size */ 4589 buf_size_idxs = 0; 4590 for (i=0;i<n_recvs;i++) { 4591 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 4592 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 4593 buf_size_idxs += (PetscInt)olengths_idxs[i]; 4594 } 4595 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 4596 4597 /* set preallocation */ 4598 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 4599 if (!newisdense) { 4600 PetscInt *new_local_nnz=0; 4601 4602 ptr_vals = recv_buffer_vals; 4603 ptr_idxs = recv_buffer_idxs_local; 4604 if (n_recvs) { 4605 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 4606 } 4607 for (i=0;i<n_recvs;i++) { 4608 PetscInt j; 4609 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 4610 for (j=0;j<*(ptr_idxs+1);j++) { 4611 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 4612 } 4613 } else { 4614 /* TODO */ 4615 } 4616 ptr_idxs += olengths_idxs[i]; 4617 } 4618 if (new_local_nnz) { 4619 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 4620 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 4621 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 4622 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 4623 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 4624 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 4625 } else { 4626 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 4627 } 4628 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 4629 } else { 4630 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 4631 } 4632 4633 /* set values */ 4634 ptr_vals = recv_buffer_vals; 4635 ptr_idxs = recv_buffer_idxs_local; 4636 for (i=0;i<n_recvs;i++) { 4637 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 4638 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 4639 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 4640 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 4641 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 4642 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 4643 } else { 4644 /* TODO */ 4645 } 4646 ptr_idxs += olengths_idxs[i]; 4647 ptr_vals += olengths_vals[i]; 4648 } 4649 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4650 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4651 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4652 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4653 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 4654 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 4655 4656 #if 0 4657 if (!restrict_comm) { /* check */ 4658 Vec lvec,rvec; 4659 PetscReal infty_error; 4660 4661 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 4662 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 4663 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 4664 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 4665 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 4666 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 4667 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 4668 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 4669 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 4670 } 4671 #endif 4672 4673 /* assemble new additional is (if any) */ 4674 if (nis) { 4675 PetscInt **temp_idxs,*count_is,j,psum; 4676 4677 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4678 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 4679 ptr_idxs = recv_buffer_idxs_is; 4680 psum = 0; 4681 for (i=0;i<n_recvs;i++) { 4682 for (j=0;j<nis;j++) { 4683 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 4684 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 4685 psum += plen; 4686 ptr_idxs += plen+1; /* shift pointer to received data */ 4687 } 4688 } 4689 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 4690 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 4691 for (i=1;i<nis;i++) { 4692 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 4693 } 4694 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 4695 ptr_idxs = recv_buffer_idxs_is; 4696 for (i=0;i<n_recvs;i++) { 4697 for (j=0;j<nis;j++) { 4698 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 4699 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 4700 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 4701 ptr_idxs += plen+1; /* shift pointer to received data */ 4702 } 4703 } 4704 for (i=0;i<nis;i++) { 4705 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 4706 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 4707 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 4708 } 4709 ierr = PetscFree(count_is);CHKERRQ(ierr); 4710 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 4711 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 4712 } 4713 /* free workspace */ 4714 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 4715 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4716 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 4717 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4718 if (isdense) { 4719 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 4720 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 4721 } else { 4722 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 4723 } 4724 if (nis) { 4725 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4726 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 4727 } 4728 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 4729 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 4730 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 4731 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 4732 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 4733 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 4734 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 4735 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 4736 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 4737 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 4738 ierr = PetscFree(onodes);CHKERRQ(ierr); 4739 if (nis) { 4740 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 4741 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 4742 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 4743 } 4744 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 4745 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 4746 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 4747 for (i=0;i<nis;i++) { 4748 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 4749 } 4750 *mat_n = NULL; 4751 } 4752 PetscFunctionReturn(0); 4753 } 4754 4755 /* temporary hack into ksp private data structure */ 4756 #include <petsc/private/kspimpl.h> 4757 4758 #undef __FUNCT__ 4759 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 4760 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 4761 { 4762 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 4763 PC_IS *pcis = (PC_IS*)pc->data; 4764 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 4765 MatNullSpace CoarseNullSpace=NULL; 4766 ISLocalToGlobalMapping coarse_islg; 4767 IS coarse_is,*isarray; 4768 PetscInt i,im_active=-1,active_procs=-1; 4769 PetscInt nis,nisdofs,nisneu; 4770 PC pc_temp; 4771 PCType coarse_pc_type; 4772 KSPType coarse_ksp_type; 4773 PetscBool multilevel_requested,multilevel_allowed; 4774 PetscBool isredundant,isbddc,isnn,coarse_reuse; 4775 Mat t_coarse_mat_is; 4776 PetscInt void_procs,ncoarse_ml,ncoarse_ds,ncoarse; 4777 PetscMPIInt all_procs; 4778 PetscBool csin_ml,csin_ds,csin,csin_type_simple,redist; 4779 PetscBool compute_vecs = PETSC_FALSE; 4780 PetscScalar *array; 4781 PetscErrorCode ierr; 4782 4783 PetscFunctionBegin; 4784 /* Assign global numbering to coarse dofs */ 4785 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 4786 PetscInt ocoarse_size; 4787 compute_vecs = PETSC_TRUE; 4788 ocoarse_size = pcbddc->coarse_size; 4789 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 4790 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 4791 /* see if we can avoid some work */ 4792 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 4793 /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */ 4794 if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) { 4795 PC pc; 4796 PetscBool isbddc; 4797 4798 /* temporary workaround since PCBDDC does not have a reset method so far */ 4799 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc);CHKERRQ(ierr); 4800 ierr = PetscObjectTypeCompare((PetscObject)pc,PCBDDC,&isbddc);CHKERRQ(ierr); 4801 if (isbddc) { 4802 ierr = PCDestroy(&pc);CHKERRQ(ierr); 4803 } 4804 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 4805 coarse_reuse = PETSC_FALSE; 4806 } else { /* we can safely reuse already computed coarse matrix */ 4807 coarse_reuse = PETSC_TRUE; 4808 } 4809 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 4810 coarse_reuse = PETSC_FALSE; 4811 } 4812 /* reset any subassembling information */ 4813 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 4814 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4815 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 4816 coarse_reuse = PETSC_TRUE; 4817 } 4818 4819 /* count "active" (i.e. with positive local size) and "void" processes */ 4820 im_active = !!(pcis->n); 4821 ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4822 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr); 4823 void_procs = all_procs-active_procs; 4824 csin_type_simple = PETSC_TRUE; 4825 redist = PETSC_FALSE; 4826 if (pcbddc->current_level && void_procs) { 4827 csin_ml = PETSC_TRUE; 4828 ncoarse_ml = void_procs; 4829 /* it has no sense to redistribute on a set of processors larger than the number of active processes */ 4830 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < active_procs) { 4831 csin_ds = PETSC_TRUE; 4832 ncoarse_ds = pcbddc->redistribute_coarse; 4833 redist = PETSC_TRUE; 4834 } else { 4835 csin_ds = PETSC_TRUE; 4836 ncoarse_ds = active_procs; 4837 redist = PETSC_TRUE; 4838 } 4839 } else { 4840 csin_ml = PETSC_FALSE; 4841 ncoarse_ml = all_procs; 4842 if (void_procs) { 4843 csin_ds = PETSC_TRUE; 4844 ncoarse_ds = void_procs; 4845 csin_type_simple = PETSC_FALSE; 4846 } else { 4847 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < all_procs) { 4848 csin_ds = PETSC_TRUE; 4849 ncoarse_ds = pcbddc->redistribute_coarse; 4850 redist = PETSC_TRUE; 4851 } else { 4852 csin_ds = PETSC_FALSE; 4853 ncoarse_ds = all_procs; 4854 } 4855 } 4856 } 4857 4858 /* 4859 test if we can go multilevel: three conditions must be satisfied: 4860 - we have not exceeded the number of levels requested 4861 - we can actually subassemble the active processes 4862 - we can find a suitable number of MPI processes where we can place the subassembled problem 4863 */ 4864 multilevel_allowed = PETSC_FALSE; 4865 multilevel_requested = PETSC_FALSE; 4866 if (pcbddc->current_level < pcbddc->max_levels) { 4867 multilevel_requested = PETSC_TRUE; 4868 if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) { 4869 multilevel_allowed = PETSC_FALSE; 4870 } else { 4871 multilevel_allowed = PETSC_TRUE; 4872 } 4873 } 4874 /* determine number of process partecipating to coarse solver */ 4875 if (multilevel_allowed) { 4876 ncoarse = ncoarse_ml; 4877 csin = csin_ml; 4878 redist = PETSC_FALSE; 4879 } else { 4880 ncoarse = ncoarse_ds; 4881 csin = csin_ds; 4882 } 4883 4884 /* creates temporary l2gmap and IS for coarse indexes */ 4885 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 4886 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 4887 4888 /* creates temporary MATIS object for coarse matrix */ 4889 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr); 4890 ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr); 4891 ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr); 4892 ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr); 4893 #if 0 4894 { 4895 PetscViewer viewer; 4896 char filename[256]; 4897 sprintf(filename,"local_coarse_mat%d_level%d.m",PetscGlobalRank,pcbddc->current_level); 4898 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 4899 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 4900 ierr = MatView(coarse_submat_dense,viewer);CHKERRQ(ierr); 4901 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 4902 } 4903 #endif 4904 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,NULL,&t_coarse_mat_is);CHKERRQ(ierr); 4905 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 4906 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4907 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4908 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 4909 4910 /* compute dofs splitting and neumann boundaries for coarse dofs */ 4911 if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal) ) { /* protects from unneded computations */ 4912 PetscInt *tidxs,*tidxs2,nout,tsize,i; 4913 const PetscInt *idxs; 4914 ISLocalToGlobalMapping tmap; 4915 4916 /* create map between primal indices (in local representative ordering) and local primal numbering */ 4917 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 4918 /* allocate space for temporary storage */ 4919 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 4920 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 4921 /* allocate for IS array */ 4922 nisdofs = pcbddc->n_ISForDofsLocal; 4923 nisneu = !!pcbddc->NeumannBoundariesLocal; 4924 nis = nisdofs + nisneu; 4925 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 4926 /* dofs splitting */ 4927 for (i=0;i<nisdofs;i++) { 4928 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 4929 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 4930 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 4931 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 4932 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 4933 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 4934 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->ISForDofsLocal[i]),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 4935 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 4936 } 4937 /* neumann boundaries */ 4938 if (pcbddc->NeumannBoundariesLocal) { 4939 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 4940 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 4941 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 4942 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 4943 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 4944 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 4945 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->NeumannBoundariesLocal),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 4946 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 4947 } 4948 /* free memory */ 4949 ierr = PetscFree(tidxs);CHKERRQ(ierr); 4950 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 4951 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 4952 } else { 4953 nis = 0; 4954 nisdofs = 0; 4955 nisneu = 0; 4956 isarray = NULL; 4957 } 4958 /* destroy no longer needed map */ 4959 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 4960 4961 /* restrict on coarse candidates (if needed) */ 4962 coarse_mat_is = NULL; 4963 if (csin) { 4964 if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */ 4965 if (redist) { 4966 PetscMPIInt rank; 4967 PetscInt spc,n_spc_p1,dest[1],destsize; 4968 4969 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 4970 spc = active_procs/ncoarse; 4971 n_spc_p1 = active_procs%ncoarse; 4972 if (im_active) { 4973 destsize = 1; 4974 if (rank > n_spc_p1*(spc+1)-1) { 4975 dest[0] = n_spc_p1+(rank-(n_spc_p1*(spc+1)))/spc; 4976 } else { 4977 dest[0] = rank/(spc+1); 4978 } 4979 } else { 4980 destsize = 0; 4981 } 4982 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),destsize,dest,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4983 } else if (csin_type_simple) { 4984 PetscMPIInt rank; 4985 PetscInt issize,isidx; 4986 4987 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 4988 if (im_active) { 4989 issize = 1; 4990 isidx = (PetscInt)rank; 4991 } else { 4992 issize = 0; 4993 isidx = -1; 4994 } 4995 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4996 } else { /* get a suitable subassembling pattern from MATIS code */ 4997 ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4998 } 4999 5000 /* we need to shift on coarse candidates either if we are not redistributing or we are redistributing and we have enough void processes */ 5001 if (!redist || ncoarse <= void_procs) { 5002 PetscInt ncoarse_cand,tissize,*nisindices; 5003 PetscInt *coarse_candidates; 5004 const PetscInt* tisindices; 5005 5006 /* get coarse candidates' ranks in pc communicator */ 5007 ierr = PetscMalloc1(all_procs,&coarse_candidates);CHKERRQ(ierr); 5008 ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 5009 for (i=0,ncoarse_cand=0;i<all_procs;i++) { 5010 if (!coarse_candidates[i]) { 5011 coarse_candidates[ncoarse_cand++]=i; 5012 } 5013 } 5014 if (ncoarse_cand < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",ncoarse_cand,ncoarse); 5015 5016 5017 if (pcbddc->dbg_flag) { 5018 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5019 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr); 5020 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 5021 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr); 5022 for (i=0;i<ncoarse_cand;i++) { 5023 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr); 5024 } 5025 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr); 5026 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5027 } 5028 /* shift the pattern on coarse candidates */ 5029 ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr); 5030 ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 5031 ierr = PetscMalloc1(tissize,&nisindices);CHKERRQ(ierr); 5032 for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]]; 5033 ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 5034 ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr); 5035 ierr = PetscFree(coarse_candidates);CHKERRQ(ierr); 5036 } 5037 if (pcbddc->dbg_flag) { 5038 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5039 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr); 5040 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 5041 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5042 } 5043 } 5044 /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */ 5045 if (multilevel_allowed) { /* we need to keep tracking of void processes for future placements */ 5046 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,PETSC_FALSE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 5047 } else { /* this is the last level, so use just receiving processes in subcomm */ 5048 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 5049 } 5050 } else { 5051 if (pcbddc->dbg_flag) { 5052 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5053 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr); 5054 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5055 } 5056 ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr); 5057 coarse_mat_is = t_coarse_mat_is; 5058 } 5059 5060 /* create local to global scatters for coarse problem */ 5061 if (compute_vecs) { 5062 PetscInt lrows; 5063 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 5064 if (coarse_mat_is) { 5065 ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr); 5066 } else { 5067 lrows = 0; 5068 } 5069 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 5070 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 5071 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 5072 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 5073 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 5074 } 5075 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 5076 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 5077 5078 /* set defaults for coarse KSP and PC */ 5079 if (multilevel_allowed) { 5080 coarse_ksp_type = KSPRICHARDSON; 5081 coarse_pc_type = PCBDDC; 5082 } else { 5083 coarse_ksp_type = KSPPREONLY; 5084 coarse_pc_type = PCREDUNDANT; 5085 } 5086 5087 /* print some info if requested */ 5088 if (pcbddc->dbg_flag) { 5089 if (!multilevel_allowed) { 5090 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5091 if (multilevel_requested) { 5092 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 5093 } else if (pcbddc->max_levels) { 5094 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 5095 } 5096 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5097 } 5098 } 5099 5100 /* create the coarse KSP object only once with defaults */ 5101 if (coarse_mat_is) { 5102 MatReuse coarse_mat_reuse; 5103 PetscViewer dbg_viewer = NULL; 5104 if (pcbddc->dbg_flag) { 5105 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is)); 5106 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 5107 } 5108 if (!pcbddc->coarse_ksp) { 5109 char prefix[256],str_level[16]; 5110 size_t len; 5111 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr); 5112 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 5113 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 5114 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 5115 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr); 5116 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 5117 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 5118 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 5119 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 5120 /* prefix */ 5121 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 5122 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 5123 if (!pcbddc->current_level) { 5124 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 5125 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 5126 } else { 5127 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 5128 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 5129 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 5130 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 5131 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 5132 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 5133 } 5134 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 5135 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 5136 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 5137 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 5138 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 5139 /* allow user customization */ 5140 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 5141 } 5142 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 5143 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 5144 if (nisdofs) { 5145 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 5146 for (i=0;i<nisdofs;i++) { 5147 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 5148 } 5149 } 5150 if (nisneu) { 5151 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 5152 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 5153 } 5154 5155 /* get some info after set from options */ 5156 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 5157 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 5158 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 5159 if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */ 5160 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 5161 isbddc = PETSC_FALSE; 5162 } 5163 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 5164 if (isredundant) { 5165 KSP inner_ksp; 5166 PC inner_pc; 5167 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 5168 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 5169 ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr); 5170 } 5171 5172 /* assemble coarse matrix */ 5173 if (coarse_reuse) { 5174 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 5175 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 5176 coarse_mat_reuse = MAT_REUSE_MATRIX; 5177 } else { 5178 coarse_mat_reuse = MAT_INITIAL_MATRIX; 5179 } 5180 if (isbddc || isnn) { 5181 if (pcbddc->coarsening_ratio > 1) { 5182 if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */ 5183 ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 5184 if (pcbddc->dbg_flag) { 5185 ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5186 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr); 5187 ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr); 5188 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 5189 } 5190 } 5191 ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr); 5192 } else { 5193 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 5194 coarse_mat = coarse_mat_is; 5195 } 5196 } else { 5197 ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 5198 } 5199 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 5200 5201 /* propagate symmetry info of coarse matrix */ 5202 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 5203 if (pc->pmat->symmetric_set) { 5204 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);CHKERRQ(ierr); 5205 } 5206 if (pc->pmat->hermitian_set) { 5207 ierr = MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);CHKERRQ(ierr); 5208 } 5209 if (pc->pmat->spd_set) { 5210 ierr = MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);CHKERRQ(ierr); 5211 } 5212 /* set operators */ 5213 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 5214 if (pcbddc->dbg_flag) { 5215 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 5216 } 5217 } else { /* processes non partecipating to coarse solver (if any) */ 5218 coarse_mat = 0; 5219 } 5220 ierr = PetscFree(isarray);CHKERRQ(ierr); 5221 #if 0 5222 { 5223 PetscViewer viewer; 5224 char filename[256]; 5225 sprintf(filename,"coarse_mat_level%d.m",pcbddc->current_level); 5226 ierr = PetscViewerASCIIOpen(PetscObjectComm((PetscObject)coarse_mat),filename,&viewer);CHKERRQ(ierr); 5227 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 5228 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 5229 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 5230 } 5231 #endif 5232 5233 /* Compute coarse null space (special handling by BDDC only) */ 5234 #if 0 5235 if (pcbddc->NullSpace) { 5236 ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr); 5237 } 5238 #endif 5239 /* hack */ 5240 if (pcbddc->coarse_ksp) { 5241 Vec crhs,csol; 5242 5243 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 5244 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 5245 if (!csol) { 5246 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 5247 } 5248 if (!crhs) { 5249 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 5250 } 5251 } 5252 5253 /* compute null space for coarse solver if the benign trick has been requested */ 5254 if (pcbddc->benign_null) { 5255 5256 ierr = VecSet(pcbddc->vec1_P,0.);CHKERRQ(ierr); 5257 for (i=0;i<pcbddc->benign_n;i++) { 5258 ierr = VecSetValue(pcbddc->vec1_P,pcbddc->local_primal_size-pcbddc->benign_n+i,1.0,INSERT_VALUES);CHKERRQ(ierr); 5259 } 5260 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 5261 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 5262 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5263 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5264 if (coarse_mat) { 5265 Vec nullv; 5266 PetscScalar *array,*array2; 5267 PetscInt nl; 5268 5269 ierr = MatCreateVecs(coarse_mat,&nullv,NULL);CHKERRQ(ierr); 5270 ierr = VecGetLocalSize(nullv,&nl);CHKERRQ(ierr); 5271 ierr = VecGetArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 5272 ierr = VecGetArray(nullv,&array2);CHKERRQ(ierr); 5273 ierr = PetscMemcpy(array2,array,nl*sizeof(*array));CHKERRQ(ierr); 5274 ierr = VecRestoreArray(nullv,&array2);CHKERRQ(ierr); 5275 ierr = VecRestoreArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 5276 ierr = VecNormalize(nullv,NULL);CHKERRQ(ierr); 5277 ierr = MatNullSpaceCreate(PetscObjectComm((PetscObject)coarse_mat),PETSC_FALSE,1,&nullv,&CoarseNullSpace);CHKERRQ(ierr); 5278 ierr = VecDestroy(&nullv);CHKERRQ(ierr); 5279 } 5280 } 5281 5282 if (pcbddc->coarse_ksp) { 5283 PetscBool ispreonly; 5284 5285 if (CoarseNullSpace) { 5286 PetscBool isnull; 5287 ierr = MatNullSpaceTest(CoarseNullSpace,coarse_mat,&isnull);CHKERRQ(ierr); 5288 if (isnull) { 5289 if (isbddc) { 5290 ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr); 5291 } else { 5292 ierr = MatSetNullSpace(coarse_mat,CoarseNullSpace);CHKERRQ(ierr); 5293 } 5294 } else { 5295 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 5296 } 5297 } 5298 /* setup coarse ksp */ 5299 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 5300 /* Check coarse problem if in debug mode or if solving with an iterative method */ 5301 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 5302 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 5303 KSP check_ksp; 5304 KSPType check_ksp_type; 5305 PC check_pc; 5306 Vec check_vec,coarse_vec; 5307 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 5308 PetscInt its; 5309 PetscBool compute_eigs; 5310 PetscReal *eigs_r,*eigs_c; 5311 PetscInt neigs; 5312 const char *prefix; 5313 5314 /* Create ksp object suitable for estimation of extreme eigenvalues */ 5315 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 5316 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 5317 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 5318 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 5319 if (ispreonly) { 5320 check_ksp_type = KSPPREONLY; 5321 compute_eigs = PETSC_FALSE; 5322 } else { 5323 check_ksp_type = KSPGMRES; 5324 compute_eigs = PETSC_TRUE; 5325 } 5326 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 5327 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 5328 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 5329 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 5330 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 5331 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 5332 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 5333 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 5334 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 5335 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 5336 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 5337 /* create random vec */ 5338 ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr); 5339 ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr); 5340 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 5341 if (CoarseNullSpace) { 5342 ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr); 5343 } 5344 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 5345 /* solve coarse problem */ 5346 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 5347 if (CoarseNullSpace) { 5348 ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr); 5349 } 5350 /* set eigenvalue estimation if preonly has not been requested */ 5351 if (compute_eigs) { 5352 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 5353 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 5354 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 5355 lambda_max = eigs_r[neigs-1]; 5356 lambda_min = eigs_r[0]; 5357 if (pcbddc->use_coarse_estimates) { 5358 if (lambda_max>lambda_min) { 5359 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr); 5360 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 5361 } 5362 } 5363 } 5364 5365 /* check coarse problem residual error */ 5366 if (pcbddc->dbg_flag) { 5367 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 5368 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 5369 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 5370 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 5371 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 5372 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 5373 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 5374 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 5375 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 5376 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 5377 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 5378 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 5379 if (CoarseNullSpace) { 5380 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem is singular\n");CHKERRQ(ierr); 5381 } 5382 if (compute_eigs) { 5383 PetscReal lambda_max_s,lambda_min_s; 5384 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 5385 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 5386 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 5387 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 5388 for (i=0;i<neigs;i++) { 5389 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 5390 } 5391 } 5392 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 5393 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 5394 } 5395 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 5396 if (compute_eigs) { 5397 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 5398 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 5399 } 5400 } 5401 } 5402 /* print additional info */ 5403 if (pcbddc->dbg_flag) { 5404 /* waits until all processes reaches this point */ 5405 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 5406 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 5407 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5408 } 5409 5410 /* free memory */ 5411 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 5412 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 5413 PetscFunctionReturn(0); 5414 } 5415 5416 #undef __FUNCT__ 5417 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 5418 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 5419 { 5420 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 5421 PC_IS* pcis = (PC_IS*)pc->data; 5422 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 5423 IS subset,subset_mult,subset_n; 5424 PetscInt local_size,coarse_size=0; 5425 PetscInt *local_primal_indices=NULL; 5426 const PetscInt *t_local_primal_indices; 5427 PetscErrorCode ierr; 5428 5429 PetscFunctionBegin; 5430 /* Compute global number of coarse dofs */ 5431 if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) { 5432 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first"); 5433 } 5434 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n);CHKERRQ(ierr); 5435 ierr = ISLocalToGlobalMappingApplyIS(pcis->mapping,subset_n,&subset);CHKERRQ(ierr); 5436 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 5437 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult);CHKERRQ(ierr); 5438 ierr = PCBDDCSubsetNumbering(subset,subset_mult,&coarse_size,&subset_n);CHKERRQ(ierr); 5439 ierr = ISDestroy(&subset);CHKERRQ(ierr); 5440 ierr = ISDestroy(&subset_mult);CHKERRQ(ierr); 5441 ierr = ISGetLocalSize(subset_n,&local_size);CHKERRQ(ierr); 5442 if (local_size != pcbddc->local_primal_size) { 5443 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %d != %d",local_size,pcbddc->local_primal_size); 5444 } 5445 ierr = PetscMalloc1(local_size,&local_primal_indices);CHKERRQ(ierr); 5446 ierr = ISGetIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 5447 ierr = PetscMemcpy(local_primal_indices,t_local_primal_indices,local_size*sizeof(PetscInt));CHKERRQ(ierr); 5448 ierr = ISRestoreIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 5449 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 5450 5451 /* check numbering */ 5452 if (pcbddc->dbg_flag) { 5453 PetscScalar coarsesum,*array,*array2; 5454 PetscInt i; 5455 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 5456 5457 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5458 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5459 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 5460 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 5461 /* counter */ 5462 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 5463 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 5464 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5465 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5466 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5467 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5468 5469 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 5470 for (i=0;i<pcbddc->local_primal_size;i++) { 5471 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 5472 } 5473 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 5474 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 5475 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 5476 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5477 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5478 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5479 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5480 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 5481 ierr = VecGetArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 5482 for (i=0;i<pcis->n;i++) { 5483 if (array[i] != 0.0 && array[i] != array2[i]) { 5484 PetscInt owned = (PetscInt)PetscRealPart(array[i]); 5485 PetscInt neigh = (PetscInt)PetscRealPart(array2[i]); 5486 set_error = PETSC_TRUE; 5487 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a %d processes instead of %d!\n",PetscGlobalRank,i,owned,neigh);CHKERRQ(ierr); 5488 } 5489 } 5490 ierr = VecRestoreArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 5491 ierr = MPI_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 5492 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5493 for (i=0;i<pcis->n;i++) { 5494 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 5495 } 5496 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 5497 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 5498 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5499 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5500 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 5501 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 5502 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 5503 PetscInt *gidxs; 5504 5505 ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr); 5506 ierr = ISLocalToGlobalMappingApply(pcis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr); 5507 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 5508 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5509 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 5510 for (i=0;i<pcbddc->local_primal_size;i++) { 5511 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr); 5512 } 5513 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5514 ierr = PetscFree(gidxs);CHKERRQ(ierr); 5515 } 5516 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5517 if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 5518 } 5519 /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */ 5520 /* get back data */ 5521 *coarse_size_n = coarse_size; 5522 *local_primal_indices_n = local_primal_indices; 5523 PetscFunctionReturn(0); 5524 } 5525 5526 #undef __FUNCT__ 5527 #define __FUNCT__ "PCBDDCGlobalToLocal" 5528 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 5529 { 5530 IS localis_t; 5531 PetscInt i,lsize,*idxs,n; 5532 PetscScalar *vals; 5533 PetscErrorCode ierr; 5534 5535 PetscFunctionBegin; 5536 /* get indices in local ordering exploiting local to global map */ 5537 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 5538 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 5539 for (i=0;i<lsize;i++) vals[i] = 1.0; 5540 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 5541 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 5542 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 5543 if (idxs) { /* multilevel guard */ 5544 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 5545 } 5546 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 5547 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 5548 ierr = PetscFree(vals);CHKERRQ(ierr); 5549 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 5550 /* now compute set in local ordering */ 5551 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5552 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5553 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 5554 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 5555 for (i=0,lsize=0;i<n;i++) { 5556 if (PetscRealPart(vals[i]) > 0.5) { 5557 lsize++; 5558 } 5559 } 5560 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 5561 for (i=0,lsize=0;i<n;i++) { 5562 if (PetscRealPart(vals[i]) > 0.5) { 5563 idxs[lsize++] = i; 5564 } 5565 } 5566 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 5567 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 5568 *localis = localis_t; 5569 PetscFunctionReturn(0); 5570 } 5571 5572 /* the next two functions will be called in KSPMatMult if a change of basis has been requested */ 5573 #undef __FUNCT__ 5574 #define __FUNCT__ "PCBDDCMatMult_Private" 5575 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y) 5576 { 5577 PCBDDCChange_ctx change_ctx; 5578 PetscErrorCode ierr; 5579 5580 PetscFunctionBegin; 5581 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 5582 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 5583 ierr = MatMult(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 5584 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 5585 PetscFunctionReturn(0); 5586 } 5587 5588 #undef __FUNCT__ 5589 #define __FUNCT__ "PCBDDCMatMultTranspose_Private" 5590 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y) 5591 { 5592 PCBDDCChange_ctx change_ctx; 5593 PetscErrorCode ierr; 5594 5595 PetscFunctionBegin; 5596 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 5597 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 5598 ierr = MatMultTranspose(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 5599 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 5600 PetscFunctionReturn(0); 5601 } 5602 5603 #undef __FUNCT__ 5604 #define __FUNCT__ "PCBDDCSetUpSubSchurs" 5605 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 5606 { 5607 PC_IS *pcis=(PC_IS*)pc->data; 5608 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 5609 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 5610 Mat S_j; 5611 PetscInt *used_xadj,*used_adjncy; 5612 PetscBool free_used_adj; 5613 PetscErrorCode ierr; 5614 5615 PetscFunctionBegin; 5616 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 5617 free_used_adj = PETSC_FALSE; 5618 if (pcbddc->sub_schurs_layers == -1) { 5619 used_xadj = NULL; 5620 used_adjncy = NULL; 5621 } else { 5622 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 5623 used_xadj = pcbddc->mat_graph->xadj; 5624 used_adjncy = pcbddc->mat_graph->adjncy; 5625 } else if (pcbddc->computed_rowadj) { 5626 used_xadj = pcbddc->mat_graph->xadj; 5627 used_adjncy = pcbddc->mat_graph->adjncy; 5628 } else { 5629 PetscBool flg_row=PETSC_FALSE; 5630 const PetscInt *xadj,*adjncy; 5631 PetscInt nvtxs; 5632 5633 ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 5634 if (flg_row) { 5635 ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr); 5636 ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr); 5637 ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr); 5638 free_used_adj = PETSC_TRUE; 5639 } else { 5640 pcbddc->sub_schurs_layers = -1; 5641 used_xadj = NULL; 5642 used_adjncy = NULL; 5643 } 5644 ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 5645 } 5646 } 5647 5648 /* setup sub_schurs data */ 5649 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 5650 if (!sub_schurs->use_mumps) { 5651 /* pcbddc->ksp_D up to date only if not using MUMPS */ 5652 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 5653 ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,PETSC_FALSE,PETSC_FALSE);CHKERRQ(ierr); 5654 } else { 5655 PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis; 5656 PetscBool isseqaij; 5657 if (!pcbddc->use_vertices && reuse_solvers) { 5658 PetscInt n_vertices; 5659 5660 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 5661 reuse_solvers = (PetscBool)!n_vertices; 5662 } 5663 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 5664 if (!isseqaij) { 5665 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 5666 if (matis->A == pcbddc->local_mat) { 5667 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 5668 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 5669 } else { 5670 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 5671 } 5672 } 5673 ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point);CHKERRQ(ierr); 5674 } 5675 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 5676 5677 /* free adjacency */ 5678 if (free_used_adj) { 5679 ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr); 5680 } 5681 PetscFunctionReturn(0); 5682 } 5683 5684 #undef __FUNCT__ 5685 #define __FUNCT__ "PCBDDCInitSubSchurs" 5686 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 5687 { 5688 PC_IS *pcis=(PC_IS*)pc->data; 5689 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 5690 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 5691 PCBDDCGraph graph; 5692 PetscErrorCode ierr; 5693 5694 PetscFunctionBegin; 5695 /* attach interface graph for determining subsets */ 5696 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 5697 IS verticesIS,verticescomm; 5698 PetscInt vsize,*idxs; 5699 5700 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 5701 ierr = ISGetSize(verticesIS,&vsize);CHKERRQ(ierr); 5702 ierr = ISGetIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 5703 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);CHKERRQ(ierr); 5704 ierr = ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 5705 ierr = ISDestroy(&verticesIS);CHKERRQ(ierr); 5706 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 5707 ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global);CHKERRQ(ierr); 5708 ierr = PCBDDCGraphSetUp(graph,0,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);CHKERRQ(ierr); 5709 ierr = ISDestroy(&verticescomm);CHKERRQ(ierr); 5710 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 5711 /* 5712 if (pcbddc->dbg_flag) { 5713 ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 5714 } 5715 */ 5716 } else { 5717 graph = pcbddc->mat_graph; 5718 } 5719 5720 /* sub_schurs init */ 5721 ierr = PCBDDCSubSchursInit(sub_schurs,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap);CHKERRQ(ierr); 5722 5723 /* free graph struct */ 5724 if (pcbddc->sub_schurs_rebuild) { 5725 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 5726 } 5727 PetscFunctionReturn(0); 5728 } 5729 5730 #undef __FUNCT__ 5731 #define __FUNCT__ "PCBDDCCheckOperator" 5732 PetscErrorCode PCBDDCCheckOperator(PC pc) 5733 { 5734 PC_IS *pcis=(PC_IS*)pc->data; 5735 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 5736 PetscErrorCode ierr; 5737 5738 PetscFunctionBegin; 5739 if (pcbddc->n_vertices == pcbddc->local_primal_size) { 5740 IS zerodiag = NULL; 5741 Mat S_j,B0_B=NULL; 5742 Vec dummy_vec=NULL,vec_check_B,vec_scale_P; 5743 PetscScalar *p0_check,*array,*array2; 5744 PetscReal norm; 5745 PetscInt i; 5746 5747 /* B0 and B0_B */ 5748 if (zerodiag) { 5749 IS dummy; 5750 5751 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);CHKERRQ(ierr); 5752 ierr = MatGetSubMatrix(pcbddc->benign_B0,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 5753 ierr = MatCreateVecs(B0_B,NULL,&dummy_vec);CHKERRQ(ierr); 5754 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 5755 } 5756 /* I need a primal vector to scale primal nodes since BDDC sums contibutions */ 5757 ierr = VecDuplicate(pcbddc->vec1_P,&vec_scale_P);CHKERRQ(ierr); 5758 ierr = VecSet(pcbddc->vec1_P,1.0);CHKERRQ(ierr); 5759 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5760 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5761 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5762 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5763 ierr = VecReciprocal(vec_scale_P);CHKERRQ(ierr); 5764 /* S_j */ 5765 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 5766 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 5767 5768 /* mimic vector in \widetilde{W}_\Gamma */ 5769 ierr = VecSetRandom(pcis->vec1_N,NULL);CHKERRQ(ierr); 5770 /* continuous in primal space */ 5771 ierr = VecSetRandom(pcbddc->coarse_vec,NULL);CHKERRQ(ierr); 5772 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5773 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5774 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5775 ierr = PetscCalloc1(pcbddc->benign_n,&p0_check);CHKERRQ(ierr); 5776 for (i=0;i<pcbddc->benign_n;i++) p0_check[i] = array[pcbddc->local_primal_size-pcbddc->benign_n+i]; 5777 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 5778 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5779 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 5780 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 5781 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5782 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5783 ierr = VecDuplicate(pcis->vec2_B,&vec_check_B);CHKERRQ(ierr); 5784 ierr = VecCopy(pcis->vec2_B,vec_check_B);CHKERRQ(ierr); 5785 5786 /* assemble rhs for coarse problem */ 5787 /* widetilde{S}_\Gamma w_\Gamma + \widetilde{B0}^T_B p0 */ 5788 /* local with Schur */ 5789 ierr = MatMult(S_j,pcis->vec2_B,pcis->vec1_B);CHKERRQ(ierr); 5790 if (zerodiag) { 5791 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 5792 for (i=0;i<pcbddc->benign_n;i++) array[i] = p0_check[i]; 5793 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 5794 ierr = MatMultTransposeAdd(B0_B,dummy_vec,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 5795 } 5796 /* sum on primal nodes the local contributions */ 5797 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5798 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5799 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 5800 ierr = VecGetArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 5801 for (i=0;i<pcbddc->local_primal_size;i++) array2[i] = array[pcbddc->local_primal_ref_node[i]]; 5802 ierr = VecRestoreArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 5803 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 5804 ierr = VecSet(pcbddc->coarse_vec,0.);CHKERRQ(ierr); 5805 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5806 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5807 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5808 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5809 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5810 /* scale primal nodes (BDDC sums contibutions) */ 5811 ierr = VecPointwiseMult(pcbddc->vec1_P,vec_scale_P,pcbddc->vec1_P);CHKERRQ(ierr); 5812 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 5813 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5814 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 5815 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 5816 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5817 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5818 /* global: \widetilde{B0}_B w_\Gamma */ 5819 if (zerodiag) { 5820 ierr = MatMult(B0_B,pcis->vec2_B,dummy_vec);CHKERRQ(ierr); 5821 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 5822 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = array[i]; 5823 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 5824 } 5825 /* BDDC */ 5826 ierr = VecSet(pcis->vec1_D,0.);CHKERRQ(ierr); 5827 ierr = PCBDDCApplyInterfacePreconditioner(pc,PETSC_FALSE);CHKERRQ(ierr); 5828 5829 ierr = VecCopy(pcis->vec1_B,pcis->vec2_B);CHKERRQ(ierr); 5830 ierr = VecAXPY(pcis->vec1_B,-1.0,vec_check_B);CHKERRQ(ierr); 5831 ierr = VecNorm(pcis->vec1_B,NORM_INFINITY,&norm);CHKERRQ(ierr); 5832 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC local error is %1.4e\n",PetscGlobalRank,norm); 5833 for (i=0;i<pcbddc->benign_n;i++) { 5834 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC p0[%d] error is %1.4e\n",PetscGlobalRank,i,PetscAbsScalar(pcbddc->benign_p0[i]-p0_check[i])); 5835 } 5836 ierr = PetscFree(p0_check);CHKERRQ(ierr); 5837 ierr = VecDestroy(&vec_scale_P);CHKERRQ(ierr); 5838 ierr = VecDestroy(&vec_check_B);CHKERRQ(ierr); 5839 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 5840 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 5841 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 5842 } 5843 PetscFunctionReturn(0); 5844 } 5845