1 #include <../src/mat/impls/aij/seq/aij.h> 2 #include <../src/ksp/pc/impls/bddc/bddc.h> 3 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 4 #include <petscblaslapack.h> 5 6 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y); 7 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y); 8 9 /* TODO: add reuse flag */ 10 #undef __FUNCT__ 11 #define __FUNCT__ "MatSeqAIJCompress" 12 PetscErrorCode MatSeqAIJCompress(Mat A, Mat *B) 13 { 14 Mat Bt; 15 PetscScalar *a,*bdata; 16 const PetscInt *ii,*ij; 17 PetscInt m,n,i,nnz,*bii,*bij; 18 PetscBool flg_row; 19 PetscErrorCode ierr; 20 21 PetscFunctionBegin; 22 ierr = MatGetSize(A,&n,&m);CHKERRQ(ierr); 23 ierr = MatGetRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 24 ierr = MatSeqAIJGetArray(A,&a);CHKERRQ(ierr); 25 nnz = n; 26 for (i=0;i<ii[n];i++) { 27 if (PetscLikely(PetscAbsScalar(a[i]) > PETSC_SMALL)) nnz++; 28 } 29 ierr = PetscMalloc1(n+1,&bii);CHKERRQ(ierr); 30 ierr = PetscMalloc1(nnz,&bij);CHKERRQ(ierr); 31 ierr = PetscMalloc1(nnz,&bdata);CHKERRQ(ierr); 32 nnz = 0; 33 bii[0] = 0; 34 for (i=0;i<n;i++) { 35 PetscInt j; 36 for (j=ii[i];j<ii[i+1];j++) { 37 PetscScalar entry = a[j]; 38 if (PetscLikely(PetscAbsScalar(entry) > PETSC_SMALL) || ij[j] == i) { 39 bij[nnz] = ij[j]; 40 bdata[nnz] = entry; 41 nnz++; 42 } 43 } 44 bii[i+1] = nnz; 45 } 46 ierr = MatSeqAIJRestoreArray(A,&a);CHKERRQ(ierr); 47 ierr = MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),n,m,bii,bij,bdata,&Bt);CHKERRQ(ierr); 48 ierr = MatRestoreRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 49 { 50 Mat_SeqAIJ *b = (Mat_SeqAIJ*)(Bt->data); 51 b->free_a = PETSC_TRUE; 52 b->free_ij = PETSC_TRUE; 53 } 54 *B = Bt; 55 PetscFunctionReturn(0); 56 } 57 58 #undef __FUNCT__ 59 #define __FUNCT__ "MatDetectDisconnectedComponents" 60 PetscErrorCode MatDetectDisconnectedComponents(Mat A, PetscBool filter, PetscInt *ncc, IS* cc[]) 61 { 62 Mat B; 63 IS is_dummy,*cc_n; 64 ISLocalToGlobalMapping l2gmap_dummy; 65 PCBDDCGraph graph; 66 PetscInt i,n; 67 PetscInt *xadj,*adjncy; 68 PetscInt *xadj_filtered,*adjncy_filtered; 69 PetscBool flg_row,isseqaij; 70 PetscErrorCode ierr; 71 72 PetscFunctionBegin; 73 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 74 if (!isseqaij && filter) { 75 PetscBool isseqdense; 76 77 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQDENSE,&isseqdense);CHKERRQ(ierr); 78 if (!isseqdense) { 79 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 80 } else { /* TODO: rectangular case and LDA */ 81 PetscScalar *array; 82 PetscReal chop=1.e-6; 83 84 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 85 ierr = MatDenseGetArray(B,&array);CHKERRQ(ierr); 86 ierr = MatGetSize(B,&n,NULL);CHKERRQ(ierr); 87 for (i=0;i<n;i++) { 88 PetscInt j; 89 for (j=i+1;j<n;j++) { 90 PetscReal thresh = chop*(PetscAbsScalar(array[i*(n+1)])+PetscAbsScalar(array[j*(n+1)])); 91 if (PetscAbsScalar(array[i*n+j]) < thresh) array[i*n+j] = 0.; 92 if (PetscAbsScalar(array[j*n+i]) < thresh) array[j*n+i] = 0.; 93 } 94 } 95 ierr = MatDenseRestoreArray(B,&array);CHKERRQ(ierr); 96 ierr = MatConvert(B,MATSEQAIJ,MAT_REUSE_MATRIX,&B);CHKERRQ(ierr); 97 } 98 } else { 99 B = A; 100 } 101 ierr = MatGetRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 102 103 /* if filter is true, then removes entries lower than PETSC_SMALL in magnitude */ 104 if (filter) { 105 PetscScalar *data; 106 PetscInt j,cum; 107 108 ierr = PetscCalloc2(n+1,&xadj_filtered,xadj[n],&adjncy_filtered);CHKERRQ(ierr); 109 ierr = MatSeqAIJGetArray(B,&data);CHKERRQ(ierr); 110 cum = 0; 111 for (i=0;i<n;i++) { 112 PetscInt t; 113 114 for (j=xadj[i];j<xadj[i+1];j++) { 115 if (PetscUnlikely(PetscAbsScalar(data[j]) < PETSC_SMALL)) { 116 continue; 117 } 118 adjncy_filtered[cum+xadj_filtered[i]++] = adjncy[j]; 119 } 120 t = xadj_filtered[i]; 121 xadj_filtered[i] = cum; 122 cum += t; 123 } 124 ierr = MatSeqAIJRestoreArray(B,&data);CHKERRQ(ierr); 125 } else { 126 xadj_filtered = NULL; 127 adjncy_filtered = NULL; 128 } 129 130 /* compute local connected components using PCBDDCGraph */ 131 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&is_dummy);CHKERRQ(ierr); 132 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 133 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 134 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 135 ierr = PCBDDCGraphInit(graph,l2gmap_dummy,n);CHKERRQ(ierr); 136 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 137 if (xadj_filtered) { 138 graph->xadj = xadj_filtered; 139 graph->adjncy = adjncy_filtered; 140 } else { 141 graph->xadj = xadj; 142 graph->adjncy = adjncy; 143 } 144 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 145 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 146 147 /* partial clean up */ 148 ierr = PetscFree2(xadj_filtered,adjncy_filtered);CHKERRQ(ierr); 149 ierr = MatRestoreRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 150 if (A != B) { 151 ierr = MatDestroy(&B);CHKERRQ(ierr); 152 } 153 154 /* get back data */ 155 if (ncc) *ncc = graph->ncc; 156 if (cc) { 157 ierr = PetscMalloc1(graph->ncc,&cc_n);CHKERRQ(ierr); 158 for (i=0;i<graph->ncc;i++) { 159 ierr = ISCreateGeneral(PETSC_COMM_SELF,graph->cptr[i+1]-graph->cptr[i],graph->queue+graph->cptr[i],PETSC_COPY_VALUES,&cc_n[i]);CHKERRQ(ierr); 160 } 161 *cc = cc_n; 162 } 163 /* clean up graph */ 164 graph->xadj = 0; 165 graph->adjncy = 0; 166 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 167 PetscFunctionReturn(0); 168 } 169 170 #undef __FUNCT__ 171 #define __FUNCT__ "PCBDDCBenignCheck" 172 PetscErrorCode PCBDDCBenignCheck(PC pc, IS zerodiag) 173 { 174 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 175 PC_IS* pcis = (PC_IS*)(pc->data); 176 IS dirIS = NULL; 177 PetscInt i; 178 PetscErrorCode ierr; 179 180 PetscFunctionBegin; 181 ierr = PCBDDCGraphGetDirichletDofs(pcbddc->mat_graph,&dirIS);CHKERRQ(ierr); 182 if (zerodiag) { 183 Mat A; 184 Vec vec3_N; 185 PetscScalar *vals; 186 const PetscInt *idxs; 187 PetscInt nz; 188 189 /* p0 */ 190 ierr = VecSet(pcis->vec1_N,0.);CHKERRQ(ierr); 191 ierr = PetscMalloc1(pcis->n,&vals);CHKERRQ(ierr); 192 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 193 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 194 for (i=0;i<nz;i++) vals[i] = 1.; 195 ierr = VecSetValues(pcis->vec1_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 196 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 197 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 198 /* v_I */ 199 ierr = VecSetRandom(pcis->vec2_N,NULL);CHKERRQ(ierr); 200 for (i=0;i<nz;i++) vals[i] = 0.; 201 ierr = VecSetValues(pcis->vec2_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 202 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 203 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 204 for (i=0;i<pcis->n_B;i++) vals[i] = 0.; 205 ierr = VecSetValues(pcis->vec2_N,pcis->n_B,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 206 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 207 if (dirIS) { 208 PetscInt n; 209 210 ierr = ISGetLocalSize(dirIS,&n);CHKERRQ(ierr); 211 ierr = ISGetIndices(dirIS,&idxs);CHKERRQ(ierr); 212 for (i=0;i<n;i++) vals[i] = 0.; 213 ierr = VecSetValues(pcis->vec2_N,n,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 214 ierr = ISRestoreIndices(dirIS,&idxs);CHKERRQ(ierr); 215 } 216 ierr = VecAssemblyBegin(pcis->vec2_N);CHKERRQ(ierr); 217 ierr = VecAssemblyEnd(pcis->vec2_N);CHKERRQ(ierr); 218 ierr = VecDuplicate(pcis->vec1_N,&vec3_N);CHKERRQ(ierr); 219 ierr = VecSet(vec3_N,0.);CHKERRQ(ierr); 220 ierr = MatISGetLocalMat(pc->mat,&A);CHKERRQ(ierr); 221 ierr = MatMult(A,pcis->vec1_N,vec3_N);CHKERRQ(ierr); 222 ierr = VecDot(vec3_N,pcis->vec2_N,&vals[0]);CHKERRQ(ierr); 223 if (PetscAbsScalar(vals[0]) > PETSC_SMALL) { 224 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! b(v_I,p_0) = %f (should be numerically 0.)",PetscAbsScalar(vals[0])); 225 } 226 ierr = PetscFree(vals);CHKERRQ(ierr); 227 ierr = VecDestroy(&vec3_N);CHKERRQ(ierr); 228 } 229 ierr = ISDestroy(&dirIS);CHKERRQ(ierr); 230 231 /* check PCBDDCBenignGetOrSetP0 */ 232 ierr = VecSetRandom(pcis->vec1_global,NULL);CHKERRQ(ierr); 233 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = -PetscGlobalRank-i; 234 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_FALSE);CHKERRQ(ierr); 235 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = 1; 236 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_TRUE);CHKERRQ(ierr); 237 for (i=0;i<pcbddc->benign_n;i++) { 238 if ((PetscInt)PetscRealPart(pcbddc->benign_p0[i]) != -PetscGlobalRank-i) { 239 SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error testing PCBDDCBenignGetOrSetP0! Found %1.4e at %d instead of %1.4e\n",pcbddc->benign_p0[i],i,-PetscGlobalRank-i);CHKERRQ(ierr); 240 } 241 } 242 PetscFunctionReturn(0); 243 } 244 245 #undef __FUNCT__ 246 #define __FUNCT__ "PCBDDCBenignDetectSaddlePoint" 247 PetscErrorCode PCBDDCBenignDetectSaddlePoint(PC pc, IS *zerodiaglocal) 248 { 249 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 250 IS pressures,zerodiag,*zerodiag_subs; 251 PetscInt nz,n; 252 PetscBool sorted,have_null,has_null_pressures,recompute_zerodiag; 253 PetscErrorCode ierr; 254 255 PetscFunctionBegin; 256 ierr = MatDestroy(&pcbddc->benign_original_mat);CHKERRQ(ierr); 257 ierr = PetscObjectReference((PetscObject)pcbddc->local_mat);CHKERRQ(ierr); 258 pcbddc->benign_original_mat = pcbddc->local_mat; 259 /* if a local info on dofs is present, assumes the last field is represented by "pressures" 260 otherwise, it uses only zerodiagonal dofs (ok if the pressure block is all zero; it could fail if it is not) 261 Checks if all the pressure dofs in each subdomain have a zero diagonal 262 If not, a change of basis on pressures is not needed 263 since the local Schur complements are SPD 264 */ 265 has_null_pressures = PETSC_TRUE; 266 have_null = PETSC_TRUE; 267 if (pcbddc->n_ISForDofsLocal) { 268 PetscInt npl,*idxs,p = pcbddc->n_ISForDofsLocal-1; 269 270 /* Dofs splitting for BDDC cannot have PETSC_COMM_SELF, so create a sequential IS */ 271 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[p],&npl);CHKERRQ(ierr); 272 ierr = ISGetIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 273 ierr = ISCreateGeneral(PETSC_COMM_SELF,npl,idxs,PETSC_COPY_VALUES,&pressures);CHKERRQ(ierr); 274 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 275 ierr = ISSorted(pressures,&sorted);CHKERRQ(ierr); 276 if (!sorted) { 277 ierr = ISSort(pressures);CHKERRQ(ierr); 278 } 279 } else { 280 pressures = NULL; 281 } 282 ierr = MatGetLocalSize(pcbddc->benign_original_mat,&n,NULL);CHKERRQ(ierr); 283 /* TODO: add check for shared dofs and raise error */ 284 ierr = MatFindZeroDiagonals(pcbddc->benign_original_mat,&zerodiag);CHKERRQ(ierr); 285 ierr = ISSorted(zerodiag,&sorted);CHKERRQ(ierr); 286 if (!sorted) { 287 ierr = ISSort(zerodiag);CHKERRQ(ierr); 288 } 289 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 290 if (!nz) { 291 if (n) have_null = PETSC_FALSE; 292 has_null_pressures = PETSC_FALSE; 293 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 294 } 295 recompute_zerodiag = PETSC_FALSE; 296 /* in case disconnected subdomains info is present, split the pressures accordingly (otherwise the benign trick could fail) */ 297 zerodiag_subs = NULL; 298 pcbddc->benign_n = 0; 299 if (has_null_pressures) { 300 IS *subs; 301 PetscInt nsubs,i; 302 303 subs = pcbddc->local_subs; 304 nsubs = pcbddc->n_local_subs; 305 if (nsubs > 1) { 306 ierr = PetscCalloc1(nsubs,&zerodiag_subs);CHKERRQ(ierr); 307 for (i=0;i<nsubs;i++) { 308 ISLocalToGlobalMapping l2g; 309 IS t_zerodiag_subs; 310 PetscInt nl; 311 312 ierr = ISLocalToGlobalMappingCreateIS(subs[i],&l2g);CHKERRQ(ierr); 313 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,zerodiag,&t_zerodiag_subs);CHKERRQ(ierr); 314 ierr = ISGetLocalSize(t_zerodiag_subs,&nl);CHKERRQ(ierr); 315 if (nl) { 316 PetscBool valid = PETSC_TRUE; 317 318 if (pressures) { 319 IS t_pressure_subs; 320 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,pressures,&t_pressure_subs);CHKERRQ(ierr); 321 ierr = ISEqual(t_pressure_subs,t_zerodiag_subs,&valid);CHKERRQ(ierr); 322 ierr = ISDestroy(&t_pressure_subs);CHKERRQ(ierr); 323 } 324 if (valid) { 325 ierr = ISLocalToGlobalMappingApplyIS(l2g,t_zerodiag_subs,&zerodiag_subs[pcbddc->benign_n]);CHKERRQ(ierr); 326 pcbddc->benign_n++; 327 } else { 328 recompute_zerodiag = PETSC_TRUE; 329 } 330 } 331 ierr = ISDestroy(&t_zerodiag_subs);CHKERRQ(ierr); 332 ierr = ISLocalToGlobalMappingDestroy(&l2g);CHKERRQ(ierr); 333 } 334 } else { /* there's just one subdomain (or zero if they have not been detected */ 335 PetscBool valid = PETSC_TRUE; 336 if (pressures) { 337 ierr = ISEqual(pressures,zerodiag,&valid);CHKERRQ(ierr); 338 } 339 if (valid) { 340 pcbddc->benign_n = 1; 341 ierr = PetscCalloc1(1,&zerodiag_subs);CHKERRQ(ierr); 342 ierr = PetscObjectReference((PetscObject)zerodiag);CHKERRQ(ierr); 343 zerodiag_subs[0] = zerodiag; 344 } 345 } 346 } 347 348 if (!pcbddc->benign_n) { 349 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 350 recompute_zerodiag = PETSC_FALSE; 351 has_null_pressures = PETSC_FALSE; 352 have_null = PETSC_FALSE; 353 } 354 355 /* final check for null pressures */ 356 if (zerodiag && pressures) { 357 PetscInt nz,np; 358 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 359 ierr = ISGetLocalSize(pressures,&np);CHKERRQ(ierr); 360 if (nz != np) have_null = PETSC_FALSE; 361 } 362 363 if (recompute_zerodiag) { 364 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 365 if (pcbddc->benign_n == 1) { 366 ierr = PetscObjectReference((PetscObject)zerodiag_subs[0]);CHKERRQ(ierr); 367 zerodiag = zerodiag_subs[0]; 368 } else { 369 PetscInt i,nzn,*new_idxs; 370 371 nzn = 0; 372 for (i=0;i<pcbddc->benign_n;i++) { 373 PetscInt ns; 374 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 375 nzn += ns; 376 } 377 ierr = PetscMalloc1(nzn,&new_idxs);CHKERRQ(ierr); 378 nzn = 0; 379 for (i=0;i<pcbddc->benign_n;i++) { 380 PetscInt ns,*idxs; 381 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 382 ierr = ISGetIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 383 ierr = PetscMemcpy(new_idxs+nzn,idxs,ns*sizeof(PetscInt));CHKERRQ(ierr); 384 ierr = ISRestoreIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 385 nzn += ns; 386 } 387 ierr = PetscSortInt(nzn,new_idxs);CHKERRQ(ierr); 388 ierr = ISCreateGeneral(PETSC_COMM_SELF,nzn,new_idxs,PETSC_OWN_POINTER,&zerodiag);CHKERRQ(ierr); 389 } 390 have_null = PETSC_FALSE; 391 } 392 393 if (has_null_pressures) { 394 IS zerodiagc; 395 const PetscInt *idxs,*idxsc; 396 PetscInt i,s,*nnz; 397 Mat M; 398 399 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 400 ierr = ISComplement(zerodiag,0,n,&zerodiagc);CHKERRQ(ierr); 401 ierr = ISGetIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 402 /* local change of basis for pressures */ 403 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 404 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->benign_original_mat),&pcbddc->benign_change);CHKERRQ(ierr); 405 ierr = MatSetType(pcbddc->benign_change,MATAIJ);CHKERRQ(ierr); 406 ierr = MatSetSizes(pcbddc->benign_change,n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 407 ierr = PetscMalloc1(n,&nnz);CHKERRQ(ierr); 408 for (i=0;i<n-nz;i++) nnz[idxsc[i]] = 1; /* identity on velocities plus pressure dofs for non-singular subdomains */ 409 for (i=0;i<pcbddc->benign_n;i++) { 410 PetscInt nzs,j; 411 412 ierr = ISGetLocalSize(zerodiag_subs[i],&nzs);CHKERRQ(ierr); 413 ierr = ISGetIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 414 for (j=0;j<nzs-1;j++) nnz[idxs[j]] = 2; /* change on pressures */ 415 nnz[idxs[nzs-1]] = nzs; /* last local pressure dof in subdomain */ 416 ierr = ISRestoreIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 417 } 418 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_change,0,nnz);CHKERRQ(ierr); 419 ierr = PetscFree(nnz);CHKERRQ(ierr); 420 /* set identity on velocities */ 421 for (i=0;i<n-nz;i++) { 422 ierr = MatSetValue(pcbddc->benign_change,idxsc[i],idxsc[i],1.,INSERT_VALUES);CHKERRQ(ierr); 423 } 424 ierr = ISRestoreIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 425 ierr = ISDestroy(&zerodiagc);CHKERRQ(ierr); 426 ierr = PetscMalloc3(pcbddc->benign_n,&pcbddc->benign_p0_lidx,pcbddc->benign_n,&pcbddc->benign_p0_gidx,pcbddc->benign_n,&pcbddc->benign_p0);CHKERRQ(ierr); 427 /* set change on pressures */ 428 for (s=0;s<pcbddc->benign_n;s++) { 429 PetscScalar *array; 430 PetscInt nzs; 431 432 ierr = ISGetLocalSize(zerodiag_subs[s],&nzs);CHKERRQ(ierr); 433 ierr = ISGetIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 434 for (i=0;i<nzs-1;i++) { 435 PetscScalar vals[2]; 436 PetscInt cols[2]; 437 438 cols[0] = idxs[i]; 439 cols[1] = idxs[nzs-1]; 440 vals[0] = 1.; 441 vals[1] = 1.; 442 ierr = MatSetValues(pcbddc->benign_change,1,cols,2,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 443 } 444 ierr = PetscMalloc1(nzs,&array);CHKERRQ(ierr); 445 for (i=0;i<nzs-1;i++) array[i] = -1.; 446 array[nzs-1] = 1.; 447 ierr = MatSetValues(pcbddc->benign_change,1,idxs+nzs-1,nzs,idxs,array,INSERT_VALUES);CHKERRQ(ierr); 448 /* store local idxs for p0 */ 449 pcbddc->benign_p0_lidx[s] = idxs[nzs-1]; 450 ierr = ISRestoreIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 451 ierr = ISDestroy(&zerodiag_subs[s]);CHKERRQ(ierr); 452 ierr = PetscFree(array);CHKERRQ(ierr); 453 } 454 ierr = MatAssemblyBegin(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 455 ierr = MatAssemblyEnd(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 456 /* TODO: need optimization? */ 457 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 458 ierr = MatPtAP(pcbddc->benign_original_mat,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&M);CHKERRQ(ierr); 459 ierr = MatSeqAIJCompress(M,&pcbddc->local_mat);CHKERRQ(ierr); 460 ierr = MatDestroy(&M);CHKERRQ(ierr); 461 /* store global idxs for p0 */ 462 ierr = ISLocalToGlobalMappingApply(pc->pmat->rmap->mapping,pcbddc->benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 463 } 464 ierr = PetscFree(zerodiag_subs);CHKERRQ(ierr); 465 ierr = ISDestroy(&pressures);CHKERRQ(ierr); 466 467 /* determines if the coarse solver will be singular or not */ 468 ierr = MPI_Allreduce(&have_null,&pcbddc->benign_null,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 469 *zerodiaglocal = zerodiag; 470 PetscFunctionReturn(0); 471 } 472 473 #undef __FUNCT__ 474 #define __FUNCT__ "PCBDDCBenignGetOrSetP0" 475 PetscErrorCode PCBDDCBenignGetOrSetP0(PC pc, Vec v, PetscBool get) 476 { 477 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 478 PetscErrorCode ierr; 479 480 PetscFunctionBegin; 481 if (!pcbddc->benign_sf) { 482 ierr = PetscSFCreate(PetscObjectComm((PetscObject)pc),&pcbddc->benign_sf);CHKERRQ(ierr); 483 ierr = PetscSFSetGraphLayout(pcbddc->benign_sf,pc->pmat->rmap,pcbddc->benign_n,NULL,PETSC_OWN_POINTER,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 484 } 485 if (get) { /* use SF to get values */ 486 PetscScalar *array; 487 488 ierr = VecGetArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 489 ierr = PetscSFBcastBegin(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 490 ierr = PetscSFBcastEnd(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 491 ierr = VecRestoreArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 492 } else { /* use VecSetValues (not scalable, I should try to find a better solution (defining a new MPI_OP for reduction) */ 493 ierr = VecSetValues(v,pcbddc->benign_n,pcbddc->benign_p0_gidx,pcbddc->benign_p0,INSERT_VALUES);CHKERRQ(ierr); 494 ierr = VecAssemblyBegin(v);CHKERRQ(ierr); 495 ierr = VecAssemblyEnd(v);CHKERRQ(ierr); 496 } 497 PetscFunctionReturn(0); 498 } 499 500 #undef __FUNCT__ 501 #define __FUNCT__ "PCBDDCBenignPopOrPushB0" 502 PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop) 503 { 504 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 505 PetscErrorCode ierr; 506 507 PetscFunctionBegin; 508 /* TODO: add error checking 509 - avoid nested pop (or push) calls. 510 - cannot push before pop. 511 - cannot call this if pcbddc->local_mat is NULL 512 */ 513 if (!pcbddc->benign_n) { 514 PetscFunctionReturn(0); 515 } 516 if (pop) { 517 IS is_p0; 518 MatReuse reuse; 519 520 /* extract B_0 */ 521 reuse = MAT_INITIAL_MATRIX; 522 if (pcbddc->benign_B0) { 523 reuse = MAT_REUSE_MATRIX; 524 } 525 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->benign_n,pcbddc->benign_p0_lidx,PETSC_COPY_VALUES,&is_p0);CHKERRQ(ierr); 526 ierr = MatGetSubMatrix(pcbddc->local_mat,is_p0,NULL,reuse,&pcbddc->benign_B0);CHKERRQ(ierr); 527 /* remove rows and cols from local problem */ 528 ierr = MatSetOption(pcbddc->local_mat,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE);CHKERRQ(ierr); 529 ierr = MatZeroRowsColumnsIS(pcbddc->local_mat,is_p0,1.0,NULL,NULL);CHKERRQ(ierr); 530 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 531 } else { /* push */ 532 PetscInt i; 533 534 for (i=0;i<pcbddc->benign_n;i++) { 535 PetscScalar *B0_vals; 536 PetscInt *B0_cols,B0_ncol; 537 538 ierr = MatGetRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 539 ierr = MatSetValues(pcbddc->local_mat,1,pcbddc->benign_p0_lidx+i,B0_ncol,B0_cols,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 540 ierr = MatSetValues(pcbddc->local_mat,B0_ncol,B0_cols,1,pcbddc->benign_p0_lidx+1,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 541 ierr = MatSetValue(pcbddc->local_mat,pcbddc->benign_p0_lidx[i],pcbddc->benign_p0_lidx[i],0.0,INSERT_VALUES);CHKERRQ(ierr); 542 ierr = MatRestoreRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 543 } 544 ierr = MatAssemblyBegin(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 545 ierr = MatAssemblyEnd(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 546 } 547 PetscFunctionReturn(0); 548 } 549 550 #undef __FUNCT__ 551 #define __FUNCT__ "PCBDDCAdaptiveSelection" 552 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 553 { 554 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 555 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 556 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 557 PetscBLASInt *B_iwork,*B_ifail; 558 PetscScalar *work,lwork; 559 PetscScalar *St,*S,*eigv; 560 PetscScalar *Sarray,*Starray; 561 PetscReal *eigs,thresh; 562 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs; 563 PetscBool allocated_S_St; 564 #if defined(PETSC_USE_COMPLEX) 565 PetscReal *rwork; 566 #endif 567 PetscErrorCode ierr; 568 569 PetscFunctionBegin; 570 if (!sub_schurs->use_mumps) { 571 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS"); 572 } 573 574 if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) { 575 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\n",sub_schurs->is_hermitian,sub_schurs->is_posdef); 576 } 577 578 if (pcbddc->dbg_flag) { 579 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 580 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 581 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr); 582 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 583 } 584 585 if (pcbddc->dbg_flag) { 586 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef); 587 } 588 589 /* max size of subsets */ 590 mss = 0; 591 for (i=0;i<sub_schurs->n_subs;i++) { 592 PetscInt subset_size; 593 594 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 595 mss = PetscMax(mss,subset_size); 596 } 597 598 /* min/max and threshold */ 599 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 600 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0; 601 nmax = PetscMax(nmin,nmax); 602 allocated_S_St = PETSC_FALSE; 603 if (nmin) { 604 allocated_S_St = PETSC_TRUE; 605 } 606 607 /* allocate lapack workspace */ 608 cum = cum2 = 0; 609 maxneigs = 0; 610 for (i=0;i<sub_schurs->n_subs;i++) { 611 PetscInt n,subset_size; 612 613 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 614 n = PetscMin(subset_size,nmax); 615 cum += subset_size; 616 cum2 += subset_size*n; 617 maxneigs = PetscMax(maxneigs,n); 618 } 619 if (mss) { 620 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 621 PetscBLASInt B_itype = 1; 622 PetscBLASInt B_N = mss; 623 PetscReal zero = 0.0; 624 PetscReal eps = 0.0; /* dlamch? */ 625 626 B_lwork = -1; 627 S = NULL; 628 St = NULL; 629 eigs = NULL; 630 eigv = NULL; 631 B_iwork = NULL; 632 B_ifail = NULL; 633 #if defined(PETSC_USE_COMPLEX) 634 rwork = NULL; 635 #endif 636 thresh = 1.0; 637 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 638 #if defined(PETSC_USE_COMPLEX) 639 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 640 #else 641 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 642 #endif 643 if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 644 ierr = PetscFPTrapPop();CHKERRQ(ierr); 645 } else { 646 /* TODO */ 647 } 648 } else { 649 lwork = 0; 650 } 651 652 nv = 0; 653 if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */ 654 ierr = ISGetLocalSize(sub_schurs->is_vertices,&nv);CHKERRQ(ierr); 655 } 656 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr); 657 if (allocated_S_St) { 658 ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr); 659 } 660 ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr); 661 #if defined(PETSC_USE_COMPLEX) 662 ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr); 663 #endif 664 ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 665 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr, 666 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr, 667 nv+cum,&pcbddc->adaptive_constraints_idxs, 668 nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 669 ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr); 670 671 maxneigs = 0; 672 cum = cum2 = cumarray = 0; 673 pcbddc->adaptive_constraints_idxs_ptr[0] = 0; 674 pcbddc->adaptive_constraints_data_ptr[0] = 0; 675 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 676 const PetscInt *idxs; 677 678 ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 679 for (cum=0;cum<nv;cum++) { 680 pcbddc->adaptive_constraints_n[cum] = 1; 681 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 682 pcbddc->adaptive_constraints_data[cum] = 1.0; 683 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1; 684 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1; 685 } 686 cum2 = cum; 687 ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 688 } 689 690 if (mss) { /* multilevel */ 691 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 692 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 693 } 694 695 for (i=0;i<sub_schurs->n_subs;i++) { 696 697 const PetscInt *idxs; 698 PetscReal infty = PETSC_MAX_REAL; 699 PetscInt j,subset_size,eigs_start = 0; 700 PetscBLASInt B_N; 701 PetscBool same_data = PETSC_FALSE; 702 703 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 704 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 705 if (allocated_S_St) { /* S and S_t should be copied since we could need them later */ 706 if (sub_schurs->is_hermitian) { 707 PetscInt j,k; 708 if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscMemcmp later */ 709 ierr = PetscMemzero(S,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 710 ierr = PetscMemzero(St,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 711 } 712 for (j=0;j<subset_size;j++) { 713 for (k=j;k<subset_size;k++) { 714 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 715 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 716 } 717 } 718 } else { 719 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 720 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 721 } 722 } else { 723 S = Sarray + cumarray; 724 St = Starray + cumarray; 725 } 726 727 ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 728 /* see if we can save some work */ 729 if (sub_schurs->n_subs == 1) { 730 ierr = PetscMemcmp(S,St,subset_size*subset_size*sizeof(PetscScalar),&same_data);CHKERRQ(ierr); 731 } 732 733 if (same_data) { /* there's no need of constraints here, deluxe scaling is enough */ 734 B_neigs = 0; 735 } else { 736 /* Threshold: this is an heuristic for edges */ 737 thresh = pcbddc->mat_graph->count[idxs[0]]*pcbddc->adaptive_threshold; 738 if (nmin) { 739 Mat SM,StM; 740 PetscInt j,k,nccs,nccst; 741 742 for (j=0;j<subset_size;j++) { 743 for (k=j;k<subset_size;k++) { 744 S [k*subset_size+j] = S [j*subset_size+k]; 745 St[k*subset_size+j] = St[j*subset_size+k]; 746 } 747 } 748 ierr = MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,S,&SM);CHKERRQ(ierr); 749 ierr = MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,St,&StM);CHKERRQ(ierr); 750 ierr = MatDetectDisconnectedComponents(SM,PETSC_TRUE,&nccs,NULL);CHKERRQ(ierr); 751 ierr = MatDetectDisconnectedComponents(StM,PETSC_TRUE,&nccst,NULL);CHKERRQ(ierr); 752 if (nccs != 1 || nccst != 1) { 753 PetscPrintf(PETSC_COMM_SELF,"[%d] Found disc %d %d (size %d)\n",PetscGlobalRank,nccs,nccst,subset_size); 754 } 755 ierr = MatDestroy(&SM);CHKERRQ(ierr); 756 ierr = MatDestroy(&StM);CHKERRQ(ierr); 757 } 758 759 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 760 PetscBLASInt B_itype = 1; 761 PetscBLASInt B_IL, B_IU; 762 PetscReal eps = -1.0; /* dlamch? */ 763 PetscInt nmin_s; 764 765 if (pcbddc->dbg_flag) { 766 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d %d %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]); 767 } 768 769 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 770 if (thresh > 1.+PETSC_SMALL) { 771 772 /* ask for eigenvalues larger than thresh */ 773 #if defined(PETSC_USE_COMPLEX) 774 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 775 #else 776 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 777 #endif 778 } else { 779 B_IU = PetscMax(1,PetscMin(B_N,nmax)); 780 B_IL = 1; 781 #if defined(PETSC_USE_COMPLEX) 782 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 783 #else 784 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 785 #endif 786 } 787 ierr = PetscFPTrapPop();CHKERRQ(ierr); 788 if (B_ierr) { 789 if (B_ierr < 0 ) { 790 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 791 } else if (B_ierr <= B_N) { 792 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 793 } else { 794 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 795 } 796 } 797 798 if (B_neigs > nmax) { 799 if (pcbddc->dbg_flag) { 800 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, more than maximum required %d.\n",B_neigs,nmax); 801 } 802 eigs_start = B_neigs -nmax; 803 B_neigs = nmax; 804 } 805 806 nmin_s = PetscMin(nmin,B_N); 807 if (B_neigs < nmin_s) { 808 PetscBLASInt B_neigs2; 809 810 B_IU = B_N - B_neigs; 811 B_IL = B_N - nmin_s + 1; 812 if (pcbddc->dbg_flag) { 813 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU); 814 } 815 if (sub_schurs->is_hermitian) { 816 PetscInt j; 817 for (j=0;j<subset_size;j++) { 818 ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 819 } 820 for (j=0;j<subset_size;j++) { 821 ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 822 } 823 } else { 824 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 825 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 826 } 827 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 828 #if defined(PETSC_USE_COMPLEX) 829 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 830 #else 831 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 832 #endif 833 ierr = PetscFPTrapPop();CHKERRQ(ierr); 834 B_neigs += B_neigs2; 835 } 836 if (B_ierr) { 837 if (B_ierr < 0 ) { 838 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 839 } else if (B_ierr <= B_N) { 840 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 841 } else { 842 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 843 } 844 } 845 if (pcbddc->dbg_flag) { 846 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Got %d eigs\n",B_neigs);CHKERRQ(ierr); 847 for (j=0;j<B_neigs;j++) { 848 if (eigs[j] == 0.0) { 849 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," Inf\n");CHKERRQ(ierr); 850 } else { 851 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",eigs[j+eigs_start]);CHKERRQ(ierr); 852 } 853 } 854 } 855 } else { 856 /* TODO */ 857 } 858 } 859 maxneigs = PetscMax(B_neigs,maxneigs); 860 pcbddc->adaptive_constraints_n[i+nv] = B_neigs; 861 if (B_neigs) { 862 ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 863 864 if (pcbddc->dbg_flag > 1) { 865 PetscInt ii; 866 for (ii=0;ii<B_neigs;ii++) { 867 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 868 for (j=0;j<B_N;j++) { 869 #if defined(PETSC_USE_COMPLEX) 870 PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 871 PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 872 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 873 #else 874 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);CHKERRQ(ierr); 875 #endif 876 } 877 } 878 } 879 #if 0 880 for (j=0;j<B_neigs;j++) { 881 PetscBLASInt Blas_N,Blas_one = 1.0; 882 PetscScalar norm; 883 ierr = PetscBLASIntCast(subset_size,&Blas_N);CHKERRQ(ierr); 884 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size, 885 &Blas_one,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one)); 886 if (pcbddc->adaptive_constraints_data[cum2] > 0.0) { 887 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 888 } else { 889 norm = -1.0/PetscSqrtReal(PetscRealPart(norm)); 890 } 891 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one)); 892 } 893 #endif 894 ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr); 895 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size; 896 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs; 897 cum++; 898 } 899 ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 900 /* shift for next computation */ 901 cumarray += subset_size*subset_size; 902 } 903 if (pcbddc->dbg_flag) { 904 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 905 } 906 907 if (mss) { 908 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 909 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 910 /* destroy matrices (junk) */ 911 ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr); 912 ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr); 913 } 914 if (allocated_S_St) { 915 ierr = PetscFree2(S,St);CHKERRQ(ierr); 916 } 917 ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr); 918 #if defined(PETSC_USE_COMPLEX) 919 ierr = PetscFree(rwork);CHKERRQ(ierr); 920 #endif 921 if (pcbddc->dbg_flag) { 922 PetscInt maxneigs_r; 923 ierr = MPI_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 924 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr); 925 } 926 PetscFunctionReturn(0); 927 } 928 929 #undef __FUNCT__ 930 #define __FUNCT__ "PCBDDCSetUpSolvers" 931 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 932 { 933 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 934 PetscScalar *coarse_submat_vals; 935 PetscErrorCode ierr; 936 937 PetscFunctionBegin; 938 /* Setup local scatters R_to_B and (optionally) R_to_D */ 939 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 940 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 941 942 /* Setup local neumann solver ksp_R */ 943 /* PCBDDCSetUpLocalScatters should be called first! */ 944 ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 945 946 /* Change global null space passed in by the user if change of basis has been requested */ 947 if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) { 948 ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr); 949 } 950 951 /* 952 Setup local correction and local part of coarse basis. 953 Gives back the dense local part of the coarse matrix in column major ordering 954 */ 955 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 956 957 /* Compute total number of coarse nodes and setup coarse solver */ 958 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 959 960 /* free */ 961 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 962 PetscFunctionReturn(0); 963 } 964 965 #undef __FUNCT__ 966 #define __FUNCT__ "PCBDDCResetCustomization" 967 PetscErrorCode PCBDDCResetCustomization(PC pc) 968 { 969 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 970 PetscErrorCode ierr; 971 972 PetscFunctionBegin; 973 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 974 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 975 ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr); 976 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 977 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 978 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 979 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 980 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 981 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 982 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 983 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 984 PetscFunctionReturn(0); 985 } 986 987 #undef __FUNCT__ 988 #define __FUNCT__ "PCBDDCResetTopography" 989 PetscErrorCode PCBDDCResetTopography(PC pc) 990 { 991 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 992 PetscInt i; 993 PetscErrorCode ierr; 994 995 PetscFunctionBegin; 996 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 997 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 998 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 999 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 1000 for (i=0;i<pcbddc->n_local_subs;i++) { 1001 ierr = ISDestroy(&pcbddc->local_subs[i]);CHKERRQ(ierr); 1002 } 1003 ierr = PetscFree(pcbddc->local_subs);CHKERRQ(ierr); 1004 ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr); 1005 PetscFunctionReturn(0); 1006 } 1007 1008 #undef __FUNCT__ 1009 #define __FUNCT__ "PCBDDCResetSolvers" 1010 PetscErrorCode PCBDDCResetSolvers(PC pc) 1011 { 1012 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1013 PetscScalar *array; 1014 PetscErrorCode ierr; 1015 1016 PetscFunctionBegin; 1017 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 1018 if (pcbddc->coarse_phi_B) { 1019 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr); 1020 ierr = PetscFree(array);CHKERRQ(ierr); 1021 } 1022 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 1023 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 1024 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 1025 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 1026 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 1027 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 1028 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 1029 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 1030 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 1031 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 1032 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 1033 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 1034 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 1035 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 1036 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 1037 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 1038 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 1039 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1040 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 1041 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 1042 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 1043 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 1044 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 1045 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 1046 ierr = VecDestroy(&pcbddc->benign_vec);CHKERRQ(ierr); 1047 ierr = MatDestroy(&pcbddc->benign_original_mat);CHKERRQ(ierr); 1048 ierr = MatDestroy(&pcbddc->benign_original_mat);CHKERRQ(ierr); 1049 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 1050 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 1051 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 1052 PetscFunctionReturn(0); 1053 } 1054 1055 #undef __FUNCT__ 1056 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors" 1057 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 1058 { 1059 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1060 PC_IS *pcis = (PC_IS*)pc->data; 1061 VecType impVecType; 1062 PetscInt n_constraints,n_R,old_size; 1063 PetscErrorCode ierr; 1064 1065 PetscFunctionBegin; 1066 if (!pcbddc->ConstraintMatrix) { 1067 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created"); 1068 } 1069 /* get sizes */ 1070 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - pcbddc->n_vertices; 1071 n_R = pcis->n - pcbddc->n_vertices; 1072 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 1073 /* local work vectors (try to avoid unneeded work)*/ 1074 /* R nodes */ 1075 old_size = -1; 1076 if (pcbddc->vec1_R) { 1077 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 1078 } 1079 if (n_R != old_size) { 1080 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 1081 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 1082 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 1083 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 1084 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 1085 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 1086 } 1087 /* local primal dofs */ 1088 old_size = -1; 1089 if (pcbddc->vec1_P) { 1090 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 1091 } 1092 if (pcbddc->local_primal_size != old_size) { 1093 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 1094 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 1095 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 1096 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 1097 } 1098 /* local explicit constraints */ 1099 old_size = -1; 1100 if (pcbddc->vec1_C) { 1101 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 1102 } 1103 if (n_constraints && n_constraints != old_size) { 1104 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 1105 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 1106 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 1107 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 1108 } 1109 PetscFunctionReturn(0); 1110 } 1111 1112 #undef __FUNCT__ 1113 #define __FUNCT__ "PCBDDCSetUpCorrection" 1114 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 1115 { 1116 PetscErrorCode ierr; 1117 /* pointers to pcis and pcbddc */ 1118 PC_IS* pcis = (PC_IS*)pc->data; 1119 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1120 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1121 /* submatrices of local problem */ 1122 Mat A_RV,A_VR,A_VV,local_auxmat2_R; 1123 /* submatrices of benign trick */ 1124 Mat B0_V = NULL; 1125 /* submatrices of local coarse problem */ 1126 Mat S_VV,S_CV,S_VC,S_CC; 1127 /* working matrices */ 1128 Mat C_CR; 1129 /* additional working stuff */ 1130 PC pc_R; 1131 Mat F; 1132 PetscBool isLU,isCHOL,isILU; 1133 1134 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 1135 PetscScalar *work; 1136 PetscInt *idx_V_B; 1137 PetscInt n,n_vertices,n_constraints,*p0_lidx_I; 1138 PetscInt i,n_R,n_D,n_B; 1139 PetscBool unsymmetric_check; 1140 /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */ 1141 MatType impMatType; 1142 /* some shortcuts to scalars */ 1143 PetscScalar one=1.0,m_one=-1.0; 1144 1145 PetscFunctionBegin; 1146 n_vertices = pcbddc->n_vertices; 1147 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - n_vertices; 1148 /* Set Non-overlapping dimensions */ 1149 n_B = pcis->n_B; 1150 n_D = pcis->n - n_B; 1151 n_R = pcis->n - n_vertices; 1152 1153 /* Set types for local objects needed by BDDC precondtioner */ 1154 impMatType = MATSEQDENSE; 1155 1156 /* vertices in boundary numbering */ 1157 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 1158 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);CHKERRQ(ierr); 1159 if (i != n_vertices) { 1160 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i); 1161 } 1162 1163 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 1164 ierr = PetscCalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 1165 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr); 1166 ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr); 1167 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr); 1168 ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr); 1169 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr); 1170 ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr); 1171 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr); 1172 ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr); 1173 1174 unsymmetric_check = PETSC_FALSE; 1175 /* allocate workspace */ 1176 n = 0; 1177 if (n_constraints) { 1178 n += n_R*n_constraints; 1179 } 1180 if (n_vertices) { 1181 n = PetscMax(2*n_R*n_vertices,n); 1182 n = PetscMax((n_R+n_B)*n_vertices,n); 1183 } 1184 if (!pcbddc->symmetric_primal) { 1185 n = PetscMax(2*n_R*pcbddc->local_primal_size,n); 1186 unsymmetric_check = PETSC_TRUE; 1187 } 1188 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 1189 1190 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 1191 ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr); 1192 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr); 1193 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr); 1194 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr); 1195 if (isLU || isILU || isCHOL) { 1196 ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr); 1197 } else if (sub_schurs->reuse_mumps) { 1198 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1199 MatFactorType type; 1200 1201 F = reuse_mumps->F; 1202 ierr = MatGetFactorType(F,&type);CHKERRQ(ierr); 1203 if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE; 1204 } else { 1205 F = NULL; 1206 } 1207 1208 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 1209 if (n_constraints) { 1210 Mat M1,M2,M3; 1211 Mat auxmat; 1212 IS is_aux; 1213 PetscScalar *array,*array2; 1214 1215 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 1216 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 1217 1218 /* Extract constraints on R nodes: C_{CR} */ 1219 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 1220 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 1221 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&auxmat);CHKERRQ(ierr); 1222 1223 /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */ 1224 /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */ 1225 ierr = PetscMemzero(work,n_R*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 1226 for (i=0;i<n_constraints;i++) { 1227 const PetscScalar *row_cmat_values; 1228 const PetscInt *row_cmat_indices; 1229 PetscInt size_of_constraint,j; 1230 1231 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 1232 for (j=0;j<size_of_constraint;j++) { 1233 work[row_cmat_indices[j]+i*n_R] = -row_cmat_values[j]; 1234 } 1235 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 1236 } 1237 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,NULL,&local_auxmat2_R);CHKERRQ(ierr); 1238 if (F) { 1239 Mat B; 1240 1241 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 1242 ierr = MatMatSolve(F,B,local_auxmat2_R);CHKERRQ(ierr); 1243 ierr = MatDestroy(&B);CHKERRQ(ierr); 1244 } else { 1245 PetscScalar *marr; 1246 1247 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1248 for (i=0;i<n_constraints;i++) { 1249 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 1250 ierr = VecPlaceArray(pcbddc->vec2_R,marr+i*n_R);CHKERRQ(ierr); 1251 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1252 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1253 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 1254 } 1255 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1256 } 1257 if (!pcbddc->switch_static) { 1258 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);CHKERRQ(ierr); 1259 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 1260 ierr = MatDenseGetArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 1261 for (i=0;i<n_constraints;i++) { 1262 ierr = VecPlaceArray(pcbddc->vec1_R,array2+i*n_R);CHKERRQ(ierr); 1263 ierr = VecPlaceArray(pcis->vec1_B,array+i*n_B);CHKERRQ(ierr); 1264 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1265 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1266 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1267 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1268 } 1269 ierr = MatDenseRestoreArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 1270 ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 1271 ierr = MatMatMult(auxmat,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 1272 } else { 1273 ierr = PetscObjectReference((PetscObject)local_auxmat2_R);CHKERRQ(ierr); 1274 pcbddc->local_auxmat2 = local_auxmat2_R; 1275 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 1276 } 1277 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 1278 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 1279 ierr = MatScale(M3,m_one);CHKERRQ(ierr); 1280 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr); 1281 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 1282 if (isCHOL) { 1283 ierr = MatCholeskyFactor(M3,NULL,NULL);CHKERRQ(ierr); 1284 } else { 1285 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 1286 } 1287 ierr = VecSet(pcbddc->vec1_C,one);CHKERRQ(ierr); 1288 ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr); 1289 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 1290 ierr = MatDestroy(&M2);CHKERRQ(ierr); 1291 ierr = MatDestroy(&M3);CHKERRQ(ierr); 1292 /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */ 1293 ierr = MatMatMult(M1,auxmat,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 1294 ierr = MatDestroy(&auxmat);CHKERRQ(ierr); 1295 ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 1296 ierr = MatDestroy(&M1);CHKERRQ(ierr); 1297 } 1298 /* Get submatrices from subdomain matrix */ 1299 if (pcbddc->benign_n) { 1300 IS dummy; 1301 Mat B0_R; 1302 PetscReal norm; 1303 1304 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);CHKERRQ(ierr); 1305 ierr = MatGetSubMatrix(pcbddc->benign_B0,dummy,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&B0_R);CHKERRQ(ierr); 1306 ierr = MatNorm(B0_R,NORM_INFINITY,&norm);CHKERRQ(ierr); 1307 if (norm > PETSC_SMALL) { 1308 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! ||B0_R|| = %f (should be numerically 0.)",norm); 1309 } 1310 ierr = MatDestroy(&B0_R);CHKERRQ(ierr); 1311 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 1312 } 1313 1314 if (n_vertices) { 1315 IS is_aux; 1316 1317 if (sub_schurs->reuse_mumps) { /* is_R_local is not sorted, ISComplement doesn't like it */ 1318 IS tis; 1319 1320 ierr = ISDuplicate(pcbddc->is_R_local,&tis);CHKERRQ(ierr); 1321 ierr = ISSort(tis);CHKERRQ(ierr); 1322 ierr = ISComplement(tis,0,pcis->n,&is_aux);CHKERRQ(ierr); 1323 ierr = ISDestroy(&tis);CHKERRQ(ierr); 1324 } else { 1325 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 1326 } 1327 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 1328 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 1329 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 1330 if (pcbddc->benign_n) { 1331 IS dummy; 1332 1333 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);CHKERRQ(ierr); 1334 ierr = MatGetSubMatrix(pcbddc->benign_B0,dummy,is_aux,MAT_INITIAL_MATRIX,&B0_V);CHKERRQ(ierr); 1335 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 1336 } 1337 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 1338 } 1339 1340 /* Matrix of coarse basis functions (local) */ 1341 if (pcbddc->coarse_phi_B) { 1342 PetscInt on_B,on_primal,on_D=n_D; 1343 if (pcbddc->coarse_phi_D) { 1344 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr); 1345 } 1346 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 1347 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 1348 PetscScalar *marray; 1349 1350 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr); 1351 ierr = PetscFree(marray);CHKERRQ(ierr); 1352 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 1353 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 1354 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 1355 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 1356 } 1357 } 1358 1359 if (!pcbddc->coarse_phi_B) { 1360 PetscScalar *marray; 1361 1362 n = n_B*pcbddc->local_primal_size; 1363 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1364 n += n_D*pcbddc->local_primal_size; 1365 } 1366 if (!pcbddc->symmetric_primal) { 1367 n *= 2; 1368 } 1369 ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr); 1370 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 1371 n = n_B*pcbddc->local_primal_size; 1372 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1373 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 1374 n += n_D*pcbddc->local_primal_size; 1375 } 1376 if (!pcbddc->symmetric_primal) { 1377 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 1378 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1379 n = n_B*pcbddc->local_primal_size; 1380 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 1381 } 1382 } else { 1383 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 1384 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 1385 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1386 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 1387 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 1388 } 1389 } 1390 } 1391 1392 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 1393 p0_lidx_I = NULL; 1394 if (pcbddc->benign_n && (pcbddc->switch_static || pcbddc->dbg_flag)) { 1395 const PetscInt *idxs; 1396 1397 ierr = ISGetIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 1398 ierr = PetscMalloc1(pcbddc->benign_n,&p0_lidx_I);CHKERRQ(ierr); 1399 for (i=0;i<pcbddc->benign_n;i++) { 1400 ierr = PetscFindInt(pcbddc->benign_p0_lidx[i],pcis->n-pcis->n_B,idxs,&p0_lidx_I[i]);CHKERRQ(ierr); 1401 } 1402 ierr = ISRestoreIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 1403 } 1404 1405 /* vertices */ 1406 if (n_vertices) { 1407 1408 ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr); 1409 1410 if (n_R) { 1411 Mat A_RRmA_RV,S_VVt; /* S_VVt with LDA=N */ 1412 PetscBLASInt B_N,B_one = 1; 1413 PetscScalar *x,*y; 1414 PetscBool isseqaij; 1415 1416 ierr = MatScale(A_RV,m_one);CHKERRQ(ierr); 1417 ierr = MatConvert(A_RV,impMatType,MAT_REUSE_MATRIX,&A_RV);CHKERRQ(ierr); 1418 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 1419 if (F) { /* TODO could be optimized for symmetric problems */ 1420 ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr); 1421 } else { 1422 ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr); 1423 for (i=0;i<n_vertices;i++) { 1424 ierr = VecPlaceArray(pcbddc->vec1_R,y+i*n_R);CHKERRQ(ierr); 1425 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 1426 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1427 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1428 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 1429 } 1430 ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr); 1431 } 1432 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 1433 /* S_VV and S_CV are the subdomain contribution to coarse matrix. WARNING -> column major ordering */ 1434 if (n_constraints) { 1435 Mat B; 1436 1437 ierr = PetscMemzero(work+n_R*n_vertices,n_B*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 1438 for (i=0;i<n_vertices;i++) { 1439 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 1440 ierr = VecPlaceArray(pcis->vec1_B,work+n_R*n_vertices+i*n_B);CHKERRQ(ierr); 1441 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1442 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1443 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1444 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1445 } 1446 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr); 1447 ierr = MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr); 1448 ierr = MatDestroy(&B);CHKERRQ(ierr); 1449 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr); 1450 ierr = MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 1451 ierr = MatScale(S_CV,m_one);CHKERRQ(ierr); 1452 ierr = PetscBLASIntCast(n_R*n_vertices,&B_N);CHKERRQ(ierr); 1453 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+n_R*n_vertices,&B_one,work,&B_one)); 1454 ierr = MatDestroy(&B);CHKERRQ(ierr); 1455 } 1456 ierr = PetscObjectTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 1457 if (!isseqaij) { /* MatMatMult with SEQ(S)BAIJ below will raise an error */ 1458 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_REUSE_MATRIX,&A_VR);CHKERRQ(ierr); 1459 } 1460 ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr); 1461 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 1462 ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr); 1463 ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr); 1464 ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr); 1465 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one)); 1466 ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr); 1467 ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr); 1468 ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1469 ierr = MatDestroy(&S_VVt);CHKERRQ(ierr); 1470 } else { 1471 ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1472 } 1473 for (i=0;i<pcbddc->benign_n;i++) { 1474 const PetscScalar *vals; 1475 const PetscInt *idxs; 1476 PetscInt n,j,primal_idx; 1477 1478 ierr = MatGetRow(B0_V,i,&n,&idxs,&vals);CHKERRQ(ierr); 1479 primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + i; 1480 for (j=0;j<n;j++) { 1481 coarse_submat_vals[primal_idx*pcbddc->local_primal_size+idxs[j]] = vals[j]; 1482 coarse_submat_vals[idxs[j]*pcbddc->local_primal_size+primal_idx] = vals[j]; 1483 } 1484 ierr = MatRestoreRow(B0_V,i,&n,&idxs,&vals);CHKERRQ(ierr); 1485 } 1486 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 1487 1488 /* coarse basis functions */ 1489 for (i=0;i<n_vertices;i++) { 1490 PetscScalar *y; 1491 1492 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr); 1493 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 1494 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 1495 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1496 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1497 y[n_B*i+idx_V_B[i]] = 1.0; 1498 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 1499 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1500 1501 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1502 PetscInt j; 1503 1504 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 1505 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 1506 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1507 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1508 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 1509 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 1510 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 1511 } 1512 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1513 } 1514 /* if n_R == 0 the object is not destroyed */ 1515 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 1516 } 1517 1518 if (n_constraints) { 1519 Mat B; 1520 1521 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 1522 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 1523 ierr = MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 1524 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 1525 if (n_vertices) { 1526 if (isCHOL) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */ 1527 ierr = MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);CHKERRQ(ierr); 1528 } else { 1529 Mat S_VCt; 1530 1531 ierr = MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);CHKERRQ(ierr); 1532 ierr = MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1533 ierr = MatDestroy(&S_VCt);CHKERRQ(ierr); 1534 } 1535 } 1536 ierr = MatDestroy(&B);CHKERRQ(ierr); 1537 /* coarse basis functions */ 1538 for (i=0;i<n_constraints;i++) { 1539 PetscScalar *y; 1540 1541 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr); 1542 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 1543 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr); 1544 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1545 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1546 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 1547 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1548 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1549 PetscInt j; 1550 1551 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 1552 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr); 1553 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1554 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1555 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 1556 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 1557 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 1558 } 1559 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1560 } 1561 } 1562 if (n_constraints) { 1563 ierr = MatDestroy(&local_auxmat2_R);CHKERRQ(ierr); 1564 } 1565 ierr = PetscFree(p0_lidx_I);CHKERRQ(ierr); 1566 ierr = MatDestroy(&B0_V);CHKERRQ(ierr); 1567 1568 /* compute other basis functions for non-symmetric problems */ 1569 if (!pcbddc->symmetric_primal) { 1570 1571 if (n_constraints) { 1572 Mat S_CCT,B_C; 1573 1574 /* this is a lazy thing */ 1575 ierr = MatConvert(C_CR,impMatType,MAT_REUSE_MATRIX,&C_CR);CHKERRQ(ierr); 1576 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work+n_vertices*n_R,&B_C);CHKERRQ(ierr); 1577 ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr); 1578 ierr = MatTransposeMatMult(C_CR,S_CCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr); 1579 ierr = MatDestroy(&S_CCT);CHKERRQ(ierr); 1580 if (n_vertices) { 1581 Mat B_V,S_VCT; 1582 1583 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&B_V);CHKERRQ(ierr); 1584 ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr); 1585 ierr = MatTransposeMatMult(C_CR,S_VCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr); 1586 ierr = MatDestroy(&B_V);CHKERRQ(ierr); 1587 ierr = MatDestroy(&S_VCT);CHKERRQ(ierr); 1588 } 1589 ierr = MatDestroy(&B_C);CHKERRQ(ierr); 1590 } else { /* if there are no constraints, reset work */ 1591 ierr = PetscMemzero(work,n_R*pcbddc->local_primal_size*sizeof(PetscScalar));CHKERRQ(ierr); 1592 } 1593 if (n_vertices && n_R) { 1594 Mat A_VRT; 1595 PetscScalar *marray; 1596 PetscBLASInt B_N,B_one = 1; 1597 1598 ierr = MatTranspose(A_VR,MAT_INITIAL_MATRIX,&A_VRT);CHKERRQ(ierr); 1599 ierr = MatConvert(A_VRT,impMatType,MAT_REUSE_MATRIX,&A_VRT);CHKERRQ(ierr); 1600 ierr = MatDenseGetArray(A_VRT,&marray);CHKERRQ(ierr); 1601 ierr = PetscBLASIntCast(n_vertices*n_R,&B_N);CHKERRQ(ierr); 1602 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&m_one,marray,&B_one,work,&B_one)); 1603 ierr = MatDenseRestoreArray(A_VRT,&marray);CHKERRQ(ierr); 1604 ierr = MatDestroy(&A_VRT);CHKERRQ(ierr); 1605 } 1606 1607 if (F) { /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 1608 for (i=0;i<pcbddc->local_primal_size;i++) { 1609 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 1610 ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr); 1611 ierr = MatSolveTranspose(F,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1612 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1613 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 1614 } 1615 } else { 1616 for (i=0;i<pcbddc->local_primal_size;i++) { 1617 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 1618 ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr); 1619 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1620 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1621 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 1622 } 1623 } 1624 /* coarse basis functions */ 1625 for (i=0;i<pcbddc->local_primal_size;i++) { 1626 PetscScalar *y; 1627 1628 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*(i+pcbddc->local_primal_size));CHKERRQ(ierr); 1629 ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 1630 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 1631 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1632 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1633 if (i<n_vertices) { 1634 y[n_B*i+idx_V_B[i]] = 1.0; 1635 } 1636 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 1637 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1638 1639 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1640 ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 1641 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 1642 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1643 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1644 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 1645 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 1646 } 1647 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1648 } 1649 } 1650 /* free memory */ 1651 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 1652 ierr = MatDestroy(&S_VV);CHKERRQ(ierr); 1653 ierr = MatDestroy(&S_CV);CHKERRQ(ierr); 1654 ierr = MatDestroy(&S_VC);CHKERRQ(ierr); 1655 ierr = MatDestroy(&S_CC);CHKERRQ(ierr); 1656 ierr = PetscFree(work);CHKERRQ(ierr); 1657 if (n_vertices) { 1658 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 1659 } 1660 if (n_constraints) { 1661 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 1662 } 1663 /* Checking coarse_sub_mat and coarse basis functios */ 1664 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 1665 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 1666 if (pcbddc->dbg_flag) { 1667 Mat coarse_sub_mat; 1668 Mat AUXMAT,TM1,TM2,TM3,TM4; 1669 Mat coarse_phi_D,coarse_phi_B; 1670 Mat coarse_psi_D,coarse_psi_B; 1671 Mat A_II,A_BB,A_IB,A_BI; 1672 Mat C_B,CPHI; 1673 IS is_dummy; 1674 Vec mones; 1675 MatType checkmattype=MATSEQAIJ; 1676 PetscReal real_value; 1677 1678 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 1679 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 1680 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 1681 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 1682 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 1683 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 1684 if (unsymmetric_check) { 1685 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 1686 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 1687 } 1688 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 1689 1690 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1691 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);CHKERRQ(ierr); 1692 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1693 if (unsymmetric_check) { 1694 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1695 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 1696 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1697 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1698 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 1699 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1700 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1701 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 1702 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1703 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1704 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 1705 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1706 } else { 1707 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 1708 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 1709 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1710 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 1711 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1712 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1713 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 1714 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1715 } 1716 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1717 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1718 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1719 ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr); 1720 if (pcbddc->benign_n) { 1721 Mat B0_I,B0_B,B0_BPHI,B0_IPHI; 1722 PetscScalar *data,*data2; 1723 PetscInt j; 1724 1725 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 1726 ierr = MatGetSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B); 1727 ierr = MatGetSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_I_local,MAT_INITIAL_MATRIX,&B0_I); 1728 ierr = MatMatMult(B0_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 1729 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_REUSE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 1730 ierr = MatDenseGetArray(TM1,&data);CHKERRQ(ierr); 1731 ierr = MatDenseGetArray(B0_BPHI,&data2);CHKERRQ(ierr); 1732 for (j=0;j<pcbddc->benign_n;j++) { 1733 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 1734 for (i=0;i<pcbddc->local_primal_size;i++) { 1735 data[primal_idx*pcbddc->local_primal_size+i] += data2[i*pcbddc->benign_n+j]; 1736 data[i*pcbddc->local_primal_size+primal_idx] += data2[i*pcbddc->benign_n+j]; 1737 } 1738 } 1739 ierr = MatDenseRestoreArray(TM1,&data);CHKERRQ(ierr); 1740 ierr = MatDenseRestoreArray(B0_BPHI,&data2);CHKERRQ(ierr); 1741 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 1742 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 1743 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 1744 ierr = MatMatMult(B0_I,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&B0_IPHI);CHKERRQ(ierr); 1745 ierr = MatDestroy(&B0_I);CHKERRQ(ierr); 1746 ierr = MatNorm(B0_IPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1747 ierr = MatDestroy(&B0_IPHI);CHKERRQ(ierr); 1748 } 1749 #if 0 1750 { 1751 PetscViewer viewer; 1752 char filename[256]; 1753 sprintf(filename,"proj_local_coarse_mat%d.m",PetscGlobalRank); 1754 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 1755 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 1756 ierr = MatView(TM1,viewer);CHKERRQ(ierr); 1757 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 1758 } 1759 #endif 1760 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1761 ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1762 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1763 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 1764 1765 /* check constraints */ 1766 if (!pcbddc->benign_n) { /* TODO: add benign case */ 1767 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size,0,1,&is_dummy);CHKERRQ(ierr); 1768 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B); 1769 ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 1770 ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr); 1771 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 1772 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 1773 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1774 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 1775 if (unsymmetric_check) { 1776 ierr = MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 1777 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 1778 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 1779 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1780 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 1781 } 1782 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 1783 ierr = MatDestroy(&CPHI);CHKERRQ(ierr); 1784 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 1785 ierr = VecDestroy(&mones);CHKERRQ(ierr); 1786 } 1787 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1788 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 1789 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 1790 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 1791 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 1792 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 1793 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 1794 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 1795 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 1796 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 1797 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 1798 if (unsymmetric_check) { 1799 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 1800 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 1801 } 1802 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 1803 } 1804 /* get back data */ 1805 *coarse_submat_vals_n = coarse_submat_vals; 1806 PetscFunctionReturn(0); 1807 } 1808 1809 #undef __FUNCT__ 1810 #define __FUNCT__ "MatGetSubMatrixUnsorted" 1811 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B) 1812 { 1813 Mat *work_mat; 1814 IS isrow_s,iscol_s; 1815 PetscBool rsorted,csorted; 1816 PetscInt rsize,*idxs_perm_r,csize,*idxs_perm_c; 1817 PetscErrorCode ierr; 1818 1819 PetscFunctionBegin; 1820 ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr); 1821 ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr); 1822 ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr); 1823 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 1824 1825 if (!rsorted) { 1826 const PetscInt *idxs; 1827 PetscInt *idxs_sorted,i; 1828 1829 ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr); 1830 ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr); 1831 for (i=0;i<rsize;i++) { 1832 idxs_perm_r[i] = i; 1833 } 1834 ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr); 1835 ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr); 1836 for (i=0;i<rsize;i++) { 1837 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 1838 } 1839 ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr); 1840 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr); 1841 } else { 1842 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1843 isrow_s = isrow; 1844 } 1845 1846 if (!csorted) { 1847 if (isrow == iscol) { 1848 ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr); 1849 iscol_s = isrow_s; 1850 } else { 1851 const PetscInt *idxs; 1852 PetscInt *idxs_sorted,i; 1853 1854 ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr); 1855 ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr); 1856 for (i=0;i<csize;i++) { 1857 idxs_perm_c[i] = i; 1858 } 1859 ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr); 1860 ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr); 1861 for (i=0;i<csize;i++) { 1862 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 1863 } 1864 ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr); 1865 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr); 1866 } 1867 } else { 1868 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1869 iscol_s = iscol; 1870 } 1871 1872 ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 1873 1874 if (!rsorted || !csorted) { 1875 Mat new_mat; 1876 IS is_perm_r,is_perm_c; 1877 1878 if (!rsorted) { 1879 PetscInt *idxs_r,i; 1880 ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr); 1881 for (i=0;i<rsize;i++) { 1882 idxs_r[idxs_perm_r[i]] = i; 1883 } 1884 ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr); 1885 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr); 1886 } else { 1887 ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr); 1888 } 1889 ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr); 1890 1891 if (!csorted) { 1892 if (isrow_s == iscol_s) { 1893 ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr); 1894 is_perm_c = is_perm_r; 1895 } else { 1896 PetscInt *idxs_c,i; 1897 ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr); 1898 for (i=0;i<csize;i++) { 1899 idxs_c[idxs_perm_c[i]] = i; 1900 } 1901 ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr); 1902 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr); 1903 } 1904 } else { 1905 ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr); 1906 } 1907 ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr); 1908 1909 ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr); 1910 ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr); 1911 work_mat[0] = new_mat; 1912 ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr); 1913 ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr); 1914 } 1915 1916 ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr); 1917 *B = work_mat[0]; 1918 ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr); 1919 ierr = ISDestroy(&isrow_s);CHKERRQ(ierr); 1920 ierr = ISDestroy(&iscol_s);CHKERRQ(ierr); 1921 PetscFunctionReturn(0); 1922 } 1923 1924 #undef __FUNCT__ 1925 #define __FUNCT__ "PCBDDCComputeLocalMatrix" 1926 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 1927 { 1928 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1929 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1930 Mat new_mat; 1931 IS is_local,is_global; 1932 PetscInt local_size; 1933 PetscBool isseqaij; 1934 PetscErrorCode ierr; 1935 1936 PetscFunctionBegin; 1937 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1938 ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr); 1939 ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr); 1940 ierr = ISLocalToGlobalMappingApplyIS(pc->pmat->rmap->mapping,is_local,&is_global);CHKERRQ(ierr); 1941 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 1942 ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr); 1943 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 1944 1945 /* check */ 1946 if (pcbddc->dbg_flag) { 1947 Vec x,x_change; 1948 PetscReal error; 1949 1950 ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr); 1951 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 1952 ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr); 1953 ierr = VecScatterBegin(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1954 ierr = VecScatterEnd(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1955 ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr); 1956 ierr = VecScatterBegin(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1957 ierr = VecScatterEnd(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1958 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 1959 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 1960 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1961 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr); 1962 ierr = VecDestroy(&x);CHKERRQ(ierr); 1963 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 1964 } 1965 1966 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 1967 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 1968 if (isseqaij) { 1969 Mat M; 1970 1971 ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&M);CHKERRQ(ierr); 1972 ierr = MatSeqAIJCompress(M,&pcbddc->local_mat);CHKERRQ(ierr); 1973 ierr = MatDestroy(&M);CHKERRQ(ierr); 1974 } else { 1975 Mat work_mat,M; 1976 1977 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 1978 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&M);CHKERRQ(ierr); 1979 ierr = MatSeqAIJCompress(M,&pcbddc->local_mat);CHKERRQ(ierr); 1980 ierr = MatDestroy(&M);CHKERRQ(ierr); 1981 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 1982 } 1983 if (matis->A->symmetric_set) { 1984 ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);CHKERRQ(ierr); 1985 #if !defined(PETSC_USE_COMPLEX) 1986 ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);CHKERRQ(ierr); 1987 #endif 1988 } 1989 /* 1990 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 1991 ierr = MatView(new_mat,(PetscViewer)0);CHKERRQ(ierr); 1992 */ 1993 ierr = MatDestroy(&new_mat);CHKERRQ(ierr); 1994 PetscFunctionReturn(0); 1995 } 1996 1997 #undef __FUNCT__ 1998 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 1999 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 2000 { 2001 PC_IS* pcis = (PC_IS*)(pc->data); 2002 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2003 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2004 PetscInt *idx_R_local=NULL; 2005 PetscInt n_vertices,i,j,n_R,n_D,n_B; 2006 PetscInt vbs,bs; 2007 PetscBT bitmask=NULL; 2008 PetscErrorCode ierr; 2009 2010 PetscFunctionBegin; 2011 /* 2012 No need to setup local scatters if 2013 - primal space is unchanged 2014 AND 2015 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 2016 AND 2017 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 2018 */ 2019 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 2020 PetscFunctionReturn(0); 2021 } 2022 /* destroy old objects */ 2023 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 2024 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 2025 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 2026 /* Set Non-overlapping dimensions */ 2027 n_B = pcis->n_B; 2028 n_D = pcis->n - n_B; 2029 n_vertices = pcbddc->n_vertices; 2030 2031 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 2032 2033 /* create auxiliary bitmask and allocate workspace */ 2034 if (!sub_schurs->reuse_mumps) { 2035 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 2036 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 2037 for (i=0;i<n_vertices;i++) { 2038 ierr = PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);CHKERRQ(ierr); 2039 } 2040 2041 for (i=0, n_R=0; i<pcis->n; i++) { 2042 if (!PetscBTLookup(bitmask,i)) { 2043 idx_R_local[n_R++] = i; 2044 } 2045 } 2046 } else { /* A different ordering (already computed) is present if we are reusing MUMPS Schur solver */ 2047 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 2048 2049 ierr = ISGetIndices(reuse_mumps->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2050 ierr = ISGetLocalSize(reuse_mumps->is_R,&n_R);CHKERRQ(ierr); 2051 } 2052 2053 /* Block code */ 2054 vbs = 1; 2055 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 2056 if (bs>1 && !(n_vertices%bs)) { 2057 PetscBool is_blocked = PETSC_TRUE; 2058 PetscInt *vary; 2059 if (!sub_schurs->reuse_mumps) { 2060 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 2061 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 2062 /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 2063 /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */ 2064 for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++; 2065 for (i=0; i<pcis->n/bs; i++) { 2066 if (vary[i]!=0 && vary[i]!=bs) { 2067 is_blocked = PETSC_FALSE; 2068 break; 2069 } 2070 } 2071 ierr = PetscFree(vary);CHKERRQ(ierr); 2072 } else { 2073 /* Verify directly the R set */ 2074 for (i=0; i<n_R/bs; i++) { 2075 PetscInt j,node=idx_R_local[bs*i]; 2076 for (j=1; j<bs; j++) { 2077 if (node != idx_R_local[bs*i+j]-j) { 2078 is_blocked = PETSC_FALSE; 2079 break; 2080 } 2081 } 2082 } 2083 } 2084 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 2085 vbs = bs; 2086 for (i=0;i<n_R/vbs;i++) { 2087 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 2088 } 2089 } 2090 } 2091 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 2092 if (sub_schurs->reuse_mumps) { 2093 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 2094 2095 ierr = ISRestoreIndices(reuse_mumps->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2096 ierr = ISDestroy(&reuse_mumps->is_R);CHKERRQ(ierr); 2097 ierr = PetscObjectReference((PetscObject)pcbddc->is_R_local);CHKERRQ(ierr); 2098 reuse_mumps->is_R = pcbddc->is_R_local; 2099 } else { 2100 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 2101 } 2102 2103 /* print some info if requested */ 2104 if (pcbddc->dbg_flag) { 2105 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2106 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2107 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 2108 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 2109 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 2110 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices-pcbddc->benign_n,pcbddc->local_primal_size);CHKERRQ(ierr); 2111 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2112 } 2113 2114 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 2115 if (!sub_schurs->reuse_mumps) { 2116 IS is_aux1,is_aux2; 2117 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 2118 2119 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2120 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 2121 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 2122 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2123 for (i=0; i<n_D; i++) { 2124 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 2125 } 2126 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2127 for (i=0, j=0; i<n_R; i++) { 2128 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 2129 aux_array1[j++] = i; 2130 } 2131 } 2132 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 2133 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2134 for (i=0, j=0; i<n_B; i++) { 2135 if (!PetscBTLookup(bitmask,is_indices[i])) { 2136 aux_array2[j++] = i; 2137 } 2138 } 2139 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2140 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 2141 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 2142 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 2143 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 2144 2145 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2146 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 2147 for (i=0, j=0; i<n_R; i++) { 2148 if (PetscBTLookup(bitmask,idx_R_local[i])) { 2149 aux_array1[j++] = i; 2150 } 2151 } 2152 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 2153 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 2154 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 2155 } 2156 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 2157 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2158 } else { 2159 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 2160 IS tis; 2161 PetscInt schur_size; 2162 2163 ierr = ISGetLocalSize(reuse_mumps->is_B,&schur_size);CHKERRQ(ierr); 2164 ierr = ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);CHKERRQ(ierr); 2165 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_mumps->is_B,&pcbddc->R_to_B);CHKERRQ(ierr); 2166 ierr = ISDestroy(&tis);CHKERRQ(ierr); 2167 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2168 ierr = ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);CHKERRQ(ierr); 2169 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 2170 ierr = ISDestroy(&tis);CHKERRQ(ierr); 2171 } 2172 } 2173 PetscFunctionReturn(0); 2174 } 2175 2176 2177 #undef __FUNCT__ 2178 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 2179 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 2180 { 2181 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2182 PC_IS *pcis = (PC_IS*)pc->data; 2183 PC pc_temp; 2184 Mat A_RR; 2185 MatReuse reuse; 2186 PetscScalar m_one = -1.0; 2187 PetscReal value; 2188 PetscInt n_D,n_R; 2189 PetscBool use_exact,use_exact_reduced,issbaij; 2190 PetscErrorCode ierr; 2191 /* prefixes stuff */ 2192 char dir_prefix[256],neu_prefix[256],str_level[16]; 2193 size_t len; 2194 2195 PetscFunctionBegin; 2196 2197 /* compute prefixes */ 2198 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 2199 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 2200 if (!pcbddc->current_level) { 2201 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 2202 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 2203 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 2204 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 2205 } else { 2206 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 2207 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 2208 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 2209 len -= 15; /* remove "pc_bddc_coarse_" */ 2210 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 2211 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 2212 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 2213 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 2214 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 2215 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 2216 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 2217 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 2218 } 2219 2220 /* DIRICHLET PROBLEM */ 2221 if (dirichlet) { 2222 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2223 if (pcbddc->local_mat->symmetric_set) { 2224 ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 2225 } 2226 /* Matrix for Dirichlet problem is pcis->A_II */ 2227 n_D = pcis->n - pcis->n_B; 2228 if (!pcbddc->ksp_D) { /* create object if not yet build */ 2229 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 2230 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 2231 /* default */ 2232 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 2233 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 2234 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 2235 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 2236 if (issbaij) { 2237 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 2238 } else { 2239 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 2240 } 2241 /* Allow user's customization */ 2242 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 2243 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 2244 } 2245 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 2246 if (sub_schurs->reuse_mumps) { 2247 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 2248 2249 ierr = KSPSetPC(pcbddc->ksp_D,reuse_mumps->interior_solver);CHKERRQ(ierr); 2250 } 2251 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 2252 if (!n_D) { 2253 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 2254 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 2255 } 2256 /* Set Up KSP for Dirichlet problem of BDDC */ 2257 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 2258 /* set ksp_D into pcis data */ 2259 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 2260 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 2261 pcis->ksp_D = pcbddc->ksp_D; 2262 } 2263 2264 /* NEUMANN PROBLEM */ 2265 A_RR = 0; 2266 if (neumann) { 2267 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2268 PetscInt ibs,mbs; 2269 PetscBool issbaij; 2270 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 2271 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 2272 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 2273 if (pcbddc->ksp_R) { /* already created ksp */ 2274 PetscInt nn_R; 2275 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 2276 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 2277 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 2278 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 2279 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 2280 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2281 reuse = MAT_INITIAL_MATRIX; 2282 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 2283 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 2284 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2285 reuse = MAT_INITIAL_MATRIX; 2286 } else { /* safe to reuse the matrix */ 2287 reuse = MAT_REUSE_MATRIX; 2288 } 2289 } 2290 /* last check */ 2291 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 2292 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2293 reuse = MAT_INITIAL_MATRIX; 2294 } 2295 } else { /* first time, so we need to create the matrix */ 2296 reuse = MAT_INITIAL_MATRIX; 2297 } 2298 /* extract A_RR */ 2299 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 2300 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 2301 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 2302 if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */ 2303 if (matis->A == pcbddc->local_mat) { 2304 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2305 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 2306 } else { 2307 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 2308 } 2309 } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 2310 if (matis->A == pcbddc->local_mat) { 2311 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2312 ierr = MatConvert(matis->A,MATSEQBAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 2313 } else { 2314 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 2315 } 2316 } 2317 if (!sub_schurs->reuse_mumps) { 2318 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 2319 if (pcbddc->local_mat->symmetric_set) { 2320 ierr = MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 2321 } 2322 } else { 2323 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 2324 2325 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2326 ierr = PCGetOperators(reuse_mumps->correction_solver,&A_RR,NULL);CHKERRQ(ierr); 2327 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 2328 } 2329 if (!pcbddc->ksp_R) { /* create object if not present */ 2330 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 2331 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 2332 /* default */ 2333 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 2334 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 2335 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 2336 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 2337 if (issbaij) { 2338 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 2339 } else { 2340 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 2341 } 2342 /* Allow user's customization */ 2343 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 2344 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 2345 } 2346 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 2347 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 2348 if (!n_R) { 2349 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 2350 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 2351 } 2352 /* Reuse MUMPS solver if it is present */ 2353 if (sub_schurs->reuse_mumps) { 2354 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 2355 2356 ierr = KSPSetPC(pcbddc->ksp_R,reuse_mumps->correction_solver);CHKERRQ(ierr); 2357 } 2358 /* Set Up KSP for Neumann problem of BDDC */ 2359 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 2360 } 2361 /* free Neumann problem's matrix */ 2362 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2363 2364 /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */ 2365 if (pcbddc->NullSpace || pcbddc->dbg_flag) { 2366 if (pcbddc->dbg_flag) { 2367 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2368 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 2369 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2370 } 2371 if (dirichlet) { /* Dirichlet */ 2372 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 2373 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 2374 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 2375 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 2376 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 2377 /* need to be adapted? */ 2378 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 2379 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2380 ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr); 2381 /* print info */ 2382 if (pcbddc->dbg_flag) { 2383 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 2384 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2385 } 2386 if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) { 2387 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_TRUE,pcis->is_I_local);CHKERRQ(ierr); 2388 } 2389 } 2390 if (neumann) { /* Neumann */ 2391 ierr = KSPGetOperators(pcbddc->ksp_R,&A_RR,NULL);CHKERRQ(ierr); 2392 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 2393 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 2394 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 2395 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 2396 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 2397 /* need to be adapted? */ 2398 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 2399 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2400 /* print info */ 2401 if (pcbddc->dbg_flag) { 2402 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 2403 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2404 } 2405 if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */ 2406 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_FALSE,pcbddc->is_R_local);CHKERRQ(ierr); 2407 } 2408 } 2409 } 2410 PetscFunctionReturn(0); 2411 } 2412 2413 #undef __FUNCT__ 2414 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection" 2415 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose) 2416 { 2417 PetscErrorCode ierr; 2418 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 2419 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2420 2421 PetscFunctionBegin; 2422 if (!sub_schurs->reuse_mumps) { 2423 ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr); 2424 } 2425 if (!pcbddc->switch_static) { 2426 if (applytranspose && pcbddc->local_auxmat1) { 2427 ierr = MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 2428 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 2429 } 2430 if (!sub_schurs->reuse_mumps) { 2431 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2432 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2433 } else { 2434 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 2435 2436 ierr = VecScatterBegin(reuse_mumps->correction_scatter_B,inout_B,reuse_mumps->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2437 ierr = VecScatterEnd(reuse_mumps->correction_scatter_B,inout_B,reuse_mumps->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2438 } 2439 } else { 2440 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2441 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2442 ierr = VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2443 ierr = VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2444 if (applytranspose && pcbddc->local_auxmat1) { 2445 ierr = MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);CHKERRQ(ierr); 2446 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 2447 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2448 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2449 } 2450 } 2451 if (!sub_schurs->reuse_mumps) { 2452 if (applytranspose) { 2453 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 2454 } else { 2455 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 2456 } 2457 #if defined(PETSC_HAVE_MUMPS) 2458 } else { 2459 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 2460 2461 if (applytranspose) { 2462 ierr = MatMumpsSolveSchurComplementTranspose(reuse_mumps->F,reuse_mumps->rhs_B,reuse_mumps->sol_B);CHKERRQ(ierr); 2463 } else { 2464 ierr = MatMumpsSolveSchurComplement(reuse_mumps->F,reuse_mumps->rhs_B,reuse_mumps->sol_B);CHKERRQ(ierr); 2465 } 2466 #endif 2467 } 2468 ierr = VecSet(inout_B,0.);CHKERRQ(ierr); 2469 if (!pcbddc->switch_static) { 2470 if (!sub_schurs->reuse_mumps) { 2471 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2472 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2473 } else { 2474 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 2475 2476 ierr = VecScatterBegin(reuse_mumps->correction_scatter_B,reuse_mumps->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2477 ierr = VecScatterEnd(reuse_mumps->correction_scatter_B,reuse_mumps->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2478 } 2479 if (!applytranspose && pcbddc->local_auxmat1) { 2480 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 2481 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 2482 } 2483 } else { 2484 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2485 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2486 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2487 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2488 if (!applytranspose && pcbddc->local_auxmat1) { 2489 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 2490 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 2491 } 2492 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2493 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2494 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2495 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2496 } 2497 PetscFunctionReturn(0); 2498 } 2499 2500 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 2501 #undef __FUNCT__ 2502 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 2503 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 2504 { 2505 PetscErrorCode ierr; 2506 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 2507 PC_IS* pcis = (PC_IS*) (pc->data); 2508 const PetscScalar zero = 0.0; 2509 2510 PetscFunctionBegin; 2511 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 2512 if (applytranspose) { 2513 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 2514 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 2515 } else { 2516 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 2517 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 2518 } 2519 2520 /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */ 2521 if (pcbddc->benign_n) { 2522 PetscScalar *array; 2523 PetscInt j; 2524 2525 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 2526 for (j=0;j<pcbddc->benign_n;j++) array[pcbddc->local_primal_size-pcbddc->benign_n+j] += pcbddc->benign_p0[j]; 2527 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 2528 } 2529 2530 /* start communications from local primal nodes to rhs of coarse solver */ 2531 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 2532 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2533 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2534 2535 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 2536 /* TODO remove null space when doing multilevel */ 2537 if (pcbddc->coarse_ksp) { 2538 Mat coarse_mat; 2539 Vec rhs,sol; 2540 MatNullSpace nullsp; 2541 2542 ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr); 2543 ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr); 2544 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 2545 ierr = MatGetNullSpace(coarse_mat,&nullsp);CHKERRQ(ierr); 2546 if (nullsp) { 2547 ierr = MatNullSpaceRemove(nullsp,rhs);CHKERRQ(ierr); 2548 } 2549 if (applytranspose) { 2550 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 2551 } else { 2552 ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 2553 } 2554 if (nullsp) { 2555 ierr = MatNullSpaceRemove(nullsp,sol);CHKERRQ(ierr); 2556 } 2557 } 2558 2559 /* Local solution on R nodes */ 2560 if (pcis->n) { /* in/out pcbddc->vec1_B,pcbddc->vec1_D */ 2561 ierr = PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);CHKERRQ(ierr); 2562 } 2563 2564 /* communications from coarse sol to local primal nodes */ 2565 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2566 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2567 2568 /* Sum contributions from two levels */ 2569 if (applytranspose) { 2570 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 2571 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 2572 } else { 2573 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 2574 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 2575 } 2576 /* store p0 */ 2577 if (pcbddc->benign_n) { 2578 PetscScalar *array; 2579 PetscInt j; 2580 2581 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 2582 for (j=0;j<pcbddc->benign_n;j++) pcbddc->benign_p0[j] = array[pcbddc->local_primal_size-pcbddc->benign_n+j]; 2583 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 2584 } 2585 PetscFunctionReturn(0); 2586 } 2587 2588 #undef __FUNCT__ 2589 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 2590 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 2591 { 2592 PetscErrorCode ierr; 2593 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 2594 PetscScalar *array; 2595 Vec from,to; 2596 2597 PetscFunctionBegin; 2598 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 2599 from = pcbddc->coarse_vec; 2600 to = pcbddc->vec1_P; 2601 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 2602 Vec tvec; 2603 2604 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 2605 ierr = VecResetArray(tvec);CHKERRQ(ierr); 2606 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 2607 ierr = VecGetArray(tvec,&array);CHKERRQ(ierr); 2608 ierr = VecPlaceArray(from,array);CHKERRQ(ierr); 2609 ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr); 2610 } 2611 } else { /* from local to global -> put data in coarse right hand side */ 2612 from = pcbddc->vec1_P; 2613 to = pcbddc->coarse_vec; 2614 } 2615 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 2616 PetscFunctionReturn(0); 2617 } 2618 2619 #undef __FUNCT__ 2620 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 2621 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 2622 { 2623 PetscErrorCode ierr; 2624 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 2625 PetscScalar *array; 2626 Vec from,to; 2627 2628 PetscFunctionBegin; 2629 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 2630 from = pcbddc->coarse_vec; 2631 to = pcbddc->vec1_P; 2632 } else { /* from local to global -> put data in coarse right hand side */ 2633 from = pcbddc->vec1_P; 2634 to = pcbddc->coarse_vec; 2635 } 2636 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 2637 if (smode == SCATTER_FORWARD) { 2638 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 2639 Vec tvec; 2640 2641 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 2642 ierr = VecGetArray(to,&array);CHKERRQ(ierr); 2643 ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr); 2644 ierr = VecRestoreArray(to,&array);CHKERRQ(ierr); 2645 } 2646 } else { 2647 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 2648 ierr = VecResetArray(from);CHKERRQ(ierr); 2649 } 2650 } 2651 PetscFunctionReturn(0); 2652 } 2653 2654 /* uncomment for testing purposes */ 2655 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 2656 #undef __FUNCT__ 2657 #define __FUNCT__ "PCBDDCConstraintsSetUp" 2658 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 2659 { 2660 PetscErrorCode ierr; 2661 PC_IS* pcis = (PC_IS*)(pc->data); 2662 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2663 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 2664 /* one and zero */ 2665 PetscScalar one=1.0,zero=0.0; 2666 /* space to store constraints and their local indices */ 2667 PetscScalar *constraints_data; 2668 PetscInt *constraints_idxs,*constraints_idxs_B; 2669 PetscInt *constraints_idxs_ptr,*constraints_data_ptr; 2670 PetscInt *constraints_n; 2671 /* iterators */ 2672 PetscInt i,j,k,total_counts,total_counts_cc,cum; 2673 /* BLAS integers */ 2674 PetscBLASInt lwork,lierr; 2675 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 2676 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 2677 /* reuse */ 2678 PetscInt olocal_primal_size,olocal_primal_size_cc; 2679 PetscInt *olocal_primal_ref_node,*olocal_primal_ref_mult; 2680 /* change of basis */ 2681 PetscBool qr_needed; 2682 PetscBT change_basis,qr_needed_idx; 2683 /* auxiliary stuff */ 2684 PetscInt *nnz,*is_indices; 2685 PetscInt ncc; 2686 /* some quantities */ 2687 PetscInt n_vertices,total_primal_vertices,valid_constraints; 2688 PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints; 2689 2690 PetscFunctionBegin; 2691 /* Destroy Mat objects computed previously */ 2692 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2693 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 2694 /* save info on constraints from previous setup (if any) */ 2695 olocal_primal_size = pcbddc->local_primal_size; 2696 olocal_primal_size_cc = pcbddc->local_primal_size_cc; 2697 ierr = PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);CHKERRQ(ierr); 2698 ierr = PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 2699 ierr = PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 2700 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 2701 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2702 2703 /* print some info */ 2704 if (pcbddc->dbg_flag) { 2705 IS vertices; 2706 PetscInt nv,nedges,nfaces; 2707 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 2708 ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr); 2709 ierr = ISDestroy(&vertices);CHKERRQ(ierr); 2710 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 2711 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 2712 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 2713 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr); 2714 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr); 2715 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2716 } 2717 2718 if (!pcbddc->adaptive_selection) { 2719 IS ISForVertices,*ISForFaces,*ISForEdges; 2720 MatNullSpace nearnullsp; 2721 const Vec *nearnullvecs; 2722 Vec *localnearnullsp; 2723 PetscScalar *array; 2724 PetscInt n_ISForFaces,n_ISForEdges,nnsp_size; 2725 PetscBool nnsp_has_cnst; 2726 /* LAPACK working arrays for SVD or POD */ 2727 PetscBool skip_lapack,boolforchange; 2728 PetscScalar *work; 2729 PetscReal *singular_vals; 2730 #if defined(PETSC_USE_COMPLEX) 2731 PetscReal *rwork; 2732 #endif 2733 #if defined(PETSC_MISSING_LAPACK_GESVD) 2734 PetscScalar *temp_basis,*correlation_mat; 2735 #else 2736 PetscBLASInt dummy_int=1; 2737 PetscScalar dummy_scalar=1.; 2738 #endif 2739 2740 /* Get index sets for faces, edges and vertices from graph */ 2741 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 2742 /* free unneeded index sets */ 2743 if (!pcbddc->use_vertices) { 2744 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 2745 } 2746 if (!pcbddc->use_edges) { 2747 for (i=0;i<n_ISForEdges;i++) { 2748 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 2749 } 2750 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 2751 n_ISForEdges = 0; 2752 } 2753 if (!pcbddc->use_faces) { 2754 for (i=0;i<n_ISForFaces;i++) { 2755 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 2756 } 2757 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 2758 n_ISForFaces = 0; 2759 } 2760 2761 #if defined(PETSC_USE_DEBUG) 2762 /* HACK: when solving singular problems not using vertices, a change of basis is mandatory. 2763 Also use_change_of_basis should be consistent among processors */ 2764 if (pcbddc->NullSpace) { 2765 PetscBool tbool[2],gbool[2]; 2766 2767 if (!ISForVertices && !pcbddc->user_ChangeOfBasisMatrix) { 2768 pcbddc->use_change_of_basis = PETSC_TRUE; 2769 if (!ISForEdges) { 2770 pcbddc->use_change_on_faces = PETSC_TRUE; 2771 } 2772 } 2773 tbool[0] = pcbddc->use_change_of_basis; 2774 tbool[1] = pcbddc->use_change_on_faces; 2775 ierr = MPI_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2776 pcbddc->use_change_of_basis = gbool[0]; 2777 pcbddc->use_change_on_faces = gbool[1]; 2778 } 2779 #endif 2780 2781 /* check if near null space is attached to global mat */ 2782 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 2783 if (nearnullsp) { 2784 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 2785 /* remove any stored info */ 2786 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 2787 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 2788 /* store information for BDDC solver reuse */ 2789 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 2790 pcbddc->onearnullspace = nearnullsp; 2791 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 2792 for (i=0;i<nnsp_size;i++) { 2793 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 2794 } 2795 } else { /* if near null space is not provided BDDC uses constants by default */ 2796 nnsp_size = 0; 2797 nnsp_has_cnst = PETSC_TRUE; 2798 } 2799 /* get max number of constraints on a single cc */ 2800 max_constraints = nnsp_size; 2801 if (nnsp_has_cnst) max_constraints++; 2802 2803 /* 2804 Evaluate maximum storage size needed by the procedure 2805 - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]" 2806 - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]" 2807 There can be multiple constraints per connected component 2808 */ 2809 n_vertices = 0; 2810 if (ISForVertices) { 2811 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 2812 } 2813 ncc = n_vertices+n_ISForFaces+n_ISForEdges; 2814 ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr); 2815 2816 total_counts = n_ISForFaces+n_ISForEdges; 2817 total_counts *= max_constraints; 2818 total_counts += n_vertices; 2819 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 2820 2821 total_counts = 0; 2822 max_size_of_constraint = 0; 2823 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 2824 IS used_is; 2825 if (i<n_ISForEdges) { 2826 used_is = ISForEdges[i]; 2827 } else { 2828 used_is = ISForFaces[i-n_ISForEdges]; 2829 } 2830 ierr = ISGetSize(used_is,&j);CHKERRQ(ierr); 2831 total_counts += j; 2832 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 2833 } 2834 ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr); 2835 2836 /* get local part of global near null space vectors */ 2837 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 2838 for (k=0;k<nnsp_size;k++) { 2839 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 2840 ierr = VecScatterBegin(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2841 ierr = VecScatterEnd(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2842 } 2843 2844 /* whether or not to skip lapack calls */ 2845 skip_lapack = PETSC_TRUE; 2846 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 2847 2848 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 2849 if (!skip_lapack) { 2850 PetscScalar temp_work; 2851 2852 #if defined(PETSC_MISSING_LAPACK_GESVD) 2853 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 2854 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 2855 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 2856 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 2857 #if defined(PETSC_USE_COMPLEX) 2858 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 2859 #endif 2860 /* now we evaluate the optimal workspace using query with lwork=-1 */ 2861 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 2862 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 2863 lwork = -1; 2864 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2865 #if !defined(PETSC_USE_COMPLEX) 2866 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 2867 #else 2868 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 2869 #endif 2870 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2871 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 2872 #else /* on missing GESVD */ 2873 /* SVD */ 2874 PetscInt max_n,min_n; 2875 max_n = max_size_of_constraint; 2876 min_n = max_constraints; 2877 if (max_size_of_constraint < max_constraints) { 2878 min_n = max_size_of_constraint; 2879 max_n = max_constraints; 2880 } 2881 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 2882 #if defined(PETSC_USE_COMPLEX) 2883 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 2884 #endif 2885 /* now we evaluate the optimal workspace using query with lwork=-1 */ 2886 lwork = -1; 2887 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 2888 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 2889 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 2890 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2891 #if !defined(PETSC_USE_COMPLEX) 2892 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr)); 2893 #else 2894 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr)); 2895 #endif 2896 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2897 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 2898 #endif /* on missing GESVD */ 2899 /* Allocate optimal workspace */ 2900 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 2901 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 2902 } 2903 /* Now we can loop on constraining sets */ 2904 total_counts = 0; 2905 constraints_idxs_ptr[0] = 0; 2906 constraints_data_ptr[0] = 0; 2907 /* vertices */ 2908 if (n_vertices) { 2909 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2910 ierr = PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 2911 for (i=0;i<n_vertices;i++) { 2912 constraints_n[total_counts] = 1; 2913 constraints_data[total_counts] = 1.0; 2914 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 2915 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 2916 total_counts++; 2917 } 2918 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2919 n_vertices = total_counts; 2920 } 2921 2922 /* edges and faces */ 2923 total_counts_cc = total_counts; 2924 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 2925 IS used_is; 2926 PetscBool idxs_copied = PETSC_FALSE; 2927 2928 if (ncc<n_ISForEdges) { 2929 used_is = ISForEdges[ncc]; 2930 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 2931 } else { 2932 used_is = ISForFaces[ncc-n_ISForEdges]; 2933 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 2934 } 2935 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 2936 2937 ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr); 2938 ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2939 /* change of basis should not be performed on local periodic nodes */ 2940 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 2941 if (nnsp_has_cnst) { 2942 PetscScalar quad_value; 2943 2944 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 2945 idxs_copied = PETSC_TRUE; 2946 2947 if (!pcbddc->use_nnsp_true) { 2948 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 2949 } else { 2950 quad_value = 1.0; 2951 } 2952 for (j=0;j<size_of_constraint;j++) { 2953 constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value; 2954 } 2955 temp_constraints++; 2956 total_counts++; 2957 } 2958 for (k=0;k<nnsp_size;k++) { 2959 PetscReal real_value; 2960 PetscScalar *ptr_to_data; 2961 2962 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2963 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint]; 2964 for (j=0;j<size_of_constraint;j++) { 2965 ptr_to_data[j] = array[is_indices[j]]; 2966 } 2967 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2968 /* check if array is null on the connected component */ 2969 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2970 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one)); 2971 if (real_value > 0.0) { /* keep indices and values */ 2972 temp_constraints++; 2973 total_counts++; 2974 if (!idxs_copied) { 2975 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 2976 idxs_copied = PETSC_TRUE; 2977 } 2978 } 2979 } 2980 ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2981 valid_constraints = temp_constraints; 2982 if (!pcbddc->use_nnsp_true && temp_constraints) { 2983 if (temp_constraints == 1) { /* just normalize the constraint */ 2984 PetscScalar norm,*ptr_to_data; 2985 2986 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 2987 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2988 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one)); 2989 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 2990 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one)); 2991 } else { /* perform SVD */ 2992 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 2993 PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 2994 2995 #if defined(PETSC_MISSING_LAPACK_GESVD) 2996 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 2997 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 2998 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 2999 the constraints basis will differ (by a complex factor with absolute value equal to 1) 3000 from that computed using LAPACKgesvd 3001 -> This is due to a different computation of eigenvectors in LAPACKheev 3002 -> The quality of the POD-computed basis will be the same */ 3003 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 3004 /* Store upper triangular part of correlation matrix */ 3005 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3006 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3007 for (j=0;j<temp_constraints;j++) { 3008 for (k=0;k<j+1;k++) { 3009 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one)); 3010 } 3011 } 3012 /* compute eigenvalues and eigenvectors of correlation matrix */ 3013 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 3014 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 3015 #if !defined(PETSC_USE_COMPLEX) 3016 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 3017 #else 3018 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 3019 #endif 3020 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3021 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 3022 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 3023 j = 0; 3024 while (j < temp_constraints && singular_vals[j] < tol) j++; 3025 total_counts = total_counts-j; 3026 valid_constraints = temp_constraints-j; 3027 /* scale and copy POD basis into used quadrature memory */ 3028 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3029 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 3030 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 3031 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3032 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 3033 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 3034 if (j<temp_constraints) { 3035 PetscInt ii; 3036 for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]); 3037 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3038 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 3039 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3040 for (k=0;k<temp_constraints-j;k++) { 3041 for (ii=0;ii<size_of_constraint;ii++) { 3042 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 3043 } 3044 } 3045 } 3046 #else /* on missing GESVD */ 3047 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3048 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 3049 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3050 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3051 #if !defined(PETSC_USE_COMPLEX) 3052 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr)); 3053 #else 3054 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr)); 3055 #endif 3056 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 3057 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3058 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 3059 k = temp_constraints; 3060 if (k > size_of_constraint) k = size_of_constraint; 3061 j = 0; 3062 while (j < k && singular_vals[k-j-1] < tol) j++; 3063 valid_constraints = k-j; 3064 total_counts = total_counts-temp_constraints+valid_constraints; 3065 #endif /* on missing GESVD */ 3066 } 3067 } 3068 /* update pointers information */ 3069 if (valid_constraints) { 3070 constraints_n[total_counts_cc] = valid_constraints; 3071 constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint; 3072 constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints; 3073 /* set change_of_basis flag */ 3074 if (boolforchange) { 3075 PetscBTSet(change_basis,total_counts_cc); 3076 } 3077 total_counts_cc++; 3078 } 3079 } 3080 /* free workspace */ 3081 if (!skip_lapack) { 3082 ierr = PetscFree(work);CHKERRQ(ierr); 3083 #if defined(PETSC_USE_COMPLEX) 3084 ierr = PetscFree(rwork);CHKERRQ(ierr); 3085 #endif 3086 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 3087 #if defined(PETSC_MISSING_LAPACK_GESVD) 3088 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 3089 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 3090 #endif 3091 } 3092 for (k=0;k<nnsp_size;k++) { 3093 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 3094 } 3095 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 3096 /* free index sets of faces, edges and vertices */ 3097 for (i=0;i<n_ISForFaces;i++) { 3098 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 3099 } 3100 if (n_ISForFaces) { 3101 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 3102 } 3103 for (i=0;i<n_ISForEdges;i++) { 3104 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 3105 } 3106 if (n_ISForEdges) { 3107 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 3108 } 3109 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 3110 } else { 3111 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3112 3113 total_counts = 0; 3114 n_vertices = 0; 3115 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 3116 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 3117 } 3118 max_constraints = 0; 3119 total_counts_cc = 0; 3120 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 3121 total_counts += pcbddc->adaptive_constraints_n[i]; 3122 if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++; 3123 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 3124 } 3125 constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr; 3126 constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr; 3127 constraints_idxs = pcbddc->adaptive_constraints_idxs; 3128 constraints_data = pcbddc->adaptive_constraints_data; 3129 /* constraints_n differs from pcbddc->adaptive_constraints_n */ 3130 ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr); 3131 total_counts_cc = 0; 3132 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 3133 if (pcbddc->adaptive_constraints_n[i]) { 3134 constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i]; 3135 } 3136 } 3137 #if 0 3138 printf("Found %d totals (%d)\n",total_counts_cc,total_counts); 3139 for (i=0;i<total_counts_cc;i++) { 3140 printf("const %d, start %d",i,constraints_idxs_ptr[i]); 3141 printf(" end %d:\n",constraints_idxs_ptr[i+1]); 3142 for (j=constraints_idxs_ptr[i];j<constraints_idxs_ptr[i+1];j++) { 3143 printf(" %d",constraints_idxs[j]); 3144 } 3145 printf("\n"); 3146 printf("number of cc: %d\n",constraints_n[i]); 3147 } 3148 for (i=0;i<n_vertices;i++) { 3149 PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]); 3150 } 3151 for (i=0;i<sub_schurs->n_subs;i++) { 3152 PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]); 3153 } 3154 #endif 3155 3156 max_size_of_constraint = 0; 3157 for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]); 3158 ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr); 3159 /* Change of basis */ 3160 ierr = PetscBTCreate(total_counts_cc,&change_basis);CHKERRQ(ierr); 3161 if (pcbddc->use_change_of_basis) { 3162 for (i=0;i<sub_schurs->n_subs;i++) { 3163 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 3164 ierr = PetscBTSet(change_basis,i+n_vertices);CHKERRQ(ierr); 3165 } 3166 } 3167 } 3168 } 3169 pcbddc->local_primal_size = total_counts; 3170 ierr = PetscMalloc1(pcbddc->local_primal_size+pcbddc->benign_n,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 3171 3172 /* map constraints_idxs in boundary numbering */ 3173 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr); 3174 if (i != constraints_idxs_ptr[total_counts_cc]) { 3175 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",constraints_idxs_ptr[total_counts_cc],i); 3176 } 3177 3178 /* Create constraint matrix */ 3179 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 3180 ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr); 3181 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 3182 3183 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 3184 /* determine if a QR strategy is needed for change of basis */ 3185 qr_needed = PETSC_FALSE; 3186 ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr); 3187 total_primal_vertices=0; 3188 pcbddc->local_primal_size_cc = 0; 3189 for (i=0;i<total_counts_cc;i++) { 3190 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 3191 if (size_of_constraint == 1) { 3192 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]]; 3193 pcbddc->local_primal_size_cc += 1; 3194 } else if (PetscBTLookup(change_basis,i)) { 3195 for (k=0;k<constraints_n[i];k++) { 3196 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 3197 } 3198 pcbddc->local_primal_size_cc += constraints_n[i]; 3199 if (constraints_n[i] > 1 || pcbddc->use_qr_single || pcbddc->faster_deluxe) { 3200 PetscBTSet(qr_needed_idx,i); 3201 qr_needed = PETSC_TRUE; 3202 } 3203 } else { 3204 pcbddc->local_primal_size_cc += 1; 3205 } 3206 } 3207 /* note that the local variable n_vertices used below stores the number of pointwise constraints */ 3208 pcbddc->n_vertices = total_primal_vertices; 3209 /* permute indices in order to have a sorted set of vertices */ 3210 ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 3211 3212 ierr = PetscMalloc2(pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 3213 ierr = PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));CHKERRQ(ierr); 3214 for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1; 3215 3216 /* nonzero structure of constraint matrix */ 3217 /* and get reference dof for local constraints */ 3218 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 3219 for (i=0;i<total_primal_vertices;i++) nnz[i] = 1; 3220 3221 j = total_primal_vertices; 3222 total_counts = total_primal_vertices; 3223 cum = total_primal_vertices; 3224 for (i=n_vertices;i<total_counts_cc;i++) { 3225 if (!PetscBTLookup(change_basis,i)) { 3226 pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]]; 3227 pcbddc->local_primal_ref_mult[cum] = constraints_n[i]; 3228 cum++; 3229 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 3230 for (k=0;k<constraints_n[i];k++) { 3231 pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 3232 nnz[j+k] = size_of_constraint; 3233 } 3234 j += constraints_n[i]; 3235 } 3236 } 3237 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 3238 ierr = PetscFree(nnz);CHKERRQ(ierr); 3239 3240 /* set values in constraint matrix */ 3241 for (i=0;i<total_primal_vertices;i++) { 3242 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 3243 } 3244 total_counts = total_primal_vertices; 3245 for (i=n_vertices;i<total_counts_cc;i++) { 3246 if (!PetscBTLookup(change_basis,i)) { 3247 PetscInt *cols; 3248 3249 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 3250 cols = constraints_idxs+constraints_idxs_ptr[i]; 3251 for (k=0;k<constraints_n[i];k++) { 3252 PetscInt row = total_counts+k; 3253 PetscScalar *vals; 3254 3255 vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint; 3256 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 3257 } 3258 total_counts += constraints_n[i]; 3259 } 3260 } 3261 /* assembling */ 3262 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3263 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3264 3265 /* 3266 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 3267 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 3268 */ 3269 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 3270 if (pcbddc->use_change_of_basis) { 3271 /* dual and primal dofs on a single cc */ 3272 PetscInt dual_dofs,primal_dofs; 3273 /* working stuff for GEQRF */ 3274 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 3275 PetscBLASInt lqr_work; 3276 /* working stuff for UNGQR */ 3277 PetscScalar *gqr_work,lgqr_work_t; 3278 PetscBLASInt lgqr_work; 3279 /* working stuff for TRTRS */ 3280 PetscScalar *trs_rhs; 3281 PetscBLASInt Blas_NRHS; 3282 /* pointers for values insertion into change of basis matrix */ 3283 PetscInt *start_rows,*start_cols; 3284 PetscScalar *start_vals; 3285 /* working stuff for values insertion */ 3286 PetscBT is_primal; 3287 PetscInt *aux_primal_numbering_B; 3288 /* matrix sizes */ 3289 PetscInt global_size,local_size; 3290 /* temporary change of basis */ 3291 Mat localChangeOfBasisMatrix; 3292 /* extra space for debugging */ 3293 PetscScalar *dbg_work; 3294 3295 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 3296 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 3297 ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 3298 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 3299 /* nonzeros for local mat */ 3300 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 3301 for (i=0;i<pcis->n;i++) nnz[i]=1; 3302 for (i=n_vertices;i<total_counts_cc;i++) { 3303 if (PetscBTLookup(change_basis,i)) { 3304 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 3305 if (PetscBTLookup(qr_needed_idx,i)) { 3306 for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint; 3307 } else { 3308 nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint; 3309 for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2; 3310 } 3311 } 3312 } 3313 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 3314 ierr = PetscFree(nnz);CHKERRQ(ierr); 3315 /* Set initial identity in the matrix */ 3316 for (i=0;i<pcis->n;i++) { 3317 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 3318 } 3319 3320 if (pcbddc->dbg_flag) { 3321 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 3322 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 3323 } 3324 3325 3326 /* Now we loop on the constraints which need a change of basis */ 3327 /* 3328 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 3329 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 3330 3331 Basic blocks of change of basis matrix T computed by 3332 3333 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 3334 3335 | 1 0 ... 0 s_1/S | 3336 | 0 1 ... 0 s_2/S | 3337 | ... | 3338 | 0 ... 1 s_{n-1}/S | 3339 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 3340 3341 with S = \sum_{i=1}^n s_i^2 3342 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 3343 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 3344 3345 - QR decomposition of constraints otherwise 3346 */ 3347 if (qr_needed) { 3348 /* space to store Q */ 3349 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 3350 /* first we issue queries for optimal work */ 3351 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 3352 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 3353 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3354 lqr_work = -1; 3355 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 3356 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 3357 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 3358 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 3359 lgqr_work = -1; 3360 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 3361 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 3362 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 3363 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3364 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 3365 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 3366 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 3367 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 3368 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 3369 /* array to store scaling factors for reflectors */ 3370 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 3371 /* array to store rhs and solution of triangular solver */ 3372 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 3373 /* allocating workspace for check */ 3374 if (pcbddc->dbg_flag) { 3375 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr); 3376 } 3377 } 3378 /* array to store whether a node is primal or not */ 3379 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 3380 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 3381 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);CHKERRQ(ierr); 3382 if (i != total_primal_vertices) { 3383 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i); 3384 } 3385 for (i=0;i<total_primal_vertices;i++) { 3386 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 3387 } 3388 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 3389 3390 /* loop on constraints and see whether or not they need a change of basis and compute it */ 3391 for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) { 3392 size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts]; 3393 if (PetscBTLookup(change_basis,total_counts)) { 3394 /* get constraint info */ 3395 primal_dofs = constraints_n[total_counts]; 3396 dual_dofs = size_of_constraint-primal_dofs; 3397 3398 if (pcbddc->dbg_flag) { 3399 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d: %d need a change of basis (size %d)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr); 3400 } 3401 3402 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 3403 3404 /* copy quadrature constraints for change of basis check */ 3405 if (pcbddc->dbg_flag) { 3406 ierr = PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 3407 } 3408 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 3409 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 3410 3411 /* compute QR decomposition of constraints */ 3412 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3413 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 3414 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3415 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3416 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 3417 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 3418 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3419 3420 /* explictly compute R^-T */ 3421 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 3422 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 3423 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 3424 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 3425 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3426 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 3427 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3428 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 3429 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 3430 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3431 3432 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 3433 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3434 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3435 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 3436 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3437 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3438 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 3439 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 3440 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3441 3442 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 3443 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 3444 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 3445 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3446 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 3447 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 3448 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3449 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 3450 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 3451 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3452 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC)); 3453 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3454 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 3455 3456 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 3457 start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]]; 3458 /* insert cols for primal dofs */ 3459 for (j=0;j<primal_dofs;j++) { 3460 start_vals = &qr_basis[j*size_of_constraint]; 3461 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 3462 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 3463 } 3464 /* insert cols for dual dofs */ 3465 for (j=0,k=0;j<dual_dofs;k++) { 3466 if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) { 3467 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 3468 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 3469 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 3470 j++; 3471 } 3472 } 3473 3474 /* check change of basis */ 3475 if (pcbddc->dbg_flag) { 3476 PetscInt ii,jj; 3477 PetscBool valid_qr=PETSC_TRUE; 3478 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 3479 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3480 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 3481 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3482 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 3483 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 3484 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3485 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC)); 3486 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3487 for (jj=0;jj<size_of_constraint;jj++) { 3488 for (ii=0;ii<primal_dofs;ii++) { 3489 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 3490 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 3491 } 3492 } 3493 if (!valid_qr) { 3494 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 3495 for (jj=0;jj<size_of_constraint;jj++) { 3496 for (ii=0;ii<primal_dofs;ii++) { 3497 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 3498 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 3499 } 3500 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 3501 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 3502 } 3503 } 3504 } 3505 } else { 3506 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 3507 } 3508 } 3509 } else { /* simple transformation block */ 3510 PetscInt row,col; 3511 PetscScalar val,norm; 3512 3513 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3514 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one)); 3515 for (j=0;j<size_of_constraint;j++) { 3516 PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j]; 3517 row = constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 3518 if (!PetscBTLookup(is_primal,row_B)) { 3519 col = constraints_idxs[constraints_idxs_ptr[total_counts]]; 3520 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 3521 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 3522 } else { 3523 for (k=0;k<size_of_constraint;k++) { 3524 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 3525 if (row != col) { 3526 val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]]; 3527 } else { 3528 val = constraints_data[constraints_data_ptr[total_counts]]/norm; 3529 } 3530 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 3531 } 3532 } 3533 } 3534 if (pcbddc->dbg_flag) { 3535 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 3536 } 3537 } 3538 } else { 3539 if (pcbddc->dbg_flag) { 3540 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,size_of_constraint);CHKERRQ(ierr); 3541 } 3542 } 3543 } 3544 3545 /* free workspace */ 3546 if (qr_needed) { 3547 if (pcbddc->dbg_flag) { 3548 ierr = PetscFree(dbg_work);CHKERRQ(ierr); 3549 } 3550 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 3551 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 3552 ierr = PetscFree(qr_work);CHKERRQ(ierr); 3553 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 3554 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 3555 } 3556 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 3557 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3558 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3559 3560 /* assembling of global change of variable */ 3561 { 3562 Mat tmat; 3563 PetscInt bs; 3564 3565 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 3566 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 3567 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 3568 ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr); 3569 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 3570 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 3571 ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr); 3572 ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr); 3573 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 3574 ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr); 3575 ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 3576 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 3577 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 3578 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 3579 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3580 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3581 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 3582 ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr); 3583 } 3584 /* check */ 3585 if (pcbddc->dbg_flag) { 3586 PetscReal error; 3587 Vec x,x_change; 3588 3589 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 3590 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 3591 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 3592 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 3593 ierr = VecScatterBegin(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3594 ierr = VecScatterEnd(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3595 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 3596 ierr = VecScatterBegin(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3597 ierr = VecScatterEnd(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3598 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 3599 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 3600 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 3601 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3602 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr); 3603 ierr = VecDestroy(&x);CHKERRQ(ierr); 3604 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 3605 } 3606 3607 /* adapt sub_schurs computed (if any) */ 3608 if (pcbddc->use_deluxe_scaling) { 3609 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 3610 if (sub_schurs->S_Ej_all) { 3611 Mat S_new,tmat; 3612 ISLocalToGlobalMapping NtoSall; 3613 IS is_all_N,is_V,is_V_Sall; 3614 const PetscScalar *array; 3615 const PetscInt *idxs_V,*idxs_all; 3616 PetscInt i,n_V; 3617 3618 ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr); 3619 ierr = MatGetSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr); 3620 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->n_vertices,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&is_V);CHKERRQ(ierr); 3621 ierr = ISLocalToGlobalMappingCreateIS(is_all_N,&NtoSall);CHKERRQ(ierr); 3622 ierr = ISGlobalToLocalMappingApplyIS(NtoSall,IS_GTOLM_DROP,is_V,&is_V_Sall);CHKERRQ(ierr); 3623 ierr = ISLocalToGlobalMappingDestroy(&NtoSall);CHKERRQ(ierr); 3624 ierr = ISDestroy(&is_all_N);CHKERRQ(ierr); 3625 ierr = ISDestroy(&is_V);CHKERRQ(ierr); 3626 ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 3627 ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr); 3628 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 3629 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 3630 ierr = ISGetLocalSize(is_V_Sall,&n_V);CHKERRQ(ierr); 3631 ierr = ISGetIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 3632 ierr = ISGetIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 3633 ierr = VecGetArrayRead(pcis->D,&array);CHKERRQ(ierr); 3634 for (i=0;i<n_V;i++) { 3635 PetscScalar val; 3636 PetscInt idx; 3637 3638 idx = idxs_V[i]; 3639 val = array[idxs_all[idxs_V[i]]]; 3640 ierr = MatSetValue(S_new,idx,idx,val,INSERT_VALUES);CHKERRQ(ierr); 3641 } 3642 ierr = MatAssemblyBegin(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3643 ierr = MatAssemblyEnd(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3644 sub_schurs->S_Ej_all = S_new; 3645 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 3646 if (sub_schurs->sum_S_Ej_all) { 3647 ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 3648 ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 3649 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 3650 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 3651 sub_schurs->sum_S_Ej_all = S_new; 3652 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 3653 } 3654 ierr = VecRestoreArrayRead(pcis->D,&array);CHKERRQ(ierr); 3655 ierr = ISRestoreIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 3656 ierr = ISRestoreIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 3657 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 3658 ierr = ISDestroy(&is_V_Sall);CHKERRQ(ierr); 3659 } 3660 } 3661 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 3662 } else if (pcbddc->user_ChangeOfBasisMatrix) { 3663 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 3664 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 3665 } 3666 3667 /* set up change of basis context */ 3668 if (pcbddc->ChangeOfBasisMatrix) { 3669 PCBDDCChange_ctx change_ctx; 3670 3671 if (!pcbddc->new_global_mat) { 3672 PetscInt global_size,local_size; 3673 3674 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 3675 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 3676 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->new_global_mat);CHKERRQ(ierr); 3677 ierr = MatSetSizes(pcbddc->new_global_mat,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 3678 ierr = MatSetType(pcbddc->new_global_mat,MATSHELL);CHKERRQ(ierr); 3679 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT,(void (*)(void))PCBDDCMatMult_Private);CHKERRQ(ierr); 3680 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCMatMultTranspose_Private);CHKERRQ(ierr); 3681 ierr = PetscNew(&change_ctx);CHKERRQ(ierr); 3682 ierr = MatShellSetContext(pcbddc->new_global_mat,change_ctx);CHKERRQ(ierr); 3683 } else { 3684 ierr = MatShellGetContext(pcbddc->new_global_mat,&change_ctx);CHKERRQ(ierr); 3685 ierr = MatDestroy(&change_ctx->global_change);CHKERRQ(ierr); 3686 ierr = VecDestroyVecs(2,&change_ctx->work);CHKERRQ(ierr); 3687 } 3688 if (!pcbddc->user_ChangeOfBasisMatrix) { 3689 ierr = PetscObjectReference((PetscObject)pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 3690 change_ctx->global_change = pcbddc->ChangeOfBasisMatrix; 3691 } else { 3692 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 3693 change_ctx->global_change = pcbddc->user_ChangeOfBasisMatrix; 3694 } 3695 ierr = VecDuplicateVecs(pcis->vec1_global,2,&change_ctx->work);CHKERRQ(ierr); 3696 ierr = MatSetUp(pcbddc->new_global_mat);CHKERRQ(ierr); 3697 ierr = MatAssemblyBegin(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3698 ierr = MatAssemblyEnd(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3699 } 3700 3701 /* add pressure dofs to set of primal nodes for numbering purposes */ 3702 for (i=0;i<pcbddc->benign_n;i++) { 3703 pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc] = pcbddc->benign_p0_lidx[i]; 3704 pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size] = pcbddc->benign_p0_lidx[i]; 3705 pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc] = 1; 3706 pcbddc->local_primal_size_cc++; 3707 pcbddc->local_primal_size++; 3708 } 3709 3710 /* check if a new primal space has been introduced (also take into account benign trick) */ 3711 pcbddc->new_primal_space_local = PETSC_TRUE; 3712 if (olocal_primal_size == pcbddc->local_primal_size) { 3713 ierr = PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc*sizeof(PetscScalar),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 3714 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 3715 if (!pcbddc->new_primal_space_local) { 3716 ierr = PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscScalar),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 3717 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 3718 } 3719 } 3720 ierr = PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);CHKERRQ(ierr); 3721 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 3722 ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3723 3724 /* flush dbg viewer */ 3725 if (pcbddc->dbg_flag) { 3726 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3727 } 3728 3729 /* free workspace */ 3730 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 3731 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 3732 if (!pcbddc->adaptive_selection) { 3733 ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr); 3734 ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr); 3735 } else { 3736 ierr = PetscFree5(pcbddc->adaptive_constraints_n, 3737 pcbddc->adaptive_constraints_idxs_ptr, 3738 pcbddc->adaptive_constraints_data_ptr, 3739 pcbddc->adaptive_constraints_idxs, 3740 pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 3741 ierr = PetscFree(constraints_n);CHKERRQ(ierr); 3742 ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr); 3743 } 3744 PetscFunctionReturn(0); 3745 } 3746 3747 #undef __FUNCT__ 3748 #define __FUNCT__ "PCBDDCAnalyzeInterface" 3749 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 3750 { 3751 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3752 PC_IS *pcis = (PC_IS*)pc->data; 3753 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 3754 PetscInt ierr,i,vertex_size,N; 3755 PetscViewer viewer=pcbddc->dbg_viewer; 3756 3757 PetscFunctionBegin; 3758 /* Reset previously computed graph */ 3759 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 3760 /* Init local Graph struct */ 3761 ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr); 3762 ierr = PCBDDCGraphInit(pcbddc->mat_graph,pcis->mapping,N);CHKERRQ(ierr); 3763 3764 /* Check validity of the csr graph passed in by the user */ 3765 if (pcbddc->mat_graph->nvtxs_csr && pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) { 3766 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid size of local CSR graph! Found %d, expected %d\n",pcbddc->mat_graph->nvtxs_csr,pcbddc->mat_graph->nvtxs); 3767 } 3768 3769 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 3770 if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) { 3771 PetscInt *xadj,*adjncy; 3772 PetscInt nvtxs; 3773 PetscBool flg_row=PETSC_FALSE; 3774 3775 if (pcbddc->use_local_adj) { 3776 3777 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3778 if (flg_row) { 3779 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 3780 pcbddc->computed_rowadj = PETSC_TRUE; 3781 } 3782 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3783 } else if (pcbddc->current_level && pcis->n_B) { /* just compute subdomain's connected components for coarser levels when the local boundary is not empty */ 3784 IS is_dummy; 3785 ISLocalToGlobalMapping l2gmap_dummy; 3786 PetscInt j,sum; 3787 PetscInt *cxadj,*cadjncy; 3788 const PetscInt *idxs; 3789 PCBDDCGraph graph; 3790 PetscBT is_on_boundary; 3791 3792 ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n,0,1,&is_dummy);CHKERRQ(ierr); 3793 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 3794 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 3795 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 3796 ierr = PCBDDCGraphInit(graph,l2gmap_dummy,pcis->n);CHKERRQ(ierr); 3797 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 3798 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3799 if (flg_row) { 3800 graph->xadj = xadj; 3801 graph->adjncy = adjncy; 3802 } 3803 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 3804 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 3805 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3806 3807 if (pcbddc->dbg_flag) { 3808 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] Found %d subdomains (local size %d)\n",PetscGlobalRank,graph->ncc,pcis->n);CHKERRQ(ierr); 3809 for (i=0;i<graph->ncc;i++) { 3810 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] %d cc size %d\n",PetscGlobalRank,i,graph->cptr[i+1]-graph->cptr[i]);CHKERRQ(ierr); 3811 } 3812 } 3813 3814 ierr = PetscBTCreate(pcis->n,&is_on_boundary);CHKERRQ(ierr); 3815 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 3816 for (i=0;i<pcis->n_B;i++) { 3817 ierr = PetscBTSet(is_on_boundary,idxs[i]);CHKERRQ(ierr); 3818 } 3819 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 3820 3821 ierr = PetscCalloc1(pcis->n+1,&cxadj);CHKERRQ(ierr); 3822 sum = 0; 3823 for (i=0;i<graph->ncc;i++) { 3824 PetscInt sizecc = 0; 3825 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 3826 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 3827 sizecc++; 3828 } 3829 } 3830 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 3831 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 3832 cxadj[graph->queue[j]] = sizecc; 3833 } 3834 } 3835 sum += sizecc*sizecc; 3836 } 3837 ierr = PetscMalloc1(sum,&cadjncy);CHKERRQ(ierr); 3838 sum = 0; 3839 for (i=0;i<pcis->n;i++) { 3840 PetscInt temp = cxadj[i]; 3841 cxadj[i] = sum; 3842 sum += temp; 3843 } 3844 cxadj[pcis->n] = sum; 3845 for (i=0;i<graph->ncc;i++) { 3846 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 3847 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 3848 PetscInt k,sizecc = 0; 3849 for (k=graph->cptr[i];k<graph->cptr[i+1];k++) { 3850 if (PetscBTLookup(is_on_boundary,graph->queue[k])) { 3851 cadjncy[cxadj[graph->queue[j]]+sizecc]=graph->queue[k]; 3852 sizecc++; 3853 } 3854 } 3855 } 3856 } 3857 } 3858 if (sum) { 3859 ierr = PCBDDCSetLocalAdjacencyGraph(pc,pcis->n,cxadj,cadjncy,PETSC_OWN_POINTER);CHKERRQ(ierr); 3860 } else { 3861 ierr = PetscFree(cxadj);CHKERRQ(ierr); 3862 ierr = PetscFree(cadjncy);CHKERRQ(ierr); 3863 } 3864 graph->xadj = 0; 3865 graph->adjncy = 0; 3866 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 3867 ierr = PetscBTDestroy(&is_on_boundary);CHKERRQ(ierr); 3868 } 3869 } 3870 if (pcbddc->dbg_flag) { 3871 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3872 } 3873 3874 /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */ 3875 vertex_size = 1; 3876 if (pcbddc->user_provided_isfordofs) { 3877 if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */ 3878 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 3879 for (i=0;i<pcbddc->n_ISForDofs;i++) { 3880 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 3881 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 3882 } 3883 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 3884 pcbddc->n_ISForDofs = 0; 3885 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 3886 } 3887 /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */ 3888 ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr); 3889 } else { 3890 if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */ 3891 ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr); 3892 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 3893 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 3894 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 3895 } 3896 } 3897 } 3898 3899 /* Setup of Graph */ 3900 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */ 3901 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 3902 } 3903 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */ 3904 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 3905 } 3906 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices);CHKERRQ(ierr); 3907 3908 /* attach info on disconnected subdomains if present */ 3909 if (pcbddc->n_local_subs) { 3910 PetscInt *local_subs; 3911 3912 ierr = PetscMalloc1(N,&local_subs);CHKERRQ(ierr); 3913 for (i=0;i<pcbddc->n_local_subs;i++) { 3914 const PetscInt *idxs; 3915 PetscInt nl,j; 3916 3917 ierr = ISGetLocalSize(pcbddc->local_subs[i],&nl);CHKERRQ(ierr); 3918 ierr = ISGetIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 3919 for (j=0;j<nl;j++) { 3920 local_subs[idxs[j]] = i; 3921 } 3922 ierr = ISRestoreIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 3923 } 3924 pcbddc->mat_graph->n_local_subs = pcbddc->n_local_subs; 3925 pcbddc->mat_graph->local_subs = local_subs; 3926 } 3927 3928 /* Graph's connected components analysis */ 3929 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 3930 3931 /* print some info to stdout */ 3932 if (pcbddc->dbg_flag) { 3933 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);CHKERRQ(ierr); 3934 } 3935 3936 /* mark topography has done */ 3937 pcbddc->recompute_topography = PETSC_FALSE; 3938 PetscFunctionReturn(0); 3939 } 3940 3941 /* given an index sets possibly with holes, renumbers the indexes removing the holes */ 3942 #undef __FUNCT__ 3943 #define __FUNCT__ "PCBDDCSubsetNumbering" 3944 PetscErrorCode PCBDDCSubsetNumbering(IS subset, IS subset_mult, PetscInt *N_n, IS *subset_n) 3945 { 3946 PetscSF sf; 3947 PetscLayout map; 3948 const PetscInt *idxs; 3949 PetscInt *leaf_data,*root_data,*gidxs; 3950 PetscInt N,n,i,lbounds[2],gbounds[2],Nl; 3951 PetscInt n_n,nlocals,start,first_index; 3952 PetscMPIInt commsize; 3953 PetscBool first_found; 3954 PetscErrorCode ierr; 3955 3956 PetscFunctionBegin; 3957 ierr = ISGetLocalSize(subset,&n);CHKERRQ(ierr); 3958 if (subset_mult) { 3959 PetscCheckSameComm(subset,1,subset_mult,2); 3960 ierr = ISGetLocalSize(subset,&i);CHKERRQ(ierr); 3961 if (i != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Local subset and multiplicity sizes don't match! %d != %d",n,i); 3962 } 3963 /* create workspace layout for computing global indices of subset */ 3964 ierr = ISGetIndices(subset,&idxs);CHKERRQ(ierr); 3965 lbounds[0] = lbounds[1] = 0; 3966 for (i=0;i<n;i++) { 3967 if (idxs[i] < lbounds[0]) lbounds[0] = idxs[i]; 3968 else if (idxs[i] > lbounds[1]) lbounds[1] = idxs[i]; 3969 } 3970 lbounds[0] = -lbounds[0]; 3971 ierr = MPI_Allreduce(lbounds,gbounds,2,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 3972 gbounds[0] = -gbounds[0]; 3973 N = gbounds[1] - gbounds[0] + 1; 3974 ierr = PetscLayoutCreate(PetscObjectComm((PetscObject)subset),&map);CHKERRQ(ierr); 3975 ierr = PetscLayoutSetBlockSize(map,1);CHKERRQ(ierr); 3976 ierr = PetscLayoutSetSize(map,N);CHKERRQ(ierr); 3977 ierr = PetscLayoutSetUp(map);CHKERRQ(ierr); 3978 ierr = PetscLayoutGetLocalSize(map,&Nl);CHKERRQ(ierr); 3979 3980 /* create sf : leaf_data == multiplicity of indexes, root data == global index in layout */ 3981 ierr = PetscMalloc2(n,&leaf_data,Nl,&root_data);CHKERRQ(ierr); 3982 if (subset_mult) { 3983 const PetscInt* idxs_mult; 3984 3985 ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 3986 ierr = PetscMemcpy(leaf_data,idxs_mult,n*sizeof(PetscInt));CHKERRQ(ierr); 3987 ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 3988 } else { 3989 for (i=0;i<n;i++) leaf_data[i] = 1; 3990 } 3991 /* local size of new subset */ 3992 n_n = 0; 3993 for (i=0;i<n;i++) n_n += leaf_data[i]; 3994 3995 /* global indexes in layout */ 3996 ierr = PetscMalloc1(n_n,&gidxs);CHKERRQ(ierr); /* allocating possibly extra space in gidxs which will be used later */ 3997 for (i=0;i<n;i++) gidxs[i] = idxs[i] - gbounds[0]; 3998 ierr = ISRestoreIndices(subset,&idxs);CHKERRQ(ierr); 3999 ierr = PetscSFCreate(PetscObjectComm((PetscObject)subset),&sf);CHKERRQ(ierr); 4000 ierr = PetscSFSetGraphLayout(sf,map,n,NULL,PETSC_COPY_VALUES,gidxs);CHKERRQ(ierr); 4001 ierr = PetscLayoutDestroy(&map);CHKERRQ(ierr); 4002 4003 /* reduce from leaves to roots */ 4004 ierr = PetscMemzero(root_data,Nl*sizeof(PetscInt));CHKERRQ(ierr); 4005 ierr = PetscSFReduceBegin(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr); 4006 ierr = PetscSFReduceEnd(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr); 4007 4008 /* count indexes in local part of layout */ 4009 nlocals = 0; 4010 first_index = -1; 4011 first_found = PETSC_FALSE; 4012 for (i=0;i<Nl;i++) { 4013 if (!first_found && root_data[i]) { 4014 first_found = PETSC_TRUE; 4015 first_index = i; 4016 } 4017 nlocals += root_data[i]; 4018 } 4019 4020 /* cumulative of number of indexes and size of subset without holes */ 4021 #if defined(PETSC_HAVE_MPI_EXSCAN) 4022 start = 0; 4023 ierr = MPI_Exscan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 4024 #else 4025 ierr = MPI_Scan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 4026 start = start-nlocals; 4027 #endif 4028 4029 if (N_n) { /* compute total size of new subset if requested */ 4030 *N_n = start + nlocals; 4031 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)subset),&commsize);CHKERRQ(ierr); 4032 ierr = MPI_Bcast(N_n,1,MPIU_INT,commsize-1,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 4033 } 4034 4035 /* adapt root data with cumulative */ 4036 if (first_found) { 4037 PetscInt old_index; 4038 4039 root_data[first_index] += start; 4040 old_index = first_index; 4041 for (i=first_index+1;i<Nl;i++) { 4042 if (root_data[i]) { 4043 root_data[i] += root_data[old_index]; 4044 old_index = i; 4045 } 4046 } 4047 } 4048 4049 /* from roots to leaves */ 4050 ierr = PetscSFBcastBegin(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr); 4051 ierr = PetscSFBcastEnd(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr); 4052 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 4053 4054 /* create new IS with global indexes without holes */ 4055 if (subset_mult) { 4056 const PetscInt* idxs_mult; 4057 PetscInt cum; 4058 4059 cum = 0; 4060 ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 4061 for (i=0;i<n;i++) { 4062 PetscInt j; 4063 for (j=0;j<idxs_mult[i];j++) gidxs[cum++] = leaf_data[i] - idxs_mult[i] + j; 4064 } 4065 ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 4066 } else { 4067 for (i=0;i<n;i++) { 4068 gidxs[i] = leaf_data[i]-1; 4069 } 4070 } 4071 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)subset),n_n,gidxs,PETSC_OWN_POINTER,subset_n);CHKERRQ(ierr); 4072 ierr = PetscFree2(leaf_data,root_data);CHKERRQ(ierr); 4073 PetscFunctionReturn(0); 4074 } 4075 4076 #undef __FUNCT__ 4077 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 4078 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 4079 { 4080 PetscInt i,j; 4081 PetscScalar *alphas; 4082 PetscErrorCode ierr; 4083 4084 PetscFunctionBegin; 4085 /* this implements stabilized Gram-Schmidt */ 4086 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 4087 for (i=0;i<n;i++) { 4088 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 4089 if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); } 4090 for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); } 4091 } 4092 ierr = PetscFree(alphas);CHKERRQ(ierr); 4093 PetscFunctionReturn(0); 4094 } 4095 4096 #undef __FUNCT__ 4097 #define __FUNCT__ "MatISGetSubassemblingPattern" 4098 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscInt redprocs, IS* is_sends) 4099 { 4100 IS ranks_send_to; 4101 PetscInt n_neighs,*neighs,*n_shared,**shared; 4102 PetscMPIInt size,rank,color; 4103 PetscInt *xadj,*adjncy; 4104 PetscInt *adjncy_wgt,*v_wgt,*ranks_send_to_idx; 4105 PetscInt i,local_size,threshold=0; 4106 PetscBool use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE; 4107 PetscSubcomm subcomm; 4108 PetscErrorCode ierr; 4109 4110 PetscFunctionBegin; 4111 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr); 4112 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 4113 ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 4114 4115 /* Get info on mapping */ 4116 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&local_size);CHKERRQ(ierr); 4117 ierr = ISLocalToGlobalMappingGetInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 4118 4119 /* build local CSR graph of subdomains' connectivity */ 4120 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 4121 xadj[0] = 0; 4122 xadj[1] = PetscMax(n_neighs-1,0); 4123 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 4124 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 4125 4126 if (threshold) { 4127 PetscInt xadj_count = 0; 4128 for (i=1;i<n_neighs;i++) { 4129 if (n_shared[i] > threshold) { 4130 adjncy[xadj_count] = neighs[i]; 4131 adjncy_wgt[xadj_count] = n_shared[i]; 4132 xadj_count++; 4133 } 4134 } 4135 xadj[1] = xadj_count; 4136 } else { 4137 if (xadj[1]) { 4138 ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr); 4139 ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr); 4140 } 4141 } 4142 ierr = ISLocalToGlobalMappingRestoreInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 4143 if (use_square) { 4144 for (i=0;i<xadj[1];i++) { 4145 adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i]; 4146 } 4147 } 4148 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 4149 4150 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 4151 4152 /* 4153 Restrict work on active processes only. 4154 */ 4155 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr); 4156 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 4157 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 4158 ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr); 4159 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 4160 if (color) { 4161 ierr = PetscFree(xadj);CHKERRQ(ierr); 4162 ierr = PetscFree(adjncy);CHKERRQ(ierr); 4163 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 4164 } else { 4165 Mat subdomain_adj; 4166 IS new_ranks,new_ranks_contig; 4167 MatPartitioning partitioner; 4168 PetscInt prank,rstart=0,rend=0; 4169 PetscInt *is_indices,*oldranks; 4170 PetscBool aggregate; 4171 4172 ierr = MPI_Comm_size(PetscSubcommChild(subcomm),&size);CHKERRQ(ierr); 4173 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 4174 prank = rank; 4175 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,PetscSubcommChild(subcomm));CHKERRQ(ierr); 4176 /* 4177 for (i=0;i<size;i++) { 4178 PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]); 4179 } 4180 */ 4181 for (i=0;i<xadj[1];i++) { 4182 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 4183 } 4184 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 4185 aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE); 4186 if (aggregate) { 4187 PetscInt lrows,row,ncols,*cols; 4188 PetscMPIInt nrank; 4189 PetscScalar *vals; 4190 4191 ierr = MPI_Comm_rank(PetscSubcommChild(subcomm),&nrank);CHKERRQ(ierr); 4192 lrows = 0; 4193 if (nrank<redprocs) { 4194 lrows = size/redprocs; 4195 if (nrank<size%redprocs) lrows++; 4196 } 4197 ierr = MatCreateAIJ(PetscSubcommChild(subcomm),lrows,lrows,size,size,50,NULL,50,NULL,&subdomain_adj);CHKERRQ(ierr); 4198 ierr = MatGetOwnershipRange(subdomain_adj,&rstart,&rend);CHKERRQ(ierr); 4199 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4200 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4201 row = nrank; 4202 ncols = xadj[1]-xadj[0]; 4203 cols = adjncy; 4204 ierr = PetscMalloc1(ncols,&vals);CHKERRQ(ierr); 4205 for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i]; 4206 ierr = MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 4207 ierr = MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4208 ierr = MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4209 ierr = PetscFree(xadj);CHKERRQ(ierr); 4210 ierr = PetscFree(adjncy);CHKERRQ(ierr); 4211 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 4212 ierr = PetscFree(vals);CHKERRQ(ierr); 4213 } else { 4214 ierr = MatCreateMPIAdj(PetscSubcommChild(subcomm),1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 4215 } 4216 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 4217 4218 /* Partition */ 4219 ierr = MatPartitioningCreate(PetscSubcommChild(subcomm),&partitioner);CHKERRQ(ierr); 4220 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 4221 if (use_vwgt) { 4222 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 4223 v_wgt[0] = local_size; 4224 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 4225 } 4226 n_subdomains = PetscMin((PetscInt)size,n_subdomains); 4227 ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr); 4228 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 4229 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 4230 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 4231 4232 /* renumber new_ranks to avoid "holes" in new set of processors */ 4233 ierr = PCBDDCSubsetNumbering(new_ranks,NULL,NULL,&new_ranks_contig);CHKERRQ(ierr); 4234 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 4235 ierr = ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4236 if (!redprocs) { 4237 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 4238 } else { 4239 PetscInt idxs[1]; 4240 PetscMPIInt tag; 4241 MPI_Request *reqs; 4242 4243 ierr = PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag);CHKERRQ(ierr); 4244 ierr = PetscMalloc1(rend-rstart,&reqs);CHKERRQ(ierr); 4245 for (i=rstart;i<rend;i++) { 4246 ierr = MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,PetscSubcommChild(subcomm),&reqs[i-rstart]);CHKERRQ(ierr); 4247 } 4248 ierr = MPI_Recv(idxs,1,MPIU_INT,MPI_ANY_SOURCE,tag,PetscSubcommChild(subcomm),MPI_STATUS_IGNORE);CHKERRQ(ierr); 4249 ierr = MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4250 ierr = PetscFree(reqs);CHKERRQ(ierr); 4251 ranks_send_to_idx[0] = oldranks[idxs[0]]; 4252 } 4253 ierr = ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4254 /* clean up */ 4255 ierr = PetscFree(oldranks);CHKERRQ(ierr); 4256 ierr = ISDestroy(&new_ranks_contig);CHKERRQ(ierr); 4257 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 4258 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 4259 } 4260 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 4261 4262 /* assemble parallel IS for sends */ 4263 i = 1; 4264 if (color) i=0; 4265 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr); 4266 /* get back IS */ 4267 *is_sends = ranks_send_to; 4268 PetscFunctionReturn(0); 4269 } 4270 4271 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 4272 4273 #undef __FUNCT__ 4274 #define __FUNCT__ "MatISSubassemble" 4275 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[]) 4276 { 4277 Mat local_mat; 4278 IS is_sends_internal; 4279 PetscInt rows,cols,new_local_rows; 4280 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals; 4281 PetscBool ismatis,isdense,newisdense,destroy_mat; 4282 ISLocalToGlobalMapping l2gmap; 4283 PetscInt* l2gmap_indices; 4284 const PetscInt* is_indices; 4285 MatType new_local_type; 4286 /* buffers */ 4287 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 4288 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 4289 PetscInt *recv_buffer_idxs_local; 4290 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 4291 /* MPI */ 4292 MPI_Comm comm,comm_n; 4293 PetscSubcomm subcomm; 4294 PetscMPIInt n_sends,n_recvs,commsize; 4295 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 4296 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 4297 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,source_dest; 4298 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals; 4299 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals; 4300 PetscErrorCode ierr; 4301 4302 PetscFunctionBegin; 4303 /* TODO: add missing checks */ 4304 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 4305 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 4306 PetscValidLogicalCollectiveEnum(mat,reuse,5); 4307 PetscValidLogicalCollectiveInt(mat,nis,7); 4308 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 4309 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 4310 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 4311 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 4312 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 4313 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 4314 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 4315 if (reuse == MAT_REUSE_MATRIX && *mat_n) { 4316 PetscInt mrows,mcols,mnrows,mncols; 4317 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 4318 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 4319 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 4320 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 4321 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 4322 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 4323 } 4324 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 4325 PetscValidLogicalCollectiveInt(mat,bs,0); 4326 /* prepare IS for sending if not provided */ 4327 if (!is_sends) { 4328 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 4329 ierr = MatISGetSubassemblingPattern(mat,n_subdomains,0,&is_sends_internal);CHKERRQ(ierr); 4330 } else { 4331 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 4332 is_sends_internal = is_sends; 4333 } 4334 4335 /* get comm */ 4336 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 4337 4338 /* compute number of sends */ 4339 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 4340 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 4341 4342 /* compute number of receives */ 4343 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 4344 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 4345 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 4346 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 4347 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 4348 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 4349 ierr = PetscFree(iflags);CHKERRQ(ierr); 4350 4351 /* restrict comm if requested */ 4352 subcomm = 0; 4353 destroy_mat = PETSC_FALSE; 4354 if (restrict_comm) { 4355 PetscMPIInt color,subcommsize; 4356 4357 color = 0; 4358 if (restrict_full) { 4359 if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm (full restriction) */ 4360 } else { 4361 if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not partecipate in new comm */ 4362 } 4363 ierr = MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 4364 subcommsize = commsize - subcommsize; 4365 /* check if reuse has been requested */ 4366 if (reuse == MAT_REUSE_MATRIX) { 4367 if (*mat_n) { 4368 PetscMPIInt subcommsize2; 4369 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 4370 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 4371 comm_n = PetscObjectComm((PetscObject)*mat_n); 4372 } else { 4373 comm_n = PETSC_COMM_SELF; 4374 } 4375 } else { /* MAT_INITIAL_MATRIX */ 4376 PetscMPIInt rank; 4377 4378 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4379 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 4380 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 4381 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 4382 comm_n = PetscSubcommChild(subcomm); 4383 } 4384 /* flag to destroy *mat_n if not significative */ 4385 if (color) destroy_mat = PETSC_TRUE; 4386 } else { 4387 comm_n = comm; 4388 } 4389 4390 /* prepare send/receive buffers */ 4391 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 4392 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 4393 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 4394 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 4395 if (nis) { 4396 ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr); 4397 } 4398 4399 /* Get data from local matrices */ 4400 if (!isdense) { 4401 SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 4402 /* TODO: See below some guidelines on how to prepare the local buffers */ 4403 /* 4404 send_buffer_vals should contain the raw values of the local matrix 4405 send_buffer_idxs should contain: 4406 - MatType_PRIVATE type 4407 - PetscInt size_of_l2gmap 4408 - PetscInt global_row_indices[size_of_l2gmap] 4409 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 4410 */ 4411 } else { 4412 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 4413 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&i);CHKERRQ(ierr); 4414 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 4415 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 4416 send_buffer_idxs[1] = i; 4417 ierr = ISLocalToGlobalMappingGetIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 4418 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 4419 ierr = ISLocalToGlobalMappingRestoreIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 4420 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 4421 for (i=0;i<n_sends;i++) { 4422 ilengths_vals[is_indices[i]] = len*len; 4423 ilengths_idxs[is_indices[i]] = len+2; 4424 } 4425 } 4426 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 4427 /* additional is (if any) */ 4428 if (nis) { 4429 PetscMPIInt psum; 4430 PetscInt j; 4431 for (j=0,psum=0;j<nis;j++) { 4432 PetscInt plen; 4433 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 4434 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 4435 psum += len+1; /* indices + lenght */ 4436 } 4437 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 4438 for (j=0,psum=0;j<nis;j++) { 4439 PetscInt plen; 4440 const PetscInt *is_array_idxs; 4441 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 4442 send_buffer_idxs_is[psum] = plen; 4443 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 4444 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 4445 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 4446 psum += plen+1; /* indices + lenght */ 4447 } 4448 for (i=0;i<n_sends;i++) { 4449 ilengths_idxs_is[is_indices[i]] = psum; 4450 } 4451 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 4452 } 4453 4454 buf_size_idxs = 0; 4455 buf_size_vals = 0; 4456 buf_size_idxs_is = 0; 4457 for (i=0;i<n_recvs;i++) { 4458 buf_size_idxs += (PetscInt)olengths_idxs[i]; 4459 buf_size_vals += (PetscInt)olengths_vals[i]; 4460 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 4461 } 4462 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 4463 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 4464 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 4465 4466 /* get new tags for clean communications */ 4467 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 4468 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 4469 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 4470 4471 /* allocate for requests */ 4472 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 4473 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 4474 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 4475 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 4476 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 4477 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 4478 4479 /* communications */ 4480 ptr_idxs = recv_buffer_idxs; 4481 ptr_vals = recv_buffer_vals; 4482 ptr_idxs_is = recv_buffer_idxs_is; 4483 for (i=0;i<n_recvs;i++) { 4484 source_dest = onodes[i]; 4485 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 4486 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 4487 ptr_idxs += olengths_idxs[i]; 4488 ptr_vals += olengths_vals[i]; 4489 if (nis) { 4490 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 4491 ptr_idxs_is += olengths_idxs_is[i]; 4492 } 4493 } 4494 for (i=0;i<n_sends;i++) { 4495 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 4496 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 4497 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 4498 if (nis) { 4499 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 4500 } 4501 } 4502 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 4503 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 4504 4505 /* assemble new l2g map */ 4506 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4507 ptr_idxs = recv_buffer_idxs; 4508 new_local_rows = 0; 4509 for (i=0;i<n_recvs;i++) { 4510 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 4511 ptr_idxs += olengths_idxs[i]; 4512 } 4513 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 4514 ptr_idxs = recv_buffer_idxs; 4515 new_local_rows = 0; 4516 for (i=0;i<n_recvs;i++) { 4517 ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 4518 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 4519 ptr_idxs += olengths_idxs[i]; 4520 } 4521 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 4522 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 4523 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 4524 4525 /* infer new local matrix type from received local matrices type */ 4526 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 4527 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 4528 if (n_recvs) { 4529 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 4530 ptr_idxs = recv_buffer_idxs; 4531 for (i=0;i<n_recvs;i++) { 4532 if ((PetscInt)new_local_type_private != *ptr_idxs) { 4533 new_local_type_private = MATAIJ_PRIVATE; 4534 break; 4535 } 4536 ptr_idxs += olengths_idxs[i]; 4537 } 4538 switch (new_local_type_private) { 4539 case MATDENSE_PRIVATE: 4540 if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */ 4541 new_local_type = MATSEQAIJ; 4542 bs = 1; 4543 } else { /* if I receive only 1 dense matrix */ 4544 new_local_type = MATSEQDENSE; 4545 bs = 1; 4546 } 4547 break; 4548 case MATAIJ_PRIVATE: 4549 new_local_type = MATSEQAIJ; 4550 bs = 1; 4551 break; 4552 case MATBAIJ_PRIVATE: 4553 new_local_type = MATSEQBAIJ; 4554 break; 4555 case MATSBAIJ_PRIVATE: 4556 new_local_type = MATSEQSBAIJ; 4557 break; 4558 default: 4559 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__); 4560 break; 4561 } 4562 } else { /* by default, new_local_type is seqdense */ 4563 new_local_type = MATSEQDENSE; 4564 bs = 1; 4565 } 4566 4567 /* create MATIS object if needed */ 4568 if (reuse == MAT_INITIAL_MATRIX) { 4569 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 4570 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 4571 } else { 4572 /* it also destroys the local matrices */ 4573 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 4574 } 4575 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 4576 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 4577 4578 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4579 4580 /* Global to local map of received indices */ 4581 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 4582 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 4583 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 4584 4585 /* restore attributes -> type of incoming data and its size */ 4586 buf_size_idxs = 0; 4587 for (i=0;i<n_recvs;i++) { 4588 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 4589 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 4590 buf_size_idxs += (PetscInt)olengths_idxs[i]; 4591 } 4592 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 4593 4594 /* set preallocation */ 4595 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 4596 if (!newisdense) { 4597 PetscInt *new_local_nnz=0; 4598 4599 ptr_vals = recv_buffer_vals; 4600 ptr_idxs = recv_buffer_idxs_local; 4601 if (n_recvs) { 4602 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 4603 } 4604 for (i=0;i<n_recvs;i++) { 4605 PetscInt j; 4606 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 4607 for (j=0;j<*(ptr_idxs+1);j++) { 4608 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 4609 } 4610 } else { 4611 /* TODO */ 4612 } 4613 ptr_idxs += olengths_idxs[i]; 4614 } 4615 if (new_local_nnz) { 4616 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 4617 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 4618 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 4619 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 4620 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 4621 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 4622 } else { 4623 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 4624 } 4625 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 4626 } else { 4627 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 4628 } 4629 4630 /* set values */ 4631 ptr_vals = recv_buffer_vals; 4632 ptr_idxs = recv_buffer_idxs_local; 4633 for (i=0;i<n_recvs;i++) { 4634 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 4635 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 4636 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 4637 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 4638 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 4639 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 4640 } else { 4641 /* TODO */ 4642 } 4643 ptr_idxs += olengths_idxs[i]; 4644 ptr_vals += olengths_vals[i]; 4645 } 4646 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4647 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4648 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4649 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4650 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 4651 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 4652 4653 #if 0 4654 if (!restrict_comm) { /* check */ 4655 Vec lvec,rvec; 4656 PetscReal infty_error; 4657 4658 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 4659 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 4660 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 4661 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 4662 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 4663 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 4664 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 4665 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 4666 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 4667 } 4668 #endif 4669 4670 /* assemble new additional is (if any) */ 4671 if (nis) { 4672 PetscInt **temp_idxs,*count_is,j,psum; 4673 4674 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4675 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 4676 ptr_idxs = recv_buffer_idxs_is; 4677 psum = 0; 4678 for (i=0;i<n_recvs;i++) { 4679 for (j=0;j<nis;j++) { 4680 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 4681 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 4682 psum += plen; 4683 ptr_idxs += plen+1; /* shift pointer to received data */ 4684 } 4685 } 4686 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 4687 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 4688 for (i=1;i<nis;i++) { 4689 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 4690 } 4691 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 4692 ptr_idxs = recv_buffer_idxs_is; 4693 for (i=0;i<n_recvs;i++) { 4694 for (j=0;j<nis;j++) { 4695 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 4696 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 4697 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 4698 ptr_idxs += plen+1; /* shift pointer to received data */ 4699 } 4700 } 4701 for (i=0;i<nis;i++) { 4702 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 4703 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 4704 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 4705 } 4706 ierr = PetscFree(count_is);CHKERRQ(ierr); 4707 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 4708 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 4709 } 4710 /* free workspace */ 4711 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 4712 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4713 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 4714 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4715 if (isdense) { 4716 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 4717 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 4718 } else { 4719 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 4720 } 4721 if (nis) { 4722 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4723 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 4724 } 4725 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 4726 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 4727 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 4728 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 4729 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 4730 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 4731 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 4732 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 4733 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 4734 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 4735 ierr = PetscFree(onodes);CHKERRQ(ierr); 4736 if (nis) { 4737 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 4738 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 4739 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 4740 } 4741 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 4742 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 4743 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 4744 for (i=0;i<nis;i++) { 4745 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 4746 } 4747 *mat_n = NULL; 4748 } 4749 PetscFunctionReturn(0); 4750 } 4751 4752 /* temporary hack into ksp private data structure */ 4753 #include <petsc/private/kspimpl.h> 4754 4755 #undef __FUNCT__ 4756 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 4757 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 4758 { 4759 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 4760 PC_IS *pcis = (PC_IS*)pc->data; 4761 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 4762 MatNullSpace CoarseNullSpace=NULL; 4763 ISLocalToGlobalMapping coarse_islg; 4764 IS coarse_is,*isarray; 4765 PetscInt i,im_active=-1,active_procs=-1; 4766 PetscInt nis,nisdofs,nisneu; 4767 PC pc_temp; 4768 PCType coarse_pc_type; 4769 KSPType coarse_ksp_type; 4770 PetscBool multilevel_requested,multilevel_allowed; 4771 PetscBool isredundant,isbddc,isnn,coarse_reuse; 4772 Mat t_coarse_mat_is; 4773 PetscInt void_procs,ncoarse_ml,ncoarse_ds,ncoarse; 4774 PetscMPIInt all_procs; 4775 PetscBool csin_ml,csin_ds,csin,csin_type_simple,redist; 4776 PetscBool compute_vecs = PETSC_FALSE; 4777 PetscScalar *array; 4778 PetscErrorCode ierr; 4779 4780 PetscFunctionBegin; 4781 /* Assign global numbering to coarse dofs */ 4782 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 4783 PetscInt ocoarse_size; 4784 compute_vecs = PETSC_TRUE; 4785 ocoarse_size = pcbddc->coarse_size; 4786 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 4787 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 4788 /* see if we can avoid some work */ 4789 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 4790 /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */ 4791 if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) { 4792 PC pc; 4793 PetscBool isbddc; 4794 4795 /* temporary workaround since PCBDDC does not have a reset method so far */ 4796 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc);CHKERRQ(ierr); 4797 ierr = PetscObjectTypeCompare((PetscObject)pc,PCBDDC,&isbddc);CHKERRQ(ierr); 4798 if (isbddc) { 4799 ierr = PCDestroy(&pc);CHKERRQ(ierr); 4800 } 4801 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 4802 coarse_reuse = PETSC_FALSE; 4803 } else { /* we can safely reuse already computed coarse matrix */ 4804 coarse_reuse = PETSC_TRUE; 4805 } 4806 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 4807 coarse_reuse = PETSC_FALSE; 4808 } 4809 /* reset any subassembling information */ 4810 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 4811 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4812 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 4813 coarse_reuse = PETSC_TRUE; 4814 } 4815 4816 /* count "active" (i.e. with positive local size) and "void" processes */ 4817 im_active = !!(pcis->n); 4818 ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4819 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr); 4820 void_procs = all_procs-active_procs; 4821 csin_type_simple = PETSC_TRUE; 4822 redist = PETSC_FALSE; 4823 if (pcbddc->current_level && void_procs) { 4824 csin_ml = PETSC_TRUE; 4825 ncoarse_ml = void_procs; 4826 /* it has no sense to redistribute on a set of processors larger than the number of active processes */ 4827 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < active_procs) { 4828 csin_ds = PETSC_TRUE; 4829 ncoarse_ds = pcbddc->redistribute_coarse; 4830 redist = PETSC_TRUE; 4831 } else { 4832 csin_ds = PETSC_TRUE; 4833 ncoarse_ds = active_procs; 4834 redist = PETSC_TRUE; 4835 } 4836 } else { 4837 csin_ml = PETSC_FALSE; 4838 ncoarse_ml = all_procs; 4839 if (void_procs) { 4840 csin_ds = PETSC_TRUE; 4841 ncoarse_ds = void_procs; 4842 csin_type_simple = PETSC_FALSE; 4843 } else { 4844 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < all_procs) { 4845 csin_ds = PETSC_TRUE; 4846 ncoarse_ds = pcbddc->redistribute_coarse; 4847 redist = PETSC_TRUE; 4848 } else { 4849 csin_ds = PETSC_FALSE; 4850 ncoarse_ds = all_procs; 4851 } 4852 } 4853 } 4854 4855 /* 4856 test if we can go multilevel: three conditions must be satisfied: 4857 - we have not exceeded the number of levels requested 4858 - we can actually subassemble the active processes 4859 - we can find a suitable number of MPI processes where we can place the subassembled problem 4860 */ 4861 multilevel_allowed = PETSC_FALSE; 4862 multilevel_requested = PETSC_FALSE; 4863 if (pcbddc->current_level < pcbddc->max_levels) { 4864 multilevel_requested = PETSC_TRUE; 4865 if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) { 4866 multilevel_allowed = PETSC_FALSE; 4867 } else { 4868 multilevel_allowed = PETSC_TRUE; 4869 } 4870 } 4871 /* determine number of process partecipating to coarse solver */ 4872 if (multilevel_allowed) { 4873 ncoarse = ncoarse_ml; 4874 csin = csin_ml; 4875 redist = PETSC_FALSE; 4876 } else { 4877 ncoarse = ncoarse_ds; 4878 csin = csin_ds; 4879 } 4880 4881 /* creates temporary l2gmap and IS for coarse indexes */ 4882 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 4883 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 4884 4885 /* creates temporary MATIS object for coarse matrix */ 4886 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr); 4887 ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr); 4888 ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr); 4889 ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr); 4890 #if 0 4891 { 4892 PetscViewer viewer; 4893 char filename[256]; 4894 sprintf(filename,"local_coarse_mat%d_level%d.m",PetscGlobalRank,pcbddc->current_level); 4895 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 4896 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 4897 ierr = MatView(coarse_submat_dense,viewer);CHKERRQ(ierr); 4898 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 4899 } 4900 #endif 4901 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,NULL,&t_coarse_mat_is);CHKERRQ(ierr); 4902 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 4903 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4904 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4905 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 4906 4907 /* compute dofs splitting and neumann boundaries for coarse dofs */ 4908 if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal) ) { /* protects from unneded computations */ 4909 PetscInt *tidxs,*tidxs2,nout,tsize,i; 4910 const PetscInt *idxs; 4911 ISLocalToGlobalMapping tmap; 4912 4913 /* create map between primal indices (in local representative ordering) and local primal numbering */ 4914 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 4915 /* allocate space for temporary storage */ 4916 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 4917 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 4918 /* allocate for IS array */ 4919 nisdofs = pcbddc->n_ISForDofsLocal; 4920 nisneu = !!pcbddc->NeumannBoundariesLocal; 4921 nis = nisdofs + nisneu; 4922 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 4923 /* dofs splitting */ 4924 for (i=0;i<nisdofs;i++) { 4925 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 4926 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 4927 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 4928 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 4929 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 4930 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 4931 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->ISForDofsLocal[i]),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 4932 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 4933 } 4934 /* neumann boundaries */ 4935 if (pcbddc->NeumannBoundariesLocal) { 4936 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 4937 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 4938 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 4939 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 4940 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 4941 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 4942 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->NeumannBoundariesLocal),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 4943 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 4944 } 4945 /* free memory */ 4946 ierr = PetscFree(tidxs);CHKERRQ(ierr); 4947 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 4948 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 4949 } else { 4950 nis = 0; 4951 nisdofs = 0; 4952 nisneu = 0; 4953 isarray = NULL; 4954 } 4955 /* destroy no longer needed map */ 4956 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 4957 4958 /* restrict on coarse candidates (if needed) */ 4959 coarse_mat_is = NULL; 4960 if (csin) { 4961 if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */ 4962 if (redist) { 4963 PetscMPIInt rank; 4964 PetscInt spc,n_spc_p1,dest[1],destsize; 4965 4966 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 4967 spc = active_procs/ncoarse; 4968 n_spc_p1 = active_procs%ncoarse; 4969 if (im_active) { 4970 destsize = 1; 4971 if (rank > n_spc_p1*(spc+1)-1) { 4972 dest[0] = n_spc_p1+(rank-(n_spc_p1*(spc+1)))/spc; 4973 } else { 4974 dest[0] = rank/(spc+1); 4975 } 4976 } else { 4977 destsize = 0; 4978 } 4979 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),destsize,dest,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4980 } else if (csin_type_simple) { 4981 PetscMPIInt rank; 4982 PetscInt issize,isidx; 4983 4984 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 4985 if (im_active) { 4986 issize = 1; 4987 isidx = (PetscInt)rank; 4988 } else { 4989 issize = 0; 4990 isidx = -1; 4991 } 4992 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4993 } else { /* get a suitable subassembling pattern from MATIS code */ 4994 ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4995 } 4996 4997 /* we need to shift on coarse candidates either if we are not redistributing or we are redistributing and we have enough void processes */ 4998 if (!redist || ncoarse <= void_procs) { 4999 PetscInt ncoarse_cand,tissize,*nisindices; 5000 PetscInt *coarse_candidates; 5001 const PetscInt* tisindices; 5002 5003 /* get coarse candidates' ranks in pc communicator */ 5004 ierr = PetscMalloc1(all_procs,&coarse_candidates);CHKERRQ(ierr); 5005 ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 5006 for (i=0,ncoarse_cand=0;i<all_procs;i++) { 5007 if (!coarse_candidates[i]) { 5008 coarse_candidates[ncoarse_cand++]=i; 5009 } 5010 } 5011 if (ncoarse_cand < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",ncoarse_cand,ncoarse); 5012 5013 5014 if (pcbddc->dbg_flag) { 5015 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5016 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr); 5017 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 5018 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr); 5019 for (i=0;i<ncoarse_cand;i++) { 5020 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr); 5021 } 5022 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr); 5023 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5024 } 5025 /* shift the pattern on coarse candidates */ 5026 ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr); 5027 ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 5028 ierr = PetscMalloc1(tissize,&nisindices);CHKERRQ(ierr); 5029 for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]]; 5030 ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 5031 ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr); 5032 ierr = PetscFree(coarse_candidates);CHKERRQ(ierr); 5033 } 5034 if (pcbddc->dbg_flag) { 5035 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5036 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr); 5037 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 5038 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5039 } 5040 } 5041 /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */ 5042 if (multilevel_allowed) { /* we need to keep tracking of void processes for future placements */ 5043 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,PETSC_FALSE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 5044 } else { /* this is the last level, so use just receiving processes in subcomm */ 5045 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 5046 } 5047 } else { 5048 if (pcbddc->dbg_flag) { 5049 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5050 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr); 5051 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5052 } 5053 ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr); 5054 coarse_mat_is = t_coarse_mat_is; 5055 } 5056 5057 /* create local to global scatters for coarse problem */ 5058 if (compute_vecs) { 5059 PetscInt lrows; 5060 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 5061 if (coarse_mat_is) { 5062 ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr); 5063 } else { 5064 lrows = 0; 5065 } 5066 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 5067 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 5068 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 5069 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 5070 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 5071 } 5072 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 5073 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 5074 5075 /* set defaults for coarse KSP and PC */ 5076 if (multilevel_allowed) { 5077 coarse_ksp_type = KSPRICHARDSON; 5078 coarse_pc_type = PCBDDC; 5079 } else { 5080 coarse_ksp_type = KSPPREONLY; 5081 coarse_pc_type = PCREDUNDANT; 5082 } 5083 5084 /* print some info if requested */ 5085 if (pcbddc->dbg_flag) { 5086 if (!multilevel_allowed) { 5087 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5088 if (multilevel_requested) { 5089 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 5090 } else if (pcbddc->max_levels) { 5091 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 5092 } 5093 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5094 } 5095 } 5096 5097 /* create the coarse KSP object only once with defaults */ 5098 if (coarse_mat_is) { 5099 MatReuse coarse_mat_reuse; 5100 PetscViewer dbg_viewer = NULL; 5101 if (pcbddc->dbg_flag) { 5102 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is)); 5103 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 5104 } 5105 if (!pcbddc->coarse_ksp) { 5106 char prefix[256],str_level[16]; 5107 size_t len; 5108 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr); 5109 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 5110 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 5111 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 5112 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr); 5113 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 5114 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 5115 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 5116 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 5117 /* prefix */ 5118 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 5119 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 5120 if (!pcbddc->current_level) { 5121 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 5122 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 5123 } else { 5124 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 5125 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 5126 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 5127 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 5128 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 5129 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 5130 } 5131 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 5132 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 5133 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 5134 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 5135 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 5136 /* allow user customization */ 5137 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 5138 } 5139 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 5140 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 5141 if (nisdofs) { 5142 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 5143 for (i=0;i<nisdofs;i++) { 5144 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 5145 } 5146 } 5147 if (nisneu) { 5148 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 5149 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 5150 } 5151 5152 /* get some info after set from options */ 5153 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 5154 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 5155 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 5156 if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */ 5157 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 5158 isbddc = PETSC_FALSE; 5159 } 5160 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 5161 if (isredundant) { 5162 KSP inner_ksp; 5163 PC inner_pc; 5164 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 5165 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 5166 ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr); 5167 } 5168 5169 /* assemble coarse matrix */ 5170 if (coarse_reuse) { 5171 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 5172 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 5173 coarse_mat_reuse = MAT_REUSE_MATRIX; 5174 } else { 5175 coarse_mat_reuse = MAT_INITIAL_MATRIX; 5176 } 5177 if (isbddc || isnn) { 5178 if (pcbddc->coarsening_ratio > 1) { 5179 if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */ 5180 ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 5181 if (pcbddc->dbg_flag) { 5182 ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5183 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr); 5184 ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr); 5185 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 5186 } 5187 } 5188 ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr); 5189 } else { 5190 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 5191 coarse_mat = coarse_mat_is; 5192 } 5193 } else { 5194 ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 5195 } 5196 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 5197 5198 /* propagate symmetry info of coarse matrix */ 5199 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 5200 if (pc->pmat->symmetric_set) { 5201 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);CHKERRQ(ierr); 5202 } 5203 if (pc->pmat->hermitian_set) { 5204 ierr = MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);CHKERRQ(ierr); 5205 } 5206 if (pc->pmat->spd_set) { 5207 ierr = MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);CHKERRQ(ierr); 5208 } 5209 /* set operators */ 5210 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 5211 if (pcbddc->dbg_flag) { 5212 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 5213 } 5214 } else { /* processes non partecipating to coarse solver (if any) */ 5215 coarse_mat = 0; 5216 } 5217 ierr = PetscFree(isarray);CHKERRQ(ierr); 5218 #if 0 5219 { 5220 PetscViewer viewer; 5221 char filename[256]; 5222 sprintf(filename,"coarse_mat_level%d.m",pcbddc->current_level); 5223 ierr = PetscViewerASCIIOpen(PetscObjectComm((PetscObject)coarse_mat),filename,&viewer);CHKERRQ(ierr); 5224 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 5225 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 5226 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 5227 } 5228 #endif 5229 5230 /* Compute coarse null space (special handling by BDDC only) */ 5231 #if 0 5232 if (pcbddc->NullSpace) { 5233 ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr); 5234 } 5235 #endif 5236 /* hack */ 5237 if (pcbddc->coarse_ksp) { 5238 Vec crhs,csol; 5239 5240 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 5241 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 5242 if (!csol) { 5243 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 5244 } 5245 if (!crhs) { 5246 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 5247 } 5248 } 5249 5250 /* compute null space for coarse solver if the benign trick has been requested */ 5251 if (pcbddc->benign_null) { 5252 5253 ierr = VecSet(pcbddc->vec1_P,0.);CHKERRQ(ierr); 5254 for (i=0;i<pcbddc->benign_n;i++) { 5255 ierr = VecSetValue(pcbddc->vec1_P,pcbddc->local_primal_size-pcbddc->benign_n+i,1.0,INSERT_VALUES);CHKERRQ(ierr); 5256 } 5257 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 5258 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 5259 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5260 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5261 if (coarse_mat) { 5262 Vec nullv; 5263 PetscScalar *array,*array2; 5264 PetscInt nl; 5265 5266 ierr = MatCreateVecs(coarse_mat,&nullv,NULL);CHKERRQ(ierr); 5267 ierr = VecGetLocalSize(nullv,&nl);CHKERRQ(ierr); 5268 ierr = VecGetArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 5269 ierr = VecGetArray(nullv,&array2);CHKERRQ(ierr); 5270 ierr = PetscMemcpy(array2,array,nl*sizeof(*array));CHKERRQ(ierr); 5271 ierr = VecRestoreArray(nullv,&array2);CHKERRQ(ierr); 5272 ierr = VecRestoreArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 5273 ierr = VecNormalize(nullv,NULL);CHKERRQ(ierr); 5274 ierr = MatNullSpaceCreate(PetscObjectComm((PetscObject)coarse_mat),PETSC_FALSE,1,&nullv,&CoarseNullSpace);CHKERRQ(ierr); 5275 ierr = VecDestroy(&nullv);CHKERRQ(ierr); 5276 } 5277 } 5278 5279 if (pcbddc->coarse_ksp) { 5280 PetscBool ispreonly; 5281 5282 if (CoarseNullSpace) { 5283 PetscBool isnull; 5284 ierr = MatNullSpaceTest(CoarseNullSpace,coarse_mat,&isnull);CHKERRQ(ierr); 5285 if (isnull) { 5286 if (isbddc) { 5287 ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr); 5288 } else { 5289 ierr = MatSetNullSpace(coarse_mat,CoarseNullSpace);CHKERRQ(ierr); 5290 } 5291 } else { 5292 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 5293 } 5294 } 5295 /* setup coarse ksp */ 5296 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 5297 /* Check coarse problem if in debug mode or if solving with an iterative method */ 5298 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 5299 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 5300 KSP check_ksp; 5301 KSPType check_ksp_type; 5302 PC check_pc; 5303 Vec check_vec,coarse_vec; 5304 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 5305 PetscInt its; 5306 PetscBool compute_eigs; 5307 PetscReal *eigs_r,*eigs_c; 5308 PetscInt neigs; 5309 const char *prefix; 5310 5311 /* Create ksp object suitable for estimation of extreme eigenvalues */ 5312 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 5313 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 5314 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 5315 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 5316 if (ispreonly) { 5317 check_ksp_type = KSPPREONLY; 5318 compute_eigs = PETSC_FALSE; 5319 } else { 5320 check_ksp_type = KSPGMRES; 5321 compute_eigs = PETSC_TRUE; 5322 } 5323 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 5324 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 5325 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 5326 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 5327 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 5328 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 5329 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 5330 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 5331 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 5332 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 5333 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 5334 /* create random vec */ 5335 ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr); 5336 ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr); 5337 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 5338 if (CoarseNullSpace) { 5339 ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr); 5340 } 5341 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 5342 /* solve coarse problem */ 5343 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 5344 if (CoarseNullSpace) { 5345 ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr); 5346 } 5347 /* set eigenvalue estimation if preonly has not been requested */ 5348 if (compute_eigs) { 5349 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 5350 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 5351 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 5352 lambda_max = eigs_r[neigs-1]; 5353 lambda_min = eigs_r[0]; 5354 if (pcbddc->use_coarse_estimates) { 5355 if (lambda_max>lambda_min) { 5356 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr); 5357 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 5358 } 5359 } 5360 } 5361 5362 /* check coarse problem residual error */ 5363 if (pcbddc->dbg_flag) { 5364 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 5365 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 5366 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 5367 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 5368 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 5369 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 5370 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 5371 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 5372 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 5373 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 5374 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 5375 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 5376 if (CoarseNullSpace) { 5377 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem is singular\n");CHKERRQ(ierr); 5378 } 5379 if (compute_eigs) { 5380 PetscReal lambda_max_s,lambda_min_s; 5381 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 5382 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 5383 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 5384 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 5385 for (i=0;i<neigs;i++) { 5386 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 5387 } 5388 } 5389 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 5390 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 5391 } 5392 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 5393 if (compute_eigs) { 5394 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 5395 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 5396 } 5397 } 5398 } 5399 /* print additional info */ 5400 if (pcbddc->dbg_flag) { 5401 /* waits until all processes reaches this point */ 5402 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 5403 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 5404 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5405 } 5406 5407 /* free memory */ 5408 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 5409 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 5410 PetscFunctionReturn(0); 5411 } 5412 5413 #undef __FUNCT__ 5414 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 5415 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 5416 { 5417 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 5418 PC_IS* pcis = (PC_IS*)pc->data; 5419 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 5420 IS subset,subset_mult,subset_n; 5421 PetscInt local_size,coarse_size=0; 5422 PetscInt *local_primal_indices=NULL; 5423 const PetscInt *t_local_primal_indices; 5424 PetscErrorCode ierr; 5425 5426 PetscFunctionBegin; 5427 /* Compute global number of coarse dofs */ 5428 if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) { 5429 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first"); 5430 } 5431 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n);CHKERRQ(ierr); 5432 ierr = ISLocalToGlobalMappingApplyIS(pcis->mapping,subset_n,&subset);CHKERRQ(ierr); 5433 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 5434 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult);CHKERRQ(ierr); 5435 ierr = PCBDDCSubsetNumbering(subset,subset_mult,&coarse_size,&subset_n);CHKERRQ(ierr); 5436 ierr = ISDestroy(&subset);CHKERRQ(ierr); 5437 ierr = ISDestroy(&subset_mult);CHKERRQ(ierr); 5438 ierr = ISGetLocalSize(subset_n,&local_size);CHKERRQ(ierr); 5439 if (local_size != pcbddc->local_primal_size) { 5440 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %d != %d",local_size,pcbddc->local_primal_size); 5441 } 5442 ierr = PetscMalloc1(local_size,&local_primal_indices);CHKERRQ(ierr); 5443 ierr = ISGetIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 5444 ierr = PetscMemcpy(local_primal_indices,t_local_primal_indices,local_size*sizeof(PetscInt));CHKERRQ(ierr); 5445 ierr = ISRestoreIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 5446 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 5447 5448 /* check numbering */ 5449 if (pcbddc->dbg_flag) { 5450 PetscScalar coarsesum,*array,*array2; 5451 PetscInt i; 5452 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 5453 5454 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5455 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5456 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 5457 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 5458 /* counter */ 5459 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 5460 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 5461 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5462 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5463 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5464 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5465 5466 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 5467 for (i=0;i<pcbddc->local_primal_size;i++) { 5468 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 5469 } 5470 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 5471 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 5472 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 5473 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5474 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5475 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5476 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5477 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 5478 ierr = VecGetArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 5479 for (i=0;i<pcis->n;i++) { 5480 if (array[i] != 0.0 && array[i] != array2[i]) { 5481 PetscInt owned = (PetscInt)PetscRealPart(array[i]); 5482 PetscInt neigh = (PetscInt)PetscRealPart(array2[i]); 5483 set_error = PETSC_TRUE; 5484 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a %d processes instead of %d!\n",PetscGlobalRank,i,owned,neigh);CHKERRQ(ierr); 5485 } 5486 } 5487 ierr = VecRestoreArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 5488 ierr = MPI_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 5489 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5490 for (i=0;i<pcis->n;i++) { 5491 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 5492 } 5493 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 5494 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 5495 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5496 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5497 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 5498 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 5499 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 5500 PetscInt *gidxs; 5501 5502 ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr); 5503 ierr = ISLocalToGlobalMappingApply(pcis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr); 5504 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 5505 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5506 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 5507 for (i=0;i<pcbddc->local_primal_size;i++) { 5508 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr); 5509 } 5510 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5511 ierr = PetscFree(gidxs);CHKERRQ(ierr); 5512 } 5513 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5514 if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 5515 } 5516 /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */ 5517 /* get back data */ 5518 *coarse_size_n = coarse_size; 5519 *local_primal_indices_n = local_primal_indices; 5520 PetscFunctionReturn(0); 5521 } 5522 5523 #undef __FUNCT__ 5524 #define __FUNCT__ "PCBDDCGlobalToLocal" 5525 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 5526 { 5527 IS localis_t; 5528 PetscInt i,lsize,*idxs,n; 5529 PetscScalar *vals; 5530 PetscErrorCode ierr; 5531 5532 PetscFunctionBegin; 5533 /* get indices in local ordering exploiting local to global map */ 5534 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 5535 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 5536 for (i=0;i<lsize;i++) vals[i] = 1.0; 5537 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 5538 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 5539 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 5540 if (idxs) { /* multilevel guard */ 5541 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 5542 } 5543 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 5544 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 5545 ierr = PetscFree(vals);CHKERRQ(ierr); 5546 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 5547 /* now compute set in local ordering */ 5548 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5549 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5550 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 5551 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 5552 for (i=0,lsize=0;i<n;i++) { 5553 if (PetscRealPart(vals[i]) > 0.5) { 5554 lsize++; 5555 } 5556 } 5557 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 5558 for (i=0,lsize=0;i<n;i++) { 5559 if (PetscRealPart(vals[i]) > 0.5) { 5560 idxs[lsize++] = i; 5561 } 5562 } 5563 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 5564 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 5565 *localis = localis_t; 5566 PetscFunctionReturn(0); 5567 } 5568 5569 /* the next two functions will be called in KSPMatMult if a change of basis has been requested */ 5570 #undef __FUNCT__ 5571 #define __FUNCT__ "PCBDDCMatMult_Private" 5572 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y) 5573 { 5574 PCBDDCChange_ctx change_ctx; 5575 PetscErrorCode ierr; 5576 5577 PetscFunctionBegin; 5578 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 5579 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 5580 ierr = MatMult(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 5581 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 5582 PetscFunctionReturn(0); 5583 } 5584 5585 #undef __FUNCT__ 5586 #define __FUNCT__ "PCBDDCMatMultTranspose_Private" 5587 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y) 5588 { 5589 PCBDDCChange_ctx change_ctx; 5590 PetscErrorCode ierr; 5591 5592 PetscFunctionBegin; 5593 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 5594 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 5595 ierr = MatMultTranspose(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 5596 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 5597 PetscFunctionReturn(0); 5598 } 5599 5600 #undef __FUNCT__ 5601 #define __FUNCT__ "PCBDDCSetUpSubSchurs" 5602 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 5603 { 5604 PC_IS *pcis=(PC_IS*)pc->data; 5605 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 5606 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 5607 Mat S_j; 5608 PetscInt *used_xadj,*used_adjncy; 5609 PetscBool free_used_adj; 5610 PetscErrorCode ierr; 5611 5612 PetscFunctionBegin; 5613 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 5614 free_used_adj = PETSC_FALSE; 5615 if (pcbddc->sub_schurs_layers == -1) { 5616 used_xadj = NULL; 5617 used_adjncy = NULL; 5618 } else { 5619 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 5620 used_xadj = pcbddc->mat_graph->xadj; 5621 used_adjncy = pcbddc->mat_graph->adjncy; 5622 } else if (pcbddc->computed_rowadj) { 5623 used_xadj = pcbddc->mat_graph->xadj; 5624 used_adjncy = pcbddc->mat_graph->adjncy; 5625 } else { 5626 PetscBool flg_row=PETSC_FALSE; 5627 const PetscInt *xadj,*adjncy; 5628 PetscInt nvtxs; 5629 5630 ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 5631 if (flg_row) { 5632 ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr); 5633 ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr); 5634 ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr); 5635 free_used_adj = PETSC_TRUE; 5636 } else { 5637 pcbddc->sub_schurs_layers = -1; 5638 used_xadj = NULL; 5639 used_adjncy = NULL; 5640 } 5641 ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 5642 } 5643 } 5644 5645 /* setup sub_schurs data */ 5646 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 5647 if (!sub_schurs->use_mumps) { 5648 /* pcbddc->ksp_D up to date only if not using MUMPS */ 5649 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 5650 ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,PETSC_FALSE,PETSC_FALSE);CHKERRQ(ierr); 5651 } else { 5652 PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis; 5653 PetscBool isseqaij; 5654 if (!pcbddc->use_vertices && reuse_solvers) { 5655 PetscInt n_vertices; 5656 5657 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 5658 reuse_solvers = (PetscBool)!n_vertices; 5659 } 5660 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 5661 if (!isseqaij) { 5662 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 5663 if (matis->A == pcbddc->local_mat) { 5664 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 5665 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 5666 } else { 5667 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 5668 } 5669 } 5670 ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point);CHKERRQ(ierr); 5671 } 5672 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 5673 5674 /* free adjacency */ 5675 if (free_used_adj) { 5676 ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr); 5677 } 5678 PetscFunctionReturn(0); 5679 } 5680 5681 #undef __FUNCT__ 5682 #define __FUNCT__ "PCBDDCInitSubSchurs" 5683 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 5684 { 5685 PC_IS *pcis=(PC_IS*)pc->data; 5686 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 5687 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 5688 PCBDDCGraph graph; 5689 PetscErrorCode ierr; 5690 5691 PetscFunctionBegin; 5692 /* attach interface graph for determining subsets */ 5693 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 5694 IS verticesIS,verticescomm; 5695 PetscInt vsize,*idxs; 5696 5697 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 5698 ierr = ISGetSize(verticesIS,&vsize);CHKERRQ(ierr); 5699 ierr = ISGetIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 5700 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);CHKERRQ(ierr); 5701 ierr = ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 5702 ierr = ISDestroy(&verticesIS);CHKERRQ(ierr); 5703 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 5704 ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global);CHKERRQ(ierr); 5705 ierr = PCBDDCGraphSetUp(graph,0,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);CHKERRQ(ierr); 5706 ierr = ISDestroy(&verticescomm);CHKERRQ(ierr); 5707 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 5708 /* 5709 if (pcbddc->dbg_flag) { 5710 ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 5711 } 5712 */ 5713 } else { 5714 graph = pcbddc->mat_graph; 5715 } 5716 5717 /* sub_schurs init */ 5718 ierr = PCBDDCSubSchursInit(sub_schurs,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap);CHKERRQ(ierr); 5719 5720 /* free graph struct */ 5721 if (pcbddc->sub_schurs_rebuild) { 5722 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 5723 } 5724 PetscFunctionReturn(0); 5725 } 5726 5727 #undef __FUNCT__ 5728 #define __FUNCT__ "PCBDDCCheckOperator" 5729 PetscErrorCode PCBDDCCheckOperator(PC pc) 5730 { 5731 PC_IS *pcis=(PC_IS*)pc->data; 5732 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 5733 PetscErrorCode ierr; 5734 5735 PetscFunctionBegin; 5736 if (pcbddc->n_vertices == pcbddc->local_primal_size) { 5737 IS zerodiag = NULL; 5738 Mat S_j,B0_B=NULL; 5739 Vec dummy_vec=NULL,vec_check_B,vec_scale_P; 5740 PetscScalar *p0_check,*array,*array2; 5741 PetscReal norm; 5742 PetscInt i; 5743 5744 /* B0 and B0_B */ 5745 if (zerodiag) { 5746 IS dummy; 5747 5748 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);CHKERRQ(ierr); 5749 ierr = MatGetSubMatrix(pcbddc->benign_B0,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 5750 ierr = MatCreateVecs(B0_B,NULL,&dummy_vec);CHKERRQ(ierr); 5751 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 5752 } 5753 /* I need a primal vector to scale primal nodes since BDDC sums contibutions */ 5754 ierr = VecDuplicate(pcbddc->vec1_P,&vec_scale_P);CHKERRQ(ierr); 5755 ierr = VecSet(pcbddc->vec1_P,1.0);CHKERRQ(ierr); 5756 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5757 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5758 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5759 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5760 ierr = VecReciprocal(vec_scale_P);CHKERRQ(ierr); 5761 /* S_j */ 5762 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 5763 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 5764 5765 /* mimic vector in \widetilde{W}_\Gamma */ 5766 ierr = VecSetRandom(pcis->vec1_N,NULL);CHKERRQ(ierr); 5767 /* continuous in primal space */ 5768 ierr = VecSetRandom(pcbddc->coarse_vec,NULL);CHKERRQ(ierr); 5769 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5770 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5771 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5772 ierr = PetscCalloc1(pcbddc->benign_n,&p0_check);CHKERRQ(ierr); 5773 for (i=0;i<pcbddc->benign_n;i++) p0_check[i] = array[pcbddc->local_primal_size-pcbddc->benign_n+i]; 5774 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 5775 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5776 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 5777 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 5778 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5779 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5780 ierr = VecDuplicate(pcis->vec2_B,&vec_check_B);CHKERRQ(ierr); 5781 ierr = VecCopy(pcis->vec2_B,vec_check_B);CHKERRQ(ierr); 5782 5783 /* assemble rhs for coarse problem */ 5784 /* widetilde{S}_\Gamma w_\Gamma + \widetilde{B0}^T_B p0 */ 5785 /* local with Schur */ 5786 ierr = MatMult(S_j,pcis->vec2_B,pcis->vec1_B);CHKERRQ(ierr); 5787 if (zerodiag) { 5788 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 5789 for (i=0;i<pcbddc->benign_n;i++) array[i] = p0_check[i]; 5790 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 5791 ierr = MatMultTransposeAdd(B0_B,dummy_vec,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 5792 } 5793 /* sum on primal nodes the local contributions */ 5794 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5795 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5796 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 5797 ierr = VecGetArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 5798 for (i=0;i<pcbddc->local_primal_size;i++) array2[i] = array[pcbddc->local_primal_ref_node[i]]; 5799 ierr = VecRestoreArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 5800 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 5801 ierr = VecSet(pcbddc->coarse_vec,0.);CHKERRQ(ierr); 5802 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5803 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5804 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5805 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5806 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5807 /* scale primal nodes (BDDC sums contibutions) */ 5808 ierr = VecPointwiseMult(pcbddc->vec1_P,vec_scale_P,pcbddc->vec1_P);CHKERRQ(ierr); 5809 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 5810 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5811 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 5812 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 5813 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5814 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5815 /* global: \widetilde{B0}_B w_\Gamma */ 5816 if (zerodiag) { 5817 ierr = MatMult(B0_B,pcis->vec2_B,dummy_vec);CHKERRQ(ierr); 5818 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 5819 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = array[i]; 5820 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 5821 } 5822 /* BDDC */ 5823 ierr = VecSet(pcis->vec1_D,0.);CHKERRQ(ierr); 5824 ierr = PCBDDCApplyInterfacePreconditioner(pc,PETSC_FALSE);CHKERRQ(ierr); 5825 5826 ierr = VecCopy(pcis->vec1_B,pcis->vec2_B);CHKERRQ(ierr); 5827 ierr = VecAXPY(pcis->vec1_B,-1.0,vec_check_B);CHKERRQ(ierr); 5828 ierr = VecNorm(pcis->vec1_B,NORM_INFINITY,&norm);CHKERRQ(ierr); 5829 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC local error is %1.4e\n",PetscGlobalRank,norm); 5830 for (i=0;i<pcbddc->benign_n;i++) { 5831 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC p0[%d] error is %1.4e\n",PetscGlobalRank,i,PetscAbsScalar(pcbddc->benign_p0[i]-p0_check[i])); 5832 } 5833 ierr = PetscFree(p0_check);CHKERRQ(ierr); 5834 ierr = VecDestroy(&vec_scale_P);CHKERRQ(ierr); 5835 ierr = VecDestroy(&vec_check_B);CHKERRQ(ierr); 5836 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 5837 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 5838 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 5839 } 5840 PetscFunctionReturn(0); 5841 } 5842