1 /* 2 Defines projective product routines where A is a AIJ matrix 3 C = P^T * A * P 4 */ 5 6 #include "src/mat/impls/aij/seq/aij.h" /*I "petscmat.h" I*/ 7 #include "src/mat/utils/freespace.h" 8 #include "src/mat/impls/aij/mpi/mpiaij.h" 9 #include "petscbt.h" 10 11 #undef __FUNCT__ 12 #define __FUNCT__ "MatPtAP" 13 /*@ 14 MatPtAP - Creates the matrix projection C = P^T * A * P 15 16 Collective on Mat 17 18 Input Parameters: 19 + A - the matrix 20 . P - the projection matrix 21 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 22 - fill - expected fill as ratio of nnz(C)/(nnz(A) + nnz(P)) 23 24 Output Parameters: 25 . C - the product matrix 26 27 Notes: 28 C will be created and must be destroyed by the user with MatDestroy(). 29 30 This routine is currently only implemented for pairs of SeqAIJ matrices and classes 31 which inherit from SeqAIJ. C will be of type MATSEQAIJ. 32 33 Level: intermediate 34 35 .seealso: MatPtAPSymbolic(),MatPtAPNumeric(),MatMatMult() 36 @*/ 37 PetscErrorCode MatPtAP(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C) 38 { 39 PetscErrorCode ierr; 40 PetscErrorCode (*fA)(Mat,Mat,MatReuse,PetscReal,Mat *); 41 PetscErrorCode (*fP)(Mat,Mat,MatReuse,PetscReal,Mat *); 42 43 PetscFunctionBegin; 44 PetscValidHeaderSpecific(A,MAT_COOKIE,1); 45 PetscValidType(A,1); 46 MatPreallocated(A); 47 if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 48 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 49 PetscValidHeaderSpecific(P,MAT_COOKIE,2); 50 PetscValidType(P,2); 51 MatPreallocated(P); 52 if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 53 if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 54 PetscValidPointer(C,3); 55 56 if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %D != %D",P->M,A->N); 57 58 if (fill <=0.0) SETERRQ1(PETSC_ERR_ARG_SIZ,"fill=%g must be > 0.0",fill); 59 60 /* For now, we do not dispatch based on the type of A and P */ 61 /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */ 62 fA = A->ops->ptap; 63 if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatPtAP not supported for A of type %s",A->type_name); 64 fP = P->ops->ptap; 65 if (!fP) SETERRQ1(PETSC_ERR_SUP,"MatPtAP not supported for P of type %s",P->type_name); 66 if (fP!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatPtAP requires A, %s, to be compatible with P, %s",A->type_name,P->type_name); 67 68 ierr = PetscLogEventBegin(MAT_PtAP,A,P,0,0);CHKERRQ(ierr); 69 ierr = (*fA)(A,P,scall,fill,C);CHKERRQ(ierr); 70 ierr = PetscLogEventEnd(MAT_PtAP,A,P,0,0);CHKERRQ(ierr); 71 PetscFunctionReturn(0); 72 } 73 74 EXTERN PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local(Mat,Mat,Mat,PetscInt,PetscReal,Mat*); 75 EXTERN PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIAIJ_Local(Mat,Mat,Mat,PetscInt,Mat C); 76 77 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat); 78 #undef __FUNCT__ 79 #define __FUNCT__ "MatDestroy_MPIAIJ_PtAP" 80 PetscErrorCode MatDestroy_MPIAIJ_PtAP(Mat A) 81 { 82 PetscErrorCode ierr; 83 Mat_MatMatMultMPI *ptap; 84 PetscObjectContainer container; 85 86 PetscFunctionBegin; 87 ierr = PetscObjectQuery((PetscObject)A,"MatPtAPMPI",(PetscObject *)&container);CHKERRQ(ierr); 88 if (container) { 89 ierr = PetscObjectContainerGetPointer(container,(void **)&ptap);CHKERRQ(ierr); 90 ierr = MatDestroy(ptap->B_loc);CHKERRQ(ierr); 91 ierr = MatDestroy(ptap->B_oth);CHKERRQ(ierr); 92 ierr = ISDestroy(ptap->isrowb);CHKERRQ(ierr); 93 ierr = ISDestroy(ptap->iscolb);CHKERRQ(ierr); 94 95 ierr = PetscObjectContainerDestroy(container);CHKERRQ(ierr); 96 ierr = PetscObjectCompose((PetscObject)A,"MatPtAPMPI",0);CHKERRQ(ierr); 97 } 98 ierr = PetscFree(ptap);CHKERRQ(ierr); 99 100 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 101 PetscFunctionReturn(0); 102 } 103 104 #undef __FUNCT__ 105 #define __FUNCT__ "MatPtAP_MPIAIJ_MPIAIJ" 106 PetscErrorCode MatPtAP_MPIAIJ_MPIAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C) 107 { 108 PetscErrorCode ierr; 109 Mat_MatMatMultMPI *ptap; 110 PetscObjectContainer container; 111 112 PetscFunctionBegin; 113 if (scall == MAT_INITIAL_MATRIX){ 114 ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 115 ierr = PetscNew(Mat_MatMatMultMPI,&ptap);CHKERRQ(ierr); 116 117 /* get P_oth by taking rows of P (= non-zero cols of local A) from other processors */ 118 ierr = MatGetBrowsOfAoCols(A,P,scall,&ptap->isrowb,&ptap->iscolb,&ptap->brstart,&ptap->B_oth);CHKERRQ(ierr); 119 120 /* get P_loc by taking all local rows of P */ 121 ierr = MatGetLocalMat(P,scall,&ptap->B_loc);CHKERRQ(ierr); 122 123 /* attach the supporting struct to P for reuse */ 124 P->ops->destroy = MatDestroy_MPIAIJ_PtAP; 125 ierr = PetscObjectContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 126 ierr = PetscObjectContainerSetPointer(container,ptap);CHKERRQ(ierr); 127 ierr = PetscObjectCompose((PetscObject)P,"MatPtAPMPI",(PetscObject)container);CHKERRQ(ierr); 128 129 /* now, compute symbolic local P^T*A*P */ 130 ierr = MatPtAPSymbolic_MPIAIJ_MPIAIJ(A,P,fill,C);CHKERRQ(ierr);/* numeric product is computed as well */ 131 ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 132 } else if (scall == MAT_REUSE_MATRIX){ 133 ierr = PetscObjectQuery((PetscObject)P,"MatPtAPMPI",(PetscObject *)&container);CHKERRQ(ierr); 134 if (container) { 135 ierr = PetscObjectContainerGetPointer(container,(void **)&ptap);CHKERRQ(ierr); 136 } else { 137 SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 138 } 139 /* update P_oth */ 140 ierr = MatGetBrowsOfAoCols(A,P,scall,&ptap->isrowb,&ptap->iscolb,&ptap->brstart,&ptap->B_oth);CHKERRQ(ierr); 141 142 } else { 143 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %D",scall); 144 } 145 /* now, compute numeric local P^T*A*P */ 146 ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 147 ierr = MatPtAPNumeric_MPIAIJ_MPIAIJ(A,P,*C);CHKERRQ(ierr); 148 ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 149 150 PetscFunctionReturn(0); 151 } 152 153 #undef __FUNCT__ 154 #define __FUNCT__ "MatPtAPSymbolic_MPIAIJ_MPIAIJ" 155 PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIAIJ(Mat A,Mat P,PetscReal fill,Mat *C) 156 { 157 PetscErrorCode ierr; 158 Mat C_seq; 159 Mat_MatMatMultMPI *ptap; 160 PetscObjectContainer container; 161 162 PetscFunctionBegin; 163 164 ierr = PetscObjectQuery((PetscObject)P,"MatPtAPMPI",(PetscObject *)&container);CHKERRQ(ierr); 165 if (container) { 166 ierr = PetscObjectContainerGetPointer(container,(void **)&ptap);CHKERRQ(ierr); 167 } else { 168 SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 169 } 170 /* compute C_seq = P_loc^T * A_loc * P_seq */ 171 ierr = MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local(A,ptap->B_loc,ptap->B_oth,ptap->brstart,fill,&C_seq);CHKERRQ(ierr); 172 173 /* add C_seq into mpi C */ 174 ierr = MatMerge_SeqsToMPISymbolic(A->comm,C_seq,P->n,P->n,C);CHKERRQ(ierr); 175 176 PetscFunctionReturn(0); 177 } 178 179 #undef __FUNCT__ 180 #define __FUNCT__ "MatPtAPNumeric_MPIAIJ_MPIAIJ" 181 PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIAIJ(Mat A,Mat P,Mat C) 182 { 183 PetscErrorCode ierr; 184 Mat_Merge_SeqsToMPI *merge; 185 Mat_MatMatMultMPI *ptap; 186 PetscObjectContainer cont_merge,cont_ptap; 187 PetscInt flops=0; 188 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data,*p=(Mat_MPIAIJ*)P->data,*c=(Mat_MPIAIJ*)C->data; 189 Mat_SeqAIJ *ad=(Mat_SeqAIJ*)(a->A)->data,*ao=(Mat_SeqAIJ*)(a->B)->data, 190 *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data, 191 *cd=(Mat_SeqAIJ*)(c->A)->data,*co=(Mat_SeqAIJ*)(c->B)->data; 192 Mat C_seq; 193 Mat_SeqAIJ *cseq,*p_oth; 194 PetscInt *adi=ad->i,*adj=ad->j,*aoi=ao->i,*aoj=ao->j,*apj,*apjdense,cstart=a->cstart,cend=a->cend; 195 PetscInt *pi_oth,*pj_oth,*pJ_d=pd->j,*pJ_o=po->j,*pjj; 196 PetscInt i,j,k,nzi,pnzi,apnzj,nextap,pnzj,prow,crow; 197 PetscInt *cjj; 198 MatScalar *ada=ad->a,*aoa=ao->a,*apa,*paj,*cseqa,*caj; 199 MatScalar *pA_d=pd->a,*pA_o=po->a,*pa_oth; 200 PetscInt am=A->m,cN=C->N,cm=C->m; 201 MPI_Comm comm=C->comm; 202 PetscMPIInt size,rank,taga,*len_s; 203 PetscInt *owners; 204 PetscInt proc; 205 PetscInt **buf_ri,**buf_rj; 206 PetscInt cseqnzi,*bj_i,*bi,*bj,bnzi,nextcseqj; /* bi, bj, ba: for C (mpi mat) */ 207 PetscInt nrows,**buf_ri_k,**nextrow,**nextcseqi; 208 MPI_Request *s_waits,*r_waits; 209 MPI_Status *status; 210 MatScalar **abuf_r,*ba_i,*ca; 211 PetscInt *cseqi,*cseqj,col; 212 213 214 PetscFunctionBegin; 215 ierr = PetscObjectQuery((PetscObject)C,"MatMergeSeqsToMPI",(PetscObject *)&cont_merge);CHKERRQ(ierr); 216 if (cont_merge) { 217 ierr = PetscObjectContainerGetPointer(cont_merge,(void **)&merge);CHKERRQ(ierr); 218 } else { 219 SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix C does not posses an object container"); 220 } 221 ierr = PetscObjectQuery((PetscObject)P,"MatPtAPMPI",(PetscObject *)&cont_ptap);CHKERRQ(ierr); 222 if (cont_ptap) { 223 ierr = PetscObjectContainerGetPointer(cont_ptap,(void **)&ptap);CHKERRQ(ierr); 224 } else { 225 SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 226 } 227 228 p_oth=(Mat_SeqAIJ*)(ptap->B_oth)->data; 229 pi_oth=p_oth->i; pj_oth=p_oth->j; pa_oth=p_oth->a; 230 231 C_seq=merge->C_seq; 232 cseq=(Mat_SeqAIJ*)C_seq->data; 233 cseqi=cseq->i; cseqj=cseq->j; 234 cseqa=cseq->a; 235 236 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 237 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 238 ierr = PetscMapGetGlobalRange(merge->rowmap,&owners);CHKERRQ(ierr); 239 240 /* Allocate temporary array for storage of one row of A*P */ 241 ierr = PetscMalloc(cN*(sizeof(MatScalar)+2*sizeof(PetscInt)),&apa);CHKERRQ(ierr); 242 ierr = PetscMemzero(apa,cN*(sizeof(MatScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr); 243 apj = (PetscInt *)(apa + cN); 244 apjdense = apj + cN; 245 246 /* Allocate temporary MatScalar array for storage of one row of C */ 247 ierr = PetscMalloc(cN*(sizeof(MatScalar)),&ca);CHKERRQ(ierr); 248 ierr = PetscMemzero(ca,cN*(sizeof(MatScalar)));CHKERRQ(ierr); 249 250 /* Clear old values in C_Seq and C */ 251 ierr = PetscMemzero(cseqa,cseqi[cN]*sizeof(MatScalar));CHKERRQ(ierr); 252 ierr = PetscMemzero(cd->a,cd->i[cm]*sizeof(MatScalar));CHKERRQ(ierr); 253 ierr = PetscMemzero(co->a,co->i[cm]*sizeof(MatScalar));CHKERRQ(ierr); 254 255 for (i=0;i<am;i++) { 256 /* Form i-th sparse row of A*P */ 257 apnzj = 0; 258 /* diagonal portion of A */ 259 nzi = adi[i+1] - adi[i]; 260 for (j=0;j<nzi;j++) { 261 prow = *adj++; 262 /* diagonal portion of P */ 263 pnzj = pd->i[prow+1] - pd->i[prow]; 264 pjj = pd->j + pd->i[prow]; /* local col index of P */ 265 paj = pd->a + pd->i[prow]; 266 for (k=0;k<pnzj;k++) { 267 col = *pjj + p->cstart; pjj++; /* global col index of P */ 268 if (!apjdense[col]) { 269 apjdense[col] = -1; 270 apj[apnzj++] = col; 271 } 272 apa[col] += (*ada)*paj[k]; 273 } 274 flops += 2*pnzj; 275 /* off-diagonal portion of P */ 276 pnzj = po->i[prow+1] - po->i[prow]; 277 pjj = po->j + po->i[prow]; /* local col index of P */ 278 paj = po->a + po->i[prow]; 279 for (k=0;k<pnzj;k++) { 280 col = p->garray[*pjj]; pjj++; /* global col index of P */ 281 if (!apjdense[col]) { 282 apjdense[col] = -1; 283 apj[apnzj++] = col; 284 } 285 apa[col] += (*ada)*paj[k]; 286 } 287 flops += 2*pnzj; 288 289 ada++; 290 } 291 /* off-diagonal portion of A */ 292 nzi = aoi[i+1] - aoi[i]; 293 for (j=0;j<nzi;j++) { 294 prow = *aoj++; 295 pnzj = pi_oth[prow+1] - pi_oth[prow]; 296 pjj = pj_oth + pi_oth[prow]; 297 paj = pa_oth + pi_oth[prow]; 298 for (k=0;k<pnzj;k++) { 299 if (!apjdense[pjj[k]]) { 300 apjdense[pjj[k]] = -1; 301 apj[apnzj++] = pjj[k]; 302 } 303 apa[pjj[k]] += (*aoa)*paj[k]; 304 } 305 flops += 2*pnzj; 306 aoa++; 307 } 308 /* Sort the j index array for quick sparse axpy. */ 309 ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr); 310 311 /* Compute P_loc[i,:]^T*AP[i,:] using outer product */ 312 /* diagonal portion of P */ 313 pnzi = pd->i[i+1] - pd->i[i]; 314 for (j=0;j<pnzi;j++) { 315 crow = (*pJ_d++) + owners[rank]; 316 cjj = cseqj + cseqi[crow]; 317 caj = cseqa + cseqi[crow]; 318 /* add value into C */ 319 for (k=0; k<apnzj; k++) ca[k] = (*pA_d)*apa[apj[k]]; 320 ierr = MatSetValues(C,1,&crow,apnzj,apj,ca,ADD_VALUES);CHKERRQ(ierr); 321 ierr = PetscMemzero(ca,apnzj*(sizeof(MatScalar)));CHKERRQ(ierr); 322 pA_d++; 323 flops += 2*apnzj; 324 } 325 326 /* off-diagonal portion of P */ 327 pnzi = po->i[i+1] - po->i[i]; 328 for (j=0;j<pnzi;j++) { 329 crow = p->garray[*pJ_o++]; 330 cjj = cseqj + cseqi[crow]; 331 caj = cseqa + cseqi[crow]; 332 /* add value into C_seq to be sent to other processors */ 333 nextap = 0; 334 for (k=0;nextap<apnzj;k++) { 335 if (cjj[k]==apj[nextap]) { 336 caj[k] += (*pA_o)*apa[apj[nextap++]]; 337 } 338 } 339 flops += 2*apnzj; 340 pA_o++; 341 } 342 343 /* Zero the current row info for A*P */ 344 for (j=0;j<apnzj;j++) { 345 apa[apj[j]] = 0.; 346 apjdense[apj[j]] = 0; 347 } 348 } 349 350 /* Assemble the final matrix and clean up */ 351 ierr = MatAssemblyBegin(C_seq,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 352 ierr = MatAssemblyEnd(C_seq,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 353 ierr = PetscFree(apa);CHKERRQ(ierr); 354 ierr = PetscLogFlops(flops);CHKERRQ(ierr); 355 356 /*--------------------------------------------------------------*/ 357 /* send and recv matrix values */ 358 /*-----------------------------*/ 359 bi = merge->bi; 360 bj = merge->bj; 361 buf_ri = merge->buf_ri; 362 buf_rj = merge->buf_rj; 363 len_s = merge->len_s; 364 ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&taga);CHKERRQ(ierr); 365 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 366 367 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 368 for (proc=0,k=0; proc<size; proc++){ 369 if (!len_s[proc]) continue; 370 i = owners[proc]; 371 ierr = MPI_Isend(cseqa+cseqi[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 372 k++; 373 } 374 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 375 ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr); 376 ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr); 377 ierr = PetscFree(status);CHKERRQ(ierr); 378 379 ierr = PetscFree(s_waits);CHKERRQ(ierr); 380 ierr = PetscFree(r_waits);CHKERRQ(ierr); 381 382 /* insert mat values of mpimat */ 383 /*----------------------------*/ 384 ierr = PetscMalloc(cN*sizeof(MatScalar),&ba_i);CHKERRQ(ierr); 385 ierr = PetscMemzero(ba_i,cN*sizeof(MatScalar));CHKERRQ(ierr); 386 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 387 nextrow = buf_ri_k + merge->nrecv; 388 nextcseqi = nextrow + merge->nrecv; 389 390 for (k=0; k<merge->nrecv; k++){ 391 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 392 nrows = *(buf_ri_k[k]); 393 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 394 nextcseqi[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 395 } 396 397 /* add received local vals to C */ 398 for (i=0; i<cm; i++) { 399 crow = owners[rank] + i; 400 bj_i = bj+bi[i]; /* col indices of the i-th row of C */ 401 bnzi = bi[i+1] - bi[i]; 402 nzi = 0; 403 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 404 /* i-th row */ 405 if (i == *nextrow[k]) { 406 cseqnzi = *(nextcseqi[k]+1) - *nextcseqi[k]; 407 cseqj = buf_rj[k] + *(nextcseqi[k]); 408 cseqa = abuf_r[k] + *(nextcseqi[k]); 409 nextcseqj = 0; 410 for (j=0; nextcseqj<cseqnzi; j++){ 411 if (*(bj_i + j) == cseqj[nextcseqj]){ /* bcol == cseqcol */ 412 ba_i[j] += cseqa[nextcseqj++]; 413 nzi++; 414 } 415 } 416 nextrow[k]++; nextcseqi[k]++; 417 } 418 } 419 if (nzi>0){ 420 ierr = MatSetValues(C,1,&crow,bnzi,bj_i,ba_i,ADD_VALUES);CHKERRQ(ierr); 421 ierr = PetscMemzero(ba_i,bnzi*sizeof(MatScalar));CHKERRQ(ierr); 422 flops += 2*nzi; 423 } 424 } 425 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 426 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 427 428 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 429 ierr = PetscFree(ba_i);CHKERRQ(ierr); 430 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 431 ierr = PetscFree(ca);CHKERRQ(ierr); 432 433 PetscFunctionReturn(0); 434 } 435 436 #undef __FUNCT__ 437 #define __FUNCT__ "MatPtAP_SeqAIJ_SeqAIJ" 438 PetscErrorCode MatPtAP_SeqAIJ_SeqAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C) 439 { 440 PetscErrorCode ierr; 441 442 PetscFunctionBegin; 443 if (scall == MAT_INITIAL_MATRIX){ 444 ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 445 ierr = MatPtAPSymbolic_SeqAIJ_SeqAIJ(A,P,fill,C);CHKERRQ(ierr); 446 ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 447 } 448 ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 449 ierr = MatPtAPNumeric_SeqAIJ_SeqAIJ(A,P,*C);CHKERRQ(ierr); 450 ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 451 PetscFunctionReturn(0); 452 } 453 454 /* 455 MatPtAPSymbolic - Creates the (i,j) structure of the matrix projection C = P^T * A * P 456 457 Collective on Mat 458 459 Input Parameters: 460 + A - the matrix 461 - P - the projection matrix 462 463 Output Parameters: 464 . C - the (i,j) structure of the product matrix 465 466 Notes: 467 C will be created and must be destroyed by the user with MatDestroy(). 468 469 This routine is currently only implemented for pairs of SeqAIJ matrices and classes 470 which inherit from SeqAIJ. C will be of type MATSEQAIJ. The product is computed using 471 this (i,j) structure by calling MatPtAPNumeric(). 472 473 Level: intermediate 474 475 .seealso: MatPtAP(),MatPtAPNumeric(),MatMatMultSymbolic() 476 */ 477 #undef __FUNCT__ 478 #define __FUNCT__ "MatPtAPSymbolic" 479 PetscErrorCode MatPtAPSymbolic(Mat A,Mat P,PetscReal fill,Mat *C) 480 { 481 PetscErrorCode ierr; 482 PetscErrorCode (*fA)(Mat,Mat,PetscReal,Mat*); 483 PetscErrorCode (*fP)(Mat,Mat,PetscReal,Mat*); 484 485 PetscFunctionBegin; 486 487 PetscValidHeaderSpecific(A,MAT_COOKIE,1); 488 PetscValidType(A,1); 489 MatPreallocated(A); 490 if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 491 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 492 493 PetscValidHeaderSpecific(P,MAT_COOKIE,2); 494 PetscValidType(P,2); 495 MatPreallocated(P); 496 if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 497 if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 498 499 PetscValidPointer(C,3); 500 501 if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %D != %D",P->M,A->N); 502 if (A->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %D != %D",A->M,A->N); 503 504 /* For now, we do not dispatch based on the type of A and P */ 505 /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */ 506 fA = A->ops->ptapsymbolic; 507 if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatPtAPSymbolic not supported for A of type %s",A->type_name); 508 fP = P->ops->ptapsymbolic; 509 if (!fP) SETERRQ1(PETSC_ERR_SUP,"MatPtAPSymbolic not supported for P of type %s",P->type_name); 510 if (fP!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatPtAPSymbolic requires A, %s, to be compatible with P, %s",A->type_name,P->type_name); 511 512 ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 513 ierr = (*fA)(A,P,fill,C);CHKERRQ(ierr); 514 ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 515 516 PetscFunctionReturn(0); 517 } 518 519 typedef struct { 520 Mat symAP; 521 } Mat_PtAPstruct; 522 523 EXTERN PetscErrorCode MatDestroy_SeqAIJ(Mat); 524 525 #undef __FUNCT__ 526 #define __FUNCT__ "MatDestroy_SeqAIJ_PtAP" 527 PetscErrorCode MatDestroy_SeqAIJ_PtAP(Mat A) 528 { 529 PetscErrorCode ierr; 530 Mat_PtAPstruct *ptap=(Mat_PtAPstruct*)A->spptr; 531 532 PetscFunctionBegin; 533 ierr = MatDestroy(ptap->symAP);CHKERRQ(ierr); 534 ierr = PetscFree(ptap);CHKERRQ(ierr); 535 ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr); 536 PetscFunctionReturn(0); 537 } 538 539 #undef __FUNCT__ 540 #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqAIJ" 541 PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat P,PetscReal fill,Mat *C) 542 { 543 PetscErrorCode ierr; 544 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 545 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*p = (Mat_SeqAIJ*)P->data,*c; 546 PetscInt *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj; 547 PetscInt *ci,*cj,*ptadenserow,*ptasparserow,*ptaj; 548 PetscInt an=A->N,am=A->M,pn=P->N,pm=P->M; 549 PetscInt i,j,k,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi,nlnk,*lnk; 550 MatScalar *ca; 551 PetscBT lnkbt; 552 553 PetscFunctionBegin; 554 /* Get ij structure of P^T */ 555 ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 556 ptJ=ptj; 557 558 /* Allocate ci array, arrays for fill computation and */ 559 /* free space for accumulating nonzero column info */ 560 ierr = PetscMalloc((pn+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 561 ci[0] = 0; 562 563 ierr = PetscMalloc((2*an+1)*sizeof(PetscInt),&ptadenserow);CHKERRQ(ierr); 564 ierr = PetscMemzero(ptadenserow,(2*an+1)*sizeof(PetscInt));CHKERRQ(ierr); 565 ptasparserow = ptadenserow + an; 566 567 /* create and initialize a linked list */ 568 nlnk = pn+1; 569 ierr = PetscLLCreate(pn,pn,nlnk,lnk,lnkbt);CHKERRQ(ierr); 570 571 /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */ 572 /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 573 ierr = GetMoreSpace((ai[am]/pm)*pn,&free_space); 574 current_space = free_space; 575 576 /* Determine symbolic info for each row of C: */ 577 for (i=0;i<pn;i++) { 578 ptnzi = pti[i+1] - pti[i]; 579 ptanzi = 0; 580 /* Determine symbolic row of PtA: */ 581 for (j=0;j<ptnzi;j++) { 582 arow = *ptJ++; 583 anzj = ai[arow+1] - ai[arow]; 584 ajj = aj + ai[arow]; 585 for (k=0;k<anzj;k++) { 586 if (!ptadenserow[ajj[k]]) { 587 ptadenserow[ajj[k]] = -1; 588 ptasparserow[ptanzi++] = ajj[k]; 589 } 590 } 591 } 592 /* Using symbolic info for row of PtA, determine symbolic info for row of C: */ 593 ptaj = ptasparserow; 594 cnzi = 0; 595 for (j=0;j<ptanzi;j++) { 596 prow = *ptaj++; 597 pnzj = pi[prow+1] - pi[prow]; 598 pjj = pj + pi[prow]; 599 /* add non-zero cols of P into the sorted linked list lnk */ 600 ierr = PetscLLAdd(pnzj,pjj,pn,nlnk,lnk,lnkbt);CHKERRQ(ierr); 601 cnzi += nlnk; 602 } 603 604 /* If free space is not available, make more free space */ 605 /* Double the amount of total space in the list */ 606 if (current_space->local_remaining<cnzi) { 607 ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 608 } 609 610 /* Copy data into free space, and zero out denserows */ 611 ierr = PetscLLClean(pn,pn,cnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 612 current_space->array += cnzi; 613 current_space->local_used += cnzi; 614 current_space->local_remaining -= cnzi; 615 616 for (j=0;j<ptanzi;j++) { 617 ptadenserow[ptasparserow[j]] = 0; 618 } 619 /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 620 /* For now, we will recompute what is needed. */ 621 ci[i+1] = ci[i] + cnzi; 622 } 623 /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 624 /* Allocate space for cj, initialize cj, and */ 625 /* destroy list of free space and other temporary array(s) */ 626 ierr = PetscMalloc((ci[pn]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr); 627 ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 628 ierr = PetscFree(ptadenserow);CHKERRQ(ierr); 629 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 630 631 /* Allocate space for ca */ 632 ierr = PetscMalloc((ci[pn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 633 ierr = PetscMemzero(ca,(ci[pn]+1)*sizeof(MatScalar));CHKERRQ(ierr); 634 635 /* put together the new matrix */ 636 ierr = MatCreateSeqAIJWithArrays(A->comm,pn,pn,ci,cj,ca,C);CHKERRQ(ierr); 637 638 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 639 /* Since these are PETSc arrays, change flags to free them as necessary. */ 640 c = (Mat_SeqAIJ *)((*C)->data); 641 c->freedata = PETSC_TRUE; 642 c->nonew = 0; 643 644 /* Clean up. */ 645 ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 646 647 PetscFunctionReturn(0); 648 } 649 650 #include "src/mat/impls/maij/maij.h" 651 EXTERN_C_BEGIN 652 #undef __FUNCT__ 653 #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqMAIJ" 654 PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat *C) 655 { 656 /* This routine requires testing -- I don't think it works. */ 657 PetscErrorCode ierr; 658 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 659 Mat_SeqMAIJ *pp=(Mat_SeqMAIJ*)PP->data; 660 Mat P=pp->AIJ; 661 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c; 662 PetscInt *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj; 663 PetscInt *ci,*cj,*denserow,*sparserow,*ptadenserow,*ptasparserow,*ptaj; 664 PetscInt an=A->N,am=A->M,pn=P->N,pm=P->M,ppdof=pp->dof; 665 PetscInt i,j,k,dof,pdof,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi; 666 MatScalar *ca; 667 668 PetscFunctionBegin; 669 /* Start timer */ 670 ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr); 671 672 /* Get ij structure of P^T */ 673 ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 674 675 /* Allocate ci array, arrays for fill computation and */ 676 /* free space for accumulating nonzero column info */ 677 ierr = PetscMalloc((pn+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 678 ci[0] = 0; 679 680 ierr = PetscMalloc((2*pn+2*an+1)*sizeof(PetscInt),&ptadenserow);CHKERRQ(ierr); 681 ierr = PetscMemzero(ptadenserow,(2*pn+2*an+1)*sizeof(PetscInt));CHKERRQ(ierr); 682 ptasparserow = ptadenserow + an; 683 denserow = ptasparserow + an; 684 sparserow = denserow + pn; 685 686 /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */ 687 /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 688 ierr = GetMoreSpace((ai[am]/pm)*pn,&free_space); 689 current_space = free_space; 690 691 /* Determine symbolic info for each row of C: */ 692 for (i=0;i<pn/ppdof;i++) { 693 ptnzi = pti[i+1] - pti[i]; 694 ptanzi = 0; 695 ptJ = ptj + pti[i]; 696 for (dof=0;dof<ppdof;dof++) { 697 /* Determine symbolic row of PtA: */ 698 for (j=0;j<ptnzi;j++) { 699 arow = ptJ[j] + dof; 700 anzj = ai[arow+1] - ai[arow]; 701 ajj = aj + ai[arow]; 702 for (k=0;k<anzj;k++) { 703 if (!ptadenserow[ajj[k]]) { 704 ptadenserow[ajj[k]] = -1; 705 ptasparserow[ptanzi++] = ajj[k]; 706 } 707 } 708 } 709 /* Using symbolic info for row of PtA, determine symbolic info for row of C: */ 710 ptaj = ptasparserow; 711 cnzi = 0; 712 for (j=0;j<ptanzi;j++) { 713 pdof = *ptaj%dof; 714 prow = (*ptaj++)/dof; 715 pnzj = pi[prow+1] - pi[prow]; 716 pjj = pj + pi[prow]; 717 for (k=0;k<pnzj;k++) { 718 if (!denserow[pjj[k]+pdof]) { 719 denserow[pjj[k]+pdof] = -1; 720 sparserow[cnzi++] = pjj[k]+pdof; 721 } 722 } 723 } 724 725 /* sort sparserow */ 726 ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr); 727 728 /* If free space is not available, make more free space */ 729 /* Double the amount of total space in the list */ 730 if (current_space->local_remaining<cnzi) { 731 ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 732 } 733 734 /* Copy data into free space, and zero out denserows */ 735 ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(PetscInt));CHKERRQ(ierr); 736 current_space->array += cnzi; 737 current_space->local_used += cnzi; 738 current_space->local_remaining -= cnzi; 739 740 for (j=0;j<ptanzi;j++) { 741 ptadenserow[ptasparserow[j]] = 0; 742 } 743 for (j=0;j<cnzi;j++) { 744 denserow[sparserow[j]] = 0; 745 } 746 /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 747 /* For now, we will recompute what is needed. */ 748 ci[i+1+dof] = ci[i+dof] + cnzi; 749 } 750 } 751 /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 752 /* Allocate space for cj, initialize cj, and */ 753 /* destroy list of free space and other temporary array(s) */ 754 ierr = PetscMalloc((ci[pn]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr); 755 ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 756 ierr = PetscFree(ptadenserow);CHKERRQ(ierr); 757 758 /* Allocate space for ca */ 759 ierr = PetscMalloc((ci[pn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 760 ierr = PetscMemzero(ca,(ci[pn]+1)*sizeof(MatScalar));CHKERRQ(ierr); 761 762 /* put together the new matrix */ 763 ierr = MatCreateSeqAIJWithArrays(A->comm,pn,pn,ci,cj,ca,C);CHKERRQ(ierr); 764 765 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 766 /* Since these are PETSc arrays, change flags to free them as necessary. */ 767 c = (Mat_SeqAIJ *)((*C)->data); 768 c->freedata = PETSC_TRUE; 769 c->nonew = 0; 770 771 /* Clean up. */ 772 ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 773 774 ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr); 775 PetscFunctionReturn(0); 776 } 777 EXTERN_C_END 778 779 /* 780 MatPtAPNumeric - Computes the matrix projection C = P^T * A * P 781 782 Collective on Mat 783 784 Input Parameters: 785 + A - the matrix 786 - P - the projection matrix 787 788 Output Parameters: 789 . C - the product matrix 790 791 Notes: 792 C must have been created by calling MatPtAPSymbolic and must be destroyed by 793 the user using MatDeatroy(). 794 795 This routine is currently only implemented for pairs of AIJ matrices and classes 796 which inherit from AIJ. C will be of type MATAIJ. 797 798 Level: intermediate 799 800 .seealso: MatPtAP(),MatPtAPSymbolic(),MatMatMultNumeric() 801 */ 802 #undef __FUNCT__ 803 #define __FUNCT__ "MatPtAPNumeric" 804 PetscErrorCode MatPtAPNumeric(Mat A,Mat P,Mat C) 805 { 806 PetscErrorCode ierr; 807 PetscErrorCode (*fA)(Mat,Mat,Mat); 808 PetscErrorCode (*fP)(Mat,Mat,Mat); 809 810 PetscFunctionBegin; 811 812 PetscValidHeaderSpecific(A,MAT_COOKIE,1); 813 PetscValidType(A,1); 814 MatPreallocated(A); 815 if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 816 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 817 818 PetscValidHeaderSpecific(P,MAT_COOKIE,2); 819 PetscValidType(P,2); 820 MatPreallocated(P); 821 if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 822 if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 823 824 PetscValidHeaderSpecific(C,MAT_COOKIE,3); 825 PetscValidType(C,3); 826 MatPreallocated(C); 827 if (!C->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 828 if (C->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 829 830 if (P->N!=C->M) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %D != %D",P->N,C->M); 831 if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %D != %D",P->M,A->N); 832 if (A->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %D != %D",A->M,A->N); 833 if (P->N!=C->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %D != %D",P->N,C->N); 834 835 /* For now, we do not dispatch based on the type of A and P */ 836 /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */ 837 fA = A->ops->ptapnumeric; 838 if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatPtAPNumeric not supported for A of type %s",A->type_name); 839 fP = P->ops->ptapnumeric; 840 if (!fP) SETERRQ1(PETSC_ERR_SUP,"MatPtAPNumeric not supported for P of type %s",P->type_name); 841 if (fP!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatPtAPNumeric requires A, %s, to be compatible with P, %s",A->type_name,P->type_name); 842 843 ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 844 ierr = (*fA)(A,P,C);CHKERRQ(ierr); 845 ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 846 847 PetscFunctionReturn(0); 848 } 849 850 #undef __FUNCT__ 851 #define __FUNCT__ "MatPtAPNumeric_SeqAIJ_SeqAIJ" 852 PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqAIJ(Mat A,Mat P,Mat C) 853 { 854 PetscErrorCode ierr; 855 PetscInt flops=0; 856 Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 857 Mat_SeqAIJ *p = (Mat_SeqAIJ *) P->data; 858 Mat_SeqAIJ *c = (Mat_SeqAIJ *) C->data; 859 PetscInt *ai=a->i,*aj=a->j,*apj,*apjdense,*pi=p->i,*pj=p->j,*pJ=p->j,*pjj; 860 PetscInt *ci=c->i,*cj=c->j,*cjj; 861 PetscInt am=A->M,cn=C->N,cm=C->M; 862 PetscInt i,j,k,anzi,pnzi,apnzj,nextap,pnzj,prow,crow; 863 MatScalar *aa=a->a,*apa,*pa=p->a,*pA=p->a,*paj,*ca=c->a,*caj; 864 865 PetscFunctionBegin; 866 /* Allocate temporary array for storage of one row of A*P */ 867 ierr = PetscMalloc(cn*(sizeof(MatScalar)+2*sizeof(PetscInt)),&apa);CHKERRQ(ierr); 868 ierr = PetscMemzero(apa,cn*(sizeof(MatScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr); 869 870 apj = (PetscInt *)(apa + cn); 871 apjdense = apj + cn; 872 873 /* Clear old values in C */ 874 ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr); 875 876 for (i=0;i<am;i++) { 877 /* Form sparse row of A*P */ 878 anzi = ai[i+1] - ai[i]; 879 apnzj = 0; 880 for (j=0;j<anzi;j++) { 881 prow = *aj++; 882 pnzj = pi[prow+1] - pi[prow]; 883 pjj = pj + pi[prow]; 884 paj = pa + pi[prow]; 885 for (k=0;k<pnzj;k++) { 886 if (!apjdense[pjj[k]]) { 887 apjdense[pjj[k]] = -1; 888 apj[apnzj++] = pjj[k]; 889 } 890 apa[pjj[k]] += (*aa)*paj[k]; 891 } 892 flops += 2*pnzj; 893 aa++; 894 } 895 896 /* Sort the j index array for quick sparse axpy. */ 897 ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr); 898 899 /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */ 900 pnzi = pi[i+1] - pi[i]; 901 for (j=0;j<pnzi;j++) { 902 nextap = 0; 903 crow = *pJ++; 904 cjj = cj + ci[crow]; 905 caj = ca + ci[crow]; 906 /* Perform sparse axpy operation. Note cjj includes apj. */ 907 for (k=0;nextap<apnzj;k++) { 908 if (cjj[k]==apj[nextap]) { 909 caj[k] += (*pA)*apa[apj[nextap++]]; 910 } 911 } 912 flops += 2*apnzj; 913 pA++; 914 } 915 916 /* Zero the current row info for A*P */ 917 for (j=0;j<apnzj;j++) { 918 apa[apj[j]] = 0.; 919 apjdense[apj[j]] = 0; 920 } 921 } 922 923 /* Assemble the final matrix and clean up */ 924 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 925 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 926 ierr = PetscFree(apa);CHKERRQ(ierr); 927 ierr = PetscLogFlops(flops);CHKERRQ(ierr); 928 929 PetscFunctionReturn(0); 930 } 931 932 /* Compute local C = P_loc^T * A * P - used by MatPtAP_MPIAIJ_MPIAIJ() */ 933 static PetscEvent logkey_matptapnumeric_local = 0; 934 #undef __FUNCT__ 935 #define __FUNCT__ "MatPtAPNumeric_MPIAIJ_MPIAIJ_Local" 936 PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIAIJ_Local(Mat A,Mat P_loc,Mat P_oth,PetscInt prstart,Mat C) 937 { 938 PetscErrorCode ierr; 939 PetscInt flops=0; 940 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 941 Mat_SeqAIJ *ad=(Mat_SeqAIJ*)(a->A)->data,*ao=(Mat_SeqAIJ*)(a->B)->data; 942 Mat_SeqAIJ *c = (Mat_SeqAIJ *) C->data; 943 Mat_SeqAIJ *p_loc=(Mat_SeqAIJ*)P_loc->data,*p_oth=(Mat_SeqAIJ*)P_oth->data; 944 PetscInt *pi_loc=p_loc->i,*pj_loc=p_loc->j,*pi_oth=p_oth->i,*pj_oth=p_oth->j; 945 PetscInt *adi=ad->i,*adj=ad->j,*aoi=ao->i,*aoj=ao->j,*apj,*apjdense,cstart=a->cstart,cend=a->cend,col; 946 PetscInt *pJ=pj_loc,*pjj; 947 PetscInt *ci=c->i,*cj=c->j,*cjj; 948 PetscInt am=A->m,cn=C->N,cm=C->M; 949 PetscInt i,j,k,anzi,pnzi,apnzj,nextap,pnzj,prow,crow; 950 MatScalar *ada=ad->a,*aoa=ao->a,*apa,*paj,*ca=c->a,*caj; 951 MatScalar *pa_loc=p_loc->a,*pA=pa_loc,*pa_oth=p_oth->a; 952 953 PetscFunctionBegin; 954 if (!logkey_matptapnumeric_local) { 955 ierr = PetscLogEventRegister(&logkey_matptapnumeric_local,"MatPtAPNumeric_MPIAIJ_MPIAIJ_Local",MAT_COOKIE);CHKERRQ(ierr); 956 } 957 ierr = PetscLogEventBegin(logkey_matptapnumeric_local,A,0,0,0);CHKERRQ(ierr); 958 959 /* Allocate temporary array for storage of one row of A*P */ 960 ierr = PetscMalloc(cn*(sizeof(MatScalar)+2*sizeof(PetscInt)),&apa);CHKERRQ(ierr); 961 ierr = PetscMemzero(apa,cn*(sizeof(MatScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr); 962 apj = (PetscInt *)(apa + cn); 963 apjdense = apj + cn; 964 965 /* Clear old values in C */ 966 ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr); 967 968 for (i=0;i<am;i++) { 969 /* Form i-th sparse row of A*P */ 970 apnzj = 0; 971 /* diagonal portion of A */ 972 anzi = adi[i+1] - adi[i]; 973 for (j=0;j<anzi;j++) { 974 prow = *adj; 975 adj++; 976 pnzj = pi_loc[prow+1] - pi_loc[prow]; 977 pjj = pj_loc + pi_loc[prow]; 978 paj = pa_loc + pi_loc[prow]; 979 for (k=0;k<pnzj;k++) { 980 if (!apjdense[pjj[k]]) { 981 apjdense[pjj[k]] = -1; 982 apj[apnzj++] = pjj[k]; 983 } 984 apa[pjj[k]] += (*ada)*paj[k]; 985 } 986 flops += 2*pnzj; 987 ada++; 988 } 989 /* off-diagonal portion of A */ 990 anzi = aoi[i+1] - aoi[i]; 991 for (j=0;j<anzi;j++) { 992 col = a->garray[*aoj]; 993 if (col < cstart){ 994 prow = *aoj; 995 } else if (col >= cend){ 996 prow = *aoj; 997 } else { 998 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Off-diagonal portion of A has wrong column map"); 999 } 1000 aoj++; 1001 pnzj = pi_oth[prow+1] - pi_oth[prow]; 1002 pjj = pj_oth + pi_oth[prow]; 1003 paj = pa_oth + pi_oth[prow]; 1004 for (k=0;k<pnzj;k++) { 1005 if (!apjdense[pjj[k]]) { 1006 apjdense[pjj[k]] = -1; 1007 apj[apnzj++] = pjj[k]; 1008 } 1009 apa[pjj[k]] += (*aoa)*paj[k]; 1010 } 1011 flops += 2*pnzj; 1012 aoa++; 1013 } 1014 /* Sort the j index array for quick sparse axpy. */ 1015 ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr); 1016 1017 /* Compute P_loc[i,:]^T*AP[i,:] using outer product */ 1018 pnzi = pi_loc[i+1] - pi_loc[i]; 1019 for (j=0;j<pnzi;j++) { 1020 nextap = 0; 1021 crow = *pJ++; 1022 cjj = cj + ci[crow]; 1023 caj = ca + ci[crow]; 1024 /* Perform sparse axpy operation. Note cjj includes apj. */ 1025 for (k=0;nextap<apnzj;k++) { 1026 if (cjj[k]==apj[nextap]) { 1027 caj[k] += (*pA)*apa[apj[nextap++]]; 1028 } 1029 } 1030 flops += 2*apnzj; 1031 pA++; 1032 } 1033 1034 /* Zero the current row info for A*P */ 1035 for (j=0;j<apnzj;j++) { 1036 apa[apj[j]] = 0.; 1037 apjdense[apj[j]] = 0; 1038 } 1039 } 1040 1041 /* Assemble the final matrix and clean up */ 1042 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1043 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1044 ierr = PetscFree(apa);CHKERRQ(ierr); 1045 ierr = PetscLogFlops(flops);CHKERRQ(ierr); 1046 ierr = PetscLogEventEnd(logkey_matptapnumeric_local,A,0,0,0);CHKERRQ(ierr); 1047 PetscFunctionReturn(0); 1048 } 1049 static PetscEvent logkey_matptapsymbolic_local = 0; 1050 #undef __FUNCT__ 1051 #define __FUNCT__ "MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local" 1052 PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local(Mat A,Mat P_loc,Mat P_oth,PetscInt prstart,PetscReal fill,Mat *C) 1053 { 1054 PetscErrorCode ierr; 1055 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1056 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 1057 Mat_SeqAIJ *ad=(Mat_SeqAIJ*)(a->A)->data,*ao=(Mat_SeqAIJ*)(a->B)->data,*c; 1058 PetscInt *pti,*ptj,*ptJ,*ajj,*pjj; 1059 PetscInt *adi=ad->i,*adj=ad->j,*aoi=ao->i,*aoj=ao->j,nnz,cstart=a->cstart,cend=a->cend,col; 1060 PetscInt *ci,*cj,*ptaj; 1061 PetscInt aN=A->N,am=A->m,pN=P_loc->N; 1062 PetscInt i,j,k,ptnzi,arow,anzj,prow,pnzj,cnzi; 1063 PetscInt pm=P_loc->m,nlnk,*lnk; 1064 MatScalar *ca; 1065 PetscBT lnkbt; 1066 PetscInt prend,nprow_loc,nprow_oth; 1067 PetscInt *ptadenserow_loc,*ptadenserow_oth,*ptasparserow_loc,*ptasparserow_oth; 1068 Mat_SeqAIJ *p_loc=(Mat_SeqAIJ*)P_loc->data,*p_oth=(Mat_SeqAIJ*)P_oth->data; 1069 PetscInt *pi_loc=p_loc->i,*pj_loc=p_loc->j,*pi_oth=p_oth->i,*pj_oth=p_oth->j; 1070 1071 PetscFunctionBegin; 1072 if (!logkey_matptapsymbolic_local) { 1073 ierr = PetscLogEventRegister(&logkey_matptapsymbolic_local,"MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local",MAT_COOKIE); 1074 } 1075 ierr = PetscLogEventBegin(logkey_matptapsymbolic_local,A,P_loc,P_oth,0);CHKERRQ(ierr); 1076 1077 prend = prstart + pm; 1078 1079 /* get ij structure of P_loc^T */ 1080 ierr = MatGetSymbolicTranspose_SeqAIJ(P_loc,&pti,&ptj);CHKERRQ(ierr); 1081 ptJ=ptj; 1082 1083 /* Allocate ci array, arrays for fill computation and */ 1084 /* free space for accumulating nonzero column info */ 1085 ierr = PetscMalloc((pN+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 1086 ci[0] = 0; 1087 1088 ierr = PetscMalloc((4*aN+1)*sizeof(PetscInt),&ptadenserow_loc);CHKERRQ(ierr); 1089 ierr = PetscMemzero(ptadenserow_loc,(4*aN+1)*sizeof(PetscInt));CHKERRQ(ierr); 1090 ptasparserow_loc = ptadenserow_loc + aN; 1091 ptadenserow_oth = ptasparserow_loc + aN; 1092 ptasparserow_oth = ptadenserow_oth + aN; 1093 1094 /* create and initialize a linked list */ 1095 nlnk = pN+1; 1096 ierr = PetscLLCreate(pN,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1097 1098 /* Set initial free space to be nnz(A) scaled by fill*P->N/P->M. */ 1099 /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 1100 nnz = adi[am] + aoi[am]; 1101 ierr = GetMoreSpace((PetscInt)(fill*nnz*pN/aN+1),&free_space); 1102 current_space = free_space; 1103 1104 /* determine symbolic info for each row of C: */ 1105 for (i=0;i<pN;i++) { 1106 ptnzi = pti[i+1] - pti[i]; 1107 nprow_loc = 0; nprow_oth = 0; 1108 /* i-th row of symbolic P_loc^T*A_loc: */ 1109 for (j=0;j<ptnzi;j++) { 1110 arow = *ptJ++; 1111 /* diagonal portion of A */ 1112 anzj = adi[arow+1] - adi[arow]; 1113 ajj = adj + adi[arow]; 1114 for (k=0;k<anzj;k++) { 1115 col = ajj[k]+prstart; 1116 if (!ptadenserow_loc[col]) { 1117 ptadenserow_loc[col] = -1; 1118 ptasparserow_loc[nprow_loc++] = col; 1119 } 1120 } 1121 /* off-diagonal portion of A */ 1122 anzj = aoi[arow+1] - aoi[arow]; 1123 ajj = aoj + aoi[arow]; 1124 for (k=0;k<anzj;k++) { 1125 col = a->garray[ajj[k]]; /* global col */ 1126 if (col < cstart){ 1127 col = ajj[k]; 1128 } else if (col >= cend){ 1129 col = ajj[k] + pm; 1130 } else { 1131 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Off-diagonal portion of A has wrong column map"); 1132 } 1133 if (!ptadenserow_oth[col]) { 1134 ptadenserow_oth[col] = -1; 1135 ptasparserow_oth[nprow_oth++] = col; 1136 } 1137 } 1138 } 1139 1140 /* using symbolic info of local PtA, determine symbolic info for row of C: */ 1141 cnzi = 0; 1142 /* rows of P_loc */ 1143 ptaj = ptasparserow_loc; 1144 for (j=0; j<nprow_loc; j++) { 1145 prow = *ptaj++; 1146 prow -= prstart; /* rm */ 1147 pnzj = pi_loc[prow+1] - pi_loc[prow]; 1148 pjj = pj_loc + pi_loc[prow]; 1149 /* add non-zero cols of P into the sorted linked list lnk */ 1150 ierr = PetscLLAdd(pnzj,pjj,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1151 cnzi += nlnk; 1152 } 1153 /* rows of P_oth */ 1154 ptaj = ptasparserow_oth; 1155 for (j=0; j<nprow_oth; j++) { 1156 prow = *ptaj++; 1157 if (prow >= prend) prow -= pm; /* rm */ 1158 pnzj = pi_oth[prow+1] - pi_oth[prow]; 1159 pjj = pj_oth + pi_oth[prow]; 1160 /* add non-zero cols of P into the sorted linked list lnk */ 1161 ierr = PetscLLAdd(pnzj,pjj,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1162 cnzi += nlnk; 1163 } 1164 1165 /* If free space is not available, make more free space */ 1166 /* Double the amount of total space in the list */ 1167 if (current_space->local_remaining<cnzi) { 1168 ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 1169 } 1170 1171 /* Copy data into free space, and zero out denserows */ 1172 ierr = PetscLLClean(pN,pN,cnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 1173 current_space->array += cnzi; 1174 current_space->local_used += cnzi; 1175 current_space->local_remaining -= cnzi; 1176 1177 for (j=0;j<nprow_loc; j++) { 1178 ptadenserow_loc[ptasparserow_loc[j]] = 0; 1179 } 1180 for (j=0;j<nprow_oth; j++) { 1181 ptadenserow_oth[ptasparserow_oth[j]] = 0; 1182 } 1183 1184 /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 1185 /* For now, we will recompute what is needed. */ 1186 ci[i+1] = ci[i] + cnzi; 1187 } 1188 /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 1189 /* Allocate space for cj, initialize cj, and */ 1190 /* destroy list of free space and other temporary array(s) */ 1191 ierr = PetscMalloc((ci[pN]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr); 1192 ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 1193 ierr = PetscFree(ptadenserow_loc);CHKERRQ(ierr); 1194 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1195 1196 /* Allocate space for ca */ 1197 ierr = PetscMalloc((ci[pN]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 1198 ierr = PetscMemzero(ca,(ci[pN]+1)*sizeof(MatScalar));CHKERRQ(ierr); 1199 1200 /* put together the new matrix */ 1201 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,pN,pN,ci,cj,ca,C);CHKERRQ(ierr); 1202 1203 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 1204 /* Since these are PETSc arrays, change flags to free them as necessary. */ 1205 c = (Mat_SeqAIJ *)((*C)->data); 1206 c->freedata = PETSC_TRUE; 1207 c->nonew = 0; 1208 1209 /* Clean up. */ 1210 ierr = MatRestoreSymbolicTranspose_SeqAIJ(P_loc,&pti,&ptj);CHKERRQ(ierr); 1211 1212 ierr = PetscLogEventEnd(logkey_matptapsymbolic_local,A,P_loc,P_oth,0);CHKERRQ(ierr); 1213 PetscFunctionReturn(0); 1214 } 1215