1eb9c0419SKris Buschelman /* 29af31e4aSHong Zhang Defines projective product routines where A is a AIJ matrix 3eb9c0419SKris Buschelman C = P^T * A * P 4eb9c0419SKris Buschelman */ 5eb9c0419SKris Buschelman 6231952e2SKris Buschelman #include "src/mat/impls/aij/seq/aij.h" /*I "petscmat.h" I*/ 7eb9c0419SKris Buschelman #include "src/mat/utils/freespace.h" 89af31e4aSHong Zhang #include "src/mat/impls/aij/mpi/mpiaij.h" 955a3bba9SHong Zhang #include "petscbt.h" 10eb9c0419SKris Buschelman 11eb9c0419SKris Buschelman #undef __FUNCT__ 129af31e4aSHong Zhang #define __FUNCT__ "MatPtAP" 134d3841fdSKris Buschelman /*@ 149af31e4aSHong Zhang MatPtAP - Creates the matrix projection C = P^T * A * P 154d3841fdSKris Buschelman 164d3841fdSKris Buschelman Collective on Mat 174d3841fdSKris Buschelman 184d3841fdSKris Buschelman Input Parameters: 194d3841fdSKris Buschelman + A - the matrix 20f747e1aeSHong Zhang . P - the projection matrix 21f747e1aeSHong Zhang . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 22f747e1aeSHong Zhang - fill - expected fill as ratio of nnz(C)/(nnz(A) + nnz(P)) 234d3841fdSKris Buschelman 244d3841fdSKris Buschelman Output Parameters: 254d3841fdSKris Buschelman . C - the product matrix 264d3841fdSKris Buschelman 274d3841fdSKris Buschelman Notes: 284d3841fdSKris Buschelman C will be created and must be destroyed by the user with MatDestroy(). 294d3841fdSKris Buschelman 304d3841fdSKris Buschelman This routine is currently only implemented for pairs of SeqAIJ matrices and classes 314d3841fdSKris Buschelman which inherit from SeqAIJ. C will be of type MATSEQAIJ. 324d3841fdSKris Buschelman 334d3841fdSKris Buschelman Level: intermediate 344d3841fdSKris Buschelman 359af31e4aSHong Zhang .seealso: MatPtAPSymbolic(),MatPtAPNumeric(),MatMatMult() 364d3841fdSKris Buschelman @*/ 3755a3bba9SHong Zhang PetscErrorCode MatPtAP(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C) 3855a3bba9SHong Zhang { 39dfbe8321SBarry Smith PetscErrorCode ierr; 40534c1384SKris Buschelman PetscErrorCode (*fA)(Mat,Mat,MatReuse,PetscReal,Mat *); 41534c1384SKris Buschelman PetscErrorCode (*fP)(Mat,Mat,MatReuse,PetscReal,Mat *); 42eb9c0419SKris Buschelman 43eb9c0419SKris Buschelman PetscFunctionBegin; 449af31e4aSHong Zhang PetscValidHeaderSpecific(A,MAT_COOKIE,1); 459af31e4aSHong Zhang PetscValidType(A,1); 469af31e4aSHong Zhang MatPreallocated(A); 479af31e4aSHong Zhang if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 489af31e4aSHong Zhang if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 499af31e4aSHong Zhang PetscValidHeaderSpecific(P,MAT_COOKIE,2); 509af31e4aSHong Zhang PetscValidType(P,2); 519af31e4aSHong Zhang MatPreallocated(P); 529af31e4aSHong Zhang if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 539af31e4aSHong Zhang if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 549af31e4aSHong Zhang PetscValidPointer(C,3); 55534c1384SKris Buschelman 56*77431f27SBarry Smith if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %D != %D",P->M,A->N); 57eb9c0419SKris Buschelman 589af31e4aSHong Zhang if (fill <=0.0) SETERRQ1(PETSC_ERR_ARG_SIZ,"fill=%g must be > 0.0",fill); 59eb9c0419SKris Buschelman 60534c1384SKris Buschelman /* For now, we do not dispatch based on the type of A and P */ 61534c1384SKris Buschelman /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */ 62534c1384SKris Buschelman fA = A->ops->ptap; 63534c1384SKris Buschelman if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatPtAP not supported for A of type %s",A->type_name); 64534c1384SKris Buschelman fP = P->ops->ptap; 65534c1384SKris Buschelman if (!fP) SETERRQ1(PETSC_ERR_SUP,"MatPtAP not supported for P of type %s",P->type_name); 66534c1384SKris Buschelman if (fP!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatPtAP requires A, %s, to be compatible with P, %s",A->type_name,P->type_name); 67534c1384SKris Buschelman 689af31e4aSHong Zhang ierr = PetscLogEventBegin(MAT_PtAP,A,P,0,0);CHKERRQ(ierr); 69534c1384SKris Buschelman ierr = (*fA)(A,P,scall,fill,C);CHKERRQ(ierr); 709af31e4aSHong Zhang ierr = PetscLogEventEnd(MAT_PtAP,A,P,0,0);CHKERRQ(ierr); 71eb9c0419SKris Buschelman PetscFunctionReturn(0); 72eb9c0419SKris Buschelman } 73eb9c0419SKris Buschelman 74e240928fSHong Zhang EXTERN PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local(Mat,Mat,Mat,PetscInt,PetscReal,Mat*); 75e240928fSHong Zhang EXTERN PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIAIJ_Local(Mat,Mat,Mat,PetscInt,Mat C); 76e240928fSHong Zhang 77a61c8c0fSHong Zhang EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat); 78a61c8c0fSHong Zhang #undef __FUNCT__ 79a61c8c0fSHong Zhang #define __FUNCT__ "MatDestroy_MPIAIJ_PtAP" 80a61c8c0fSHong Zhang PetscErrorCode MatDestroy_MPIAIJ_PtAP(Mat A) 81a61c8c0fSHong Zhang { 82a61c8c0fSHong Zhang PetscErrorCode ierr; 8332fba14fSHong Zhang Mat_MatMatMultMPI *ptap; 84a61c8c0fSHong Zhang PetscObjectContainer container; 85a61c8c0fSHong Zhang 86a61c8c0fSHong Zhang PetscFunctionBegin; 87a61c8c0fSHong Zhang ierr = PetscObjectQuery((PetscObject)A,"MatPtAPMPI",(PetscObject *)&container);CHKERRQ(ierr); 88a61c8c0fSHong Zhang if (container) { 89a61c8c0fSHong Zhang ierr = PetscObjectContainerGetPointer(container,(void **)&ptap);CHKERRQ(ierr); 9032fba14fSHong Zhang ierr = MatDestroy(ptap->B_loc);CHKERRQ(ierr); 9132fba14fSHong Zhang ierr = MatDestroy(ptap->B_oth);CHKERRQ(ierr); 9201b7ae99SHong Zhang ierr = ISDestroy(ptap->isrowb);CHKERRQ(ierr); 9301b7ae99SHong Zhang ierr = ISDestroy(ptap->iscolb);CHKERRQ(ierr); 94a61c8c0fSHong Zhang 95a61c8c0fSHong Zhang ierr = PetscObjectContainerDestroy(container);CHKERRQ(ierr); 9632fba14fSHong Zhang ierr = PetscObjectCompose((PetscObject)A,"MatPtAPMPI",0);CHKERRQ(ierr); 97a61c8c0fSHong Zhang } 98a61c8c0fSHong Zhang ierr = PetscFree(ptap);CHKERRQ(ierr); 99a61c8c0fSHong Zhang 100a61c8c0fSHong Zhang ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 101a61c8c0fSHong Zhang PetscFunctionReturn(0); 102a61c8c0fSHong Zhang } 103a61c8c0fSHong Zhang 104eb9c0419SKris Buschelman #undef __FUNCT__ 105ff134f7aSHong Zhang #define __FUNCT__ "MatPtAP_MPIAIJ_MPIAIJ" 106ff134f7aSHong Zhang PetscErrorCode MatPtAP_MPIAIJ_MPIAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C) 107ff134f7aSHong Zhang { 108ff134f7aSHong Zhang PetscErrorCode ierr; 10932fba14fSHong Zhang Mat_MatMatMultMPI *ptap; 110a61c8c0fSHong Zhang PetscObjectContainer container; 111b90dcfe3SHong Zhang 112b90dcfe3SHong Zhang PetscFunctionBegin; 113b90dcfe3SHong Zhang if (scall == MAT_INITIAL_MATRIX){ 1144c627768SHong Zhang ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 11532fba14fSHong Zhang ierr = PetscNew(Mat_MatMatMultMPI,&ptap);CHKERRQ(ierr); 11632fba14fSHong Zhang 117e240928fSHong Zhang /* get P_oth by taking rows of P (= non-zero cols of local A) from other processors */ 11801b7ae99SHong Zhang ierr = MatGetBrowsOfAoCols(A,P,scall,&ptap->isrowb,&ptap->iscolb,&ptap->brstart,&ptap->B_oth);CHKERRQ(ierr); 119a61c8c0fSHong Zhang 120e240928fSHong Zhang /* get P_loc by taking all local rows of P */ 12132fba14fSHong Zhang ierr = MatGetLocalMat(P,scall,&ptap->B_loc);CHKERRQ(ierr); 122e240928fSHong Zhang 123a61c8c0fSHong Zhang /* attach the supporting struct to P for reuse */ 124a61c8c0fSHong Zhang P->ops->destroy = MatDestroy_MPIAIJ_PtAP; 125a61c8c0fSHong Zhang ierr = PetscObjectContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 126a61c8c0fSHong Zhang ierr = PetscObjectContainerSetPointer(container,ptap);CHKERRQ(ierr); 127a61c8c0fSHong Zhang ierr = PetscObjectCompose((PetscObject)P,"MatPtAPMPI",(PetscObject)container);CHKERRQ(ierr); 128e240928fSHong Zhang 12932fba14fSHong Zhang /* now, compute symbolic local P^T*A*P */ 130a61c8c0fSHong Zhang ierr = MatPtAPSymbolic_MPIAIJ_MPIAIJ(A,P,fill,C);CHKERRQ(ierr);/* numeric product is computed as well */ 131a61c8c0fSHong Zhang ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 132a61c8c0fSHong Zhang } else if (scall == MAT_REUSE_MATRIX){ 133a61c8c0fSHong Zhang ierr = PetscObjectQuery((PetscObject)P,"MatPtAPMPI",(PetscObject *)&container);CHKERRQ(ierr); 134a61c8c0fSHong Zhang if (container) { 135a61c8c0fSHong Zhang ierr = PetscObjectContainerGetPointer(container,(void **)&ptap);CHKERRQ(ierr); 136a61c8c0fSHong Zhang } else { 137a61c8c0fSHong Zhang SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 138a61c8c0fSHong Zhang } 139a61c8c0fSHong Zhang 140a61c8c0fSHong Zhang /* get P_oth by taking rows of P (= non-zero cols of local A) from other processors */ 14101b7ae99SHong Zhang ierr = MatGetBrowsOfAoCols(A,P,scall,&ptap->isrowb,&ptap->iscolb,&ptap->brstart,&ptap->B_oth);CHKERRQ(ierr); 142a61c8c0fSHong Zhang 143a61c8c0fSHong Zhang /* get P_loc by taking all local rows of P */ 14432fba14fSHong Zhang ierr = MatGetLocalMat(P,scall,&ptap->B_loc);CHKERRQ(ierr); 14532fba14fSHong Zhang 146a61c8c0fSHong Zhang } else { 147*77431f27SBarry Smith SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %D",scall); 148a61c8c0fSHong Zhang } 14932fba14fSHong Zhang /* now, compute numeric local P^T*A*P */ 150a61c8c0fSHong Zhang ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 151a61c8c0fSHong Zhang ierr = MatPtAPNumeric_MPIAIJ_MPIAIJ(A,P,*C);CHKERRQ(ierr); 152a61c8c0fSHong Zhang ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 153a61c8c0fSHong Zhang 154a61c8c0fSHong Zhang PetscFunctionReturn(0); 155a61c8c0fSHong Zhang } 156a61c8c0fSHong Zhang 157a61c8c0fSHong Zhang #undef __FUNCT__ 158a61c8c0fSHong Zhang #define __FUNCT__ "MatPtAPSymbolic_MPIAIJ_MPIAIJ" 159a61c8c0fSHong Zhang PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIAIJ(Mat A,Mat P,PetscReal fill,Mat *C) 160a61c8c0fSHong Zhang { 161a61c8c0fSHong Zhang PetscErrorCode ierr; 162a61c8c0fSHong Zhang Mat C_seq; 16332fba14fSHong Zhang Mat_MatMatMultMPI *ptap; 164a61c8c0fSHong Zhang PetscObjectContainer container; 165a61c8c0fSHong Zhang 166a61c8c0fSHong Zhang PetscFunctionBegin; 167a61c8c0fSHong Zhang 168a61c8c0fSHong Zhang ierr = PetscObjectQuery((PetscObject)P,"MatPtAPMPI",(PetscObject *)&container);CHKERRQ(ierr); 169a61c8c0fSHong Zhang if (container) { 170a61c8c0fSHong Zhang ierr = PetscObjectContainerGetPointer(container,(void **)&ptap);CHKERRQ(ierr); 171a61c8c0fSHong Zhang } else { 172a61c8c0fSHong Zhang SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 173a61c8c0fSHong Zhang } 17425616d81SHong Zhang /* compute C_seq = P_loc^T * A_loc * P_seq */ 17532fba14fSHong Zhang ierr = MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local(A,ptap->B_loc,ptap->B_oth,ptap->brstart,fill,&C_seq);CHKERRQ(ierr); 176a61c8c0fSHong Zhang 177b90dcfe3SHong Zhang /* add C_seq into mpi C */ 178a61c8c0fSHong Zhang ierr = MatMerge_SeqsToMPISymbolic(A->comm,C_seq,P->n,P->n,C);CHKERRQ(ierr); 179b90dcfe3SHong Zhang 180ff134f7aSHong Zhang PetscFunctionReturn(0); 181ff134f7aSHong Zhang } 182ff134f7aSHong Zhang 183ff134f7aSHong Zhang #undef __FUNCT__ 184e240928fSHong Zhang #define __FUNCT__ "MatPtAPNumeric_MPIAIJ_MPIAIJ" 185b90dcfe3SHong Zhang PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIAIJ(Mat A,Mat P,Mat C) 186ff134f7aSHong Zhang { 187b90dcfe3SHong Zhang PetscErrorCode ierr; 188671beff6SHong Zhang Mat_Merge_SeqsToMPI *merge; 18932fba14fSHong Zhang Mat_MatMatMultMPI *ptap; 190a61c8c0fSHong Zhang PetscObjectContainer cont_merge,cont_ptap; 191ff134f7aSHong Zhang 192097ab81bSHong Zhang PetscInt flops=0; 19382afef89SHong Zhang Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data,*c=(Mat_MPIAIJ*)C->data; 194097ab81bSHong Zhang Mat_SeqAIJ *ad=(Mat_SeqAIJ*)(a->A)->data,*ao=(Mat_SeqAIJ*)(a->B)->data; 19582afef89SHong Zhang Mat_SeqAIJ *cd=(Mat_SeqAIJ*)(c->A)->data,*co=(Mat_SeqAIJ*)(c->B)->data; 196097ab81bSHong Zhang Mat C_seq; 197e658270aSHong Zhang Mat_SeqAIJ *p_loc,*p_oth; 198097ab81bSHong Zhang PetscInt *adi=ad->i,*adj=ad->j,*aoi=ao->i,*aoj=ao->j,*apj,*apjdense,cstart=a->cstart,cend=a->cend,col; 199097ab81bSHong Zhang PetscInt *pi_loc,*pj_loc,*pi_oth,*pj_oth,*pJ,*pjj; 20082afef89SHong Zhang PetscInt i,j,k,nzi,pnzi,apnzj,nextap,pnzj,prow,crow; 201097ab81bSHong Zhang PetscInt *cjj; 202097ab81bSHong Zhang MatScalar *ada=ad->a,*aoa=ao->a,*apa,*paj,*cseqa,*caj; 203097ab81bSHong Zhang MatScalar *pa_loc,*pA,*pa_oth; 20482afef89SHong Zhang PetscInt am=A->m,cN=C->N,cm=C->m; 205097ab81bSHong Zhang 206097ab81bSHong Zhang MPI_Comm comm=C->comm; 207097ab81bSHong Zhang PetscMPIInt size,rank,taga,*len_s; 208097ab81bSHong Zhang PetscInt *owners; 209097ab81bSHong Zhang PetscInt proc; 210097ab81bSHong Zhang PetscInt **buf_ri,**buf_rj; 21182afef89SHong Zhang PetscInt cseqnzi,*bj_i,*bi,*bj,bnzi,nextcseqj; /* bi, bj, ba: for C (mpi mat) */ 212097ab81bSHong Zhang PetscInt nrows,**buf_ri_k,**nextrow,**nextcseqi; 213097ab81bSHong Zhang MPI_Request *s_waits,*r_waits; 214097ab81bSHong Zhang MPI_Status *status; 215097ab81bSHong Zhang MatScalar **abuf_r,*ba_i; 216097ab81bSHong Zhang Mat_SeqAIJ *cseq; 217097ab81bSHong Zhang PetscInt *cseqi,*cseqj; 21882afef89SHong Zhang MatScalar *ca; 219097ab81bSHong Zhang 220ff134f7aSHong Zhang PetscFunctionBegin; 221a61c8c0fSHong Zhang ierr = PetscObjectQuery((PetscObject)C,"MatMergeSeqsToMPI",(PetscObject *)&cont_merge);CHKERRQ(ierr); 222a61c8c0fSHong Zhang if (cont_merge) { 223a61c8c0fSHong Zhang ierr = PetscObjectContainerGetPointer(cont_merge,(void **)&merge);CHKERRQ(ierr); 2247f79fd58SMatthew Knepley } else { 225a61c8c0fSHong Zhang SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix C does not posses an object container"); 226671beff6SHong Zhang } 227a61c8c0fSHong Zhang ierr = PetscObjectQuery((PetscObject)P,"MatPtAPMPI",(PetscObject *)&cont_ptap);CHKERRQ(ierr); 228a61c8c0fSHong Zhang if (cont_ptap) { 229a61c8c0fSHong Zhang ierr = PetscObjectContainerGetPointer(cont_ptap,(void **)&ptap);CHKERRQ(ierr); 230a61c8c0fSHong Zhang } else { 231a61c8c0fSHong Zhang SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 232a61c8c0fSHong Zhang } 233097ab81bSHong Zhang #ifdef OLD 23432fba14fSHong Zhang ierr = MatPtAPNumeric_MPIAIJ_MPIAIJ_Local(A,ptap->B_loc,ptap->B_oth,ptap->brstart,merge->C_seq);CHKERRQ(ierr); 235097ab81bSHong Zhang #endif /* OLD */ 236097ab81bSHong Zhang /*--------------------------------------------------------------*/ 237097ab81bSHong Zhang 238097ab81bSHong Zhang p_loc=(Mat_SeqAIJ*)(ptap->B_loc)->data; 239097ab81bSHong Zhang p_oth=(Mat_SeqAIJ*)(ptap->B_oth)->data; 240097ab81bSHong Zhang pi_loc=p_loc->i; pj_loc=p_loc->j; pJ=pj_loc; pa_loc=p_loc->a; pA=pa_loc; 241097ab81bSHong Zhang pi_oth=p_oth->i; pj_oth=p_oth->j; pa_oth=p_oth->a; 242097ab81bSHong Zhang 243097ab81bSHong Zhang C_seq=merge->C_seq; 244097ab81bSHong Zhang cseq=(Mat_SeqAIJ*)C_seq->data; 245097ab81bSHong Zhang cseqi=cseq->i; cseqj=cseq->j; 246097ab81bSHong Zhang cseqa=cseq->a; 247097ab81bSHong Zhang 24882afef89SHong Zhang ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 24982afef89SHong Zhang ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 25082afef89SHong Zhang ierr = PetscMapGetGlobalRange(merge->rowmap,&owners);CHKERRQ(ierr); 25182afef89SHong Zhang 252097ab81bSHong Zhang /* Allocate temporary array for storage of one row of A*P */ 253097ab81bSHong Zhang ierr = PetscMalloc(cN*(sizeof(MatScalar)+2*sizeof(PetscInt)),&apa);CHKERRQ(ierr); 254097ab81bSHong Zhang ierr = PetscMemzero(apa,cN*(sizeof(MatScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr); 255097ab81bSHong Zhang apj = (PetscInt *)(apa + cN); 256097ab81bSHong Zhang apjdense = apj + cN; 257097ab81bSHong Zhang 25882afef89SHong Zhang /* Allocate temporary MatScalar array for storage of one row of C */ 25982afef89SHong Zhang ierr = PetscMalloc(cN*(sizeof(MatScalar)),&ca);CHKERRQ(ierr); 26082afef89SHong Zhang ierr = PetscMemzero(ca,cN*(sizeof(MatScalar)));CHKERRQ(ierr); 26182afef89SHong Zhang 26282afef89SHong Zhang /* Clear old values in C_Seq and C */ 263097ab81bSHong Zhang ierr = PetscMemzero(cseqa,cseqi[cN]*sizeof(MatScalar));CHKERRQ(ierr); 26482afef89SHong Zhang ierr = PetscMemzero(cd->a,cd->i[cm]*sizeof(MatScalar));CHKERRQ(ierr); 26582afef89SHong Zhang ierr = PetscMemzero(co->a,co->i[cm]*sizeof(MatScalar));CHKERRQ(ierr); 266097ab81bSHong Zhang 267097ab81bSHong Zhang for (i=0;i<am;i++) { 268097ab81bSHong Zhang /* Form i-th sparse row of A*P */ 269097ab81bSHong Zhang apnzj = 0; 270097ab81bSHong Zhang /* diagonal portion of A */ 27182afef89SHong Zhang nzi = adi[i+1] - adi[i]; 27282afef89SHong Zhang for (j=0;j<nzi;j++) { 273097ab81bSHong Zhang prow = *adj; 274097ab81bSHong Zhang adj++; 275097ab81bSHong Zhang pnzj = pi_loc[prow+1] - pi_loc[prow]; 276097ab81bSHong Zhang pjj = pj_loc + pi_loc[prow]; 277097ab81bSHong Zhang paj = pa_loc + pi_loc[prow]; 278097ab81bSHong Zhang for (k=0;k<pnzj;k++) { 279097ab81bSHong Zhang if (!apjdense[pjj[k]]) { 280097ab81bSHong Zhang apjdense[pjj[k]] = -1; 281097ab81bSHong Zhang apj[apnzj++] = pjj[k]; 282097ab81bSHong Zhang } 283097ab81bSHong Zhang apa[pjj[k]] += (*ada)*paj[k]; 284097ab81bSHong Zhang } 285097ab81bSHong Zhang flops += 2*pnzj; 286097ab81bSHong Zhang ada++; 287097ab81bSHong Zhang } 288097ab81bSHong Zhang /* off-diagonal portion of A */ 28982afef89SHong Zhang nzi = aoi[i+1] - aoi[i]; 29082afef89SHong Zhang for (j=0;j<nzi;j++) { 291097ab81bSHong Zhang col = a->garray[*aoj]; 292097ab81bSHong Zhang if (col < cstart){ 293097ab81bSHong Zhang prow = *aoj; 294097ab81bSHong Zhang } else if (col >= cend){ 295097ab81bSHong Zhang prow = *aoj; 296097ab81bSHong Zhang } else { 297097ab81bSHong Zhang SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Off-diagonal portion of A has wrong column map"); 298097ab81bSHong Zhang } 299097ab81bSHong Zhang aoj++; 300097ab81bSHong Zhang pnzj = pi_oth[prow+1] - pi_oth[prow]; 301097ab81bSHong Zhang pjj = pj_oth + pi_oth[prow]; 302097ab81bSHong Zhang paj = pa_oth + pi_oth[prow]; 303097ab81bSHong Zhang for (k=0;k<pnzj;k++) { 304097ab81bSHong Zhang if (!apjdense[pjj[k]]) { 305097ab81bSHong Zhang apjdense[pjj[k]] = -1; 306097ab81bSHong Zhang apj[apnzj++] = pjj[k]; 307097ab81bSHong Zhang } 308097ab81bSHong Zhang apa[pjj[k]] += (*aoa)*paj[k]; 309097ab81bSHong Zhang } 310097ab81bSHong Zhang flops += 2*pnzj; 311097ab81bSHong Zhang aoa++; 312097ab81bSHong Zhang } 313097ab81bSHong Zhang /* Sort the j index array for quick sparse axpy. */ 314097ab81bSHong Zhang ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr); 315097ab81bSHong Zhang 316097ab81bSHong Zhang /* Compute P_loc[i,:]^T*AP[i,:] using outer product */ 317097ab81bSHong Zhang pnzi = pi_loc[i+1] - pi_loc[i]; 318097ab81bSHong Zhang for (j=0;j<pnzi;j++) { 319097ab81bSHong Zhang crow = *pJ++; 320097ab81bSHong Zhang cjj = cseqj + cseqi[crow]; 321097ab81bSHong Zhang caj = cseqa + cseqi[crow]; 322097ab81bSHong Zhang /* Perform sparse axpy operation. Note cjj includes apj. */ 32382afef89SHong Zhang if (crow >= owners[rank] && crow < owners[rank+1]){ /* add value into mpi C */ 32482afef89SHong Zhang for (k=0; k<apnzj; k++) ca[k] = (*pA)*apa[apj[k]]; 32582afef89SHong Zhang ierr = MatSetValues(C,1,&crow,apnzj,apj,ca,ADD_VALUES);CHKERRQ(ierr); 32682afef89SHong Zhang ierr = PetscMemzero(ca,apnzj*(sizeof(MatScalar)));CHKERRQ(ierr); 32782afef89SHong Zhang } else { /* add value into C_seq to be sent to other processors */ 32882afef89SHong Zhang nextap = 0; 329097ab81bSHong Zhang for (k=0;nextap<apnzj;k++) { 330097ab81bSHong Zhang if (cjj[k]==apj[nextap]) { 331097ab81bSHong Zhang caj[k] += (*pA)*apa[apj[nextap++]]; 332097ab81bSHong Zhang } 333097ab81bSHong Zhang } 33482afef89SHong Zhang } 335097ab81bSHong Zhang flops += 2*apnzj; 336097ab81bSHong Zhang pA++; 337097ab81bSHong Zhang } 338097ab81bSHong Zhang 339097ab81bSHong Zhang /* Zero the current row info for A*P */ 340097ab81bSHong Zhang for (j=0;j<apnzj;j++) { 341097ab81bSHong Zhang apa[apj[j]] = 0.; 342097ab81bSHong Zhang apjdense[apj[j]] = 0; 343097ab81bSHong Zhang } 344097ab81bSHong Zhang } 345097ab81bSHong Zhang 346097ab81bSHong Zhang /* Assemble the final matrix and clean up */ 347097ab81bSHong Zhang ierr = MatAssemblyBegin(C_seq,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 348097ab81bSHong Zhang ierr = MatAssemblyEnd(C_seq,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 349097ab81bSHong Zhang ierr = PetscFree(apa);CHKERRQ(ierr); 350097ab81bSHong Zhang ierr = PetscLogFlops(flops);CHKERRQ(ierr); 351097ab81bSHong Zhang 352097ab81bSHong Zhang #ifdef OLD1 353a61c8c0fSHong Zhang ierr = MatMerge_SeqsToMPINumeric(merge->C_seq,C);CHKERRQ(ierr); 354097ab81bSHong Zhang #endif /* OLD1 */ 355097ab81bSHong Zhang /*--------------------------------------------------------------*/ 35682afef89SHong Zhang /* send and recv matrix values */ 35782afef89SHong Zhang /*-----------------------------*/ 358097ab81bSHong Zhang bi = merge->bi; 359097ab81bSHong Zhang bj = merge->bj; 360097ab81bSHong Zhang buf_ri = merge->buf_ri; 361097ab81bSHong Zhang buf_rj = merge->buf_rj; 362097ab81bSHong Zhang len_s = merge->len_s; 363097ab81bSHong Zhang ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&taga);CHKERRQ(ierr); 364097ab81bSHong Zhang ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 365097ab81bSHong Zhang 366097ab81bSHong Zhang ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 367097ab81bSHong Zhang for (proc=0,k=0; proc<size; proc++){ 368097ab81bSHong Zhang if (!len_s[proc]) continue; 369097ab81bSHong Zhang i = owners[proc]; 370097ab81bSHong Zhang ierr = MPI_Isend(cseqa+cseqi[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 371097ab81bSHong Zhang k++; 372097ab81bSHong Zhang } 37382afef89SHong Zhang ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 374097ab81bSHong Zhang ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr); 375097ab81bSHong Zhang ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr); 376097ab81bSHong Zhang ierr = PetscFree(status);CHKERRQ(ierr); 377097ab81bSHong Zhang 378097ab81bSHong Zhang ierr = PetscFree(s_waits);CHKERRQ(ierr); 379097ab81bSHong Zhang ierr = PetscFree(r_waits);CHKERRQ(ierr); 380097ab81bSHong Zhang 381097ab81bSHong Zhang /* insert mat values of mpimat */ 382097ab81bSHong Zhang /*----------------------------*/ 383097ab81bSHong Zhang ierr = PetscMalloc(cN*sizeof(MatScalar),&ba_i);CHKERRQ(ierr); 38482afef89SHong Zhang ierr = PetscMemzero(ba_i,cN*sizeof(MatScalar));CHKERRQ(ierr); 385097ab81bSHong Zhang ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 386097ab81bSHong Zhang nextrow = buf_ri_k + merge->nrecv; 387097ab81bSHong Zhang nextcseqi = nextrow + merge->nrecv; 388097ab81bSHong Zhang 389097ab81bSHong Zhang for (k=0; k<merge->nrecv; k++){ 390097ab81bSHong Zhang buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 391097ab81bSHong Zhang nrows = *(buf_ri_k[k]); 392097ab81bSHong Zhang nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 393097ab81bSHong Zhang nextcseqi[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 394097ab81bSHong Zhang } 395097ab81bSHong Zhang 39682afef89SHong Zhang /* add received local vals to C */ 39782afef89SHong Zhang for (i=0; i<cm; i++) { 39882afef89SHong Zhang crow = owners[rank] + i; 399097ab81bSHong Zhang bj_i = bj+bi[i]; /* col indices of the i-th row of C */ 400097ab81bSHong Zhang bnzi = bi[i+1] - bi[i]; 40182afef89SHong Zhang nzi = 0; 402097ab81bSHong Zhang for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 403097ab81bSHong Zhang /* i-th row */ 404097ab81bSHong Zhang if (i == *nextrow[k]) { 405097ab81bSHong Zhang cseqnzi = *(nextcseqi[k]+1) - *nextcseqi[k]; 406097ab81bSHong Zhang cseqj = buf_rj[k] + *(nextcseqi[k]); 407097ab81bSHong Zhang cseqa = abuf_r[k] + *(nextcseqi[k]); 408097ab81bSHong Zhang nextcseqj = 0; 409097ab81bSHong Zhang for (j=0; nextcseqj<cseqnzi; j++){ 410097ab81bSHong Zhang if (*(bj_i + j) == cseqj[nextcseqj]){ /* bcol == cseqcol */ 411097ab81bSHong Zhang ba_i[j] += cseqa[nextcseqj++]; 41282afef89SHong Zhang nzi++; 413097ab81bSHong Zhang } 414097ab81bSHong Zhang } 415097ab81bSHong Zhang nextrow[k]++; nextcseqi[k]++; 416097ab81bSHong Zhang } 417097ab81bSHong Zhang } 41882afef89SHong Zhang if (nzi>0){ 41982afef89SHong Zhang ierr = MatSetValues(C,1,&crow,bnzi,bj_i,ba_i,ADD_VALUES);CHKERRQ(ierr); 42082afef89SHong Zhang ierr = PetscMemzero(ba_i,bnzi*sizeof(MatScalar));CHKERRQ(ierr); 42182afef89SHong Zhang } 422097ab81bSHong Zhang } 423097ab81bSHong Zhang ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 424097ab81bSHong Zhang ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 425097ab81bSHong Zhang 426097ab81bSHong Zhang ierr = PetscFree(abuf_r);CHKERRQ(ierr); 427097ab81bSHong Zhang ierr = PetscFree(ba_i);CHKERRQ(ierr); 428097ab81bSHong Zhang ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 42982afef89SHong Zhang ierr = PetscFree(ca);CHKERRQ(ierr); 430097ab81bSHong Zhang 431ff134f7aSHong Zhang PetscFunctionReturn(0); 432ff134f7aSHong Zhang } 433ff134f7aSHong Zhang 434ff134f7aSHong Zhang #undef __FUNCT__ 4359af31e4aSHong Zhang #define __FUNCT__ "MatPtAP_SeqAIJ_SeqAIJ" 436dfbe8321SBarry Smith PetscErrorCode MatPtAP_SeqAIJ_SeqAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C) 4379af31e4aSHong Zhang { 438dfbe8321SBarry Smith PetscErrorCode ierr; 439b1d57f15SBarry Smith 4409af31e4aSHong Zhang PetscFunctionBegin; 4419af31e4aSHong Zhang if (scall == MAT_INITIAL_MATRIX){ 442d20bfe6fSHong Zhang ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 4439af31e4aSHong Zhang ierr = MatPtAPSymbolic_SeqAIJ_SeqAIJ(A,P,fill,C);CHKERRQ(ierr); 444d20bfe6fSHong Zhang ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 4459af31e4aSHong Zhang } 446d20bfe6fSHong Zhang ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 4479af31e4aSHong Zhang ierr = MatPtAPNumeric_SeqAIJ_SeqAIJ(A,P,*C);CHKERRQ(ierr); 448d20bfe6fSHong Zhang ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 4499af31e4aSHong Zhang PetscFunctionReturn(0); 4509af31e4aSHong Zhang } 4519af31e4aSHong Zhang 4526849ba73SBarry Smith /* 4539af31e4aSHong Zhang MatPtAPSymbolic - Creates the (i,j) structure of the matrix projection C = P^T * A * P 4544d3841fdSKris Buschelman 4554d3841fdSKris Buschelman Collective on Mat 4564d3841fdSKris Buschelman 4574d3841fdSKris Buschelman Input Parameters: 4584d3841fdSKris Buschelman + A - the matrix 4594d3841fdSKris Buschelman - P - the projection matrix 4604d3841fdSKris Buschelman 4614d3841fdSKris Buschelman Output Parameters: 4624d3841fdSKris Buschelman . C - the (i,j) structure of the product matrix 4634d3841fdSKris Buschelman 4644d3841fdSKris Buschelman Notes: 4654d3841fdSKris Buschelman C will be created and must be destroyed by the user with MatDestroy(). 4664d3841fdSKris Buschelman 4674d3841fdSKris Buschelman This routine is currently only implemented for pairs of SeqAIJ matrices and classes 4684d3841fdSKris Buschelman which inherit from SeqAIJ. C will be of type MATSEQAIJ. The product is computed using 4699af31e4aSHong Zhang this (i,j) structure by calling MatPtAPNumeric(). 4704d3841fdSKris Buschelman 4714d3841fdSKris Buschelman Level: intermediate 4724d3841fdSKris Buschelman 4739af31e4aSHong Zhang .seealso: MatPtAP(),MatPtAPNumeric(),MatMatMultSymbolic() 4746849ba73SBarry Smith */ 475e240928fSHong Zhang #undef __FUNCT__ 476e240928fSHong Zhang #define __FUNCT__ "MatPtAPSymbolic" 47755a3bba9SHong Zhang PetscErrorCode MatPtAPSymbolic(Mat A,Mat P,PetscReal fill,Mat *C) 47855a3bba9SHong Zhang { 479dfbe8321SBarry Smith PetscErrorCode ierr; 480534c1384SKris Buschelman PetscErrorCode (*fA)(Mat,Mat,PetscReal,Mat*); 481534c1384SKris Buschelman PetscErrorCode (*fP)(Mat,Mat,PetscReal,Mat*); 482eb9c0419SKris Buschelman 483eb9c0419SKris Buschelman PetscFunctionBegin; 484eb9c0419SKris Buschelman 4854482741eSBarry Smith PetscValidHeaderSpecific(A,MAT_COOKIE,1); 486c9780b6fSBarry Smith PetscValidType(A,1); 487eb9c0419SKris Buschelman MatPreallocated(A); 488eb9c0419SKris Buschelman if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 489eb9c0419SKris Buschelman if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 490eb9c0419SKris Buschelman 4914482741eSBarry Smith PetscValidHeaderSpecific(P,MAT_COOKIE,2); 492c9780b6fSBarry Smith PetscValidType(P,2); 493eb9c0419SKris Buschelman MatPreallocated(P); 494eb9c0419SKris Buschelman if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 495eb9c0419SKris Buschelman if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 496eb9c0419SKris Buschelman 4974482741eSBarry Smith PetscValidPointer(C,3); 4984482741eSBarry Smith 499*77431f27SBarry Smith if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %D != %D",P->M,A->N); 500*77431f27SBarry Smith if (A->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %D != %D",A->M,A->N); 501eb9c0419SKris Buschelman 502534c1384SKris Buschelman /* For now, we do not dispatch based on the type of A and P */ 503534c1384SKris Buschelman /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */ 504534c1384SKris Buschelman fA = A->ops->ptapsymbolic; 505534c1384SKris Buschelman if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatPtAPSymbolic not supported for A of type %s",A->type_name); 506534c1384SKris Buschelman fP = P->ops->ptapsymbolic; 507534c1384SKris Buschelman if (!fP) SETERRQ1(PETSC_ERR_SUP,"MatPtAPSymbolic not supported for P of type %s",P->type_name); 508534c1384SKris Buschelman if (fP!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatPtAPSymbolic requires A, %s, to be compatible with P, %s",A->type_name,P->type_name); 5094d3841fdSKris Buschelman 510534c1384SKris Buschelman ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 511534c1384SKris Buschelman ierr = (*fA)(A,P,fill,C);CHKERRQ(ierr); 512534c1384SKris Buschelman ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 513eb9c0419SKris Buschelman 514eb9c0419SKris Buschelman PetscFunctionReturn(0); 515eb9c0419SKris Buschelman } 516eb9c0419SKris Buschelman 517f747e1aeSHong Zhang typedef struct { 518f747e1aeSHong Zhang Mat symAP; 519f747e1aeSHong Zhang } Mat_PtAPstruct; 520f747e1aeSHong Zhang 52178a80504SBarry Smith EXTERN PetscErrorCode MatDestroy_SeqAIJ(Mat); 52278a80504SBarry Smith 523f747e1aeSHong Zhang #undef __FUNCT__ 524f747e1aeSHong Zhang #define __FUNCT__ "MatDestroy_SeqAIJ_PtAP" 525f4a850bbSBarry Smith PetscErrorCode MatDestroy_SeqAIJ_PtAP(Mat A) 526f747e1aeSHong Zhang { 527f4a850bbSBarry Smith PetscErrorCode ierr; 528f747e1aeSHong Zhang Mat_PtAPstruct *ptap=(Mat_PtAPstruct*)A->spptr; 529f747e1aeSHong Zhang 530f747e1aeSHong Zhang PetscFunctionBegin; 531f747e1aeSHong Zhang ierr = MatDestroy(ptap->symAP);CHKERRQ(ierr); 532f747e1aeSHong Zhang ierr = PetscFree(ptap);CHKERRQ(ierr); 53378a80504SBarry Smith ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr); 534f747e1aeSHong Zhang PetscFunctionReturn(0); 535f747e1aeSHong Zhang } 536f747e1aeSHong Zhang 537eb9c0419SKris Buschelman #undef __FUNCT__ 5389af31e4aSHong Zhang #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqAIJ" 53955a3bba9SHong Zhang PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat P,PetscReal fill,Mat *C) 54055a3bba9SHong Zhang { 541dfbe8321SBarry Smith PetscErrorCode ierr; 542d20bfe6fSHong Zhang FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 543d20bfe6fSHong Zhang Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*p = (Mat_SeqAIJ*)P->data,*c; 54455a3bba9SHong Zhang PetscInt *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj; 545b8374ebeSBarry Smith PetscInt *ci,*cj,*ptadenserow,*ptasparserow,*ptaj; 54655a3bba9SHong Zhang PetscInt an=A->N,am=A->M,pn=P->N,pm=P->M; 547b8374ebeSBarry Smith PetscInt i,j,k,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi,nlnk,*lnk; 548d20bfe6fSHong Zhang MatScalar *ca; 54955a3bba9SHong Zhang PetscBT lnkbt; 550eb9c0419SKris Buschelman 551eb9c0419SKris Buschelman PetscFunctionBegin; 552d20bfe6fSHong Zhang /* Get ij structure of P^T */ 553eb9c0419SKris Buschelman ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 554d20bfe6fSHong Zhang ptJ=ptj; 555eb9c0419SKris Buschelman 556d20bfe6fSHong Zhang /* Allocate ci array, arrays for fill computation and */ 557d20bfe6fSHong Zhang /* free space for accumulating nonzero column info */ 55855a3bba9SHong Zhang ierr = PetscMalloc((pn+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 559d20bfe6fSHong Zhang ci[0] = 0; 560eb9c0419SKris Buschelman 56155a3bba9SHong Zhang ierr = PetscMalloc((2*an+1)*sizeof(PetscInt),&ptadenserow);CHKERRQ(ierr); 56255a3bba9SHong Zhang ierr = PetscMemzero(ptadenserow,(2*an+1)*sizeof(PetscInt));CHKERRQ(ierr); 563d20bfe6fSHong Zhang ptasparserow = ptadenserow + an; 56455a3bba9SHong Zhang 56555a3bba9SHong Zhang /* create and initialize a linked list */ 56655a3bba9SHong Zhang nlnk = pn+1; 56755a3bba9SHong Zhang ierr = PetscLLCreate(pn,pn,nlnk,lnk,lnkbt);CHKERRQ(ierr); 568eb9c0419SKris Buschelman 569d20bfe6fSHong Zhang /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */ 570d20bfe6fSHong Zhang /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 571d20bfe6fSHong Zhang ierr = GetMoreSpace((ai[am]/pm)*pn,&free_space); 572d20bfe6fSHong Zhang current_space = free_space; 573d20bfe6fSHong Zhang 574d20bfe6fSHong Zhang /* Determine symbolic info for each row of C: */ 575d20bfe6fSHong Zhang for (i=0;i<pn;i++) { 576d20bfe6fSHong Zhang ptnzi = pti[i+1] - pti[i]; 577d20bfe6fSHong Zhang ptanzi = 0; 578d20bfe6fSHong Zhang /* Determine symbolic row of PtA: */ 579d20bfe6fSHong Zhang for (j=0;j<ptnzi;j++) { 580d20bfe6fSHong Zhang arow = *ptJ++; 581d20bfe6fSHong Zhang anzj = ai[arow+1] - ai[arow]; 582d20bfe6fSHong Zhang ajj = aj + ai[arow]; 583d20bfe6fSHong Zhang for (k=0;k<anzj;k++) { 584d20bfe6fSHong Zhang if (!ptadenserow[ajj[k]]) { 585d20bfe6fSHong Zhang ptadenserow[ajj[k]] = -1; 586d20bfe6fSHong Zhang ptasparserow[ptanzi++] = ajj[k]; 587d20bfe6fSHong Zhang } 588d20bfe6fSHong Zhang } 589d20bfe6fSHong Zhang } 590d20bfe6fSHong Zhang /* Using symbolic info for row of PtA, determine symbolic info for row of C: */ 591d20bfe6fSHong Zhang ptaj = ptasparserow; 592d20bfe6fSHong Zhang cnzi = 0; 593d20bfe6fSHong Zhang for (j=0;j<ptanzi;j++) { 594d20bfe6fSHong Zhang prow = *ptaj++; 595d20bfe6fSHong Zhang pnzj = pi[prow+1] - pi[prow]; 596d20bfe6fSHong Zhang pjj = pj + pi[prow]; 59755a3bba9SHong Zhang /* add non-zero cols of P into the sorted linked list lnk */ 59855a3bba9SHong Zhang ierr = PetscLLAdd(pnzj,pjj,pn,nlnk,lnk,lnkbt);CHKERRQ(ierr); 59955a3bba9SHong Zhang cnzi += nlnk; 600d20bfe6fSHong Zhang } 601d20bfe6fSHong Zhang 602d20bfe6fSHong Zhang /* If free space is not available, make more free space */ 603d20bfe6fSHong Zhang /* Double the amount of total space in the list */ 604d20bfe6fSHong Zhang if (current_space->local_remaining<cnzi) { 605d20bfe6fSHong Zhang ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 606d20bfe6fSHong Zhang } 607d20bfe6fSHong Zhang 608d20bfe6fSHong Zhang /* Copy data into free space, and zero out denserows */ 60955a3bba9SHong Zhang ierr = PetscLLClean(pn,pn,cnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 610d20bfe6fSHong Zhang current_space->array += cnzi; 611d20bfe6fSHong Zhang current_space->local_used += cnzi; 612d20bfe6fSHong Zhang current_space->local_remaining -= cnzi; 613d20bfe6fSHong Zhang 614d20bfe6fSHong Zhang for (j=0;j<ptanzi;j++) { 615d20bfe6fSHong Zhang ptadenserow[ptasparserow[j]] = 0; 616d20bfe6fSHong Zhang } 617d20bfe6fSHong Zhang /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 618d20bfe6fSHong Zhang /* For now, we will recompute what is needed. */ 619d20bfe6fSHong Zhang ci[i+1] = ci[i] + cnzi; 620d20bfe6fSHong Zhang } 621d20bfe6fSHong Zhang /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 622d20bfe6fSHong Zhang /* Allocate space for cj, initialize cj, and */ 623d20bfe6fSHong Zhang /* destroy list of free space and other temporary array(s) */ 62455a3bba9SHong Zhang ierr = PetscMalloc((ci[pn]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr); 625d20bfe6fSHong Zhang ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 626d20bfe6fSHong Zhang ierr = PetscFree(ptadenserow);CHKERRQ(ierr); 62755a3bba9SHong Zhang ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 628d20bfe6fSHong Zhang 629d20bfe6fSHong Zhang /* Allocate space for ca */ 630d20bfe6fSHong Zhang ierr = PetscMalloc((ci[pn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 631d20bfe6fSHong Zhang ierr = PetscMemzero(ca,(ci[pn]+1)*sizeof(MatScalar));CHKERRQ(ierr); 632d20bfe6fSHong Zhang 633d20bfe6fSHong Zhang /* put together the new matrix */ 634d20bfe6fSHong Zhang ierr = MatCreateSeqAIJWithArrays(A->comm,pn,pn,ci,cj,ca,C);CHKERRQ(ierr); 635d20bfe6fSHong Zhang 636d20bfe6fSHong Zhang /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 637d20bfe6fSHong Zhang /* Since these are PETSc arrays, change flags to free them as necessary. */ 638d20bfe6fSHong Zhang c = (Mat_SeqAIJ *)((*C)->data); 639d20bfe6fSHong Zhang c->freedata = PETSC_TRUE; 640d20bfe6fSHong Zhang c->nonew = 0; 641d20bfe6fSHong Zhang 642d20bfe6fSHong Zhang /* Clean up. */ 643d20bfe6fSHong Zhang ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 644eb9c0419SKris Buschelman 645eb9c0419SKris Buschelman PetscFunctionReturn(0); 646eb9c0419SKris Buschelman } 647eb9c0419SKris Buschelman 6483985e5eaSKris Buschelman #include "src/mat/impls/maij/maij.h" 6493985e5eaSKris Buschelman EXTERN_C_BEGIN 6503985e5eaSKris Buschelman #undef __FUNCT__ 6519af31e4aSHong Zhang #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqMAIJ" 65255a3bba9SHong Zhang PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat *C) 65355a3bba9SHong Zhang { 6545c66b693SKris Buschelman /* This routine requires testing -- I don't think it works. */ 655dfbe8321SBarry Smith PetscErrorCode ierr; 6563985e5eaSKris Buschelman FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 6573985e5eaSKris Buschelman Mat_SeqMAIJ *pp=(Mat_SeqMAIJ*)PP->data; 6583985e5eaSKris Buschelman Mat P=pp->AIJ; 6593985e5eaSKris Buschelman Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c; 660b1d57f15SBarry Smith PetscInt *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj; 661b1d57f15SBarry Smith PetscInt *ci,*cj,*denserow,*sparserow,*ptadenserow,*ptasparserow,*ptaj; 662b1d57f15SBarry Smith PetscInt an=A->N,am=A->M,pn=P->N,pm=P->M,ppdof=pp->dof; 663b1d57f15SBarry Smith PetscInt i,j,k,dof,pdof,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi; 6643985e5eaSKris Buschelman MatScalar *ca; 6653985e5eaSKris Buschelman 6663985e5eaSKris Buschelman PetscFunctionBegin; 6673985e5eaSKris Buschelman /* Start timer */ 6689af31e4aSHong Zhang ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr); 6693985e5eaSKris Buschelman 6703985e5eaSKris Buschelman /* Get ij structure of P^T */ 6713985e5eaSKris Buschelman ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 6723985e5eaSKris Buschelman 6733985e5eaSKris Buschelman /* Allocate ci array, arrays for fill computation and */ 6743985e5eaSKris Buschelman /* free space for accumulating nonzero column info */ 675b1d57f15SBarry Smith ierr = PetscMalloc((pn+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 6763985e5eaSKris Buschelman ci[0] = 0; 6773985e5eaSKris Buschelman 678b1d57f15SBarry Smith ierr = PetscMalloc((2*pn+2*an+1)*sizeof(PetscInt),&ptadenserow);CHKERRQ(ierr); 679b1d57f15SBarry Smith ierr = PetscMemzero(ptadenserow,(2*pn+2*an+1)*sizeof(PetscInt));CHKERRQ(ierr); 6803985e5eaSKris Buschelman ptasparserow = ptadenserow + an; 6813985e5eaSKris Buschelman denserow = ptasparserow + an; 6823985e5eaSKris Buschelman sparserow = denserow + pn; 6833985e5eaSKris Buschelman 6843985e5eaSKris Buschelman /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */ 6853985e5eaSKris Buschelman /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 6863985e5eaSKris Buschelman ierr = GetMoreSpace((ai[am]/pm)*pn,&free_space); 6873985e5eaSKris Buschelman current_space = free_space; 6883985e5eaSKris Buschelman 6893985e5eaSKris Buschelman /* Determine symbolic info for each row of C: */ 6903985e5eaSKris Buschelman for (i=0;i<pn/ppdof;i++) { 6913985e5eaSKris Buschelman ptnzi = pti[i+1] - pti[i]; 6923985e5eaSKris Buschelman ptanzi = 0; 6933985e5eaSKris Buschelman ptJ = ptj + pti[i]; 6943985e5eaSKris Buschelman for (dof=0;dof<ppdof;dof++) { 6953985e5eaSKris Buschelman /* Determine symbolic row of PtA: */ 6963985e5eaSKris Buschelman for (j=0;j<ptnzi;j++) { 6973985e5eaSKris Buschelman arow = ptJ[j] + dof; 6983985e5eaSKris Buschelman anzj = ai[arow+1] - ai[arow]; 6993985e5eaSKris Buschelman ajj = aj + ai[arow]; 7003985e5eaSKris Buschelman for (k=0;k<anzj;k++) { 7013985e5eaSKris Buschelman if (!ptadenserow[ajj[k]]) { 7023985e5eaSKris Buschelman ptadenserow[ajj[k]] = -1; 7033985e5eaSKris Buschelman ptasparserow[ptanzi++] = ajj[k]; 7043985e5eaSKris Buschelman } 7053985e5eaSKris Buschelman } 7063985e5eaSKris Buschelman } 7073985e5eaSKris Buschelman /* Using symbolic info for row of PtA, determine symbolic info for row of C: */ 7083985e5eaSKris Buschelman ptaj = ptasparserow; 7093985e5eaSKris Buschelman cnzi = 0; 7103985e5eaSKris Buschelman for (j=0;j<ptanzi;j++) { 711fe05a634SKris Buschelman pdof = *ptaj%dof; 7123985e5eaSKris Buschelman prow = (*ptaj++)/dof; 7133985e5eaSKris Buschelman pnzj = pi[prow+1] - pi[prow]; 7143985e5eaSKris Buschelman pjj = pj + pi[prow]; 7153985e5eaSKris Buschelman for (k=0;k<pnzj;k++) { 716fe05a634SKris Buschelman if (!denserow[pjj[k]+pdof]) { 717fe05a634SKris Buschelman denserow[pjj[k]+pdof] = -1; 718fe05a634SKris Buschelman sparserow[cnzi++] = pjj[k]+pdof; 7193985e5eaSKris Buschelman } 7203985e5eaSKris Buschelman } 7213985e5eaSKris Buschelman } 7223985e5eaSKris Buschelman 7233985e5eaSKris Buschelman /* sort sparserow */ 7243985e5eaSKris Buschelman ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr); 7253985e5eaSKris Buschelman 7263985e5eaSKris Buschelman /* If free space is not available, make more free space */ 7273985e5eaSKris Buschelman /* Double the amount of total space in the list */ 7283985e5eaSKris Buschelman if (current_space->local_remaining<cnzi) { 7293985e5eaSKris Buschelman ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 7303985e5eaSKris Buschelman } 7313985e5eaSKris Buschelman 7323985e5eaSKris Buschelman /* Copy data into free space, and zero out denserows */ 733b1d57f15SBarry Smith ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(PetscInt));CHKERRQ(ierr); 7343985e5eaSKris Buschelman current_space->array += cnzi; 7353985e5eaSKris Buschelman current_space->local_used += cnzi; 7363985e5eaSKris Buschelman current_space->local_remaining -= cnzi; 7373985e5eaSKris Buschelman 7383985e5eaSKris Buschelman for (j=0;j<ptanzi;j++) { 7393985e5eaSKris Buschelman ptadenserow[ptasparserow[j]] = 0; 7403985e5eaSKris Buschelman } 7413985e5eaSKris Buschelman for (j=0;j<cnzi;j++) { 7423985e5eaSKris Buschelman denserow[sparserow[j]] = 0; 7433985e5eaSKris Buschelman } 7443985e5eaSKris Buschelman /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 7453985e5eaSKris Buschelman /* For now, we will recompute what is needed. */ 7463985e5eaSKris Buschelman ci[i+1+dof] = ci[i+dof] + cnzi; 7473985e5eaSKris Buschelman } 7483985e5eaSKris Buschelman } 7493985e5eaSKris Buschelman /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 7503985e5eaSKris Buschelman /* Allocate space for cj, initialize cj, and */ 7513985e5eaSKris Buschelman /* destroy list of free space and other temporary array(s) */ 752b1d57f15SBarry Smith ierr = PetscMalloc((ci[pn]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr); 7533985e5eaSKris Buschelman ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 7543985e5eaSKris Buschelman ierr = PetscFree(ptadenserow);CHKERRQ(ierr); 7553985e5eaSKris Buschelman 7563985e5eaSKris Buschelman /* Allocate space for ca */ 7573985e5eaSKris Buschelman ierr = PetscMalloc((ci[pn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 7583985e5eaSKris Buschelman ierr = PetscMemzero(ca,(ci[pn]+1)*sizeof(MatScalar));CHKERRQ(ierr); 7593985e5eaSKris Buschelman 7603985e5eaSKris Buschelman /* put together the new matrix */ 7613985e5eaSKris Buschelman ierr = MatCreateSeqAIJWithArrays(A->comm,pn,pn,ci,cj,ca,C);CHKERRQ(ierr); 7623985e5eaSKris Buschelman 7633985e5eaSKris Buschelman /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 7643985e5eaSKris Buschelman /* Since these are PETSc arrays, change flags to free them as necessary. */ 7653985e5eaSKris Buschelman c = (Mat_SeqAIJ *)((*C)->data); 7663985e5eaSKris Buschelman c->freedata = PETSC_TRUE; 7673985e5eaSKris Buschelman c->nonew = 0; 7683985e5eaSKris Buschelman 7693985e5eaSKris Buschelman /* Clean up. */ 7703985e5eaSKris Buschelman ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 7713985e5eaSKris Buschelman 7729af31e4aSHong Zhang ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr); 7733985e5eaSKris Buschelman PetscFunctionReturn(0); 7743985e5eaSKris Buschelman } 7753985e5eaSKris Buschelman EXTERN_C_END 7763985e5eaSKris Buschelman 7776849ba73SBarry Smith /* 7789af31e4aSHong Zhang MatPtAPNumeric - Computes the matrix projection C = P^T * A * P 7794d3841fdSKris Buschelman 7804d3841fdSKris Buschelman Collective on Mat 7814d3841fdSKris Buschelman 7824d3841fdSKris Buschelman Input Parameters: 7834d3841fdSKris Buschelman + A - the matrix 7844d3841fdSKris Buschelman - P - the projection matrix 7854d3841fdSKris Buschelman 7864d3841fdSKris Buschelman Output Parameters: 7874d3841fdSKris Buschelman . C - the product matrix 7884d3841fdSKris Buschelman 7894d3841fdSKris Buschelman Notes: 7909af31e4aSHong Zhang C must have been created by calling MatPtAPSymbolic and must be destroyed by 7914d3841fdSKris Buschelman the user using MatDeatroy(). 7924d3841fdSKris Buschelman 793170ef064SHong Zhang This routine is currently only implemented for pairs of AIJ matrices and classes 794170ef064SHong Zhang which inherit from AIJ. C will be of type MATAIJ. 7954d3841fdSKris Buschelman 7964d3841fdSKris Buschelman Level: intermediate 7974d3841fdSKris Buschelman 7989af31e4aSHong Zhang .seealso: MatPtAP(),MatPtAPSymbolic(),MatMatMultNumeric() 7996849ba73SBarry Smith */ 800e240928fSHong Zhang #undef __FUNCT__ 801e240928fSHong Zhang #define __FUNCT__ "MatPtAPNumeric" 80255a3bba9SHong Zhang PetscErrorCode MatPtAPNumeric(Mat A,Mat P,Mat C) 80355a3bba9SHong Zhang { 804dfbe8321SBarry Smith PetscErrorCode ierr; 805534c1384SKris Buschelman PetscErrorCode (*fA)(Mat,Mat,Mat); 806534c1384SKris Buschelman PetscErrorCode (*fP)(Mat,Mat,Mat); 807eb9c0419SKris Buschelman 808eb9c0419SKris Buschelman PetscFunctionBegin; 809eb9c0419SKris Buschelman 8104482741eSBarry Smith PetscValidHeaderSpecific(A,MAT_COOKIE,1); 811c9780b6fSBarry Smith PetscValidType(A,1); 812eb9c0419SKris Buschelman MatPreallocated(A); 813eb9c0419SKris Buschelman if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 814eb9c0419SKris Buschelman if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 815eb9c0419SKris Buschelman 8164482741eSBarry Smith PetscValidHeaderSpecific(P,MAT_COOKIE,2); 817c9780b6fSBarry Smith PetscValidType(P,2); 818eb9c0419SKris Buschelman MatPreallocated(P); 819eb9c0419SKris Buschelman if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 820eb9c0419SKris Buschelman if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 821eb9c0419SKris Buschelman 8224482741eSBarry Smith PetscValidHeaderSpecific(C,MAT_COOKIE,3); 823c9780b6fSBarry Smith PetscValidType(C,3); 824eb9c0419SKris Buschelman MatPreallocated(C); 825eb9c0419SKris Buschelman if (!C->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 826eb9c0419SKris Buschelman if (C->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 827eb9c0419SKris Buschelman 828*77431f27SBarry Smith if (P->N!=C->M) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %D != %D",P->N,C->M); 829*77431f27SBarry Smith if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %D != %D",P->M,A->N); 830*77431f27SBarry Smith if (A->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %D != %D",A->M,A->N); 831*77431f27SBarry Smith if (P->N!=C->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %D != %D",P->N,C->N); 832eb9c0419SKris Buschelman 833534c1384SKris Buschelman /* For now, we do not dispatch based on the type of A and P */ 834534c1384SKris Buschelman /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */ 835534c1384SKris Buschelman fA = A->ops->ptapnumeric; 836534c1384SKris Buschelman if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatPtAPNumeric not supported for A of type %s",A->type_name); 837534c1384SKris Buschelman fP = P->ops->ptapnumeric; 838534c1384SKris Buschelman if (!fP) SETERRQ1(PETSC_ERR_SUP,"MatPtAPNumeric not supported for P of type %s",P->type_name); 839534c1384SKris Buschelman if (fP!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatPtAPNumeric requires A, %s, to be compatible with P, %s",A->type_name,P->type_name); 8404d3841fdSKris Buschelman 841534c1384SKris Buschelman ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 842534c1384SKris Buschelman ierr = (*fA)(A,P,C);CHKERRQ(ierr); 843534c1384SKris Buschelman ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 844eb9c0419SKris Buschelman 845eb9c0419SKris Buschelman PetscFunctionReturn(0); 846eb9c0419SKris Buschelman } 847eb9c0419SKris Buschelman 848eb9c0419SKris Buschelman #undef __FUNCT__ 8499af31e4aSHong Zhang #define __FUNCT__ "MatPtAPNumeric_SeqAIJ_SeqAIJ" 850dfbe8321SBarry Smith PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqAIJ(Mat A,Mat P,Mat C) 851dfbe8321SBarry Smith { 852dfbe8321SBarry Smith PetscErrorCode ierr; 853b1d57f15SBarry Smith PetscInt flops=0; 854d20bfe6fSHong Zhang Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 855d20bfe6fSHong Zhang Mat_SeqAIJ *p = (Mat_SeqAIJ *) P->data; 856d20bfe6fSHong Zhang Mat_SeqAIJ *c = (Mat_SeqAIJ *) C->data; 857b1d57f15SBarry Smith PetscInt *ai=a->i,*aj=a->j,*apj,*apjdense,*pi=p->i,*pj=p->j,*pJ=p->j,*pjj; 858b1d57f15SBarry Smith PetscInt *ci=c->i,*cj=c->j,*cjj; 859b1d57f15SBarry Smith PetscInt am=A->M,cn=C->N,cm=C->M; 860b1d57f15SBarry Smith PetscInt i,j,k,anzi,pnzi,apnzj,nextap,pnzj,prow,crow; 861d20bfe6fSHong Zhang MatScalar *aa=a->a,*apa,*pa=p->a,*pA=p->a,*paj,*ca=c->a,*caj; 862eb9c0419SKris Buschelman 863eb9c0419SKris Buschelman PetscFunctionBegin; 864d20bfe6fSHong Zhang /* Allocate temporary array for storage of one row of A*P */ 865b1d57f15SBarry Smith ierr = PetscMalloc(cn*(sizeof(MatScalar)+2*sizeof(PetscInt)),&apa);CHKERRQ(ierr); 866b1d57f15SBarry Smith ierr = PetscMemzero(apa,cn*(sizeof(MatScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr); 867eb9c0419SKris Buschelman 868b1d57f15SBarry Smith apj = (PetscInt *)(apa + cn); 869d20bfe6fSHong Zhang apjdense = apj + cn; 870d20bfe6fSHong Zhang 871d20bfe6fSHong Zhang /* Clear old values in C */ 872d20bfe6fSHong Zhang ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr); 873d20bfe6fSHong Zhang 874d20bfe6fSHong Zhang for (i=0;i<am;i++) { 875d20bfe6fSHong Zhang /* Form sparse row of A*P */ 876d20bfe6fSHong Zhang anzi = ai[i+1] - ai[i]; 877d20bfe6fSHong Zhang apnzj = 0; 878d20bfe6fSHong Zhang for (j=0;j<anzi;j++) { 879d20bfe6fSHong Zhang prow = *aj++; 880d20bfe6fSHong Zhang pnzj = pi[prow+1] - pi[prow]; 881d20bfe6fSHong Zhang pjj = pj + pi[prow]; 882d20bfe6fSHong Zhang paj = pa + pi[prow]; 883d20bfe6fSHong Zhang for (k=0;k<pnzj;k++) { 884d20bfe6fSHong Zhang if (!apjdense[pjj[k]]) { 885d20bfe6fSHong Zhang apjdense[pjj[k]] = -1; 886d20bfe6fSHong Zhang apj[apnzj++] = pjj[k]; 887d20bfe6fSHong Zhang } 888d20bfe6fSHong Zhang apa[pjj[k]] += (*aa)*paj[k]; 889d20bfe6fSHong Zhang } 890d20bfe6fSHong Zhang flops += 2*pnzj; 891d20bfe6fSHong Zhang aa++; 892d20bfe6fSHong Zhang } 893d20bfe6fSHong Zhang 894d20bfe6fSHong Zhang /* Sort the j index array for quick sparse axpy. */ 895d20bfe6fSHong Zhang ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr); 896d20bfe6fSHong Zhang 897d20bfe6fSHong Zhang /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */ 898d20bfe6fSHong Zhang pnzi = pi[i+1] - pi[i]; 899d20bfe6fSHong Zhang for (j=0;j<pnzi;j++) { 900d20bfe6fSHong Zhang nextap = 0; 901d20bfe6fSHong Zhang crow = *pJ++; 902d20bfe6fSHong Zhang cjj = cj + ci[crow]; 903d20bfe6fSHong Zhang caj = ca + ci[crow]; 904d20bfe6fSHong Zhang /* Perform sparse axpy operation. Note cjj includes apj. */ 905d20bfe6fSHong Zhang for (k=0;nextap<apnzj;k++) { 906d20bfe6fSHong Zhang if (cjj[k]==apj[nextap]) { 907d20bfe6fSHong Zhang caj[k] += (*pA)*apa[apj[nextap++]]; 908d20bfe6fSHong Zhang } 909d20bfe6fSHong Zhang } 910d20bfe6fSHong Zhang flops += 2*apnzj; 911d20bfe6fSHong Zhang pA++; 912d20bfe6fSHong Zhang } 913d20bfe6fSHong Zhang 914d20bfe6fSHong Zhang /* Zero the current row info for A*P */ 915d20bfe6fSHong Zhang for (j=0;j<apnzj;j++) { 916d20bfe6fSHong Zhang apa[apj[j]] = 0.; 917d20bfe6fSHong Zhang apjdense[apj[j]] = 0; 918d20bfe6fSHong Zhang } 919d20bfe6fSHong Zhang } 920d20bfe6fSHong Zhang 921d20bfe6fSHong Zhang /* Assemble the final matrix and clean up */ 922d20bfe6fSHong Zhang ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 923d20bfe6fSHong Zhang ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 924d20bfe6fSHong Zhang ierr = PetscFree(apa);CHKERRQ(ierr); 925d20bfe6fSHong Zhang ierr = PetscLogFlops(flops);CHKERRQ(ierr); 926d20bfe6fSHong Zhang 927eb9c0419SKris Buschelman PetscFunctionReturn(0); 928eb9c0419SKris Buschelman } 9290e36024fSHong Zhang 930a61c8c0fSHong Zhang /* Compute local C = P_loc^T * A * P - used by MatPtAP_MPIAIJ_MPIAIJ() */ 931e240928fSHong Zhang static PetscEvent logkey_matptapnumeric_local = 0; 932e240928fSHong Zhang #undef __FUNCT__ 933e240928fSHong Zhang #define __FUNCT__ "MatPtAPNumeric_MPIAIJ_MPIAIJ_Local" 934e240928fSHong Zhang PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIAIJ_Local(Mat A,Mat P_loc,Mat P_oth,PetscInt prstart,Mat C) 935e240928fSHong Zhang { 936e240928fSHong Zhang PetscErrorCode ierr; 937e240928fSHong Zhang PetscInt flops=0; 938e240928fSHong Zhang Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 939e240928fSHong Zhang Mat_SeqAIJ *ad=(Mat_SeqAIJ*)(a->A)->data,*ao=(Mat_SeqAIJ*)(a->B)->data; 940e240928fSHong Zhang Mat_SeqAIJ *c = (Mat_SeqAIJ *) C->data; 941e240928fSHong Zhang Mat_SeqAIJ *p_loc=(Mat_SeqAIJ*)P_loc->data,*p_oth=(Mat_SeqAIJ*)P_oth->data; 942e240928fSHong Zhang PetscInt *pi_loc=p_loc->i,*pj_loc=p_loc->j,*pi_oth=p_oth->i,*pj_oth=p_oth->j; 943e240928fSHong Zhang PetscInt *adi=ad->i,*adj=ad->j,*aoi=ao->i,*aoj=ao->j,*apj,*apjdense,cstart=a->cstart,cend=a->cend,col; 944e240928fSHong Zhang PetscInt *pJ=pj_loc,*pjj; 945e240928fSHong Zhang PetscInt *ci=c->i,*cj=c->j,*cjj; 946e240928fSHong Zhang PetscInt am=A->m,cn=C->N,cm=C->M; 947e240928fSHong Zhang PetscInt i,j,k,anzi,pnzi,apnzj,nextap,pnzj,prow,crow; 948e240928fSHong Zhang MatScalar *ada=ad->a,*aoa=ao->a,*apa,*paj,*ca=c->a,*caj; 949e240928fSHong Zhang MatScalar *pa_loc=p_loc->a,*pA=pa_loc,*pa_oth=p_oth->a; 950e240928fSHong Zhang 951e240928fSHong Zhang PetscFunctionBegin; 952e240928fSHong Zhang if (!logkey_matptapnumeric_local) { 953a61c8c0fSHong Zhang ierr = PetscLogEventRegister(&logkey_matptapnumeric_local,"MatPtAPNumeric_MPIAIJ_MPIAIJ_Local",MAT_COOKIE);CHKERRQ(ierr); 954e240928fSHong Zhang } 955a61c8c0fSHong Zhang ierr = PetscLogEventBegin(logkey_matptapnumeric_local,A,0,0,0);CHKERRQ(ierr); 956e240928fSHong Zhang 957e240928fSHong Zhang /* Allocate temporary array for storage of one row of A*P */ 958e240928fSHong Zhang ierr = PetscMalloc(cn*(sizeof(MatScalar)+2*sizeof(PetscInt)),&apa);CHKERRQ(ierr); 959e240928fSHong Zhang ierr = PetscMemzero(apa,cn*(sizeof(MatScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr); 960e240928fSHong Zhang apj = (PetscInt *)(apa + cn); 961e240928fSHong Zhang apjdense = apj + cn; 962e240928fSHong Zhang 963e240928fSHong Zhang /* Clear old values in C */ 964e240928fSHong Zhang ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr); 965e240928fSHong Zhang 966e240928fSHong Zhang for (i=0;i<am;i++) { 967e240928fSHong Zhang /* Form i-th sparse row of A*P */ 968e240928fSHong Zhang apnzj = 0; 969e240928fSHong Zhang /* diagonal portion of A */ 970e240928fSHong Zhang anzi = adi[i+1] - adi[i]; 971e240928fSHong Zhang for (j=0;j<anzi;j++) { 972e240928fSHong Zhang prow = *adj; 973e240928fSHong Zhang adj++; 974e240928fSHong Zhang pnzj = pi_loc[prow+1] - pi_loc[prow]; 975e240928fSHong Zhang pjj = pj_loc + pi_loc[prow]; 976e240928fSHong Zhang paj = pa_loc + pi_loc[prow]; 977e240928fSHong Zhang for (k=0;k<pnzj;k++) { 978e240928fSHong Zhang if (!apjdense[pjj[k]]) { 979e240928fSHong Zhang apjdense[pjj[k]] = -1; 980e240928fSHong Zhang apj[apnzj++] = pjj[k]; 981e240928fSHong Zhang } 982e240928fSHong Zhang apa[pjj[k]] += (*ada)*paj[k]; 983e240928fSHong Zhang } 984e240928fSHong Zhang flops += 2*pnzj; 985e240928fSHong Zhang ada++; 986e240928fSHong Zhang } 987e240928fSHong Zhang /* off-diagonal portion of A */ 988e240928fSHong Zhang anzi = aoi[i+1] - aoi[i]; 989e240928fSHong Zhang for (j=0;j<anzi;j++) { 990e240928fSHong Zhang col = a->garray[*aoj]; 991e240928fSHong Zhang if (col < cstart){ 992e240928fSHong Zhang prow = *aoj; 993e240928fSHong Zhang } else if (col >= cend){ 994e240928fSHong Zhang prow = *aoj; 995e240928fSHong Zhang } else { 996e240928fSHong Zhang SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Off-diagonal portion of A has wrong column map"); 997e240928fSHong Zhang } 998e240928fSHong Zhang aoj++; 999e240928fSHong Zhang pnzj = pi_oth[prow+1] - pi_oth[prow]; 1000e240928fSHong Zhang pjj = pj_oth + pi_oth[prow]; 1001e240928fSHong Zhang paj = pa_oth + pi_oth[prow]; 1002e240928fSHong Zhang for (k=0;k<pnzj;k++) { 1003e240928fSHong Zhang if (!apjdense[pjj[k]]) { 1004e240928fSHong Zhang apjdense[pjj[k]] = -1; 1005e240928fSHong Zhang apj[apnzj++] = pjj[k]; 1006e240928fSHong Zhang } 1007e240928fSHong Zhang apa[pjj[k]] += (*aoa)*paj[k]; 1008e240928fSHong Zhang } 1009e240928fSHong Zhang flops += 2*pnzj; 1010e240928fSHong Zhang aoa++; 1011e240928fSHong Zhang } 1012e240928fSHong Zhang /* Sort the j index array for quick sparse axpy. */ 1013e240928fSHong Zhang ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr); 1014e240928fSHong Zhang 1015e240928fSHong Zhang /* Compute P_loc[i,:]^T*AP[i,:] using outer product */ 1016e240928fSHong Zhang pnzi = pi_loc[i+1] - pi_loc[i]; 1017e240928fSHong Zhang for (j=0;j<pnzi;j++) { 1018e240928fSHong Zhang nextap = 0; 1019e240928fSHong Zhang crow = *pJ++; 1020e240928fSHong Zhang cjj = cj + ci[crow]; 1021e240928fSHong Zhang caj = ca + ci[crow]; 1022e240928fSHong Zhang /* Perform sparse axpy operation. Note cjj includes apj. */ 1023e240928fSHong Zhang for (k=0;nextap<apnzj;k++) { 1024e240928fSHong Zhang if (cjj[k]==apj[nextap]) { 1025e240928fSHong Zhang caj[k] += (*pA)*apa[apj[nextap++]]; 1026e240928fSHong Zhang } 1027e240928fSHong Zhang } 1028e240928fSHong Zhang flops += 2*apnzj; 1029e240928fSHong Zhang pA++; 1030e240928fSHong Zhang } 1031e240928fSHong Zhang 1032e240928fSHong Zhang /* Zero the current row info for A*P */ 1033e240928fSHong Zhang for (j=0;j<apnzj;j++) { 1034e240928fSHong Zhang apa[apj[j]] = 0.; 1035e240928fSHong Zhang apjdense[apj[j]] = 0; 1036e240928fSHong Zhang } 1037e240928fSHong Zhang } 1038e240928fSHong Zhang 1039e240928fSHong Zhang /* Assemble the final matrix and clean up */ 1040e240928fSHong Zhang ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1041e240928fSHong Zhang ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1042e240928fSHong Zhang ierr = PetscFree(apa);CHKERRQ(ierr); 1043e240928fSHong Zhang ierr = PetscLogFlops(flops);CHKERRQ(ierr); 1044a61c8c0fSHong Zhang ierr = PetscLogEventEnd(logkey_matptapnumeric_local,A,0,0,0);CHKERRQ(ierr); 1045e240928fSHong Zhang PetscFunctionReturn(0); 1046e240928fSHong Zhang } 1047e240928fSHong Zhang static PetscEvent logkey_matptapsymbolic_local = 0; 1048e240928fSHong Zhang #undef __FUNCT__ 1049e240928fSHong Zhang #define __FUNCT__ "MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local" 1050e240928fSHong Zhang PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local(Mat A,Mat P_loc,Mat P_oth,PetscInt prstart,PetscReal fill,Mat *C) 1051e240928fSHong Zhang { 1052e240928fSHong Zhang PetscErrorCode ierr; 1053e240928fSHong Zhang FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1054e240928fSHong Zhang Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 1055e240928fSHong Zhang Mat_SeqAIJ *ad=(Mat_SeqAIJ*)(a->A)->data,*ao=(Mat_SeqAIJ*)(a->B)->data,*c; 1056e240928fSHong Zhang PetscInt *pti,*ptj,*ptJ,*ajj,*pjj; 1057e240928fSHong Zhang PetscInt *adi=ad->i,*adj=ad->j,*aoi=ao->i,*aoj=ao->j,nnz,cstart=a->cstart,cend=a->cend,col; 1058e240928fSHong Zhang PetscInt *ci,*cj,*ptaj; 10596abd8857SHong Zhang PetscInt aN=A->N,am=A->m,pN=P_loc->N; 1060e240928fSHong Zhang PetscInt i,j,k,ptnzi,arow,anzj,prow,pnzj,cnzi; 1061e240928fSHong Zhang PetscInt pm=P_loc->m,nlnk,*lnk; 1062e240928fSHong Zhang MatScalar *ca; 1063e240928fSHong Zhang PetscBT lnkbt; 1064e240928fSHong Zhang PetscInt prend,nprow_loc,nprow_oth; 1065e240928fSHong Zhang PetscInt *ptadenserow_loc,*ptadenserow_oth,*ptasparserow_loc,*ptasparserow_oth; 1066e240928fSHong Zhang Mat_SeqAIJ *p_loc=(Mat_SeqAIJ*)P_loc->data,*p_oth=(Mat_SeqAIJ*)P_oth->data; 1067e240928fSHong Zhang PetscInt *pi_loc=p_loc->i,*pj_loc=p_loc->j,*pi_oth=p_oth->i,*pj_oth=p_oth->j; 1068e240928fSHong Zhang 1069e240928fSHong Zhang PetscFunctionBegin; 1070e240928fSHong Zhang if (!logkey_matptapsymbolic_local) { 1071e240928fSHong Zhang ierr = PetscLogEventRegister(&logkey_matptapsymbolic_local,"MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local",MAT_COOKIE); 1072e240928fSHong Zhang } 1073e240928fSHong Zhang ierr = PetscLogEventBegin(logkey_matptapsymbolic_local,A,P_loc,P_oth,0);CHKERRQ(ierr); 1074e240928fSHong Zhang 1075e240928fSHong Zhang prend = prstart + pm; 1076e240928fSHong Zhang 1077e240928fSHong Zhang /* get ij structure of P_loc^T */ 1078e240928fSHong Zhang ierr = MatGetSymbolicTranspose_SeqAIJ(P_loc,&pti,&ptj);CHKERRQ(ierr); 1079e240928fSHong Zhang ptJ=ptj; 1080e240928fSHong Zhang 1081e240928fSHong Zhang /* Allocate ci array, arrays for fill computation and */ 1082e240928fSHong Zhang /* free space for accumulating nonzero column info */ 1083e240928fSHong Zhang ierr = PetscMalloc((pN+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 1084e240928fSHong Zhang ci[0] = 0; 1085e240928fSHong Zhang 10866abd8857SHong Zhang ierr = PetscMalloc((4*aN+1)*sizeof(PetscInt),&ptadenserow_loc);CHKERRQ(ierr); 10876abd8857SHong Zhang ierr = PetscMemzero(ptadenserow_loc,(4*aN+1)*sizeof(PetscInt));CHKERRQ(ierr); 10886abd8857SHong Zhang ptasparserow_loc = ptadenserow_loc + aN; 10896abd8857SHong Zhang ptadenserow_oth = ptasparserow_loc + aN; 10906abd8857SHong Zhang ptasparserow_oth = ptadenserow_oth + aN; 1091e240928fSHong Zhang 1092e240928fSHong Zhang /* create and initialize a linked list */ 1093e240928fSHong Zhang nlnk = pN+1; 1094e240928fSHong Zhang ierr = PetscLLCreate(pN,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1095e240928fSHong Zhang 10966abd8857SHong Zhang /* Set initial free space to be nnz(A) scaled by fill*P->N/P->M. */ 1097e240928fSHong Zhang /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 1098e240928fSHong Zhang nnz = adi[am] + aoi[am]; 10996abd8857SHong Zhang ierr = GetMoreSpace((PetscInt)(fill*nnz*pN/aN+1),&free_space); 1100e240928fSHong Zhang current_space = free_space; 1101e240928fSHong Zhang 1102e240928fSHong Zhang /* determine symbolic info for each row of C: */ 1103e240928fSHong Zhang for (i=0;i<pN;i++) { 1104e240928fSHong Zhang ptnzi = pti[i+1] - pti[i]; 1105e240928fSHong Zhang nprow_loc = 0; nprow_oth = 0; 1106e240928fSHong Zhang /* i-th row of symbolic P_loc^T*A_loc: */ 1107e240928fSHong Zhang for (j=0;j<ptnzi;j++) { 1108e240928fSHong Zhang arow = *ptJ++; 1109e240928fSHong Zhang /* diagonal portion of A */ 1110e240928fSHong Zhang anzj = adi[arow+1] - adi[arow]; 1111e240928fSHong Zhang ajj = adj + adi[arow]; 1112e240928fSHong Zhang for (k=0;k<anzj;k++) { 1113e240928fSHong Zhang col = ajj[k]+prstart; 1114e240928fSHong Zhang if (!ptadenserow_loc[col]) { 1115e240928fSHong Zhang ptadenserow_loc[col] = -1; 1116e240928fSHong Zhang ptasparserow_loc[nprow_loc++] = col; 1117e240928fSHong Zhang } 1118e240928fSHong Zhang } 1119e240928fSHong Zhang /* off-diagonal portion of A */ 1120e240928fSHong Zhang anzj = aoi[arow+1] - aoi[arow]; 1121e240928fSHong Zhang ajj = aoj + aoi[arow]; 1122e240928fSHong Zhang for (k=0;k<anzj;k++) { 1123e240928fSHong Zhang col = a->garray[ajj[k]]; /* global col */ 1124e240928fSHong Zhang if (col < cstart){ 1125e240928fSHong Zhang col = ajj[k]; 1126e240928fSHong Zhang } else if (col >= cend){ 1127e240928fSHong Zhang col = ajj[k] + pm; 1128e240928fSHong Zhang } else { 1129e240928fSHong Zhang SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Off-diagonal portion of A has wrong column map"); 1130e240928fSHong Zhang } 1131e240928fSHong Zhang if (!ptadenserow_oth[col]) { 1132e240928fSHong Zhang ptadenserow_oth[col] = -1; 1133e240928fSHong Zhang ptasparserow_oth[nprow_oth++] = col; 1134e240928fSHong Zhang } 1135e240928fSHong Zhang } 1136e240928fSHong Zhang } 1137e240928fSHong Zhang 1138e240928fSHong Zhang /* using symbolic info of local PtA, determine symbolic info for row of C: */ 1139e240928fSHong Zhang cnzi = 0; 1140e240928fSHong Zhang /* rows of P_loc */ 1141e240928fSHong Zhang ptaj = ptasparserow_loc; 1142e240928fSHong Zhang for (j=0; j<nprow_loc; j++) { 1143e240928fSHong Zhang prow = *ptaj++; 1144e240928fSHong Zhang prow -= prstart; /* rm */ 1145e240928fSHong Zhang pnzj = pi_loc[prow+1] - pi_loc[prow]; 1146e240928fSHong Zhang pjj = pj_loc + pi_loc[prow]; 1147e240928fSHong Zhang /* add non-zero cols of P into the sorted linked list lnk */ 1148e240928fSHong Zhang ierr = PetscLLAdd(pnzj,pjj,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1149e240928fSHong Zhang cnzi += nlnk; 1150e240928fSHong Zhang } 1151e240928fSHong Zhang /* rows of P_oth */ 1152e240928fSHong Zhang ptaj = ptasparserow_oth; 1153e240928fSHong Zhang for (j=0; j<nprow_oth; j++) { 1154e240928fSHong Zhang prow = *ptaj++; 1155e240928fSHong Zhang if (prow >= prend) prow -= pm; /* rm */ 1156e240928fSHong Zhang pnzj = pi_oth[prow+1] - pi_oth[prow]; 1157e240928fSHong Zhang pjj = pj_oth + pi_oth[prow]; 1158e240928fSHong Zhang /* add non-zero cols of P into the sorted linked list lnk */ 1159e240928fSHong Zhang ierr = PetscLLAdd(pnzj,pjj,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1160e240928fSHong Zhang cnzi += nlnk; 1161e240928fSHong Zhang } 1162e240928fSHong Zhang 1163e240928fSHong Zhang /* If free space is not available, make more free space */ 1164e240928fSHong Zhang /* Double the amount of total space in the list */ 1165e240928fSHong Zhang if (current_space->local_remaining<cnzi) { 1166e240928fSHong Zhang ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 1167e240928fSHong Zhang } 1168e240928fSHong Zhang 1169e240928fSHong Zhang /* Copy data into free space, and zero out denserows */ 1170e240928fSHong Zhang ierr = PetscLLClean(pN,pN,cnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 1171e240928fSHong Zhang current_space->array += cnzi; 1172e240928fSHong Zhang current_space->local_used += cnzi; 1173e240928fSHong Zhang current_space->local_remaining -= cnzi; 1174e240928fSHong Zhang 1175e240928fSHong Zhang for (j=0;j<nprow_loc; j++) { 1176e240928fSHong Zhang ptadenserow_loc[ptasparserow_loc[j]] = 0; 1177e240928fSHong Zhang } 1178e240928fSHong Zhang for (j=0;j<nprow_oth; j++) { 1179e240928fSHong Zhang ptadenserow_oth[ptasparserow_oth[j]] = 0; 1180e240928fSHong Zhang } 1181e240928fSHong Zhang 1182e240928fSHong Zhang /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 1183e240928fSHong Zhang /* For now, we will recompute what is needed. */ 1184e240928fSHong Zhang ci[i+1] = ci[i] + cnzi; 1185e240928fSHong Zhang } 1186e240928fSHong Zhang /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 1187e240928fSHong Zhang /* Allocate space for cj, initialize cj, and */ 1188e240928fSHong Zhang /* destroy list of free space and other temporary array(s) */ 1189e240928fSHong Zhang ierr = PetscMalloc((ci[pN]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr); 1190e240928fSHong Zhang ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 1191e240928fSHong Zhang ierr = PetscFree(ptadenserow_loc);CHKERRQ(ierr); 1192e240928fSHong Zhang ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1193e240928fSHong Zhang 1194e240928fSHong Zhang /* Allocate space for ca */ 1195e240928fSHong Zhang ierr = PetscMalloc((ci[pN]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 1196e240928fSHong Zhang ierr = PetscMemzero(ca,(ci[pN]+1)*sizeof(MatScalar));CHKERRQ(ierr); 1197e240928fSHong Zhang 1198e240928fSHong Zhang /* put together the new matrix */ 1199e240928fSHong Zhang ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,pN,pN,ci,cj,ca,C);CHKERRQ(ierr); 1200e240928fSHong Zhang 1201e240928fSHong Zhang /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 1202e240928fSHong Zhang /* Since these are PETSc arrays, change flags to free them as necessary. */ 1203e240928fSHong Zhang c = (Mat_SeqAIJ *)((*C)->data); 1204e240928fSHong Zhang c->freedata = PETSC_TRUE; 1205e240928fSHong Zhang c->nonew = 0; 1206e240928fSHong Zhang 1207e240928fSHong Zhang /* Clean up. */ 1208e240928fSHong Zhang ierr = MatRestoreSymbolicTranspose_SeqAIJ(P_loc,&pti,&ptj);CHKERRQ(ierr); 1209a61c8c0fSHong Zhang 1210e240928fSHong Zhang ierr = PetscLogEventEnd(logkey_matptapsymbolic_local,A,P_loc,P_oth,0);CHKERRQ(ierr); 1211e240928fSHong Zhang PetscFunctionReturn(0); 1212e240928fSHong Zhang } 1213