1eb9c0419SKris Buschelman /* 29af31e4aSHong Zhang Defines projective product routines where A is a AIJ matrix 3eb9c0419SKris Buschelman C = P^T * A * P 4eb9c0419SKris Buschelman */ 5eb9c0419SKris Buschelman 6231952e2SKris Buschelman #include "src/mat/impls/aij/seq/aij.h" /*I "petscmat.h" I*/ 7eb9c0419SKris Buschelman #include "src/mat/utils/freespace.h" 89af31e4aSHong Zhang #include "src/mat/impls/aij/mpi/mpiaij.h" 955a3bba9SHong Zhang #include "petscbt.h" 10eb9c0419SKris Buschelman 11eb9c0419SKris Buschelman #undef __FUNCT__ 129af31e4aSHong Zhang #define __FUNCT__ "MatPtAP" 134d3841fdSKris Buschelman /*@ 149af31e4aSHong Zhang MatPtAP - Creates the matrix projection C = P^T * A * P 154d3841fdSKris Buschelman 164d3841fdSKris Buschelman Collective on Mat 174d3841fdSKris Buschelman 184d3841fdSKris Buschelman Input Parameters: 194d3841fdSKris Buschelman + A - the matrix 20f747e1aeSHong Zhang . P - the projection matrix 21f747e1aeSHong Zhang . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 22f747e1aeSHong Zhang - fill - expected fill as ratio of nnz(C)/(nnz(A) + nnz(P)) 234d3841fdSKris Buschelman 244d3841fdSKris Buschelman Output Parameters: 254d3841fdSKris Buschelman . C - the product matrix 264d3841fdSKris Buschelman 274d3841fdSKris Buschelman Notes: 284d3841fdSKris Buschelman C will be created and must be destroyed by the user with MatDestroy(). 294d3841fdSKris Buschelman 304d3841fdSKris Buschelman This routine is currently only implemented for pairs of SeqAIJ matrices and classes 314d3841fdSKris Buschelman which inherit from SeqAIJ. C will be of type MATSEQAIJ. 324d3841fdSKris Buschelman 334d3841fdSKris Buschelman Level: intermediate 344d3841fdSKris Buschelman 359af31e4aSHong Zhang .seealso: MatPtAPSymbolic(),MatPtAPNumeric(),MatMatMult() 364d3841fdSKris Buschelman @*/ 3755a3bba9SHong Zhang PetscErrorCode MatPtAP(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C) 3855a3bba9SHong Zhang { 39dfbe8321SBarry Smith PetscErrorCode ierr; 40534c1384SKris Buschelman PetscErrorCode (*fA)(Mat,Mat,MatReuse,PetscReal,Mat *); 41534c1384SKris Buschelman PetscErrorCode (*fP)(Mat,Mat,MatReuse,PetscReal,Mat *); 42eb9c0419SKris Buschelman 43eb9c0419SKris Buschelman PetscFunctionBegin; 449af31e4aSHong Zhang PetscValidHeaderSpecific(A,MAT_COOKIE,1); 459af31e4aSHong Zhang PetscValidType(A,1); 469af31e4aSHong Zhang MatPreallocated(A); 479af31e4aSHong Zhang if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 489af31e4aSHong Zhang if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 499af31e4aSHong Zhang PetscValidHeaderSpecific(P,MAT_COOKIE,2); 509af31e4aSHong Zhang PetscValidType(P,2); 519af31e4aSHong Zhang MatPreallocated(P); 529af31e4aSHong Zhang if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 539af31e4aSHong Zhang if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 549af31e4aSHong Zhang PetscValidPointer(C,3); 55534c1384SKris Buschelman 569af31e4aSHong Zhang if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->M,A->N); 57eb9c0419SKris Buschelman 589af31e4aSHong Zhang if (fill <=0.0) SETERRQ1(PETSC_ERR_ARG_SIZ,"fill=%g must be > 0.0",fill); 59eb9c0419SKris Buschelman 60534c1384SKris Buschelman /* For now, we do not dispatch based on the type of A and P */ 61534c1384SKris Buschelman /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */ 62534c1384SKris Buschelman fA = A->ops->ptap; 63534c1384SKris Buschelman if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatPtAP not supported for A of type %s",A->type_name); 64534c1384SKris Buschelman fP = P->ops->ptap; 65534c1384SKris Buschelman if (!fP) SETERRQ1(PETSC_ERR_SUP,"MatPtAP not supported for P of type %s",P->type_name); 66534c1384SKris Buschelman if (fP!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatPtAP requires A, %s, to be compatible with P, %s",A->type_name,P->type_name); 67534c1384SKris Buschelman 689af31e4aSHong Zhang ierr = PetscLogEventBegin(MAT_PtAP,A,P,0,0);CHKERRQ(ierr); 69534c1384SKris Buschelman ierr = (*fA)(A,P,scall,fill,C);CHKERRQ(ierr); 709af31e4aSHong Zhang ierr = PetscLogEventEnd(MAT_PtAP,A,P,0,0);CHKERRQ(ierr); 71eb9c0419SKris Buschelman PetscFunctionReturn(0); 72eb9c0419SKris Buschelman } 73eb9c0419SKris Buschelman 74e240928fSHong Zhang EXTERN PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local(Mat,Mat,Mat,PetscInt,PetscReal,Mat*); 75e240928fSHong Zhang EXTERN PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIAIJ_Local(Mat,Mat,Mat,PetscInt,Mat C); 76e240928fSHong Zhang 77a61c8c0fSHong Zhang EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat); 78a61c8c0fSHong Zhang #undef __FUNCT__ 79a61c8c0fSHong Zhang #define __FUNCT__ "MatDestroy_MPIAIJ_PtAP" 80a61c8c0fSHong Zhang PetscErrorCode MatDestroy_MPIAIJ_PtAP(Mat A) 81a61c8c0fSHong Zhang { 82a61c8c0fSHong Zhang PetscErrorCode ierr; 8332fba14fSHong Zhang Mat_MatMatMultMPI *ptap; 84a61c8c0fSHong Zhang PetscObjectContainer container; 85a61c8c0fSHong Zhang 86a61c8c0fSHong Zhang PetscFunctionBegin; 87a61c8c0fSHong Zhang ierr = PetscObjectQuery((PetscObject)A,"MatPtAPMPI",(PetscObject *)&container);CHKERRQ(ierr); 88a61c8c0fSHong Zhang if (container) { 89a61c8c0fSHong Zhang ierr = PetscObjectContainerGetPointer(container,(void **)&ptap);CHKERRQ(ierr); 9032fba14fSHong Zhang ierr = MatDestroy(ptap->B_loc);CHKERRQ(ierr); 9132fba14fSHong Zhang ierr = MatDestroy(ptap->B_oth);CHKERRQ(ierr); 9201b7ae99SHong Zhang ierr = ISDestroy(ptap->isrowb);CHKERRQ(ierr); 9301b7ae99SHong Zhang ierr = ISDestroy(ptap->iscolb);CHKERRQ(ierr); 94a61c8c0fSHong Zhang 95a61c8c0fSHong Zhang ierr = PetscObjectContainerDestroy(container);CHKERRQ(ierr); 9632fba14fSHong Zhang ierr = PetscObjectCompose((PetscObject)A,"MatPtAPMPI",0);CHKERRQ(ierr); 97a61c8c0fSHong Zhang } 98a61c8c0fSHong Zhang ierr = PetscFree(ptap);CHKERRQ(ierr); 99a61c8c0fSHong Zhang 100a61c8c0fSHong Zhang ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 101a61c8c0fSHong Zhang PetscFunctionReturn(0); 102a61c8c0fSHong Zhang } 103a61c8c0fSHong Zhang 104eb9c0419SKris Buschelman #undef __FUNCT__ 105ff134f7aSHong Zhang #define __FUNCT__ "MatPtAP_MPIAIJ_MPIAIJ" 106ff134f7aSHong Zhang PetscErrorCode MatPtAP_MPIAIJ_MPIAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C) 107ff134f7aSHong Zhang { 108ff134f7aSHong Zhang PetscErrorCode ierr; 10932fba14fSHong Zhang Mat_MatMatMultMPI *ptap; 110a61c8c0fSHong Zhang PetscObjectContainer container; 111b90dcfe3SHong Zhang 112b90dcfe3SHong Zhang PetscFunctionBegin; 113b90dcfe3SHong Zhang if (scall == MAT_INITIAL_MATRIX){ 1144c627768SHong Zhang ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 11532fba14fSHong Zhang ierr = PetscNew(Mat_MatMatMultMPI,&ptap);CHKERRQ(ierr); 11632fba14fSHong Zhang 117e240928fSHong Zhang /* get P_oth by taking rows of P (= non-zero cols of local A) from other processors */ 11801b7ae99SHong Zhang ierr = MatGetBrowsOfAoCols(A,P,scall,&ptap->isrowb,&ptap->iscolb,&ptap->brstart,&ptap->B_oth);CHKERRQ(ierr); 119a61c8c0fSHong Zhang 120e240928fSHong Zhang /* get P_loc by taking all local rows of P */ 12132fba14fSHong Zhang ierr = MatGetLocalMat(P,scall,&ptap->B_loc);CHKERRQ(ierr); 122e240928fSHong Zhang 123a61c8c0fSHong Zhang /* attach the supporting struct to P for reuse */ 124a61c8c0fSHong Zhang P->ops->destroy = MatDestroy_MPIAIJ_PtAP; 125a61c8c0fSHong Zhang ierr = PetscObjectContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 126a61c8c0fSHong Zhang ierr = PetscObjectContainerSetPointer(container,ptap);CHKERRQ(ierr); 127a61c8c0fSHong Zhang ierr = PetscObjectCompose((PetscObject)P,"MatPtAPMPI",(PetscObject)container);CHKERRQ(ierr); 128e240928fSHong Zhang 12932fba14fSHong Zhang /* now, compute symbolic local P^T*A*P */ 130a61c8c0fSHong Zhang ierr = MatPtAPSymbolic_MPIAIJ_MPIAIJ(A,P,fill,C);CHKERRQ(ierr);/* numeric product is computed as well */ 131a61c8c0fSHong Zhang ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 132a61c8c0fSHong Zhang } else if (scall == MAT_REUSE_MATRIX){ 133a61c8c0fSHong Zhang ierr = PetscObjectQuery((PetscObject)P,"MatPtAPMPI",(PetscObject *)&container);CHKERRQ(ierr); 134a61c8c0fSHong Zhang if (container) { 135a61c8c0fSHong Zhang ierr = PetscObjectContainerGetPointer(container,(void **)&ptap);CHKERRQ(ierr); 136a61c8c0fSHong Zhang } else { 137a61c8c0fSHong Zhang SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 138a61c8c0fSHong Zhang } 139a61c8c0fSHong Zhang 140a61c8c0fSHong Zhang /* get P_oth by taking rows of P (= non-zero cols of local A) from other processors */ 14101b7ae99SHong Zhang ierr = MatGetBrowsOfAoCols(A,P,scall,&ptap->isrowb,&ptap->iscolb,&ptap->brstart,&ptap->B_oth);CHKERRQ(ierr); 142a61c8c0fSHong Zhang 143a61c8c0fSHong Zhang /* get P_loc by taking all local rows of P */ 14432fba14fSHong Zhang ierr = MatGetLocalMat(P,scall,&ptap->B_loc);CHKERRQ(ierr); 14532fba14fSHong Zhang 146a61c8c0fSHong Zhang } else { 147a61c8c0fSHong Zhang SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",scall); 148a61c8c0fSHong Zhang } 14932fba14fSHong Zhang /* now, compute numeric local P^T*A*P */ 150a61c8c0fSHong Zhang ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 151a61c8c0fSHong Zhang ierr = MatPtAPNumeric_MPIAIJ_MPIAIJ(A,P,*C);CHKERRQ(ierr); 152a61c8c0fSHong Zhang ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 153a61c8c0fSHong Zhang 154a61c8c0fSHong Zhang PetscFunctionReturn(0); 155a61c8c0fSHong Zhang } 156a61c8c0fSHong Zhang 157a61c8c0fSHong Zhang #undef __FUNCT__ 158a61c8c0fSHong Zhang #define __FUNCT__ "MatPtAPSymbolic_MPIAIJ_MPIAIJ" 159a61c8c0fSHong Zhang PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIAIJ(Mat A,Mat P,PetscReal fill,Mat *C) 160a61c8c0fSHong Zhang { 161a61c8c0fSHong Zhang PetscErrorCode ierr; 162a61c8c0fSHong Zhang Mat C_seq; 16332fba14fSHong Zhang Mat_MatMatMultMPI *ptap; 164a61c8c0fSHong Zhang PetscObjectContainer container; 165a61c8c0fSHong Zhang 166a61c8c0fSHong Zhang PetscFunctionBegin; 167a61c8c0fSHong Zhang 168a61c8c0fSHong Zhang ierr = PetscObjectQuery((PetscObject)P,"MatPtAPMPI",(PetscObject *)&container);CHKERRQ(ierr); 169a61c8c0fSHong Zhang if (container) { 170a61c8c0fSHong Zhang ierr = PetscObjectContainerGetPointer(container,(void **)&ptap);CHKERRQ(ierr); 171a61c8c0fSHong Zhang } else { 172a61c8c0fSHong Zhang SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 173a61c8c0fSHong Zhang } 17425616d81SHong Zhang /* compute C_seq = P_loc^T * A_loc * P_seq */ 17532fba14fSHong Zhang ierr = MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local(A,ptap->B_loc,ptap->B_oth,ptap->brstart,fill,&C_seq);CHKERRQ(ierr); 176a61c8c0fSHong Zhang 177b90dcfe3SHong Zhang /* add C_seq into mpi C */ 178a61c8c0fSHong Zhang ierr = MatMerge_SeqsToMPISymbolic(A->comm,C_seq,P->n,P->n,C);CHKERRQ(ierr); 179b90dcfe3SHong Zhang 180ff134f7aSHong Zhang PetscFunctionReturn(0); 181ff134f7aSHong Zhang } 182ff134f7aSHong Zhang 183ff134f7aSHong Zhang #undef __FUNCT__ 184e240928fSHong Zhang #define __FUNCT__ "MatPtAPNumeric_MPIAIJ_MPIAIJ" 185b90dcfe3SHong Zhang PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIAIJ(Mat A,Mat P,Mat C) 186ff134f7aSHong Zhang { 187b90dcfe3SHong Zhang PetscErrorCode ierr; 188671beff6SHong Zhang Mat_Merge_SeqsToMPI *merge; 18932fba14fSHong Zhang Mat_MatMatMultMPI *ptap; 190a61c8c0fSHong Zhang PetscObjectContainer cont_merge,cont_ptap; 191ff134f7aSHong Zhang 192097ab81bSHong Zhang PetscInt flops=0; 193*82afef89SHong Zhang Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data,*c=(Mat_MPIAIJ*)C->data; 194097ab81bSHong Zhang Mat_SeqAIJ *ad=(Mat_SeqAIJ*)(a->A)->data,*ao=(Mat_SeqAIJ*)(a->B)->data; 195*82afef89SHong Zhang Mat_SeqAIJ *cd=(Mat_SeqAIJ*)(c->A)->data,*co=(Mat_SeqAIJ*)(c->B)->data; 196097ab81bSHong Zhang Mat C_seq; 197e658270aSHong Zhang Mat_SeqAIJ *p_loc,*p_oth; 198097ab81bSHong Zhang PetscInt *adi=ad->i,*adj=ad->j,*aoi=ao->i,*aoj=ao->j,*apj,*apjdense,cstart=a->cstart,cend=a->cend,col; 199097ab81bSHong Zhang PetscInt *pi_loc,*pj_loc,*pi_oth,*pj_oth,*pJ,*pjj; 200*82afef89SHong Zhang PetscInt i,j,k,nzi,pnzi,apnzj,nextap,pnzj,prow,crow; 201097ab81bSHong Zhang PetscInt *cjj; 202097ab81bSHong Zhang MatScalar *ada=ad->a,*aoa=ao->a,*apa,*paj,*cseqa,*caj; 203097ab81bSHong Zhang MatScalar *pa_loc,*pA,*pa_oth; 204*82afef89SHong Zhang PetscInt am=A->m,cN=C->N,cm=C->m; 205097ab81bSHong Zhang 206097ab81bSHong Zhang MPI_Comm comm=C->comm; 207097ab81bSHong Zhang PetscMPIInt size,rank,taga,*len_s; 208097ab81bSHong Zhang PetscInt *owners; 209097ab81bSHong Zhang PetscInt proc; 210097ab81bSHong Zhang PetscInt **buf_ri,**buf_rj; 211*82afef89SHong Zhang PetscInt cseqnzi,*bj_i,*bi,*bj,bnzi,nextcseqj; /* bi, bj, ba: for C (mpi mat) */ 212097ab81bSHong Zhang PetscInt nrows,**buf_ri_k,**nextrow,**nextcseqi; 213097ab81bSHong Zhang MPI_Request *s_waits,*r_waits; 214097ab81bSHong Zhang MPI_Status *status; 215097ab81bSHong Zhang MatScalar **abuf_r,*ba_i; 216097ab81bSHong Zhang Mat_SeqAIJ *cseq; 217097ab81bSHong Zhang PetscInt *cseqi,*cseqj; 218*82afef89SHong Zhang MatScalar *ca; 219097ab81bSHong Zhang 220ff134f7aSHong Zhang PetscFunctionBegin; 221a61c8c0fSHong Zhang ierr = PetscObjectQuery((PetscObject)C,"MatMergeSeqsToMPI",(PetscObject *)&cont_merge);CHKERRQ(ierr); 222a61c8c0fSHong Zhang if (cont_merge) { 223a61c8c0fSHong Zhang ierr = PetscObjectContainerGetPointer(cont_merge,(void **)&merge);CHKERRQ(ierr); 2247f79fd58SMatthew Knepley } else { 225a61c8c0fSHong Zhang SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix C does not posses an object container"); 226671beff6SHong Zhang } 227a61c8c0fSHong Zhang ierr = PetscObjectQuery((PetscObject)P,"MatPtAPMPI",(PetscObject *)&cont_ptap);CHKERRQ(ierr); 228a61c8c0fSHong Zhang if (cont_ptap) { 229a61c8c0fSHong Zhang ierr = PetscObjectContainerGetPointer(cont_ptap,(void **)&ptap);CHKERRQ(ierr); 230a61c8c0fSHong Zhang } else { 231a61c8c0fSHong Zhang SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 232a61c8c0fSHong Zhang } 233097ab81bSHong Zhang #ifdef OLD 23432fba14fSHong Zhang ierr = MatPtAPNumeric_MPIAIJ_MPIAIJ_Local(A,ptap->B_loc,ptap->B_oth,ptap->brstart,merge->C_seq);CHKERRQ(ierr); 235097ab81bSHong Zhang #endif /* OLD */ 236097ab81bSHong Zhang /*--------------------------------------------------------------*/ 237097ab81bSHong Zhang 238097ab81bSHong Zhang p_loc=(Mat_SeqAIJ*)(ptap->B_loc)->data; 239097ab81bSHong Zhang p_oth=(Mat_SeqAIJ*)(ptap->B_oth)->data; 240097ab81bSHong Zhang pi_loc=p_loc->i; pj_loc=p_loc->j; pJ=pj_loc; pa_loc=p_loc->a; pA=pa_loc; 241097ab81bSHong Zhang pi_oth=p_oth->i; pj_oth=p_oth->j; pa_oth=p_oth->a; 242097ab81bSHong Zhang 243097ab81bSHong Zhang C_seq=merge->C_seq; 244097ab81bSHong Zhang cseq=(Mat_SeqAIJ*)C_seq->data; 245097ab81bSHong Zhang cseqi=cseq->i; cseqj=cseq->j; 246097ab81bSHong Zhang cseqa=cseq->a; 247097ab81bSHong Zhang 248*82afef89SHong Zhang ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 249*82afef89SHong Zhang ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 250*82afef89SHong Zhang ierr = PetscMapGetGlobalRange(merge->rowmap,&owners);CHKERRQ(ierr); 251*82afef89SHong Zhang 252097ab81bSHong Zhang /* Allocate temporary array for storage of one row of A*P */ 253097ab81bSHong Zhang ierr = PetscMalloc(cN*(sizeof(MatScalar)+2*sizeof(PetscInt)),&apa);CHKERRQ(ierr); 254097ab81bSHong Zhang ierr = PetscMemzero(apa,cN*(sizeof(MatScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr); 255097ab81bSHong Zhang apj = (PetscInt *)(apa + cN); 256097ab81bSHong Zhang apjdense = apj + cN; 257097ab81bSHong Zhang 258*82afef89SHong Zhang /* Allocate temporary MatScalar array for storage of one row of C */ 259*82afef89SHong Zhang ierr = PetscMalloc(cN*(sizeof(MatScalar)),&ca);CHKERRQ(ierr); 260*82afef89SHong Zhang ierr = PetscMemzero(ca,cN*(sizeof(MatScalar)));CHKERRQ(ierr); 261*82afef89SHong Zhang 262*82afef89SHong Zhang /* Clear old values in C_Seq and C */ 263097ab81bSHong Zhang ierr = PetscMemzero(cseqa,cseqi[cN]*sizeof(MatScalar));CHKERRQ(ierr); 264*82afef89SHong Zhang ierr = PetscMemzero(cd->a,cd->i[cm]*sizeof(MatScalar));CHKERRQ(ierr); 265*82afef89SHong Zhang ierr = PetscMemzero(co->a,co->i[cm]*sizeof(MatScalar));CHKERRQ(ierr); 266097ab81bSHong Zhang 267097ab81bSHong Zhang for (i=0;i<am;i++) { 268097ab81bSHong Zhang /* Form i-th sparse row of A*P */ 269097ab81bSHong Zhang apnzj = 0; 270097ab81bSHong Zhang /* diagonal portion of A */ 271*82afef89SHong Zhang nzi = adi[i+1] - adi[i]; 272*82afef89SHong Zhang for (j=0;j<nzi;j++) { 273097ab81bSHong Zhang prow = *adj; 274097ab81bSHong Zhang adj++; 275097ab81bSHong Zhang pnzj = pi_loc[prow+1] - pi_loc[prow]; 276097ab81bSHong Zhang pjj = pj_loc + pi_loc[prow]; 277097ab81bSHong Zhang paj = pa_loc + pi_loc[prow]; 278097ab81bSHong Zhang for (k=0;k<pnzj;k++) { 279097ab81bSHong Zhang if (!apjdense[pjj[k]]) { 280097ab81bSHong Zhang apjdense[pjj[k]] = -1; 281097ab81bSHong Zhang apj[apnzj++] = pjj[k]; 282097ab81bSHong Zhang } 283097ab81bSHong Zhang apa[pjj[k]] += (*ada)*paj[k]; 284097ab81bSHong Zhang } 285097ab81bSHong Zhang flops += 2*pnzj; 286097ab81bSHong Zhang ada++; 287097ab81bSHong Zhang } 288097ab81bSHong Zhang /* off-diagonal portion of A */ 289*82afef89SHong Zhang nzi = aoi[i+1] - aoi[i]; 290*82afef89SHong Zhang for (j=0;j<nzi;j++) { 291097ab81bSHong Zhang col = a->garray[*aoj]; 292097ab81bSHong Zhang if (col < cstart){ 293097ab81bSHong Zhang prow = *aoj; 294097ab81bSHong Zhang } else if (col >= cend){ 295097ab81bSHong Zhang prow = *aoj; 296097ab81bSHong Zhang } else { 297097ab81bSHong Zhang SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Off-diagonal portion of A has wrong column map"); 298097ab81bSHong Zhang } 299097ab81bSHong Zhang aoj++; 300097ab81bSHong Zhang pnzj = pi_oth[prow+1] - pi_oth[prow]; 301097ab81bSHong Zhang pjj = pj_oth + pi_oth[prow]; 302097ab81bSHong Zhang paj = pa_oth + pi_oth[prow]; 303097ab81bSHong Zhang for (k=0;k<pnzj;k++) { 304097ab81bSHong Zhang if (!apjdense[pjj[k]]) { 305097ab81bSHong Zhang apjdense[pjj[k]] = -1; 306097ab81bSHong Zhang apj[apnzj++] = pjj[k]; 307097ab81bSHong Zhang } 308097ab81bSHong Zhang apa[pjj[k]] += (*aoa)*paj[k]; 309097ab81bSHong Zhang } 310097ab81bSHong Zhang flops += 2*pnzj; 311097ab81bSHong Zhang aoa++; 312097ab81bSHong Zhang } 313097ab81bSHong Zhang /* Sort the j index array for quick sparse axpy. */ 314097ab81bSHong Zhang ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr); 315097ab81bSHong Zhang 316097ab81bSHong Zhang /* Compute P_loc[i,:]^T*AP[i,:] using outer product */ 317097ab81bSHong Zhang pnzi = pi_loc[i+1] - pi_loc[i]; 318097ab81bSHong Zhang for (j=0;j<pnzi;j++) { 319097ab81bSHong Zhang crow = *pJ++; 320097ab81bSHong Zhang cjj = cseqj + cseqi[crow]; 321097ab81bSHong Zhang caj = cseqa + cseqi[crow]; 322097ab81bSHong Zhang /* Perform sparse axpy operation. Note cjj includes apj. */ 323*82afef89SHong Zhang if (crow >= owners[rank] && crow < owners[rank+1]){ /* add value into mpi C */ 324*82afef89SHong Zhang for (k=0; k<apnzj; k++) ca[k] = (*pA)*apa[apj[k]]; 325*82afef89SHong Zhang ierr = MatSetValues(C,1,&crow,apnzj,apj,ca,ADD_VALUES);CHKERRQ(ierr); 326*82afef89SHong Zhang ierr = PetscMemzero(ca,apnzj*(sizeof(MatScalar)));CHKERRQ(ierr); 327*82afef89SHong Zhang } else { /* add value into C_seq to be sent to other processors */ 328*82afef89SHong Zhang nextap = 0; 329097ab81bSHong Zhang for (k=0;nextap<apnzj;k++) { 330097ab81bSHong Zhang if (cjj[k]==apj[nextap]) { 331097ab81bSHong Zhang caj[k] += (*pA)*apa[apj[nextap++]]; 332097ab81bSHong Zhang } 333097ab81bSHong Zhang } 334*82afef89SHong Zhang } 335097ab81bSHong Zhang flops += 2*apnzj; 336097ab81bSHong Zhang pA++; 337097ab81bSHong Zhang } 338097ab81bSHong Zhang 339097ab81bSHong Zhang /* Zero the current row info for A*P */ 340097ab81bSHong Zhang for (j=0;j<apnzj;j++) { 341097ab81bSHong Zhang apa[apj[j]] = 0.; 342097ab81bSHong Zhang apjdense[apj[j]] = 0; 343097ab81bSHong Zhang } 344097ab81bSHong Zhang } 345097ab81bSHong Zhang 346097ab81bSHong Zhang /* Assemble the final matrix and clean up */ 347097ab81bSHong Zhang ierr = MatAssemblyBegin(C_seq,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 348097ab81bSHong Zhang ierr = MatAssemblyEnd(C_seq,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 349097ab81bSHong Zhang ierr = PetscFree(apa);CHKERRQ(ierr); 350097ab81bSHong Zhang ierr = PetscLogFlops(flops);CHKERRQ(ierr); 351097ab81bSHong Zhang 352097ab81bSHong Zhang #ifdef OLD1 353a61c8c0fSHong Zhang ierr = MatMerge_SeqsToMPINumeric(merge->C_seq,C);CHKERRQ(ierr); 354097ab81bSHong Zhang #endif /* OLD1 */ 355097ab81bSHong Zhang /*--------------------------------------------------------------*/ 356*82afef89SHong Zhang /* send and recv matrix values */ 357*82afef89SHong Zhang /*-----------------------------*/ 358097ab81bSHong Zhang bi = merge->bi; 359097ab81bSHong Zhang bj = merge->bj; 360097ab81bSHong Zhang buf_ri = merge->buf_ri; 361097ab81bSHong Zhang buf_rj = merge->buf_rj; 362097ab81bSHong Zhang len_s = merge->len_s; 363097ab81bSHong Zhang ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&taga);CHKERRQ(ierr); 364097ab81bSHong Zhang ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 365097ab81bSHong Zhang 366097ab81bSHong Zhang ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 367097ab81bSHong Zhang for (proc=0,k=0; proc<size; proc++){ 368097ab81bSHong Zhang if (!len_s[proc]) continue; 369097ab81bSHong Zhang i = owners[proc]; 370097ab81bSHong Zhang ierr = MPI_Isend(cseqa+cseqi[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 371097ab81bSHong Zhang k++; 372097ab81bSHong Zhang } 373*82afef89SHong Zhang ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 374097ab81bSHong Zhang ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr); 375097ab81bSHong Zhang ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr); 376097ab81bSHong Zhang ierr = PetscFree(status);CHKERRQ(ierr); 377097ab81bSHong Zhang 378097ab81bSHong Zhang ierr = PetscFree(s_waits);CHKERRQ(ierr); 379097ab81bSHong Zhang ierr = PetscFree(r_waits);CHKERRQ(ierr); 380097ab81bSHong Zhang 381097ab81bSHong Zhang /* insert mat values of mpimat */ 382097ab81bSHong Zhang /*----------------------------*/ 383097ab81bSHong Zhang ierr = PetscMalloc(cN*sizeof(MatScalar),&ba_i);CHKERRQ(ierr); 384*82afef89SHong Zhang ierr = PetscMemzero(ba_i,cN*sizeof(MatScalar));CHKERRQ(ierr); 385097ab81bSHong Zhang ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 386097ab81bSHong Zhang nextrow = buf_ri_k + merge->nrecv; 387097ab81bSHong Zhang nextcseqi = nextrow + merge->nrecv; 388097ab81bSHong Zhang 389097ab81bSHong Zhang for (k=0; k<merge->nrecv; k++){ 390097ab81bSHong Zhang buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 391097ab81bSHong Zhang nrows = *(buf_ri_k[k]); 392097ab81bSHong Zhang nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 393097ab81bSHong Zhang nextcseqi[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 394097ab81bSHong Zhang } 395097ab81bSHong Zhang 396*82afef89SHong Zhang /* add received local vals to C */ 397*82afef89SHong Zhang for (i=0; i<cm; i++) { 398*82afef89SHong Zhang crow = owners[rank] + i; 399097ab81bSHong Zhang bj_i = bj+bi[i]; /* col indices of the i-th row of C */ 400097ab81bSHong Zhang bnzi = bi[i+1] - bi[i]; 401*82afef89SHong Zhang nzi = 0; 402097ab81bSHong Zhang for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 403097ab81bSHong Zhang /* i-th row */ 404097ab81bSHong Zhang if (i == *nextrow[k]) { 405097ab81bSHong Zhang cseqnzi = *(nextcseqi[k]+1) - *nextcseqi[k]; 406097ab81bSHong Zhang cseqj = buf_rj[k] + *(nextcseqi[k]); 407097ab81bSHong Zhang cseqa = abuf_r[k] + *(nextcseqi[k]); 408097ab81bSHong Zhang nextcseqj = 0; 409097ab81bSHong Zhang for (j=0; nextcseqj<cseqnzi; j++){ 410097ab81bSHong Zhang if (*(bj_i + j) == cseqj[nextcseqj]){ /* bcol == cseqcol */ 411097ab81bSHong Zhang ba_i[j] += cseqa[nextcseqj++]; 412*82afef89SHong Zhang nzi++; 413097ab81bSHong Zhang } 414097ab81bSHong Zhang } 415097ab81bSHong Zhang nextrow[k]++; nextcseqi[k]++; 416097ab81bSHong Zhang } 417097ab81bSHong Zhang } 418*82afef89SHong Zhang if (nzi>0){ 419*82afef89SHong Zhang /* if (rank==1) printf(" [%d] set %d values on C, row: %d\n",rank,bnzi,crow); */ 420*82afef89SHong Zhang ierr = MatSetValues(C,1,&crow,bnzi,bj_i,ba_i,ADD_VALUES);CHKERRQ(ierr); 421*82afef89SHong Zhang ierr = PetscMemzero(ba_i,bnzi*sizeof(MatScalar));CHKERRQ(ierr); 422*82afef89SHong Zhang } 423097ab81bSHong Zhang } 424097ab81bSHong Zhang ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 425097ab81bSHong Zhang ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 426097ab81bSHong Zhang 427097ab81bSHong Zhang ierr = PetscFree(abuf_r);CHKERRQ(ierr); 428097ab81bSHong Zhang ierr = PetscFree(ba_i);CHKERRQ(ierr); 429097ab81bSHong Zhang ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 430*82afef89SHong Zhang ierr = PetscFree(ca);CHKERRQ(ierr); 431097ab81bSHong Zhang 432ff134f7aSHong Zhang PetscFunctionReturn(0); 433ff134f7aSHong Zhang } 434ff134f7aSHong Zhang 435ff134f7aSHong Zhang #undef __FUNCT__ 4369af31e4aSHong Zhang #define __FUNCT__ "MatPtAP_SeqAIJ_SeqAIJ" 437dfbe8321SBarry Smith PetscErrorCode MatPtAP_SeqAIJ_SeqAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C) 4389af31e4aSHong Zhang { 439dfbe8321SBarry Smith PetscErrorCode ierr; 440b1d57f15SBarry Smith 4419af31e4aSHong Zhang PetscFunctionBegin; 4429af31e4aSHong Zhang if (scall == MAT_INITIAL_MATRIX){ 443d20bfe6fSHong Zhang ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 4449af31e4aSHong Zhang ierr = MatPtAPSymbolic_SeqAIJ_SeqAIJ(A,P,fill,C);CHKERRQ(ierr); 445d20bfe6fSHong Zhang ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 4469af31e4aSHong Zhang } 447d20bfe6fSHong Zhang ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 4489af31e4aSHong Zhang ierr = MatPtAPNumeric_SeqAIJ_SeqAIJ(A,P,*C);CHKERRQ(ierr); 449d20bfe6fSHong Zhang ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 4509af31e4aSHong Zhang PetscFunctionReturn(0); 4519af31e4aSHong Zhang } 4529af31e4aSHong Zhang 4536849ba73SBarry Smith /* 4549af31e4aSHong Zhang MatPtAPSymbolic - Creates the (i,j) structure of the matrix projection C = P^T * A * P 4554d3841fdSKris Buschelman 4564d3841fdSKris Buschelman Collective on Mat 4574d3841fdSKris Buschelman 4584d3841fdSKris Buschelman Input Parameters: 4594d3841fdSKris Buschelman + A - the matrix 4604d3841fdSKris Buschelman - P - the projection matrix 4614d3841fdSKris Buschelman 4624d3841fdSKris Buschelman Output Parameters: 4634d3841fdSKris Buschelman . C - the (i,j) structure of the product matrix 4644d3841fdSKris Buschelman 4654d3841fdSKris Buschelman Notes: 4664d3841fdSKris Buschelman C will be created and must be destroyed by the user with MatDestroy(). 4674d3841fdSKris Buschelman 4684d3841fdSKris Buschelman This routine is currently only implemented for pairs of SeqAIJ matrices and classes 4694d3841fdSKris Buschelman which inherit from SeqAIJ. C will be of type MATSEQAIJ. The product is computed using 4709af31e4aSHong Zhang this (i,j) structure by calling MatPtAPNumeric(). 4714d3841fdSKris Buschelman 4724d3841fdSKris Buschelman Level: intermediate 4734d3841fdSKris Buschelman 4749af31e4aSHong Zhang .seealso: MatPtAP(),MatPtAPNumeric(),MatMatMultSymbolic() 4756849ba73SBarry Smith */ 476e240928fSHong Zhang #undef __FUNCT__ 477e240928fSHong Zhang #define __FUNCT__ "MatPtAPSymbolic" 47855a3bba9SHong Zhang PetscErrorCode MatPtAPSymbolic(Mat A,Mat P,PetscReal fill,Mat *C) 47955a3bba9SHong Zhang { 480dfbe8321SBarry Smith PetscErrorCode ierr; 481534c1384SKris Buschelman PetscErrorCode (*fA)(Mat,Mat,PetscReal,Mat*); 482534c1384SKris Buschelman PetscErrorCode (*fP)(Mat,Mat,PetscReal,Mat*); 483eb9c0419SKris Buschelman 484eb9c0419SKris Buschelman PetscFunctionBegin; 485eb9c0419SKris Buschelman 4864482741eSBarry Smith PetscValidHeaderSpecific(A,MAT_COOKIE,1); 487c9780b6fSBarry Smith PetscValidType(A,1); 488eb9c0419SKris Buschelman MatPreallocated(A); 489eb9c0419SKris Buschelman if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 490eb9c0419SKris Buschelman if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 491eb9c0419SKris Buschelman 4924482741eSBarry Smith PetscValidHeaderSpecific(P,MAT_COOKIE,2); 493c9780b6fSBarry Smith PetscValidType(P,2); 494eb9c0419SKris Buschelman MatPreallocated(P); 495eb9c0419SKris Buschelman if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 496eb9c0419SKris Buschelman if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 497eb9c0419SKris Buschelman 4984482741eSBarry Smith PetscValidPointer(C,3); 4994482741eSBarry Smith 500eb9c0419SKris Buschelman if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->M,A->N); 501eb9c0419SKris Buschelman if (A->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %d != %d",A->M,A->N); 502eb9c0419SKris Buschelman 503534c1384SKris Buschelman /* For now, we do not dispatch based on the type of A and P */ 504534c1384SKris Buschelman /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */ 505534c1384SKris Buschelman fA = A->ops->ptapsymbolic; 506534c1384SKris Buschelman if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatPtAPSymbolic not supported for A of type %s",A->type_name); 507534c1384SKris Buschelman fP = P->ops->ptapsymbolic; 508534c1384SKris Buschelman if (!fP) SETERRQ1(PETSC_ERR_SUP,"MatPtAPSymbolic not supported for P of type %s",P->type_name); 509534c1384SKris Buschelman if (fP!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatPtAPSymbolic requires A, %s, to be compatible with P, %s",A->type_name,P->type_name); 5104d3841fdSKris Buschelman 511534c1384SKris Buschelman ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 512534c1384SKris Buschelman ierr = (*fA)(A,P,fill,C);CHKERRQ(ierr); 513534c1384SKris Buschelman ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 514eb9c0419SKris Buschelman 515eb9c0419SKris Buschelman PetscFunctionReturn(0); 516eb9c0419SKris Buschelman } 517eb9c0419SKris Buschelman 518f747e1aeSHong Zhang typedef struct { 519f747e1aeSHong Zhang Mat symAP; 520f747e1aeSHong Zhang } Mat_PtAPstruct; 521f747e1aeSHong Zhang 52278a80504SBarry Smith EXTERN PetscErrorCode MatDestroy_SeqAIJ(Mat); 52378a80504SBarry Smith 524f747e1aeSHong Zhang #undef __FUNCT__ 525f747e1aeSHong Zhang #define __FUNCT__ "MatDestroy_SeqAIJ_PtAP" 526f4a850bbSBarry Smith PetscErrorCode MatDestroy_SeqAIJ_PtAP(Mat A) 527f747e1aeSHong Zhang { 528f4a850bbSBarry Smith PetscErrorCode ierr; 529f747e1aeSHong Zhang Mat_PtAPstruct *ptap=(Mat_PtAPstruct*)A->spptr; 530f747e1aeSHong Zhang 531f747e1aeSHong Zhang PetscFunctionBegin; 532f747e1aeSHong Zhang ierr = MatDestroy(ptap->symAP);CHKERRQ(ierr); 533f747e1aeSHong Zhang ierr = PetscFree(ptap);CHKERRQ(ierr); 53478a80504SBarry Smith ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr); 535f747e1aeSHong Zhang PetscFunctionReturn(0); 536f747e1aeSHong Zhang } 537f747e1aeSHong Zhang 538eb9c0419SKris Buschelman #undef __FUNCT__ 5399af31e4aSHong Zhang #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqAIJ" 54055a3bba9SHong Zhang PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat P,PetscReal fill,Mat *C) 54155a3bba9SHong Zhang { 542dfbe8321SBarry Smith PetscErrorCode ierr; 543d20bfe6fSHong Zhang FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 544d20bfe6fSHong Zhang Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*p = (Mat_SeqAIJ*)P->data,*c; 54555a3bba9SHong Zhang PetscInt *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj; 546b8374ebeSBarry Smith PetscInt *ci,*cj,*ptadenserow,*ptasparserow,*ptaj; 54755a3bba9SHong Zhang PetscInt an=A->N,am=A->M,pn=P->N,pm=P->M; 548b8374ebeSBarry Smith PetscInt i,j,k,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi,nlnk,*lnk; 549d20bfe6fSHong Zhang MatScalar *ca; 55055a3bba9SHong Zhang PetscBT lnkbt; 551eb9c0419SKris Buschelman 552eb9c0419SKris Buschelman PetscFunctionBegin; 553d20bfe6fSHong Zhang /* Get ij structure of P^T */ 554eb9c0419SKris Buschelman ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 555d20bfe6fSHong Zhang ptJ=ptj; 556eb9c0419SKris Buschelman 557d20bfe6fSHong Zhang /* Allocate ci array, arrays for fill computation and */ 558d20bfe6fSHong Zhang /* free space for accumulating nonzero column info */ 55955a3bba9SHong Zhang ierr = PetscMalloc((pn+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 560d20bfe6fSHong Zhang ci[0] = 0; 561eb9c0419SKris Buschelman 56255a3bba9SHong Zhang ierr = PetscMalloc((2*an+1)*sizeof(PetscInt),&ptadenserow);CHKERRQ(ierr); 56355a3bba9SHong Zhang ierr = PetscMemzero(ptadenserow,(2*an+1)*sizeof(PetscInt));CHKERRQ(ierr); 564d20bfe6fSHong Zhang ptasparserow = ptadenserow + an; 56555a3bba9SHong Zhang 56655a3bba9SHong Zhang /* create and initialize a linked list */ 56755a3bba9SHong Zhang nlnk = pn+1; 56855a3bba9SHong Zhang ierr = PetscLLCreate(pn,pn,nlnk,lnk,lnkbt);CHKERRQ(ierr); 569eb9c0419SKris Buschelman 570d20bfe6fSHong Zhang /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */ 571d20bfe6fSHong Zhang /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 572d20bfe6fSHong Zhang ierr = GetMoreSpace((ai[am]/pm)*pn,&free_space); 573d20bfe6fSHong Zhang current_space = free_space; 574d20bfe6fSHong Zhang 575d20bfe6fSHong Zhang /* Determine symbolic info for each row of C: */ 576d20bfe6fSHong Zhang for (i=0;i<pn;i++) { 577d20bfe6fSHong Zhang ptnzi = pti[i+1] - pti[i]; 578d20bfe6fSHong Zhang ptanzi = 0; 579d20bfe6fSHong Zhang /* Determine symbolic row of PtA: */ 580d20bfe6fSHong Zhang for (j=0;j<ptnzi;j++) { 581d20bfe6fSHong Zhang arow = *ptJ++; 582d20bfe6fSHong Zhang anzj = ai[arow+1] - ai[arow]; 583d20bfe6fSHong Zhang ajj = aj + ai[arow]; 584d20bfe6fSHong Zhang for (k=0;k<anzj;k++) { 585d20bfe6fSHong Zhang if (!ptadenserow[ajj[k]]) { 586d20bfe6fSHong Zhang ptadenserow[ajj[k]] = -1; 587d20bfe6fSHong Zhang ptasparserow[ptanzi++] = ajj[k]; 588d20bfe6fSHong Zhang } 589d20bfe6fSHong Zhang } 590d20bfe6fSHong Zhang } 591d20bfe6fSHong Zhang /* Using symbolic info for row of PtA, determine symbolic info for row of C: */ 592d20bfe6fSHong Zhang ptaj = ptasparserow; 593d20bfe6fSHong Zhang cnzi = 0; 594d20bfe6fSHong Zhang for (j=0;j<ptanzi;j++) { 595d20bfe6fSHong Zhang prow = *ptaj++; 596d20bfe6fSHong Zhang pnzj = pi[prow+1] - pi[prow]; 597d20bfe6fSHong Zhang pjj = pj + pi[prow]; 59855a3bba9SHong Zhang /* add non-zero cols of P into the sorted linked list lnk */ 59955a3bba9SHong Zhang ierr = PetscLLAdd(pnzj,pjj,pn,nlnk,lnk,lnkbt);CHKERRQ(ierr); 60055a3bba9SHong Zhang cnzi += nlnk; 601d20bfe6fSHong Zhang } 602d20bfe6fSHong Zhang 603d20bfe6fSHong Zhang /* If free space is not available, make more free space */ 604d20bfe6fSHong Zhang /* Double the amount of total space in the list */ 605d20bfe6fSHong Zhang if (current_space->local_remaining<cnzi) { 606d20bfe6fSHong Zhang ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 607d20bfe6fSHong Zhang } 608d20bfe6fSHong Zhang 609d20bfe6fSHong Zhang /* Copy data into free space, and zero out denserows */ 61055a3bba9SHong Zhang ierr = PetscLLClean(pn,pn,cnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 611d20bfe6fSHong Zhang current_space->array += cnzi; 612d20bfe6fSHong Zhang current_space->local_used += cnzi; 613d20bfe6fSHong Zhang current_space->local_remaining -= cnzi; 614d20bfe6fSHong Zhang 615d20bfe6fSHong Zhang for (j=0;j<ptanzi;j++) { 616d20bfe6fSHong Zhang ptadenserow[ptasparserow[j]] = 0; 617d20bfe6fSHong Zhang } 618d20bfe6fSHong Zhang /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 619d20bfe6fSHong Zhang /* For now, we will recompute what is needed. */ 620d20bfe6fSHong Zhang ci[i+1] = ci[i] + cnzi; 621d20bfe6fSHong Zhang } 622d20bfe6fSHong Zhang /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 623d20bfe6fSHong Zhang /* Allocate space for cj, initialize cj, and */ 624d20bfe6fSHong Zhang /* destroy list of free space and other temporary array(s) */ 62555a3bba9SHong Zhang ierr = PetscMalloc((ci[pn]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr); 626d20bfe6fSHong Zhang ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 627d20bfe6fSHong Zhang ierr = PetscFree(ptadenserow);CHKERRQ(ierr); 62855a3bba9SHong Zhang ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 629d20bfe6fSHong Zhang 630d20bfe6fSHong Zhang /* Allocate space for ca */ 631d20bfe6fSHong Zhang ierr = PetscMalloc((ci[pn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 632d20bfe6fSHong Zhang ierr = PetscMemzero(ca,(ci[pn]+1)*sizeof(MatScalar));CHKERRQ(ierr); 633d20bfe6fSHong Zhang 634d20bfe6fSHong Zhang /* put together the new matrix */ 635d20bfe6fSHong Zhang ierr = MatCreateSeqAIJWithArrays(A->comm,pn,pn,ci,cj,ca,C);CHKERRQ(ierr); 636d20bfe6fSHong Zhang 637d20bfe6fSHong Zhang /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 638d20bfe6fSHong Zhang /* Since these are PETSc arrays, change flags to free them as necessary. */ 639d20bfe6fSHong Zhang c = (Mat_SeqAIJ *)((*C)->data); 640d20bfe6fSHong Zhang c->freedata = PETSC_TRUE; 641d20bfe6fSHong Zhang c->nonew = 0; 642d20bfe6fSHong Zhang 643d20bfe6fSHong Zhang /* Clean up. */ 644d20bfe6fSHong Zhang ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 645eb9c0419SKris Buschelman 646eb9c0419SKris Buschelman PetscFunctionReturn(0); 647eb9c0419SKris Buschelman } 648eb9c0419SKris Buschelman 6493985e5eaSKris Buschelman #include "src/mat/impls/maij/maij.h" 6503985e5eaSKris Buschelman EXTERN_C_BEGIN 6513985e5eaSKris Buschelman #undef __FUNCT__ 6529af31e4aSHong Zhang #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqMAIJ" 65355a3bba9SHong Zhang PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat *C) 65455a3bba9SHong Zhang { 6555c66b693SKris Buschelman /* This routine requires testing -- I don't think it works. */ 656dfbe8321SBarry Smith PetscErrorCode ierr; 6573985e5eaSKris Buschelman FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 6583985e5eaSKris Buschelman Mat_SeqMAIJ *pp=(Mat_SeqMAIJ*)PP->data; 6593985e5eaSKris Buschelman Mat P=pp->AIJ; 6603985e5eaSKris Buschelman Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c; 661b1d57f15SBarry Smith PetscInt *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj; 662b1d57f15SBarry Smith PetscInt *ci,*cj,*denserow,*sparserow,*ptadenserow,*ptasparserow,*ptaj; 663b1d57f15SBarry Smith PetscInt an=A->N,am=A->M,pn=P->N,pm=P->M,ppdof=pp->dof; 664b1d57f15SBarry Smith PetscInt i,j,k,dof,pdof,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi; 6653985e5eaSKris Buschelman MatScalar *ca; 6663985e5eaSKris Buschelman 6673985e5eaSKris Buschelman PetscFunctionBegin; 6683985e5eaSKris Buschelman /* Start timer */ 6699af31e4aSHong Zhang ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr); 6703985e5eaSKris Buschelman 6713985e5eaSKris Buschelman /* Get ij structure of P^T */ 6723985e5eaSKris Buschelman ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 6733985e5eaSKris Buschelman 6743985e5eaSKris Buschelman /* Allocate ci array, arrays for fill computation and */ 6753985e5eaSKris Buschelman /* free space for accumulating nonzero column info */ 676b1d57f15SBarry Smith ierr = PetscMalloc((pn+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 6773985e5eaSKris Buschelman ci[0] = 0; 6783985e5eaSKris Buschelman 679b1d57f15SBarry Smith ierr = PetscMalloc((2*pn+2*an+1)*sizeof(PetscInt),&ptadenserow);CHKERRQ(ierr); 680b1d57f15SBarry Smith ierr = PetscMemzero(ptadenserow,(2*pn+2*an+1)*sizeof(PetscInt));CHKERRQ(ierr); 6813985e5eaSKris Buschelman ptasparserow = ptadenserow + an; 6823985e5eaSKris Buschelman denserow = ptasparserow + an; 6833985e5eaSKris Buschelman sparserow = denserow + pn; 6843985e5eaSKris Buschelman 6853985e5eaSKris Buschelman /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */ 6863985e5eaSKris Buschelman /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 6873985e5eaSKris Buschelman ierr = GetMoreSpace((ai[am]/pm)*pn,&free_space); 6883985e5eaSKris Buschelman current_space = free_space; 6893985e5eaSKris Buschelman 6903985e5eaSKris Buschelman /* Determine symbolic info for each row of C: */ 6913985e5eaSKris Buschelman for (i=0;i<pn/ppdof;i++) { 6923985e5eaSKris Buschelman ptnzi = pti[i+1] - pti[i]; 6933985e5eaSKris Buschelman ptanzi = 0; 6943985e5eaSKris Buschelman ptJ = ptj + pti[i]; 6953985e5eaSKris Buschelman for (dof=0;dof<ppdof;dof++) { 6963985e5eaSKris Buschelman /* Determine symbolic row of PtA: */ 6973985e5eaSKris Buschelman for (j=0;j<ptnzi;j++) { 6983985e5eaSKris Buschelman arow = ptJ[j] + dof; 6993985e5eaSKris Buschelman anzj = ai[arow+1] - ai[arow]; 7003985e5eaSKris Buschelman ajj = aj + ai[arow]; 7013985e5eaSKris Buschelman for (k=0;k<anzj;k++) { 7023985e5eaSKris Buschelman if (!ptadenserow[ajj[k]]) { 7033985e5eaSKris Buschelman ptadenserow[ajj[k]] = -1; 7043985e5eaSKris Buschelman ptasparserow[ptanzi++] = ajj[k]; 7053985e5eaSKris Buschelman } 7063985e5eaSKris Buschelman } 7073985e5eaSKris Buschelman } 7083985e5eaSKris Buschelman /* Using symbolic info for row of PtA, determine symbolic info for row of C: */ 7093985e5eaSKris Buschelman ptaj = ptasparserow; 7103985e5eaSKris Buschelman cnzi = 0; 7113985e5eaSKris Buschelman for (j=0;j<ptanzi;j++) { 712fe05a634SKris Buschelman pdof = *ptaj%dof; 7133985e5eaSKris Buschelman prow = (*ptaj++)/dof; 7143985e5eaSKris Buschelman pnzj = pi[prow+1] - pi[prow]; 7153985e5eaSKris Buschelman pjj = pj + pi[prow]; 7163985e5eaSKris Buschelman for (k=0;k<pnzj;k++) { 717fe05a634SKris Buschelman if (!denserow[pjj[k]+pdof]) { 718fe05a634SKris Buschelman denserow[pjj[k]+pdof] = -1; 719fe05a634SKris Buschelman sparserow[cnzi++] = pjj[k]+pdof; 7203985e5eaSKris Buschelman } 7213985e5eaSKris Buschelman } 7223985e5eaSKris Buschelman } 7233985e5eaSKris Buschelman 7243985e5eaSKris Buschelman /* sort sparserow */ 7253985e5eaSKris Buschelman ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr); 7263985e5eaSKris Buschelman 7273985e5eaSKris Buschelman /* If free space is not available, make more free space */ 7283985e5eaSKris Buschelman /* Double the amount of total space in the list */ 7293985e5eaSKris Buschelman if (current_space->local_remaining<cnzi) { 7303985e5eaSKris Buschelman ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 7313985e5eaSKris Buschelman } 7323985e5eaSKris Buschelman 7333985e5eaSKris Buschelman /* Copy data into free space, and zero out denserows */ 734b1d57f15SBarry Smith ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(PetscInt));CHKERRQ(ierr); 7353985e5eaSKris Buschelman current_space->array += cnzi; 7363985e5eaSKris Buschelman current_space->local_used += cnzi; 7373985e5eaSKris Buschelman current_space->local_remaining -= cnzi; 7383985e5eaSKris Buschelman 7393985e5eaSKris Buschelman for (j=0;j<ptanzi;j++) { 7403985e5eaSKris Buschelman ptadenserow[ptasparserow[j]] = 0; 7413985e5eaSKris Buschelman } 7423985e5eaSKris Buschelman for (j=0;j<cnzi;j++) { 7433985e5eaSKris Buschelman denserow[sparserow[j]] = 0; 7443985e5eaSKris Buschelman } 7453985e5eaSKris Buschelman /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 7463985e5eaSKris Buschelman /* For now, we will recompute what is needed. */ 7473985e5eaSKris Buschelman ci[i+1+dof] = ci[i+dof] + cnzi; 7483985e5eaSKris Buschelman } 7493985e5eaSKris Buschelman } 7503985e5eaSKris Buschelman /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 7513985e5eaSKris Buschelman /* Allocate space for cj, initialize cj, and */ 7523985e5eaSKris Buschelman /* destroy list of free space and other temporary array(s) */ 753b1d57f15SBarry Smith ierr = PetscMalloc((ci[pn]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr); 7543985e5eaSKris Buschelman ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 7553985e5eaSKris Buschelman ierr = PetscFree(ptadenserow);CHKERRQ(ierr); 7563985e5eaSKris Buschelman 7573985e5eaSKris Buschelman /* Allocate space for ca */ 7583985e5eaSKris Buschelman ierr = PetscMalloc((ci[pn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 7593985e5eaSKris Buschelman ierr = PetscMemzero(ca,(ci[pn]+1)*sizeof(MatScalar));CHKERRQ(ierr); 7603985e5eaSKris Buschelman 7613985e5eaSKris Buschelman /* put together the new matrix */ 7623985e5eaSKris Buschelman ierr = MatCreateSeqAIJWithArrays(A->comm,pn,pn,ci,cj,ca,C);CHKERRQ(ierr); 7633985e5eaSKris Buschelman 7643985e5eaSKris Buschelman /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 7653985e5eaSKris Buschelman /* Since these are PETSc arrays, change flags to free them as necessary. */ 7663985e5eaSKris Buschelman c = (Mat_SeqAIJ *)((*C)->data); 7673985e5eaSKris Buschelman c->freedata = PETSC_TRUE; 7683985e5eaSKris Buschelman c->nonew = 0; 7693985e5eaSKris Buschelman 7703985e5eaSKris Buschelman /* Clean up. */ 7713985e5eaSKris Buschelman ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 7723985e5eaSKris Buschelman 7739af31e4aSHong Zhang ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr); 7743985e5eaSKris Buschelman PetscFunctionReturn(0); 7753985e5eaSKris Buschelman } 7763985e5eaSKris Buschelman EXTERN_C_END 7773985e5eaSKris Buschelman 7786849ba73SBarry Smith /* 7799af31e4aSHong Zhang MatPtAPNumeric - Computes the matrix projection C = P^T * A * P 7804d3841fdSKris Buschelman 7814d3841fdSKris Buschelman Collective on Mat 7824d3841fdSKris Buschelman 7834d3841fdSKris Buschelman Input Parameters: 7844d3841fdSKris Buschelman + A - the matrix 7854d3841fdSKris Buschelman - P - the projection matrix 7864d3841fdSKris Buschelman 7874d3841fdSKris Buschelman Output Parameters: 7884d3841fdSKris Buschelman . C - the product matrix 7894d3841fdSKris Buschelman 7904d3841fdSKris Buschelman Notes: 7919af31e4aSHong Zhang C must have been created by calling MatPtAPSymbolic and must be destroyed by 7924d3841fdSKris Buschelman the user using MatDeatroy(). 7934d3841fdSKris Buschelman 794170ef064SHong Zhang This routine is currently only implemented for pairs of AIJ matrices and classes 795170ef064SHong Zhang which inherit from AIJ. C will be of type MATAIJ. 7964d3841fdSKris Buschelman 7974d3841fdSKris Buschelman Level: intermediate 7984d3841fdSKris Buschelman 7999af31e4aSHong Zhang .seealso: MatPtAP(),MatPtAPSymbolic(),MatMatMultNumeric() 8006849ba73SBarry Smith */ 801e240928fSHong Zhang #undef __FUNCT__ 802e240928fSHong Zhang #define __FUNCT__ "MatPtAPNumeric" 80355a3bba9SHong Zhang PetscErrorCode MatPtAPNumeric(Mat A,Mat P,Mat C) 80455a3bba9SHong Zhang { 805dfbe8321SBarry Smith PetscErrorCode ierr; 806534c1384SKris Buschelman PetscErrorCode (*fA)(Mat,Mat,Mat); 807534c1384SKris Buschelman PetscErrorCode (*fP)(Mat,Mat,Mat); 808eb9c0419SKris Buschelman 809eb9c0419SKris Buschelman PetscFunctionBegin; 810eb9c0419SKris Buschelman 8114482741eSBarry Smith PetscValidHeaderSpecific(A,MAT_COOKIE,1); 812c9780b6fSBarry Smith PetscValidType(A,1); 813eb9c0419SKris Buschelman MatPreallocated(A); 814eb9c0419SKris Buschelman if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 815eb9c0419SKris Buschelman if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 816eb9c0419SKris Buschelman 8174482741eSBarry Smith PetscValidHeaderSpecific(P,MAT_COOKIE,2); 818c9780b6fSBarry Smith PetscValidType(P,2); 819eb9c0419SKris Buschelman MatPreallocated(P); 820eb9c0419SKris Buschelman if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 821eb9c0419SKris Buschelman if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 822eb9c0419SKris Buschelman 8234482741eSBarry Smith PetscValidHeaderSpecific(C,MAT_COOKIE,3); 824c9780b6fSBarry Smith PetscValidType(C,3); 825eb9c0419SKris Buschelman MatPreallocated(C); 826eb9c0419SKris Buschelman if (!C->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 827eb9c0419SKris Buschelman if (C->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 828eb9c0419SKris Buschelman 829eb9c0419SKris Buschelman if (P->N!=C->M) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->N,C->M); 830eb9c0419SKris Buschelman if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->M,A->N); 831eb9c0419SKris Buschelman if (A->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %d != %d",A->M,A->N); 832eb9c0419SKris Buschelman if (P->N!=C->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->N,C->N); 833eb9c0419SKris Buschelman 834534c1384SKris Buschelman /* For now, we do not dispatch based on the type of A and P */ 835534c1384SKris Buschelman /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */ 836534c1384SKris Buschelman fA = A->ops->ptapnumeric; 837534c1384SKris Buschelman if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatPtAPNumeric not supported for A of type %s",A->type_name); 838534c1384SKris Buschelman fP = P->ops->ptapnumeric; 839534c1384SKris Buschelman if (!fP) SETERRQ1(PETSC_ERR_SUP,"MatPtAPNumeric not supported for P of type %s",P->type_name); 840534c1384SKris Buschelman if (fP!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatPtAPNumeric requires A, %s, to be compatible with P, %s",A->type_name,P->type_name); 8414d3841fdSKris Buschelman 842534c1384SKris Buschelman ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 843534c1384SKris Buschelman ierr = (*fA)(A,P,C);CHKERRQ(ierr); 844534c1384SKris Buschelman ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 845eb9c0419SKris Buschelman 846eb9c0419SKris Buschelman PetscFunctionReturn(0); 847eb9c0419SKris Buschelman } 848eb9c0419SKris Buschelman 849eb9c0419SKris Buschelman #undef __FUNCT__ 8509af31e4aSHong Zhang #define __FUNCT__ "MatPtAPNumeric_SeqAIJ_SeqAIJ" 851dfbe8321SBarry Smith PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqAIJ(Mat A,Mat P,Mat C) 852dfbe8321SBarry Smith { 853dfbe8321SBarry Smith PetscErrorCode ierr; 854b1d57f15SBarry Smith PetscInt flops=0; 855d20bfe6fSHong Zhang Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 856d20bfe6fSHong Zhang Mat_SeqAIJ *p = (Mat_SeqAIJ *) P->data; 857d20bfe6fSHong Zhang Mat_SeqAIJ *c = (Mat_SeqAIJ *) C->data; 858b1d57f15SBarry Smith PetscInt *ai=a->i,*aj=a->j,*apj,*apjdense,*pi=p->i,*pj=p->j,*pJ=p->j,*pjj; 859b1d57f15SBarry Smith PetscInt *ci=c->i,*cj=c->j,*cjj; 860b1d57f15SBarry Smith PetscInt am=A->M,cn=C->N,cm=C->M; 861b1d57f15SBarry Smith PetscInt i,j,k,anzi,pnzi,apnzj,nextap,pnzj,prow,crow; 862d20bfe6fSHong Zhang MatScalar *aa=a->a,*apa,*pa=p->a,*pA=p->a,*paj,*ca=c->a,*caj; 863eb9c0419SKris Buschelman 864eb9c0419SKris Buschelman PetscFunctionBegin; 865d20bfe6fSHong Zhang /* Allocate temporary array for storage of one row of A*P */ 866b1d57f15SBarry Smith ierr = PetscMalloc(cn*(sizeof(MatScalar)+2*sizeof(PetscInt)),&apa);CHKERRQ(ierr); 867b1d57f15SBarry Smith ierr = PetscMemzero(apa,cn*(sizeof(MatScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr); 868eb9c0419SKris Buschelman 869b1d57f15SBarry Smith apj = (PetscInt *)(apa + cn); 870d20bfe6fSHong Zhang apjdense = apj + cn; 871d20bfe6fSHong Zhang 872d20bfe6fSHong Zhang /* Clear old values in C */ 873d20bfe6fSHong Zhang ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr); 874d20bfe6fSHong Zhang 875d20bfe6fSHong Zhang for (i=0;i<am;i++) { 876d20bfe6fSHong Zhang /* Form sparse row of A*P */ 877d20bfe6fSHong Zhang anzi = ai[i+1] - ai[i]; 878d20bfe6fSHong Zhang apnzj = 0; 879d20bfe6fSHong Zhang for (j=0;j<anzi;j++) { 880d20bfe6fSHong Zhang prow = *aj++; 881d20bfe6fSHong Zhang pnzj = pi[prow+1] - pi[prow]; 882d20bfe6fSHong Zhang pjj = pj + pi[prow]; 883d20bfe6fSHong Zhang paj = pa + pi[prow]; 884d20bfe6fSHong Zhang for (k=0;k<pnzj;k++) { 885d20bfe6fSHong Zhang if (!apjdense[pjj[k]]) { 886d20bfe6fSHong Zhang apjdense[pjj[k]] = -1; 887d20bfe6fSHong Zhang apj[apnzj++] = pjj[k]; 888d20bfe6fSHong Zhang } 889d20bfe6fSHong Zhang apa[pjj[k]] += (*aa)*paj[k]; 890d20bfe6fSHong Zhang } 891d20bfe6fSHong Zhang flops += 2*pnzj; 892d20bfe6fSHong Zhang aa++; 893d20bfe6fSHong Zhang } 894d20bfe6fSHong Zhang 895d20bfe6fSHong Zhang /* Sort the j index array for quick sparse axpy. */ 896d20bfe6fSHong Zhang ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr); 897d20bfe6fSHong Zhang 898d20bfe6fSHong Zhang /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */ 899d20bfe6fSHong Zhang pnzi = pi[i+1] - pi[i]; 900d20bfe6fSHong Zhang for (j=0;j<pnzi;j++) { 901d20bfe6fSHong Zhang nextap = 0; 902d20bfe6fSHong Zhang crow = *pJ++; 903d20bfe6fSHong Zhang cjj = cj + ci[crow]; 904d20bfe6fSHong Zhang caj = ca + ci[crow]; 905d20bfe6fSHong Zhang /* Perform sparse axpy operation. Note cjj includes apj. */ 906d20bfe6fSHong Zhang for (k=0;nextap<apnzj;k++) { 907d20bfe6fSHong Zhang if (cjj[k]==apj[nextap]) { 908d20bfe6fSHong Zhang caj[k] += (*pA)*apa[apj[nextap++]]; 909d20bfe6fSHong Zhang } 910d20bfe6fSHong Zhang } 911d20bfe6fSHong Zhang flops += 2*apnzj; 912d20bfe6fSHong Zhang pA++; 913d20bfe6fSHong Zhang } 914d20bfe6fSHong Zhang 915d20bfe6fSHong Zhang /* Zero the current row info for A*P */ 916d20bfe6fSHong Zhang for (j=0;j<apnzj;j++) { 917d20bfe6fSHong Zhang apa[apj[j]] = 0.; 918d20bfe6fSHong Zhang apjdense[apj[j]] = 0; 919d20bfe6fSHong Zhang } 920d20bfe6fSHong Zhang } 921d20bfe6fSHong Zhang 922d20bfe6fSHong Zhang /* Assemble the final matrix and clean up */ 923d20bfe6fSHong Zhang ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 924d20bfe6fSHong Zhang ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 925d20bfe6fSHong Zhang ierr = PetscFree(apa);CHKERRQ(ierr); 926d20bfe6fSHong Zhang ierr = PetscLogFlops(flops);CHKERRQ(ierr); 927d20bfe6fSHong Zhang 928eb9c0419SKris Buschelman PetscFunctionReturn(0); 929eb9c0419SKris Buschelman } 9300e36024fSHong Zhang 931a61c8c0fSHong Zhang /* Compute local C = P_loc^T * A * P - used by MatPtAP_MPIAIJ_MPIAIJ() */ 932e240928fSHong Zhang static PetscEvent logkey_matptapnumeric_local = 0; 933e240928fSHong Zhang #undef __FUNCT__ 934e240928fSHong Zhang #define __FUNCT__ "MatPtAPNumeric_MPIAIJ_MPIAIJ_Local" 935e240928fSHong Zhang PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIAIJ_Local(Mat A,Mat P_loc,Mat P_oth,PetscInt prstart,Mat C) 936e240928fSHong Zhang { 937e240928fSHong Zhang PetscErrorCode ierr; 938e240928fSHong Zhang PetscInt flops=0; 939e240928fSHong Zhang Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 940e240928fSHong Zhang Mat_SeqAIJ *ad=(Mat_SeqAIJ*)(a->A)->data,*ao=(Mat_SeqAIJ*)(a->B)->data; 941e240928fSHong Zhang Mat_SeqAIJ *c = (Mat_SeqAIJ *) C->data; 942e240928fSHong Zhang Mat_SeqAIJ *p_loc=(Mat_SeqAIJ*)P_loc->data,*p_oth=(Mat_SeqAIJ*)P_oth->data; 943e240928fSHong Zhang PetscInt *pi_loc=p_loc->i,*pj_loc=p_loc->j,*pi_oth=p_oth->i,*pj_oth=p_oth->j; 944e240928fSHong Zhang PetscInt *adi=ad->i,*adj=ad->j,*aoi=ao->i,*aoj=ao->j,*apj,*apjdense,cstart=a->cstart,cend=a->cend,col; 945e240928fSHong Zhang PetscInt *pJ=pj_loc,*pjj; 946e240928fSHong Zhang PetscInt *ci=c->i,*cj=c->j,*cjj; 947e240928fSHong Zhang PetscInt am=A->m,cn=C->N,cm=C->M; 948e240928fSHong Zhang PetscInt i,j,k,anzi,pnzi,apnzj,nextap,pnzj,prow,crow; 949e240928fSHong Zhang MatScalar *ada=ad->a,*aoa=ao->a,*apa,*paj,*ca=c->a,*caj; 950e240928fSHong Zhang MatScalar *pa_loc=p_loc->a,*pA=pa_loc,*pa_oth=p_oth->a; 951e240928fSHong Zhang 952e240928fSHong Zhang PetscFunctionBegin; 953e240928fSHong Zhang if (!logkey_matptapnumeric_local) { 954a61c8c0fSHong Zhang ierr = PetscLogEventRegister(&logkey_matptapnumeric_local,"MatPtAPNumeric_MPIAIJ_MPIAIJ_Local",MAT_COOKIE);CHKERRQ(ierr); 955e240928fSHong Zhang } 956a61c8c0fSHong Zhang ierr = PetscLogEventBegin(logkey_matptapnumeric_local,A,0,0,0);CHKERRQ(ierr); 957e240928fSHong Zhang 958e240928fSHong Zhang /* Allocate temporary array for storage of one row of A*P */ 959e240928fSHong Zhang ierr = PetscMalloc(cn*(sizeof(MatScalar)+2*sizeof(PetscInt)),&apa);CHKERRQ(ierr); 960e240928fSHong Zhang ierr = PetscMemzero(apa,cn*(sizeof(MatScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr); 961e240928fSHong Zhang apj = (PetscInt *)(apa + cn); 962e240928fSHong Zhang apjdense = apj + cn; 963e240928fSHong Zhang 964e240928fSHong Zhang /* Clear old values in C */ 965e240928fSHong Zhang ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr); 966e240928fSHong Zhang 967e240928fSHong Zhang for (i=0;i<am;i++) { 968e240928fSHong Zhang /* Form i-th sparse row of A*P */ 969e240928fSHong Zhang apnzj = 0; 970e240928fSHong Zhang /* diagonal portion of A */ 971e240928fSHong Zhang anzi = adi[i+1] - adi[i]; 972e240928fSHong Zhang for (j=0;j<anzi;j++) { 973e240928fSHong Zhang prow = *adj; 974e240928fSHong Zhang adj++; 975e240928fSHong Zhang pnzj = pi_loc[prow+1] - pi_loc[prow]; 976e240928fSHong Zhang pjj = pj_loc + pi_loc[prow]; 977e240928fSHong Zhang paj = pa_loc + pi_loc[prow]; 978e240928fSHong Zhang for (k=0;k<pnzj;k++) { 979e240928fSHong Zhang if (!apjdense[pjj[k]]) { 980e240928fSHong Zhang apjdense[pjj[k]] = -1; 981e240928fSHong Zhang apj[apnzj++] = pjj[k]; 982e240928fSHong Zhang } 983e240928fSHong Zhang apa[pjj[k]] += (*ada)*paj[k]; 984e240928fSHong Zhang } 985e240928fSHong Zhang flops += 2*pnzj; 986e240928fSHong Zhang ada++; 987e240928fSHong Zhang } 988e240928fSHong Zhang /* off-diagonal portion of A */ 989e240928fSHong Zhang anzi = aoi[i+1] - aoi[i]; 990e240928fSHong Zhang for (j=0;j<anzi;j++) { 991e240928fSHong Zhang col = a->garray[*aoj]; 992e240928fSHong Zhang if (col < cstart){ 993e240928fSHong Zhang prow = *aoj; 994e240928fSHong Zhang } else if (col >= cend){ 995e240928fSHong Zhang prow = *aoj; 996e240928fSHong Zhang } else { 997e240928fSHong Zhang SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Off-diagonal portion of A has wrong column map"); 998e240928fSHong Zhang } 999e240928fSHong Zhang aoj++; 1000e240928fSHong Zhang pnzj = pi_oth[prow+1] - pi_oth[prow]; 1001e240928fSHong Zhang pjj = pj_oth + pi_oth[prow]; 1002e240928fSHong Zhang paj = pa_oth + pi_oth[prow]; 1003e240928fSHong Zhang for (k=0;k<pnzj;k++) { 1004e240928fSHong Zhang if (!apjdense[pjj[k]]) { 1005e240928fSHong Zhang apjdense[pjj[k]] = -1; 1006e240928fSHong Zhang apj[apnzj++] = pjj[k]; 1007e240928fSHong Zhang } 1008e240928fSHong Zhang apa[pjj[k]] += (*aoa)*paj[k]; 1009e240928fSHong Zhang } 1010e240928fSHong Zhang flops += 2*pnzj; 1011e240928fSHong Zhang aoa++; 1012e240928fSHong Zhang } 1013e240928fSHong Zhang /* Sort the j index array for quick sparse axpy. */ 1014e240928fSHong Zhang ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr); 1015e240928fSHong Zhang 1016e240928fSHong Zhang /* Compute P_loc[i,:]^T*AP[i,:] using outer product */ 1017e240928fSHong Zhang pnzi = pi_loc[i+1] - pi_loc[i]; 1018e240928fSHong Zhang for (j=0;j<pnzi;j++) { 1019e240928fSHong Zhang nextap = 0; 1020e240928fSHong Zhang crow = *pJ++; 1021e240928fSHong Zhang cjj = cj + ci[crow]; 1022e240928fSHong Zhang caj = ca + ci[crow]; 1023e240928fSHong Zhang /* Perform sparse axpy operation. Note cjj includes apj. */ 1024e240928fSHong Zhang for (k=0;nextap<apnzj;k++) { 1025e240928fSHong Zhang if (cjj[k]==apj[nextap]) { 1026e240928fSHong Zhang caj[k] += (*pA)*apa[apj[nextap++]]; 1027e240928fSHong Zhang } 1028e240928fSHong Zhang } 1029e240928fSHong Zhang flops += 2*apnzj; 1030e240928fSHong Zhang pA++; 1031e240928fSHong Zhang } 1032e240928fSHong Zhang 1033e240928fSHong Zhang /* Zero the current row info for A*P */ 1034e240928fSHong Zhang for (j=0;j<apnzj;j++) { 1035e240928fSHong Zhang apa[apj[j]] = 0.; 1036e240928fSHong Zhang apjdense[apj[j]] = 0; 1037e240928fSHong Zhang } 1038e240928fSHong Zhang } 1039e240928fSHong Zhang 1040e240928fSHong Zhang /* Assemble the final matrix and clean up */ 1041e240928fSHong Zhang ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1042e240928fSHong Zhang ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1043e240928fSHong Zhang ierr = PetscFree(apa);CHKERRQ(ierr); 1044e240928fSHong Zhang ierr = PetscLogFlops(flops);CHKERRQ(ierr); 1045a61c8c0fSHong Zhang ierr = PetscLogEventEnd(logkey_matptapnumeric_local,A,0,0,0);CHKERRQ(ierr); 1046e240928fSHong Zhang PetscFunctionReturn(0); 1047e240928fSHong Zhang } 1048e240928fSHong Zhang static PetscEvent logkey_matptapsymbolic_local = 0; 1049e240928fSHong Zhang #undef __FUNCT__ 1050e240928fSHong Zhang #define __FUNCT__ "MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local" 1051e240928fSHong Zhang PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local(Mat A,Mat P_loc,Mat P_oth,PetscInt prstart,PetscReal fill,Mat *C) 1052e240928fSHong Zhang { 1053e240928fSHong Zhang PetscErrorCode ierr; 1054e240928fSHong Zhang FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1055e240928fSHong Zhang Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 1056e240928fSHong Zhang Mat_SeqAIJ *ad=(Mat_SeqAIJ*)(a->A)->data,*ao=(Mat_SeqAIJ*)(a->B)->data,*c; 1057e240928fSHong Zhang PetscInt *pti,*ptj,*ptJ,*ajj,*pjj; 1058e240928fSHong Zhang PetscInt *adi=ad->i,*adj=ad->j,*aoi=ao->i,*aoj=ao->j,nnz,cstart=a->cstart,cend=a->cend,col; 1059e240928fSHong Zhang PetscInt *ci,*cj,*ptaj; 10606abd8857SHong Zhang PetscInt aN=A->N,am=A->m,pN=P_loc->N; 1061e240928fSHong Zhang PetscInt i,j,k,ptnzi,arow,anzj,prow,pnzj,cnzi; 1062e240928fSHong Zhang PetscInt pm=P_loc->m,nlnk,*lnk; 1063e240928fSHong Zhang MatScalar *ca; 1064e240928fSHong Zhang PetscBT lnkbt; 1065e240928fSHong Zhang PetscInt prend,nprow_loc,nprow_oth; 1066e240928fSHong Zhang PetscInt *ptadenserow_loc,*ptadenserow_oth,*ptasparserow_loc,*ptasparserow_oth; 1067e240928fSHong Zhang Mat_SeqAIJ *p_loc=(Mat_SeqAIJ*)P_loc->data,*p_oth=(Mat_SeqAIJ*)P_oth->data; 1068e240928fSHong Zhang PetscInt *pi_loc=p_loc->i,*pj_loc=p_loc->j,*pi_oth=p_oth->i,*pj_oth=p_oth->j; 1069e240928fSHong Zhang 1070e240928fSHong Zhang PetscFunctionBegin; 1071e240928fSHong Zhang if (!logkey_matptapsymbolic_local) { 1072e240928fSHong Zhang ierr = PetscLogEventRegister(&logkey_matptapsymbolic_local,"MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local",MAT_COOKIE); 1073e240928fSHong Zhang } 1074e240928fSHong Zhang ierr = PetscLogEventBegin(logkey_matptapsymbolic_local,A,P_loc,P_oth,0);CHKERRQ(ierr); 1075e240928fSHong Zhang 1076e240928fSHong Zhang prend = prstart + pm; 1077e240928fSHong Zhang 1078e240928fSHong Zhang /* get ij structure of P_loc^T */ 1079e240928fSHong Zhang ierr = MatGetSymbolicTranspose_SeqAIJ(P_loc,&pti,&ptj);CHKERRQ(ierr); 1080e240928fSHong Zhang ptJ=ptj; 1081e240928fSHong Zhang 1082e240928fSHong Zhang /* Allocate ci array, arrays for fill computation and */ 1083e240928fSHong Zhang /* free space for accumulating nonzero column info */ 1084e240928fSHong Zhang ierr = PetscMalloc((pN+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 1085e240928fSHong Zhang ci[0] = 0; 1086e240928fSHong Zhang 10876abd8857SHong Zhang ierr = PetscMalloc((4*aN+1)*sizeof(PetscInt),&ptadenserow_loc);CHKERRQ(ierr); 10886abd8857SHong Zhang ierr = PetscMemzero(ptadenserow_loc,(4*aN+1)*sizeof(PetscInt));CHKERRQ(ierr); 10896abd8857SHong Zhang ptasparserow_loc = ptadenserow_loc + aN; 10906abd8857SHong Zhang ptadenserow_oth = ptasparserow_loc + aN; 10916abd8857SHong Zhang ptasparserow_oth = ptadenserow_oth + aN; 1092e240928fSHong Zhang 1093e240928fSHong Zhang /* create and initialize a linked list */ 1094e240928fSHong Zhang nlnk = pN+1; 1095e240928fSHong Zhang ierr = PetscLLCreate(pN,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1096e240928fSHong Zhang 10976abd8857SHong Zhang /* Set initial free space to be nnz(A) scaled by fill*P->N/P->M. */ 1098e240928fSHong Zhang /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 1099e240928fSHong Zhang nnz = adi[am] + aoi[am]; 11006abd8857SHong Zhang ierr = GetMoreSpace((PetscInt)(fill*nnz*pN/aN+1),&free_space); 1101e240928fSHong Zhang current_space = free_space; 1102e240928fSHong Zhang 1103e240928fSHong Zhang /* determine symbolic info for each row of C: */ 1104e240928fSHong Zhang for (i=0;i<pN;i++) { 1105e240928fSHong Zhang ptnzi = pti[i+1] - pti[i]; 1106e240928fSHong Zhang nprow_loc = 0; nprow_oth = 0; 1107e240928fSHong Zhang /* i-th row of symbolic P_loc^T*A_loc: */ 1108e240928fSHong Zhang for (j=0;j<ptnzi;j++) { 1109e240928fSHong Zhang arow = *ptJ++; 1110e240928fSHong Zhang /* diagonal portion of A */ 1111e240928fSHong Zhang anzj = adi[arow+1] - adi[arow]; 1112e240928fSHong Zhang ajj = adj + adi[arow]; 1113e240928fSHong Zhang for (k=0;k<anzj;k++) { 1114e240928fSHong Zhang col = ajj[k]+prstart; 1115e240928fSHong Zhang if (!ptadenserow_loc[col]) { 1116e240928fSHong Zhang ptadenserow_loc[col] = -1; 1117e240928fSHong Zhang ptasparserow_loc[nprow_loc++] = col; 1118e240928fSHong Zhang } 1119e240928fSHong Zhang } 1120e240928fSHong Zhang /* off-diagonal portion of A */ 1121e240928fSHong Zhang anzj = aoi[arow+1] - aoi[arow]; 1122e240928fSHong Zhang ajj = aoj + aoi[arow]; 1123e240928fSHong Zhang for (k=0;k<anzj;k++) { 1124e240928fSHong Zhang col = a->garray[ajj[k]]; /* global col */ 1125e240928fSHong Zhang if (col < cstart){ 1126e240928fSHong Zhang col = ajj[k]; 1127e240928fSHong Zhang } else if (col >= cend){ 1128e240928fSHong Zhang col = ajj[k] + pm; 1129e240928fSHong Zhang } else { 1130e240928fSHong Zhang SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Off-diagonal portion of A has wrong column map"); 1131e240928fSHong Zhang } 1132e240928fSHong Zhang if (!ptadenserow_oth[col]) { 1133e240928fSHong Zhang ptadenserow_oth[col] = -1; 1134e240928fSHong Zhang ptasparserow_oth[nprow_oth++] = col; 1135e240928fSHong Zhang } 1136e240928fSHong Zhang } 1137e240928fSHong Zhang } 1138e240928fSHong Zhang 1139e240928fSHong Zhang /* using symbolic info of local PtA, determine symbolic info for row of C: */ 1140e240928fSHong Zhang cnzi = 0; 1141e240928fSHong Zhang /* rows of P_loc */ 1142e240928fSHong Zhang ptaj = ptasparserow_loc; 1143e240928fSHong Zhang for (j=0; j<nprow_loc; j++) { 1144e240928fSHong Zhang prow = *ptaj++; 1145e240928fSHong Zhang prow -= prstart; /* rm */ 1146e240928fSHong Zhang pnzj = pi_loc[prow+1] - pi_loc[prow]; 1147e240928fSHong Zhang pjj = pj_loc + pi_loc[prow]; 1148e240928fSHong Zhang /* add non-zero cols of P into the sorted linked list lnk */ 1149e240928fSHong Zhang ierr = PetscLLAdd(pnzj,pjj,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1150e240928fSHong Zhang cnzi += nlnk; 1151e240928fSHong Zhang } 1152e240928fSHong Zhang /* rows of P_oth */ 1153e240928fSHong Zhang ptaj = ptasparserow_oth; 1154e240928fSHong Zhang for (j=0; j<nprow_oth; j++) { 1155e240928fSHong Zhang prow = *ptaj++; 1156e240928fSHong Zhang if (prow >= prend) prow -= pm; /* rm */ 1157e240928fSHong Zhang pnzj = pi_oth[prow+1] - pi_oth[prow]; 1158e240928fSHong Zhang pjj = pj_oth + pi_oth[prow]; 1159e240928fSHong Zhang /* add non-zero cols of P into the sorted linked list lnk */ 1160e240928fSHong Zhang ierr = PetscLLAdd(pnzj,pjj,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1161e240928fSHong Zhang cnzi += nlnk; 1162e240928fSHong Zhang } 1163e240928fSHong Zhang 1164e240928fSHong Zhang /* If free space is not available, make more free space */ 1165e240928fSHong Zhang /* Double the amount of total space in the list */ 1166e240928fSHong Zhang if (current_space->local_remaining<cnzi) { 1167e240928fSHong Zhang ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 1168e240928fSHong Zhang } 1169e240928fSHong Zhang 1170e240928fSHong Zhang /* Copy data into free space, and zero out denserows */ 1171e240928fSHong Zhang ierr = PetscLLClean(pN,pN,cnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 1172e240928fSHong Zhang current_space->array += cnzi; 1173e240928fSHong Zhang current_space->local_used += cnzi; 1174e240928fSHong Zhang current_space->local_remaining -= cnzi; 1175e240928fSHong Zhang 1176e240928fSHong Zhang for (j=0;j<nprow_loc; j++) { 1177e240928fSHong Zhang ptadenserow_loc[ptasparserow_loc[j]] = 0; 1178e240928fSHong Zhang } 1179e240928fSHong Zhang for (j=0;j<nprow_oth; j++) { 1180e240928fSHong Zhang ptadenserow_oth[ptasparserow_oth[j]] = 0; 1181e240928fSHong Zhang } 1182e240928fSHong Zhang 1183e240928fSHong Zhang /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 1184e240928fSHong Zhang /* For now, we will recompute what is needed. */ 1185e240928fSHong Zhang ci[i+1] = ci[i] + cnzi; 1186e240928fSHong Zhang } 1187e240928fSHong Zhang /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 1188e240928fSHong Zhang /* Allocate space for cj, initialize cj, and */ 1189e240928fSHong Zhang /* destroy list of free space and other temporary array(s) */ 1190e240928fSHong Zhang ierr = PetscMalloc((ci[pN]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr); 1191e240928fSHong Zhang ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 1192e240928fSHong Zhang ierr = PetscFree(ptadenserow_loc);CHKERRQ(ierr); 1193e240928fSHong Zhang ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1194e240928fSHong Zhang 1195e240928fSHong Zhang /* Allocate space for ca */ 1196e240928fSHong Zhang ierr = PetscMalloc((ci[pN]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 1197e240928fSHong Zhang ierr = PetscMemzero(ca,(ci[pN]+1)*sizeof(MatScalar));CHKERRQ(ierr); 1198e240928fSHong Zhang 1199e240928fSHong Zhang /* put together the new matrix */ 1200e240928fSHong Zhang ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,pN,pN,ci,cj,ca,C);CHKERRQ(ierr); 1201e240928fSHong Zhang 1202e240928fSHong Zhang /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 1203e240928fSHong Zhang /* Since these are PETSc arrays, change flags to free them as necessary. */ 1204e240928fSHong Zhang c = (Mat_SeqAIJ *)((*C)->data); 1205e240928fSHong Zhang c->freedata = PETSC_TRUE; 1206e240928fSHong Zhang c->nonew = 0; 1207e240928fSHong Zhang 1208e240928fSHong Zhang /* Clean up. */ 1209e240928fSHong Zhang ierr = MatRestoreSymbolicTranspose_SeqAIJ(P_loc,&pti,&ptj);CHKERRQ(ierr); 1210a61c8c0fSHong Zhang 1211e240928fSHong Zhang ierr = PetscLogEventEnd(logkey_matptapsymbolic_local,A,P_loc,P_oth,0);CHKERRQ(ierr); 1212e240928fSHong Zhang PetscFunctionReturn(0); 1213e240928fSHong Zhang } 1214