1eb9c0419SKris Buschelman /* 29af31e4aSHong Zhang Defines projective product routines where A is a AIJ matrix 3eb9c0419SKris Buschelman C = P^T * A * P 4eb9c0419SKris Buschelman */ 5eb9c0419SKris Buschelman 6231952e2SKris Buschelman #include "src/mat/impls/aij/seq/aij.h" /*I "petscmat.h" I*/ 7eb9c0419SKris Buschelman #include "src/mat/utils/freespace.h" 89af31e4aSHong Zhang #include "src/mat/impls/aij/mpi/mpiaij.h" 955a3bba9SHong Zhang #include "petscbt.h" 10eb9c0419SKris Buschelman 11eb9c0419SKris Buschelman #undef __FUNCT__ 129af31e4aSHong Zhang #define __FUNCT__ "MatPtAP" 134d3841fdSKris Buschelman /*@ 149af31e4aSHong Zhang MatPtAP - Creates the matrix projection C = P^T * A * P 154d3841fdSKris Buschelman 164d3841fdSKris Buschelman Collective on Mat 174d3841fdSKris Buschelman 184d3841fdSKris Buschelman Input Parameters: 194d3841fdSKris Buschelman + A - the matrix 20f747e1aeSHong Zhang . P - the projection matrix 21f747e1aeSHong Zhang . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 22f747e1aeSHong Zhang - fill - expected fill as ratio of nnz(C)/(nnz(A) + nnz(P)) 234d3841fdSKris Buschelman 244d3841fdSKris Buschelman Output Parameters: 254d3841fdSKris Buschelman . C - the product matrix 264d3841fdSKris Buschelman 274d3841fdSKris Buschelman Notes: 284d3841fdSKris Buschelman C will be created and must be destroyed by the user with MatDestroy(). 294d3841fdSKris Buschelman 304d3841fdSKris Buschelman This routine is currently only implemented for pairs of SeqAIJ matrices and classes 314d3841fdSKris Buschelman which inherit from SeqAIJ. C will be of type MATSEQAIJ. 324d3841fdSKris Buschelman 334d3841fdSKris Buschelman Level: intermediate 344d3841fdSKris Buschelman 359af31e4aSHong Zhang .seealso: MatPtAPSymbolic(),MatPtAPNumeric(),MatMatMult() 364d3841fdSKris Buschelman @*/ 3755a3bba9SHong Zhang PetscErrorCode MatPtAP(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C) 3855a3bba9SHong Zhang { 39dfbe8321SBarry Smith PetscErrorCode ierr; 40534c1384SKris Buschelman PetscErrorCode (*fA)(Mat,Mat,MatReuse,PetscReal,Mat *); 41534c1384SKris Buschelman PetscErrorCode (*fP)(Mat,Mat,MatReuse,PetscReal,Mat *); 42eb9c0419SKris Buschelman 43eb9c0419SKris Buschelman PetscFunctionBegin; 449af31e4aSHong Zhang PetscValidHeaderSpecific(A,MAT_COOKIE,1); 459af31e4aSHong Zhang PetscValidType(A,1); 469af31e4aSHong Zhang MatPreallocated(A); 479af31e4aSHong Zhang if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 489af31e4aSHong Zhang if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 499af31e4aSHong Zhang PetscValidHeaderSpecific(P,MAT_COOKIE,2); 509af31e4aSHong Zhang PetscValidType(P,2); 519af31e4aSHong Zhang MatPreallocated(P); 529af31e4aSHong Zhang if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 539af31e4aSHong Zhang if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 549af31e4aSHong Zhang PetscValidPointer(C,3); 55534c1384SKris Buschelman 569af31e4aSHong Zhang if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->M,A->N); 57eb9c0419SKris Buschelman 589af31e4aSHong Zhang if (fill <=0.0) SETERRQ1(PETSC_ERR_ARG_SIZ,"fill=%g must be > 0.0",fill); 59eb9c0419SKris Buschelman 60534c1384SKris Buschelman /* For now, we do not dispatch based on the type of A and P */ 61534c1384SKris Buschelman /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */ 62534c1384SKris Buschelman fA = A->ops->ptap; 63534c1384SKris Buschelman if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatPtAP not supported for A of type %s",A->type_name); 64534c1384SKris Buschelman fP = P->ops->ptap; 65534c1384SKris Buschelman if (!fP) SETERRQ1(PETSC_ERR_SUP,"MatPtAP not supported for P of type %s",P->type_name); 66534c1384SKris Buschelman if (fP!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatPtAP requires A, %s, to be compatible with P, %s",A->type_name,P->type_name); 67534c1384SKris Buschelman 689af31e4aSHong Zhang ierr = PetscLogEventBegin(MAT_PtAP,A,P,0,0);CHKERRQ(ierr); 69534c1384SKris Buschelman ierr = (*fA)(A,P,scall,fill,C);CHKERRQ(ierr); 709af31e4aSHong Zhang ierr = PetscLogEventEnd(MAT_PtAP,A,P,0,0);CHKERRQ(ierr); 71eb9c0419SKris Buschelman PetscFunctionReturn(0); 72eb9c0419SKris Buschelman } 73eb9c0419SKris Buschelman 74e240928fSHong Zhang EXTERN PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local(Mat,Mat,Mat,PetscInt,PetscReal,Mat*); 75e240928fSHong Zhang EXTERN PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIAIJ_Local(Mat,Mat,Mat,PetscInt,Mat C); 76e240928fSHong Zhang 77a61c8c0fSHong Zhang EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat); 78a61c8c0fSHong Zhang #undef __FUNCT__ 79a61c8c0fSHong Zhang #define __FUNCT__ "MatDestroy_MPIAIJ_PtAP" 80a61c8c0fSHong Zhang PetscErrorCode MatDestroy_MPIAIJ_PtAP(Mat A) 81a61c8c0fSHong Zhang { 82a61c8c0fSHong Zhang PetscErrorCode ierr; 8332fba14fSHong Zhang Mat_MatMatMultMPI *ptap; 84a61c8c0fSHong Zhang PetscObjectContainer container; 85a61c8c0fSHong Zhang 86a61c8c0fSHong Zhang PetscFunctionBegin; 87a61c8c0fSHong Zhang ierr = PetscObjectQuery((PetscObject)A,"MatPtAPMPI",(PetscObject *)&container);CHKERRQ(ierr); 88a61c8c0fSHong Zhang if (container) { 89a61c8c0fSHong Zhang ierr = PetscObjectContainerGetPointer(container,(void **)&ptap);CHKERRQ(ierr); 9032fba14fSHong Zhang ierr = MatDestroy(ptap->B_loc);CHKERRQ(ierr); 9132fba14fSHong Zhang ierr = MatDestroy(ptap->B_oth);CHKERRQ(ierr); 9201b7ae99SHong Zhang ierr = ISDestroy(ptap->isrowb);CHKERRQ(ierr); 9301b7ae99SHong Zhang ierr = ISDestroy(ptap->iscolb);CHKERRQ(ierr); 94a61c8c0fSHong Zhang 95a61c8c0fSHong Zhang ierr = PetscObjectContainerDestroy(container);CHKERRQ(ierr); 9632fba14fSHong Zhang ierr = PetscObjectCompose((PetscObject)A,"MatPtAPMPI",0);CHKERRQ(ierr); 97a61c8c0fSHong Zhang } 98a61c8c0fSHong Zhang ierr = PetscFree(ptap);CHKERRQ(ierr); 99a61c8c0fSHong Zhang 100a61c8c0fSHong Zhang ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 101a61c8c0fSHong Zhang PetscFunctionReturn(0); 102a61c8c0fSHong Zhang } 103a61c8c0fSHong Zhang 104eb9c0419SKris Buschelman #undef __FUNCT__ 105ff134f7aSHong Zhang #define __FUNCT__ "MatPtAP_MPIAIJ_MPIAIJ" 106ff134f7aSHong Zhang PetscErrorCode MatPtAP_MPIAIJ_MPIAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C) 107ff134f7aSHong Zhang { 108ff134f7aSHong Zhang PetscErrorCode ierr; 10932fba14fSHong Zhang Mat_MatMatMultMPI *ptap; 110a61c8c0fSHong Zhang PetscObjectContainer container; 111b90dcfe3SHong Zhang 112b90dcfe3SHong Zhang PetscFunctionBegin; 113b90dcfe3SHong Zhang if (scall == MAT_INITIAL_MATRIX){ 1144c627768SHong Zhang ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 11532fba14fSHong Zhang ierr = PetscNew(Mat_MatMatMultMPI,&ptap);CHKERRQ(ierr); 11632fba14fSHong Zhang 117e240928fSHong Zhang /* get P_oth by taking rows of P (= non-zero cols of local A) from other processors */ 11801b7ae99SHong Zhang ierr = MatGetBrowsOfAoCols(A,P,scall,&ptap->isrowb,&ptap->iscolb,&ptap->brstart,&ptap->B_oth);CHKERRQ(ierr); 119a61c8c0fSHong Zhang 120e240928fSHong Zhang /* get P_loc by taking all local rows of P */ 12132fba14fSHong Zhang ierr = MatGetLocalMat(P,scall,&ptap->B_loc);CHKERRQ(ierr); 122e240928fSHong Zhang 123a61c8c0fSHong Zhang /* attach the supporting struct to P for reuse */ 124a61c8c0fSHong Zhang P->ops->destroy = MatDestroy_MPIAIJ_PtAP; 125a61c8c0fSHong Zhang ierr = PetscObjectContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 126a61c8c0fSHong Zhang ierr = PetscObjectContainerSetPointer(container,ptap);CHKERRQ(ierr); 127a61c8c0fSHong Zhang ierr = PetscObjectCompose((PetscObject)P,"MatPtAPMPI",(PetscObject)container);CHKERRQ(ierr); 128e240928fSHong Zhang 12932fba14fSHong Zhang /* now, compute symbolic local P^T*A*P */ 130a61c8c0fSHong Zhang ierr = MatPtAPSymbolic_MPIAIJ_MPIAIJ(A,P,fill,C);CHKERRQ(ierr);/* numeric product is computed as well */ 131a61c8c0fSHong Zhang ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 132a61c8c0fSHong Zhang } else if (scall == MAT_REUSE_MATRIX){ 133a61c8c0fSHong Zhang ierr = PetscObjectQuery((PetscObject)P,"MatPtAPMPI",(PetscObject *)&container);CHKERRQ(ierr); 134a61c8c0fSHong Zhang if (container) { 135a61c8c0fSHong Zhang ierr = PetscObjectContainerGetPointer(container,(void **)&ptap);CHKERRQ(ierr); 136a61c8c0fSHong Zhang } else { 137a61c8c0fSHong Zhang SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 138a61c8c0fSHong Zhang } 139a61c8c0fSHong Zhang 140a61c8c0fSHong Zhang /* get P_oth by taking rows of P (= non-zero cols of local A) from other processors */ 14101b7ae99SHong Zhang ierr = MatGetBrowsOfAoCols(A,P,scall,&ptap->isrowb,&ptap->iscolb,&ptap->brstart,&ptap->B_oth);CHKERRQ(ierr); 142a61c8c0fSHong Zhang 143a61c8c0fSHong Zhang /* get P_loc by taking all local rows of P */ 14432fba14fSHong Zhang ierr = MatGetLocalMat(P,scall,&ptap->B_loc);CHKERRQ(ierr); 14532fba14fSHong Zhang 146a61c8c0fSHong Zhang } else { 147a61c8c0fSHong Zhang SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",scall); 148a61c8c0fSHong Zhang } 14932fba14fSHong Zhang /* now, compute numeric local P^T*A*P */ 150a61c8c0fSHong Zhang ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 151a61c8c0fSHong Zhang ierr = MatPtAPNumeric_MPIAIJ_MPIAIJ(A,P,*C);CHKERRQ(ierr); 152a61c8c0fSHong Zhang ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 153a61c8c0fSHong Zhang 154a61c8c0fSHong Zhang PetscFunctionReturn(0); 155a61c8c0fSHong Zhang } 156a61c8c0fSHong Zhang 157a61c8c0fSHong Zhang #undef __FUNCT__ 158a61c8c0fSHong Zhang #define __FUNCT__ "MatPtAPSymbolic_MPIAIJ_MPIAIJ" 159a61c8c0fSHong Zhang PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIAIJ(Mat A,Mat P,PetscReal fill,Mat *C) 160a61c8c0fSHong Zhang { 161a61c8c0fSHong Zhang PetscErrorCode ierr; 162a61c8c0fSHong Zhang Mat C_seq; 16332fba14fSHong Zhang Mat_MatMatMultMPI *ptap; 164a61c8c0fSHong Zhang PetscObjectContainer container; 165a61c8c0fSHong Zhang 166a61c8c0fSHong Zhang PetscFunctionBegin; 167a61c8c0fSHong Zhang 168a61c8c0fSHong Zhang ierr = PetscObjectQuery((PetscObject)P,"MatPtAPMPI",(PetscObject *)&container);CHKERRQ(ierr); 169a61c8c0fSHong Zhang if (container) { 170a61c8c0fSHong Zhang ierr = PetscObjectContainerGetPointer(container,(void **)&ptap);CHKERRQ(ierr); 171a61c8c0fSHong Zhang } else { 172a61c8c0fSHong Zhang SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 173a61c8c0fSHong Zhang } 17425616d81SHong Zhang /* compute C_seq = P_loc^T * A_loc * P_seq */ 17532fba14fSHong Zhang ierr = MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local(A,ptap->B_loc,ptap->B_oth,ptap->brstart,fill,&C_seq);CHKERRQ(ierr); 176a61c8c0fSHong Zhang 177b90dcfe3SHong Zhang /* add C_seq into mpi C */ 178a61c8c0fSHong Zhang ierr = MatMerge_SeqsToMPISymbolic(A->comm,C_seq,P->n,P->n,C);CHKERRQ(ierr); 179b90dcfe3SHong Zhang 180ff134f7aSHong Zhang PetscFunctionReturn(0); 181ff134f7aSHong Zhang } 182ff134f7aSHong Zhang 183ff134f7aSHong Zhang #undef __FUNCT__ 184e240928fSHong Zhang #define __FUNCT__ "MatPtAPNumeric_MPIAIJ_MPIAIJ" 185b90dcfe3SHong Zhang PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIAIJ(Mat A,Mat P,Mat C) 186ff134f7aSHong Zhang { 187b90dcfe3SHong Zhang PetscErrorCode ierr; 188671beff6SHong Zhang Mat_Merge_SeqsToMPI *merge; 18932fba14fSHong Zhang Mat_MatMatMultMPI *ptap; 190a61c8c0fSHong Zhang PetscObjectContainer cont_merge,cont_ptap; 191ff134f7aSHong Zhang 192*097ab81bSHong Zhang PetscInt flops=0; 193*097ab81bSHong Zhang Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 194*097ab81bSHong Zhang Mat_SeqAIJ *ad=(Mat_SeqAIJ*)(a->A)->data,*ao=(Mat_SeqAIJ*)(a->B)->data; 195*097ab81bSHong Zhang Mat C_seq; 196*097ab81bSHong Zhang Mat_SeqAIJ *c,*p_loc,*p_oth; 197*097ab81bSHong Zhang PetscInt *adi=ad->i,*adj=ad->j,*aoi=ao->i,*aoj=ao->j,*apj,*apjdense,cstart=a->cstart,cend=a->cend,col; 198*097ab81bSHong Zhang PetscInt *pi_loc,*pj_loc,*pi_oth,*pj_oth,*pJ,*pjj; 199*097ab81bSHong Zhang PetscInt i,j,k,anzi,pnzi,apnzj,nextap,pnzj,prow,crow; 200*097ab81bSHong Zhang PetscInt *cjj; 201*097ab81bSHong Zhang MatScalar *ada=ad->a,*aoa=ao->a,*apa,*paj,*cseqa,*caj; 202*097ab81bSHong Zhang MatScalar *pa_loc,*pA,*pa_oth; 203*097ab81bSHong Zhang PetscInt am=A->m,cN=C->N; 204*097ab81bSHong Zhang 205*097ab81bSHong Zhang MPI_Comm comm=C->comm; 206*097ab81bSHong Zhang PetscMPIInt size,rank,taga,*len_s; 207*097ab81bSHong Zhang PetscInt *owners; 208*097ab81bSHong Zhang PetscInt proc; 209*097ab81bSHong Zhang PetscInt **buf_ri,**buf_rj; 210*097ab81bSHong Zhang PetscInt cseqnzi,*bj_i,*bi,*bj,cseqrow,bnzi,nextcseqj; /* bi, bj, ba: for C (mpi mat) */ 211*097ab81bSHong Zhang PetscInt nrows,**buf_ri_k,**nextrow,**nextcseqi; 212*097ab81bSHong Zhang MPI_Request *s_waits,*r_waits; 213*097ab81bSHong Zhang MPI_Status *status; 214*097ab81bSHong Zhang MatScalar **abuf_r,*ba_i; 215*097ab81bSHong Zhang Mat_SeqAIJ *cseq; 216*097ab81bSHong Zhang PetscInt *cseqi,*cseqj; 217*097ab81bSHong Zhang 218ff134f7aSHong Zhang PetscFunctionBegin; 219a61c8c0fSHong Zhang ierr = PetscObjectQuery((PetscObject)C,"MatMergeSeqsToMPI",(PetscObject *)&cont_merge);CHKERRQ(ierr); 220a61c8c0fSHong Zhang if (cont_merge) { 221a61c8c0fSHong Zhang ierr = PetscObjectContainerGetPointer(cont_merge,(void **)&merge);CHKERRQ(ierr); 2227f79fd58SMatthew Knepley } else { 223a61c8c0fSHong Zhang SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix C does not posses an object container"); 224671beff6SHong Zhang } 225a61c8c0fSHong Zhang ierr = PetscObjectQuery((PetscObject)P,"MatPtAPMPI",(PetscObject *)&cont_ptap);CHKERRQ(ierr); 226a61c8c0fSHong Zhang if (cont_ptap) { 227a61c8c0fSHong Zhang ierr = PetscObjectContainerGetPointer(cont_ptap,(void **)&ptap);CHKERRQ(ierr); 228a61c8c0fSHong Zhang } else { 229a61c8c0fSHong Zhang SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 230a61c8c0fSHong Zhang } 231*097ab81bSHong Zhang #ifdef OLD 23232fba14fSHong Zhang ierr = MatPtAPNumeric_MPIAIJ_MPIAIJ_Local(A,ptap->B_loc,ptap->B_oth,ptap->brstart,merge->C_seq);CHKERRQ(ierr); 233*097ab81bSHong Zhang #endif /* OLD */ 234*097ab81bSHong Zhang /*--------------------------------------------------------------*/ 235*097ab81bSHong Zhang 236*097ab81bSHong Zhang p_loc=(Mat_SeqAIJ*)(ptap->B_loc)->data; 237*097ab81bSHong Zhang p_oth=(Mat_SeqAIJ*)(ptap->B_oth)->data; 238*097ab81bSHong Zhang pi_loc=p_loc->i; pj_loc=p_loc->j; pJ=pj_loc; pa_loc=p_loc->a; pA=pa_loc; 239*097ab81bSHong Zhang pi_oth=p_oth->i; pj_oth=p_oth->j; pa_oth=p_oth->a; 240*097ab81bSHong Zhang 241*097ab81bSHong Zhang C_seq=merge->C_seq; 242*097ab81bSHong Zhang cseq=(Mat_SeqAIJ*)C_seq->data; 243*097ab81bSHong Zhang cseqi=cseq->i; cseqj=cseq->j; 244*097ab81bSHong Zhang cseqa=cseq->a; 245*097ab81bSHong Zhang 246*097ab81bSHong Zhang /* Allocate temporary array for storage of one row of A*P */ 247*097ab81bSHong Zhang ierr = PetscMalloc(cN*(sizeof(MatScalar)+2*sizeof(PetscInt)),&apa);CHKERRQ(ierr); 248*097ab81bSHong Zhang ierr = PetscMemzero(apa,cN*(sizeof(MatScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr); 249*097ab81bSHong Zhang apj = (PetscInt *)(apa + cN); 250*097ab81bSHong Zhang apjdense = apj + cN; 251*097ab81bSHong Zhang 252*097ab81bSHong Zhang /* Clear old values in C_Seq */ 253*097ab81bSHong Zhang ierr = PetscMemzero(cseqa,cseqi[cN]*sizeof(MatScalar));CHKERRQ(ierr); 254*097ab81bSHong Zhang 255*097ab81bSHong Zhang for (i=0;i<am;i++) { 256*097ab81bSHong Zhang /* Form i-th sparse row of A*P */ 257*097ab81bSHong Zhang apnzj = 0; 258*097ab81bSHong Zhang /* diagonal portion of A */ 259*097ab81bSHong Zhang anzi = adi[i+1] - adi[i]; 260*097ab81bSHong Zhang for (j=0;j<anzi;j++) { 261*097ab81bSHong Zhang prow = *adj; 262*097ab81bSHong Zhang adj++; 263*097ab81bSHong Zhang pnzj = pi_loc[prow+1] - pi_loc[prow]; 264*097ab81bSHong Zhang pjj = pj_loc + pi_loc[prow]; 265*097ab81bSHong Zhang paj = pa_loc + pi_loc[prow]; 266*097ab81bSHong Zhang for (k=0;k<pnzj;k++) { 267*097ab81bSHong Zhang if (!apjdense[pjj[k]]) { 268*097ab81bSHong Zhang apjdense[pjj[k]] = -1; 269*097ab81bSHong Zhang apj[apnzj++] = pjj[k]; 270*097ab81bSHong Zhang } 271*097ab81bSHong Zhang apa[pjj[k]] += (*ada)*paj[k]; 272*097ab81bSHong Zhang } 273*097ab81bSHong Zhang flops += 2*pnzj; 274*097ab81bSHong Zhang ada++; 275*097ab81bSHong Zhang } 276*097ab81bSHong Zhang /* off-diagonal portion of A */ 277*097ab81bSHong Zhang anzi = aoi[i+1] - aoi[i]; 278*097ab81bSHong Zhang for (j=0;j<anzi;j++) { 279*097ab81bSHong Zhang col = a->garray[*aoj]; 280*097ab81bSHong Zhang if (col < cstart){ 281*097ab81bSHong Zhang prow = *aoj; 282*097ab81bSHong Zhang } else if (col >= cend){ 283*097ab81bSHong Zhang prow = *aoj; 284*097ab81bSHong Zhang } else { 285*097ab81bSHong Zhang SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Off-diagonal portion of A has wrong column map"); 286*097ab81bSHong Zhang } 287*097ab81bSHong Zhang aoj++; 288*097ab81bSHong Zhang pnzj = pi_oth[prow+1] - pi_oth[prow]; 289*097ab81bSHong Zhang pjj = pj_oth + pi_oth[prow]; 290*097ab81bSHong Zhang paj = pa_oth + pi_oth[prow]; 291*097ab81bSHong Zhang for (k=0;k<pnzj;k++) { 292*097ab81bSHong Zhang if (!apjdense[pjj[k]]) { 293*097ab81bSHong Zhang apjdense[pjj[k]] = -1; 294*097ab81bSHong Zhang apj[apnzj++] = pjj[k]; 295*097ab81bSHong Zhang } 296*097ab81bSHong Zhang apa[pjj[k]] += (*aoa)*paj[k]; 297*097ab81bSHong Zhang } 298*097ab81bSHong Zhang flops += 2*pnzj; 299*097ab81bSHong Zhang aoa++; 300*097ab81bSHong Zhang } 301*097ab81bSHong Zhang /* Sort the j index array for quick sparse axpy. */ 302*097ab81bSHong Zhang ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr); 303*097ab81bSHong Zhang 304*097ab81bSHong Zhang /* Compute P_loc[i,:]^T*AP[i,:] using outer product */ 305*097ab81bSHong Zhang pnzi = pi_loc[i+1] - pi_loc[i]; 306*097ab81bSHong Zhang for (j=0;j<pnzi;j++) { 307*097ab81bSHong Zhang nextap = 0; 308*097ab81bSHong Zhang crow = *pJ++; 309*097ab81bSHong Zhang cjj = cseqj + cseqi[crow]; 310*097ab81bSHong Zhang caj = cseqa + cseqi[crow]; 311*097ab81bSHong Zhang /* Perform sparse axpy operation. Note cjj includes apj. */ 312*097ab81bSHong Zhang for (k=0;nextap<apnzj;k++) { 313*097ab81bSHong Zhang if (cjj[k]==apj[nextap]) { 314*097ab81bSHong Zhang caj[k] += (*pA)*apa[apj[nextap++]]; 315*097ab81bSHong Zhang } 316*097ab81bSHong Zhang } 317*097ab81bSHong Zhang flops += 2*apnzj; 318*097ab81bSHong Zhang pA++; 319*097ab81bSHong Zhang } 320*097ab81bSHong Zhang 321*097ab81bSHong Zhang /* Zero the current row info for A*P */ 322*097ab81bSHong Zhang for (j=0;j<apnzj;j++) { 323*097ab81bSHong Zhang apa[apj[j]] = 0.; 324*097ab81bSHong Zhang apjdense[apj[j]] = 0; 325*097ab81bSHong Zhang } 326*097ab81bSHong Zhang } 327*097ab81bSHong Zhang 328*097ab81bSHong Zhang /* Assemble the final matrix and clean up */ 329*097ab81bSHong Zhang ierr = MatAssemblyBegin(C_seq,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 330*097ab81bSHong Zhang ierr = MatAssemblyEnd(C_seq,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 331*097ab81bSHong Zhang ierr = PetscFree(apa);CHKERRQ(ierr); 332*097ab81bSHong Zhang ierr = PetscLogFlops(flops);CHKERRQ(ierr); 333*097ab81bSHong Zhang 334*097ab81bSHong Zhang #ifdef OLD1 335a61c8c0fSHong Zhang ierr = MatMerge_SeqsToMPINumeric(merge->C_seq,C);CHKERRQ(ierr); 336*097ab81bSHong Zhang #endif /* OLD1 */ 337*097ab81bSHong Zhang /*--------------------------------------------------------------*/ 338*097ab81bSHong Zhang 339*097ab81bSHong Zhang ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 340*097ab81bSHong Zhang ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 341*097ab81bSHong Zhang 342*097ab81bSHong Zhang bi = merge->bi; 343*097ab81bSHong Zhang bj = merge->bj; 344*097ab81bSHong Zhang buf_ri = merge->buf_ri; 345*097ab81bSHong Zhang buf_rj = merge->buf_rj; 346*097ab81bSHong Zhang 347*097ab81bSHong Zhang ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 348*097ab81bSHong Zhang ierr = PetscMapGetGlobalRange(merge->rowmap,&owners);CHKERRQ(ierr); 349*097ab81bSHong Zhang 350*097ab81bSHong Zhang /* send and recv matrix values */ 351*097ab81bSHong Zhang /*-----------------------------*/ 352*097ab81bSHong Zhang len_s = merge->len_s; 353*097ab81bSHong Zhang ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&taga);CHKERRQ(ierr); 354*097ab81bSHong Zhang ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 355*097ab81bSHong Zhang 356*097ab81bSHong Zhang ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 357*097ab81bSHong Zhang for (proc=0,k=0; proc<size; proc++){ 358*097ab81bSHong Zhang if (!len_s[proc]) continue; 359*097ab81bSHong Zhang i = owners[proc]; 360*097ab81bSHong Zhang ierr = MPI_Isend(cseqa+cseqi[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 361*097ab81bSHong Zhang k++; 362*097ab81bSHong Zhang } 363*097ab81bSHong Zhang 364*097ab81bSHong Zhang ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr); 365*097ab81bSHong Zhang ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr); 366*097ab81bSHong Zhang ierr = PetscFree(status);CHKERRQ(ierr); 367*097ab81bSHong Zhang 368*097ab81bSHong Zhang ierr = PetscFree(s_waits);CHKERRQ(ierr); 369*097ab81bSHong Zhang ierr = PetscFree(r_waits);CHKERRQ(ierr); 370*097ab81bSHong Zhang 371*097ab81bSHong Zhang /* insert mat values of mpimat */ 372*097ab81bSHong Zhang /*----------------------------*/ 373*097ab81bSHong Zhang ierr = PetscMalloc(cN*sizeof(MatScalar),&ba_i);CHKERRQ(ierr); 374*097ab81bSHong Zhang ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 375*097ab81bSHong Zhang nextrow = buf_ri_k + merge->nrecv; 376*097ab81bSHong Zhang nextcseqi = nextrow + merge->nrecv; 377*097ab81bSHong Zhang 378*097ab81bSHong Zhang for (k=0; k<merge->nrecv; k++){ 379*097ab81bSHong Zhang buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 380*097ab81bSHong Zhang nrows = *(buf_ri_k[k]); 381*097ab81bSHong Zhang nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 382*097ab81bSHong Zhang nextcseqi[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 383*097ab81bSHong Zhang } 384*097ab81bSHong Zhang 385*097ab81bSHong Zhang /* set values of ba */ 386*097ab81bSHong Zhang for (i=0; i<C->m; i++) { 387*097ab81bSHong Zhang cseqrow = owners[rank] + i; 388*097ab81bSHong Zhang bj_i = bj+bi[i]; /* col indices of the i-th row of C */ 389*097ab81bSHong Zhang bnzi = bi[i+1] - bi[i]; 390*097ab81bSHong Zhang ierr = PetscMemzero(ba_i,bnzi*sizeof(MatScalar));CHKERRQ(ierr); 391*097ab81bSHong Zhang 392*097ab81bSHong Zhang /* add local non-zero vals of this proc's C_seq into ba */ 393*097ab81bSHong Zhang cseqnzi = cseqi[cseqrow+1] - cseqi[cseqrow]; 394*097ab81bSHong Zhang cseqj = cseq->j + cseqi[cseqrow]; 395*097ab81bSHong Zhang cseqa = cseq->a + cseqi[cseqrow]; 396*097ab81bSHong Zhang nextcseqj = 0; 397*097ab81bSHong Zhang for (j=0; nextcseqj<cseqnzi; j++){ 398*097ab81bSHong Zhang if (*(bj_i + j) == cseqj[nextcseqj]){ /* bcol == cseqcol */ 399*097ab81bSHong Zhang ba_i[j] += cseqa[nextcseqj++]; 400*097ab81bSHong Zhang } 401*097ab81bSHong Zhang } 402*097ab81bSHong Zhang 403*097ab81bSHong Zhang /* add received vals into ba */ 404*097ab81bSHong Zhang for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 405*097ab81bSHong Zhang /* i-th row */ 406*097ab81bSHong Zhang if (i == *nextrow[k]) { 407*097ab81bSHong Zhang cseqnzi = *(nextcseqi[k]+1) - *nextcseqi[k]; 408*097ab81bSHong Zhang cseqj = buf_rj[k] + *(nextcseqi[k]); 409*097ab81bSHong Zhang cseqa = abuf_r[k] + *(nextcseqi[k]); 410*097ab81bSHong Zhang nextcseqj = 0; 411*097ab81bSHong Zhang for (j=0; nextcseqj<cseqnzi; j++){ 412*097ab81bSHong Zhang if (*(bj_i + j) == cseqj[nextcseqj]){ /* bcol == cseqcol */ 413*097ab81bSHong Zhang ba_i[j] += cseqa[nextcseqj++]; 414*097ab81bSHong Zhang } 415*097ab81bSHong Zhang } 416*097ab81bSHong Zhang nextrow[k]++; nextcseqi[k]++; 417*097ab81bSHong Zhang } 418*097ab81bSHong Zhang } 419*097ab81bSHong Zhang ierr = MatSetValues(C,1,&cseqrow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 420*097ab81bSHong Zhang } 421*097ab81bSHong Zhang ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 422*097ab81bSHong Zhang ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 423*097ab81bSHong Zhang 424*097ab81bSHong Zhang ierr = PetscFree(abuf_r);CHKERRQ(ierr); 425*097ab81bSHong Zhang ierr = PetscFree(ba_i);CHKERRQ(ierr); 426*097ab81bSHong Zhang ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 427*097ab81bSHong Zhang 428ff134f7aSHong Zhang PetscFunctionReturn(0); 429ff134f7aSHong Zhang } 430ff134f7aSHong Zhang 431ff134f7aSHong Zhang #undef __FUNCT__ 4329af31e4aSHong Zhang #define __FUNCT__ "MatPtAP_SeqAIJ_SeqAIJ" 433dfbe8321SBarry Smith PetscErrorCode MatPtAP_SeqAIJ_SeqAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C) 4349af31e4aSHong Zhang { 435dfbe8321SBarry Smith PetscErrorCode ierr; 436b1d57f15SBarry Smith 4379af31e4aSHong Zhang PetscFunctionBegin; 4389af31e4aSHong Zhang if (scall == MAT_INITIAL_MATRIX){ 439d20bfe6fSHong Zhang ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 4409af31e4aSHong Zhang ierr = MatPtAPSymbolic_SeqAIJ_SeqAIJ(A,P,fill,C);CHKERRQ(ierr); 441d20bfe6fSHong Zhang ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 4429af31e4aSHong Zhang } 443d20bfe6fSHong Zhang ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 4449af31e4aSHong Zhang ierr = MatPtAPNumeric_SeqAIJ_SeqAIJ(A,P,*C);CHKERRQ(ierr); 445d20bfe6fSHong Zhang ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 4469af31e4aSHong Zhang PetscFunctionReturn(0); 4479af31e4aSHong Zhang } 4489af31e4aSHong Zhang 4496849ba73SBarry Smith /* 4509af31e4aSHong Zhang MatPtAPSymbolic - Creates the (i,j) structure of the matrix projection C = P^T * A * P 4514d3841fdSKris Buschelman 4524d3841fdSKris Buschelman Collective on Mat 4534d3841fdSKris Buschelman 4544d3841fdSKris Buschelman Input Parameters: 4554d3841fdSKris Buschelman + A - the matrix 4564d3841fdSKris Buschelman - P - the projection matrix 4574d3841fdSKris Buschelman 4584d3841fdSKris Buschelman Output Parameters: 4594d3841fdSKris Buschelman . C - the (i,j) structure of the product matrix 4604d3841fdSKris Buschelman 4614d3841fdSKris Buschelman Notes: 4624d3841fdSKris Buschelman C will be created and must be destroyed by the user with MatDestroy(). 4634d3841fdSKris Buschelman 4644d3841fdSKris Buschelman This routine is currently only implemented for pairs of SeqAIJ matrices and classes 4654d3841fdSKris Buschelman which inherit from SeqAIJ. C will be of type MATSEQAIJ. The product is computed using 4669af31e4aSHong Zhang this (i,j) structure by calling MatPtAPNumeric(). 4674d3841fdSKris Buschelman 4684d3841fdSKris Buschelman Level: intermediate 4694d3841fdSKris Buschelman 4709af31e4aSHong Zhang .seealso: MatPtAP(),MatPtAPNumeric(),MatMatMultSymbolic() 4716849ba73SBarry Smith */ 472e240928fSHong Zhang #undef __FUNCT__ 473e240928fSHong Zhang #define __FUNCT__ "MatPtAPSymbolic" 47455a3bba9SHong Zhang PetscErrorCode MatPtAPSymbolic(Mat A,Mat P,PetscReal fill,Mat *C) 47555a3bba9SHong Zhang { 476dfbe8321SBarry Smith PetscErrorCode ierr; 477534c1384SKris Buschelman PetscErrorCode (*fA)(Mat,Mat,PetscReal,Mat*); 478534c1384SKris Buschelman PetscErrorCode (*fP)(Mat,Mat,PetscReal,Mat*); 479eb9c0419SKris Buschelman 480eb9c0419SKris Buschelman PetscFunctionBegin; 481eb9c0419SKris Buschelman 4824482741eSBarry Smith PetscValidHeaderSpecific(A,MAT_COOKIE,1); 483c9780b6fSBarry Smith PetscValidType(A,1); 484eb9c0419SKris Buschelman MatPreallocated(A); 485eb9c0419SKris Buschelman if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 486eb9c0419SKris Buschelman if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 487eb9c0419SKris Buschelman 4884482741eSBarry Smith PetscValidHeaderSpecific(P,MAT_COOKIE,2); 489c9780b6fSBarry Smith PetscValidType(P,2); 490eb9c0419SKris Buschelman MatPreallocated(P); 491eb9c0419SKris Buschelman if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 492eb9c0419SKris Buschelman if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 493eb9c0419SKris Buschelman 4944482741eSBarry Smith PetscValidPointer(C,3); 4954482741eSBarry Smith 496eb9c0419SKris Buschelman if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->M,A->N); 497eb9c0419SKris Buschelman if (A->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %d != %d",A->M,A->N); 498eb9c0419SKris Buschelman 499534c1384SKris Buschelman /* For now, we do not dispatch based on the type of A and P */ 500534c1384SKris Buschelman /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */ 501534c1384SKris Buschelman fA = A->ops->ptapsymbolic; 502534c1384SKris Buschelman if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatPtAPSymbolic not supported for A of type %s",A->type_name); 503534c1384SKris Buschelman fP = P->ops->ptapsymbolic; 504534c1384SKris Buschelman if (!fP) SETERRQ1(PETSC_ERR_SUP,"MatPtAPSymbolic not supported for P of type %s",P->type_name); 505534c1384SKris Buschelman if (fP!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatPtAPSymbolic requires A, %s, to be compatible with P, %s",A->type_name,P->type_name); 5064d3841fdSKris Buschelman 507534c1384SKris Buschelman ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 508534c1384SKris Buschelman ierr = (*fA)(A,P,fill,C);CHKERRQ(ierr); 509534c1384SKris Buschelman ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 510eb9c0419SKris Buschelman 511eb9c0419SKris Buschelman PetscFunctionReturn(0); 512eb9c0419SKris Buschelman } 513eb9c0419SKris Buschelman 514f747e1aeSHong Zhang typedef struct { 515f747e1aeSHong Zhang Mat symAP; 516f747e1aeSHong Zhang } Mat_PtAPstruct; 517f747e1aeSHong Zhang 51878a80504SBarry Smith EXTERN PetscErrorCode MatDestroy_SeqAIJ(Mat); 51978a80504SBarry Smith 520f747e1aeSHong Zhang #undef __FUNCT__ 521f747e1aeSHong Zhang #define __FUNCT__ "MatDestroy_SeqAIJ_PtAP" 522f4a850bbSBarry Smith PetscErrorCode MatDestroy_SeqAIJ_PtAP(Mat A) 523f747e1aeSHong Zhang { 524f4a850bbSBarry Smith PetscErrorCode ierr; 525f747e1aeSHong Zhang Mat_PtAPstruct *ptap=(Mat_PtAPstruct*)A->spptr; 526f747e1aeSHong Zhang 527f747e1aeSHong Zhang PetscFunctionBegin; 528f747e1aeSHong Zhang ierr = MatDestroy(ptap->symAP);CHKERRQ(ierr); 529f747e1aeSHong Zhang ierr = PetscFree(ptap);CHKERRQ(ierr); 53078a80504SBarry Smith ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr); 531f747e1aeSHong Zhang PetscFunctionReturn(0); 532f747e1aeSHong Zhang } 533f747e1aeSHong Zhang 534eb9c0419SKris Buschelman #undef __FUNCT__ 5359af31e4aSHong Zhang #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqAIJ" 53655a3bba9SHong Zhang PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat P,PetscReal fill,Mat *C) 53755a3bba9SHong Zhang { 538dfbe8321SBarry Smith PetscErrorCode ierr; 539d20bfe6fSHong Zhang FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 540d20bfe6fSHong Zhang Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*p = (Mat_SeqAIJ*)P->data,*c; 54155a3bba9SHong Zhang PetscInt *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj; 542b8374ebeSBarry Smith PetscInt *ci,*cj,*ptadenserow,*ptasparserow,*ptaj; 54355a3bba9SHong Zhang PetscInt an=A->N,am=A->M,pn=P->N,pm=P->M; 544b8374ebeSBarry Smith PetscInt i,j,k,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi,nlnk,*lnk; 545d20bfe6fSHong Zhang MatScalar *ca; 54655a3bba9SHong Zhang PetscBT lnkbt; 547eb9c0419SKris Buschelman 548eb9c0419SKris Buschelman PetscFunctionBegin; 549d20bfe6fSHong Zhang /* Get ij structure of P^T */ 550eb9c0419SKris Buschelman ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 551d20bfe6fSHong Zhang ptJ=ptj; 552eb9c0419SKris Buschelman 553d20bfe6fSHong Zhang /* Allocate ci array, arrays for fill computation and */ 554d20bfe6fSHong Zhang /* free space for accumulating nonzero column info */ 55555a3bba9SHong Zhang ierr = PetscMalloc((pn+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 556d20bfe6fSHong Zhang ci[0] = 0; 557eb9c0419SKris Buschelman 55855a3bba9SHong Zhang ierr = PetscMalloc((2*an+1)*sizeof(PetscInt),&ptadenserow);CHKERRQ(ierr); 55955a3bba9SHong Zhang ierr = PetscMemzero(ptadenserow,(2*an+1)*sizeof(PetscInt));CHKERRQ(ierr); 560d20bfe6fSHong Zhang ptasparserow = ptadenserow + an; 56155a3bba9SHong Zhang 56255a3bba9SHong Zhang /* create and initialize a linked list */ 56355a3bba9SHong Zhang nlnk = pn+1; 56455a3bba9SHong Zhang ierr = PetscLLCreate(pn,pn,nlnk,lnk,lnkbt);CHKERRQ(ierr); 565eb9c0419SKris Buschelman 566d20bfe6fSHong Zhang /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */ 567d20bfe6fSHong Zhang /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 568d20bfe6fSHong Zhang ierr = GetMoreSpace((ai[am]/pm)*pn,&free_space); 569d20bfe6fSHong Zhang current_space = free_space; 570d20bfe6fSHong Zhang 571d20bfe6fSHong Zhang /* Determine symbolic info for each row of C: */ 572d20bfe6fSHong Zhang for (i=0;i<pn;i++) { 573d20bfe6fSHong Zhang ptnzi = pti[i+1] - pti[i]; 574d20bfe6fSHong Zhang ptanzi = 0; 575d20bfe6fSHong Zhang /* Determine symbolic row of PtA: */ 576d20bfe6fSHong Zhang for (j=0;j<ptnzi;j++) { 577d20bfe6fSHong Zhang arow = *ptJ++; 578d20bfe6fSHong Zhang anzj = ai[arow+1] - ai[arow]; 579d20bfe6fSHong Zhang ajj = aj + ai[arow]; 580d20bfe6fSHong Zhang for (k=0;k<anzj;k++) { 581d20bfe6fSHong Zhang if (!ptadenserow[ajj[k]]) { 582d20bfe6fSHong Zhang ptadenserow[ajj[k]] = -1; 583d20bfe6fSHong Zhang ptasparserow[ptanzi++] = ajj[k]; 584d20bfe6fSHong Zhang } 585d20bfe6fSHong Zhang } 586d20bfe6fSHong Zhang } 587d20bfe6fSHong Zhang /* Using symbolic info for row of PtA, determine symbolic info for row of C: */ 588d20bfe6fSHong Zhang ptaj = ptasparserow; 589d20bfe6fSHong Zhang cnzi = 0; 590d20bfe6fSHong Zhang for (j=0;j<ptanzi;j++) { 591d20bfe6fSHong Zhang prow = *ptaj++; 592d20bfe6fSHong Zhang pnzj = pi[prow+1] - pi[prow]; 593d20bfe6fSHong Zhang pjj = pj + pi[prow]; 59455a3bba9SHong Zhang /* add non-zero cols of P into the sorted linked list lnk */ 59555a3bba9SHong Zhang ierr = PetscLLAdd(pnzj,pjj,pn,nlnk,lnk,lnkbt);CHKERRQ(ierr); 59655a3bba9SHong Zhang cnzi += nlnk; 597d20bfe6fSHong Zhang } 598d20bfe6fSHong Zhang 599d20bfe6fSHong Zhang /* If free space is not available, make more free space */ 600d20bfe6fSHong Zhang /* Double the amount of total space in the list */ 601d20bfe6fSHong Zhang if (current_space->local_remaining<cnzi) { 602d20bfe6fSHong Zhang ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 603d20bfe6fSHong Zhang } 604d20bfe6fSHong Zhang 605d20bfe6fSHong Zhang /* Copy data into free space, and zero out denserows */ 60655a3bba9SHong Zhang ierr = PetscLLClean(pn,pn,cnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 607d20bfe6fSHong Zhang current_space->array += cnzi; 608d20bfe6fSHong Zhang current_space->local_used += cnzi; 609d20bfe6fSHong Zhang current_space->local_remaining -= cnzi; 610d20bfe6fSHong Zhang 611d20bfe6fSHong Zhang for (j=0;j<ptanzi;j++) { 612d20bfe6fSHong Zhang ptadenserow[ptasparserow[j]] = 0; 613d20bfe6fSHong Zhang } 614d20bfe6fSHong Zhang /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 615d20bfe6fSHong Zhang /* For now, we will recompute what is needed. */ 616d20bfe6fSHong Zhang ci[i+1] = ci[i] + cnzi; 617d20bfe6fSHong Zhang } 618d20bfe6fSHong Zhang /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 619d20bfe6fSHong Zhang /* Allocate space for cj, initialize cj, and */ 620d20bfe6fSHong Zhang /* destroy list of free space and other temporary array(s) */ 62155a3bba9SHong Zhang ierr = PetscMalloc((ci[pn]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr); 622d20bfe6fSHong Zhang ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 623d20bfe6fSHong Zhang ierr = PetscFree(ptadenserow);CHKERRQ(ierr); 62455a3bba9SHong Zhang ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 625d20bfe6fSHong Zhang 626d20bfe6fSHong Zhang /* Allocate space for ca */ 627d20bfe6fSHong Zhang ierr = PetscMalloc((ci[pn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 628d20bfe6fSHong Zhang ierr = PetscMemzero(ca,(ci[pn]+1)*sizeof(MatScalar));CHKERRQ(ierr); 629d20bfe6fSHong Zhang 630d20bfe6fSHong Zhang /* put together the new matrix */ 631d20bfe6fSHong Zhang ierr = MatCreateSeqAIJWithArrays(A->comm,pn,pn,ci,cj,ca,C);CHKERRQ(ierr); 632d20bfe6fSHong Zhang 633d20bfe6fSHong Zhang /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 634d20bfe6fSHong Zhang /* Since these are PETSc arrays, change flags to free them as necessary. */ 635d20bfe6fSHong Zhang c = (Mat_SeqAIJ *)((*C)->data); 636d20bfe6fSHong Zhang c->freedata = PETSC_TRUE; 637d20bfe6fSHong Zhang c->nonew = 0; 638d20bfe6fSHong Zhang 639d20bfe6fSHong Zhang /* Clean up. */ 640d20bfe6fSHong Zhang ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 641eb9c0419SKris Buschelman 642eb9c0419SKris Buschelman PetscFunctionReturn(0); 643eb9c0419SKris Buschelman } 644eb9c0419SKris Buschelman 6453985e5eaSKris Buschelman #include "src/mat/impls/maij/maij.h" 6463985e5eaSKris Buschelman EXTERN_C_BEGIN 6473985e5eaSKris Buschelman #undef __FUNCT__ 6489af31e4aSHong Zhang #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqMAIJ" 64955a3bba9SHong Zhang PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat *C) 65055a3bba9SHong Zhang { 6515c66b693SKris Buschelman /* This routine requires testing -- I don't think it works. */ 652dfbe8321SBarry Smith PetscErrorCode ierr; 6533985e5eaSKris Buschelman FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 6543985e5eaSKris Buschelman Mat_SeqMAIJ *pp=(Mat_SeqMAIJ*)PP->data; 6553985e5eaSKris Buschelman Mat P=pp->AIJ; 6563985e5eaSKris Buschelman Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c; 657b1d57f15SBarry Smith PetscInt *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj; 658b1d57f15SBarry Smith PetscInt *ci,*cj,*denserow,*sparserow,*ptadenserow,*ptasparserow,*ptaj; 659b1d57f15SBarry Smith PetscInt an=A->N,am=A->M,pn=P->N,pm=P->M,ppdof=pp->dof; 660b1d57f15SBarry Smith PetscInt i,j,k,dof,pdof,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi; 6613985e5eaSKris Buschelman MatScalar *ca; 6623985e5eaSKris Buschelman 6633985e5eaSKris Buschelman PetscFunctionBegin; 6643985e5eaSKris Buschelman /* Start timer */ 6659af31e4aSHong Zhang ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr); 6663985e5eaSKris Buschelman 6673985e5eaSKris Buschelman /* Get ij structure of P^T */ 6683985e5eaSKris Buschelman ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 6693985e5eaSKris Buschelman 6703985e5eaSKris Buschelman /* Allocate ci array, arrays for fill computation and */ 6713985e5eaSKris Buschelman /* free space for accumulating nonzero column info */ 672b1d57f15SBarry Smith ierr = PetscMalloc((pn+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 6733985e5eaSKris Buschelman ci[0] = 0; 6743985e5eaSKris Buschelman 675b1d57f15SBarry Smith ierr = PetscMalloc((2*pn+2*an+1)*sizeof(PetscInt),&ptadenserow);CHKERRQ(ierr); 676b1d57f15SBarry Smith ierr = PetscMemzero(ptadenserow,(2*pn+2*an+1)*sizeof(PetscInt));CHKERRQ(ierr); 6773985e5eaSKris Buschelman ptasparserow = ptadenserow + an; 6783985e5eaSKris Buschelman denserow = ptasparserow + an; 6793985e5eaSKris Buschelman sparserow = denserow + pn; 6803985e5eaSKris Buschelman 6813985e5eaSKris Buschelman /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */ 6823985e5eaSKris Buschelman /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 6833985e5eaSKris Buschelman ierr = GetMoreSpace((ai[am]/pm)*pn,&free_space); 6843985e5eaSKris Buschelman current_space = free_space; 6853985e5eaSKris Buschelman 6863985e5eaSKris Buschelman /* Determine symbolic info for each row of C: */ 6873985e5eaSKris Buschelman for (i=0;i<pn/ppdof;i++) { 6883985e5eaSKris Buschelman ptnzi = pti[i+1] - pti[i]; 6893985e5eaSKris Buschelman ptanzi = 0; 6903985e5eaSKris Buschelman ptJ = ptj + pti[i]; 6913985e5eaSKris Buschelman for (dof=0;dof<ppdof;dof++) { 6923985e5eaSKris Buschelman /* Determine symbolic row of PtA: */ 6933985e5eaSKris Buschelman for (j=0;j<ptnzi;j++) { 6943985e5eaSKris Buschelman arow = ptJ[j] + dof; 6953985e5eaSKris Buschelman anzj = ai[arow+1] - ai[arow]; 6963985e5eaSKris Buschelman ajj = aj + ai[arow]; 6973985e5eaSKris Buschelman for (k=0;k<anzj;k++) { 6983985e5eaSKris Buschelman if (!ptadenserow[ajj[k]]) { 6993985e5eaSKris Buschelman ptadenserow[ajj[k]] = -1; 7003985e5eaSKris Buschelman ptasparserow[ptanzi++] = ajj[k]; 7013985e5eaSKris Buschelman } 7023985e5eaSKris Buschelman } 7033985e5eaSKris Buschelman } 7043985e5eaSKris Buschelman /* Using symbolic info for row of PtA, determine symbolic info for row of C: */ 7053985e5eaSKris Buschelman ptaj = ptasparserow; 7063985e5eaSKris Buschelman cnzi = 0; 7073985e5eaSKris Buschelman for (j=0;j<ptanzi;j++) { 708fe05a634SKris Buschelman pdof = *ptaj%dof; 7093985e5eaSKris Buschelman prow = (*ptaj++)/dof; 7103985e5eaSKris Buschelman pnzj = pi[prow+1] - pi[prow]; 7113985e5eaSKris Buschelman pjj = pj + pi[prow]; 7123985e5eaSKris Buschelman for (k=0;k<pnzj;k++) { 713fe05a634SKris Buschelman if (!denserow[pjj[k]+pdof]) { 714fe05a634SKris Buschelman denserow[pjj[k]+pdof] = -1; 715fe05a634SKris Buschelman sparserow[cnzi++] = pjj[k]+pdof; 7163985e5eaSKris Buschelman } 7173985e5eaSKris Buschelman } 7183985e5eaSKris Buschelman } 7193985e5eaSKris Buschelman 7203985e5eaSKris Buschelman /* sort sparserow */ 7213985e5eaSKris Buschelman ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr); 7223985e5eaSKris Buschelman 7233985e5eaSKris Buschelman /* If free space is not available, make more free space */ 7243985e5eaSKris Buschelman /* Double the amount of total space in the list */ 7253985e5eaSKris Buschelman if (current_space->local_remaining<cnzi) { 7263985e5eaSKris Buschelman ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 7273985e5eaSKris Buschelman } 7283985e5eaSKris Buschelman 7293985e5eaSKris Buschelman /* Copy data into free space, and zero out denserows */ 730b1d57f15SBarry Smith ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(PetscInt));CHKERRQ(ierr); 7313985e5eaSKris Buschelman current_space->array += cnzi; 7323985e5eaSKris Buschelman current_space->local_used += cnzi; 7333985e5eaSKris Buschelman current_space->local_remaining -= cnzi; 7343985e5eaSKris Buschelman 7353985e5eaSKris Buschelman for (j=0;j<ptanzi;j++) { 7363985e5eaSKris Buschelman ptadenserow[ptasparserow[j]] = 0; 7373985e5eaSKris Buschelman } 7383985e5eaSKris Buschelman for (j=0;j<cnzi;j++) { 7393985e5eaSKris Buschelman denserow[sparserow[j]] = 0; 7403985e5eaSKris Buschelman } 7413985e5eaSKris Buschelman /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 7423985e5eaSKris Buschelman /* For now, we will recompute what is needed. */ 7433985e5eaSKris Buschelman ci[i+1+dof] = ci[i+dof] + cnzi; 7443985e5eaSKris Buschelman } 7453985e5eaSKris Buschelman } 7463985e5eaSKris Buschelman /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 7473985e5eaSKris Buschelman /* Allocate space for cj, initialize cj, and */ 7483985e5eaSKris Buschelman /* destroy list of free space and other temporary array(s) */ 749b1d57f15SBarry Smith ierr = PetscMalloc((ci[pn]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr); 7503985e5eaSKris Buschelman ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 7513985e5eaSKris Buschelman ierr = PetscFree(ptadenserow);CHKERRQ(ierr); 7523985e5eaSKris Buschelman 7533985e5eaSKris Buschelman /* Allocate space for ca */ 7543985e5eaSKris Buschelman ierr = PetscMalloc((ci[pn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 7553985e5eaSKris Buschelman ierr = PetscMemzero(ca,(ci[pn]+1)*sizeof(MatScalar));CHKERRQ(ierr); 7563985e5eaSKris Buschelman 7573985e5eaSKris Buschelman /* put together the new matrix */ 7583985e5eaSKris Buschelman ierr = MatCreateSeqAIJWithArrays(A->comm,pn,pn,ci,cj,ca,C);CHKERRQ(ierr); 7593985e5eaSKris Buschelman 7603985e5eaSKris Buschelman /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 7613985e5eaSKris Buschelman /* Since these are PETSc arrays, change flags to free them as necessary. */ 7623985e5eaSKris Buschelman c = (Mat_SeqAIJ *)((*C)->data); 7633985e5eaSKris Buschelman c->freedata = PETSC_TRUE; 7643985e5eaSKris Buschelman c->nonew = 0; 7653985e5eaSKris Buschelman 7663985e5eaSKris Buschelman /* Clean up. */ 7673985e5eaSKris Buschelman ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 7683985e5eaSKris Buschelman 7699af31e4aSHong Zhang ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr); 7703985e5eaSKris Buschelman PetscFunctionReturn(0); 7713985e5eaSKris Buschelman } 7723985e5eaSKris Buschelman EXTERN_C_END 7733985e5eaSKris Buschelman 7746849ba73SBarry Smith /* 7759af31e4aSHong Zhang MatPtAPNumeric - Computes the matrix projection C = P^T * A * P 7764d3841fdSKris Buschelman 7774d3841fdSKris Buschelman Collective on Mat 7784d3841fdSKris Buschelman 7794d3841fdSKris Buschelman Input Parameters: 7804d3841fdSKris Buschelman + A - the matrix 7814d3841fdSKris Buschelman - P - the projection matrix 7824d3841fdSKris Buschelman 7834d3841fdSKris Buschelman Output Parameters: 7844d3841fdSKris Buschelman . C - the product matrix 7854d3841fdSKris Buschelman 7864d3841fdSKris Buschelman Notes: 7879af31e4aSHong Zhang C must have been created by calling MatPtAPSymbolic and must be destroyed by 7884d3841fdSKris Buschelman the user using MatDeatroy(). 7894d3841fdSKris Buschelman 790170ef064SHong Zhang This routine is currently only implemented for pairs of AIJ matrices and classes 791170ef064SHong Zhang which inherit from AIJ. C will be of type MATAIJ. 7924d3841fdSKris Buschelman 7934d3841fdSKris Buschelman Level: intermediate 7944d3841fdSKris Buschelman 7959af31e4aSHong Zhang .seealso: MatPtAP(),MatPtAPSymbolic(),MatMatMultNumeric() 7966849ba73SBarry Smith */ 797e240928fSHong Zhang #undef __FUNCT__ 798e240928fSHong Zhang #define __FUNCT__ "MatPtAPNumeric" 79955a3bba9SHong Zhang PetscErrorCode MatPtAPNumeric(Mat A,Mat P,Mat C) 80055a3bba9SHong Zhang { 801dfbe8321SBarry Smith PetscErrorCode ierr; 802534c1384SKris Buschelman PetscErrorCode (*fA)(Mat,Mat,Mat); 803534c1384SKris Buschelman PetscErrorCode (*fP)(Mat,Mat,Mat); 804eb9c0419SKris Buschelman 805eb9c0419SKris Buschelman PetscFunctionBegin; 806eb9c0419SKris Buschelman 8074482741eSBarry Smith PetscValidHeaderSpecific(A,MAT_COOKIE,1); 808c9780b6fSBarry Smith PetscValidType(A,1); 809eb9c0419SKris Buschelman MatPreallocated(A); 810eb9c0419SKris Buschelman if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 811eb9c0419SKris Buschelman if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 812eb9c0419SKris Buschelman 8134482741eSBarry Smith PetscValidHeaderSpecific(P,MAT_COOKIE,2); 814c9780b6fSBarry Smith PetscValidType(P,2); 815eb9c0419SKris Buschelman MatPreallocated(P); 816eb9c0419SKris Buschelman if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 817eb9c0419SKris Buschelman if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 818eb9c0419SKris Buschelman 8194482741eSBarry Smith PetscValidHeaderSpecific(C,MAT_COOKIE,3); 820c9780b6fSBarry Smith PetscValidType(C,3); 821eb9c0419SKris Buschelman MatPreallocated(C); 822eb9c0419SKris Buschelman if (!C->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 823eb9c0419SKris Buschelman if (C->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 824eb9c0419SKris Buschelman 825eb9c0419SKris Buschelman if (P->N!=C->M) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->N,C->M); 826eb9c0419SKris Buschelman if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->M,A->N); 827eb9c0419SKris Buschelman if (A->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %d != %d",A->M,A->N); 828eb9c0419SKris Buschelman if (P->N!=C->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->N,C->N); 829eb9c0419SKris Buschelman 830534c1384SKris Buschelman /* For now, we do not dispatch based on the type of A and P */ 831534c1384SKris Buschelman /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */ 832534c1384SKris Buschelman fA = A->ops->ptapnumeric; 833534c1384SKris Buschelman if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatPtAPNumeric not supported for A of type %s",A->type_name); 834534c1384SKris Buschelman fP = P->ops->ptapnumeric; 835534c1384SKris Buschelman if (!fP) SETERRQ1(PETSC_ERR_SUP,"MatPtAPNumeric not supported for P of type %s",P->type_name); 836534c1384SKris Buschelman if (fP!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatPtAPNumeric requires A, %s, to be compatible with P, %s",A->type_name,P->type_name); 8374d3841fdSKris Buschelman 838534c1384SKris Buschelman ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 839534c1384SKris Buschelman ierr = (*fA)(A,P,C);CHKERRQ(ierr); 840534c1384SKris Buschelman ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 841eb9c0419SKris Buschelman 842eb9c0419SKris Buschelman PetscFunctionReturn(0); 843eb9c0419SKris Buschelman } 844eb9c0419SKris Buschelman 845eb9c0419SKris Buschelman #undef __FUNCT__ 8469af31e4aSHong Zhang #define __FUNCT__ "MatPtAPNumeric_SeqAIJ_SeqAIJ" 847dfbe8321SBarry Smith PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqAIJ(Mat A,Mat P,Mat C) 848dfbe8321SBarry Smith { 849dfbe8321SBarry Smith PetscErrorCode ierr; 850b1d57f15SBarry Smith PetscInt flops=0; 851d20bfe6fSHong Zhang Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 852d20bfe6fSHong Zhang Mat_SeqAIJ *p = (Mat_SeqAIJ *) P->data; 853d20bfe6fSHong Zhang Mat_SeqAIJ *c = (Mat_SeqAIJ *) C->data; 854b1d57f15SBarry Smith PetscInt *ai=a->i,*aj=a->j,*apj,*apjdense,*pi=p->i,*pj=p->j,*pJ=p->j,*pjj; 855b1d57f15SBarry Smith PetscInt *ci=c->i,*cj=c->j,*cjj; 856b1d57f15SBarry Smith PetscInt am=A->M,cn=C->N,cm=C->M; 857b1d57f15SBarry Smith PetscInt i,j,k,anzi,pnzi,apnzj,nextap,pnzj,prow,crow; 858d20bfe6fSHong Zhang MatScalar *aa=a->a,*apa,*pa=p->a,*pA=p->a,*paj,*ca=c->a,*caj; 859eb9c0419SKris Buschelman 860eb9c0419SKris Buschelman PetscFunctionBegin; 861d20bfe6fSHong Zhang /* Allocate temporary array for storage of one row of A*P */ 862b1d57f15SBarry Smith ierr = PetscMalloc(cn*(sizeof(MatScalar)+2*sizeof(PetscInt)),&apa);CHKERRQ(ierr); 863b1d57f15SBarry Smith ierr = PetscMemzero(apa,cn*(sizeof(MatScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr); 864eb9c0419SKris Buschelman 865b1d57f15SBarry Smith apj = (PetscInt *)(apa + cn); 866d20bfe6fSHong Zhang apjdense = apj + cn; 867d20bfe6fSHong Zhang 868d20bfe6fSHong Zhang /* Clear old values in C */ 869d20bfe6fSHong Zhang ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr); 870d20bfe6fSHong Zhang 871d20bfe6fSHong Zhang for (i=0;i<am;i++) { 872d20bfe6fSHong Zhang /* Form sparse row of A*P */ 873d20bfe6fSHong Zhang anzi = ai[i+1] - ai[i]; 874d20bfe6fSHong Zhang apnzj = 0; 875d20bfe6fSHong Zhang for (j=0;j<anzi;j++) { 876d20bfe6fSHong Zhang prow = *aj++; 877d20bfe6fSHong Zhang pnzj = pi[prow+1] - pi[prow]; 878d20bfe6fSHong Zhang pjj = pj + pi[prow]; 879d20bfe6fSHong Zhang paj = pa + pi[prow]; 880d20bfe6fSHong Zhang for (k=0;k<pnzj;k++) { 881d20bfe6fSHong Zhang if (!apjdense[pjj[k]]) { 882d20bfe6fSHong Zhang apjdense[pjj[k]] = -1; 883d20bfe6fSHong Zhang apj[apnzj++] = pjj[k]; 884d20bfe6fSHong Zhang } 885d20bfe6fSHong Zhang apa[pjj[k]] += (*aa)*paj[k]; 886d20bfe6fSHong Zhang } 887d20bfe6fSHong Zhang flops += 2*pnzj; 888d20bfe6fSHong Zhang aa++; 889d20bfe6fSHong Zhang } 890d20bfe6fSHong Zhang 891d20bfe6fSHong Zhang /* Sort the j index array for quick sparse axpy. */ 892d20bfe6fSHong Zhang ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr); 893d20bfe6fSHong Zhang 894d20bfe6fSHong Zhang /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */ 895d20bfe6fSHong Zhang pnzi = pi[i+1] - pi[i]; 896d20bfe6fSHong Zhang for (j=0;j<pnzi;j++) { 897d20bfe6fSHong Zhang nextap = 0; 898d20bfe6fSHong Zhang crow = *pJ++; 899d20bfe6fSHong Zhang cjj = cj + ci[crow]; 900d20bfe6fSHong Zhang caj = ca + ci[crow]; 901d20bfe6fSHong Zhang /* Perform sparse axpy operation. Note cjj includes apj. */ 902d20bfe6fSHong Zhang for (k=0;nextap<apnzj;k++) { 903d20bfe6fSHong Zhang if (cjj[k]==apj[nextap]) { 904d20bfe6fSHong Zhang caj[k] += (*pA)*apa[apj[nextap++]]; 905d20bfe6fSHong Zhang } 906d20bfe6fSHong Zhang } 907d20bfe6fSHong Zhang flops += 2*apnzj; 908d20bfe6fSHong Zhang pA++; 909d20bfe6fSHong Zhang } 910d20bfe6fSHong Zhang 911d20bfe6fSHong Zhang /* Zero the current row info for A*P */ 912d20bfe6fSHong Zhang for (j=0;j<apnzj;j++) { 913d20bfe6fSHong Zhang apa[apj[j]] = 0.; 914d20bfe6fSHong Zhang apjdense[apj[j]] = 0; 915d20bfe6fSHong Zhang } 916d20bfe6fSHong Zhang } 917d20bfe6fSHong Zhang 918d20bfe6fSHong Zhang /* Assemble the final matrix and clean up */ 919d20bfe6fSHong Zhang ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 920d20bfe6fSHong Zhang ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 921d20bfe6fSHong Zhang ierr = PetscFree(apa);CHKERRQ(ierr); 922d20bfe6fSHong Zhang ierr = PetscLogFlops(flops);CHKERRQ(ierr); 923d20bfe6fSHong Zhang 924eb9c0419SKris Buschelman PetscFunctionReturn(0); 925eb9c0419SKris Buschelman } 9260e36024fSHong Zhang 927a61c8c0fSHong Zhang /* Compute local C = P_loc^T * A * P - used by MatPtAP_MPIAIJ_MPIAIJ() */ 928e240928fSHong Zhang static PetscEvent logkey_matptapnumeric_local = 0; 929e240928fSHong Zhang #undef __FUNCT__ 930e240928fSHong Zhang #define __FUNCT__ "MatPtAPNumeric_MPIAIJ_MPIAIJ_Local" 931e240928fSHong Zhang PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIAIJ_Local(Mat A,Mat P_loc,Mat P_oth,PetscInt prstart,Mat C) 932e240928fSHong Zhang { 933e240928fSHong Zhang PetscErrorCode ierr; 934e240928fSHong Zhang PetscInt flops=0; 935e240928fSHong Zhang Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 936e240928fSHong Zhang Mat_SeqAIJ *ad=(Mat_SeqAIJ*)(a->A)->data,*ao=(Mat_SeqAIJ*)(a->B)->data; 937e240928fSHong Zhang Mat_SeqAIJ *c = (Mat_SeqAIJ *) C->data; 938e240928fSHong Zhang Mat_SeqAIJ *p_loc=(Mat_SeqAIJ*)P_loc->data,*p_oth=(Mat_SeqAIJ*)P_oth->data; 939e240928fSHong Zhang PetscInt *pi_loc=p_loc->i,*pj_loc=p_loc->j,*pi_oth=p_oth->i,*pj_oth=p_oth->j; 940e240928fSHong Zhang PetscInt *adi=ad->i,*adj=ad->j,*aoi=ao->i,*aoj=ao->j,*apj,*apjdense,cstart=a->cstart,cend=a->cend,col; 941e240928fSHong Zhang PetscInt *pJ=pj_loc,*pjj; 942e240928fSHong Zhang PetscInt *ci=c->i,*cj=c->j,*cjj; 943e240928fSHong Zhang PetscInt am=A->m,cn=C->N,cm=C->M; 944e240928fSHong Zhang PetscInt i,j,k,anzi,pnzi,apnzj,nextap,pnzj,prow,crow; 945e240928fSHong Zhang MatScalar *ada=ad->a,*aoa=ao->a,*apa,*paj,*ca=c->a,*caj; 946e240928fSHong Zhang MatScalar *pa_loc=p_loc->a,*pA=pa_loc,*pa_oth=p_oth->a; 947e240928fSHong Zhang 948e240928fSHong Zhang PetscFunctionBegin; 949e240928fSHong Zhang if (!logkey_matptapnumeric_local) { 950a61c8c0fSHong Zhang ierr = PetscLogEventRegister(&logkey_matptapnumeric_local,"MatPtAPNumeric_MPIAIJ_MPIAIJ_Local",MAT_COOKIE);CHKERRQ(ierr); 951e240928fSHong Zhang } 952a61c8c0fSHong Zhang ierr = PetscLogEventBegin(logkey_matptapnumeric_local,A,0,0,0);CHKERRQ(ierr); 953e240928fSHong Zhang 954e240928fSHong Zhang /* Allocate temporary array for storage of one row of A*P */ 955e240928fSHong Zhang ierr = PetscMalloc(cn*(sizeof(MatScalar)+2*sizeof(PetscInt)),&apa);CHKERRQ(ierr); 956e240928fSHong Zhang ierr = PetscMemzero(apa,cn*(sizeof(MatScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr); 957e240928fSHong Zhang apj = (PetscInt *)(apa + cn); 958e240928fSHong Zhang apjdense = apj + cn; 959e240928fSHong Zhang 960e240928fSHong Zhang /* Clear old values in C */ 961e240928fSHong Zhang ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr); 962e240928fSHong Zhang 963e240928fSHong Zhang for (i=0;i<am;i++) { 964e240928fSHong Zhang /* Form i-th sparse row of A*P */ 965e240928fSHong Zhang apnzj = 0; 966e240928fSHong Zhang /* diagonal portion of A */ 967e240928fSHong Zhang anzi = adi[i+1] - adi[i]; 968e240928fSHong Zhang for (j=0;j<anzi;j++) { 969e240928fSHong Zhang prow = *adj; 970e240928fSHong Zhang adj++; 971e240928fSHong Zhang pnzj = pi_loc[prow+1] - pi_loc[prow]; 972e240928fSHong Zhang pjj = pj_loc + pi_loc[prow]; 973e240928fSHong Zhang paj = pa_loc + pi_loc[prow]; 974e240928fSHong Zhang for (k=0;k<pnzj;k++) { 975e240928fSHong Zhang if (!apjdense[pjj[k]]) { 976e240928fSHong Zhang apjdense[pjj[k]] = -1; 977e240928fSHong Zhang apj[apnzj++] = pjj[k]; 978e240928fSHong Zhang } 979e240928fSHong Zhang apa[pjj[k]] += (*ada)*paj[k]; 980e240928fSHong Zhang } 981e240928fSHong Zhang flops += 2*pnzj; 982e240928fSHong Zhang ada++; 983e240928fSHong Zhang } 984e240928fSHong Zhang /* off-diagonal portion of A */ 985e240928fSHong Zhang anzi = aoi[i+1] - aoi[i]; 986e240928fSHong Zhang for (j=0;j<anzi;j++) { 987e240928fSHong Zhang col = a->garray[*aoj]; 988e240928fSHong Zhang if (col < cstart){ 989e240928fSHong Zhang prow = *aoj; 990e240928fSHong Zhang } else if (col >= cend){ 991e240928fSHong Zhang prow = *aoj; 992e240928fSHong Zhang } else { 993e240928fSHong Zhang SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Off-diagonal portion of A has wrong column map"); 994e240928fSHong Zhang } 995e240928fSHong Zhang aoj++; 996e240928fSHong Zhang pnzj = pi_oth[prow+1] - pi_oth[prow]; 997e240928fSHong Zhang pjj = pj_oth + pi_oth[prow]; 998e240928fSHong Zhang paj = pa_oth + pi_oth[prow]; 999e240928fSHong Zhang for (k=0;k<pnzj;k++) { 1000e240928fSHong Zhang if (!apjdense[pjj[k]]) { 1001e240928fSHong Zhang apjdense[pjj[k]] = -1; 1002e240928fSHong Zhang apj[apnzj++] = pjj[k]; 1003e240928fSHong Zhang } 1004e240928fSHong Zhang apa[pjj[k]] += (*aoa)*paj[k]; 1005e240928fSHong Zhang } 1006e240928fSHong Zhang flops += 2*pnzj; 1007e240928fSHong Zhang aoa++; 1008e240928fSHong Zhang } 1009e240928fSHong Zhang /* Sort the j index array for quick sparse axpy. */ 1010e240928fSHong Zhang ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr); 1011e240928fSHong Zhang 1012e240928fSHong Zhang /* Compute P_loc[i,:]^T*AP[i,:] using outer product */ 1013e240928fSHong Zhang pnzi = pi_loc[i+1] - pi_loc[i]; 1014e240928fSHong Zhang for (j=0;j<pnzi;j++) { 1015e240928fSHong Zhang nextap = 0; 1016e240928fSHong Zhang crow = *pJ++; 1017e240928fSHong Zhang cjj = cj + ci[crow]; 1018e240928fSHong Zhang caj = ca + ci[crow]; 1019e240928fSHong Zhang /* Perform sparse axpy operation. Note cjj includes apj. */ 1020e240928fSHong Zhang for (k=0;nextap<apnzj;k++) { 1021e240928fSHong Zhang if (cjj[k]==apj[nextap]) { 1022e240928fSHong Zhang caj[k] += (*pA)*apa[apj[nextap++]]; 1023e240928fSHong Zhang } 1024e240928fSHong Zhang } 1025e240928fSHong Zhang flops += 2*apnzj; 1026e240928fSHong Zhang pA++; 1027e240928fSHong Zhang } 1028e240928fSHong Zhang 1029e240928fSHong Zhang /* Zero the current row info for A*P */ 1030e240928fSHong Zhang for (j=0;j<apnzj;j++) { 1031e240928fSHong Zhang apa[apj[j]] = 0.; 1032e240928fSHong Zhang apjdense[apj[j]] = 0; 1033e240928fSHong Zhang } 1034e240928fSHong Zhang } 1035e240928fSHong Zhang 1036e240928fSHong Zhang /* Assemble the final matrix and clean up */ 1037e240928fSHong Zhang ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1038e240928fSHong Zhang ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1039e240928fSHong Zhang ierr = PetscFree(apa);CHKERRQ(ierr); 1040e240928fSHong Zhang ierr = PetscLogFlops(flops);CHKERRQ(ierr); 1041a61c8c0fSHong Zhang ierr = PetscLogEventEnd(logkey_matptapnumeric_local,A,0,0,0);CHKERRQ(ierr); 1042e240928fSHong Zhang PetscFunctionReturn(0); 1043e240928fSHong Zhang } 1044e240928fSHong Zhang static PetscEvent logkey_matptapsymbolic_local = 0; 1045e240928fSHong Zhang #undef __FUNCT__ 1046e240928fSHong Zhang #define __FUNCT__ "MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local" 1047e240928fSHong Zhang PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local(Mat A,Mat P_loc,Mat P_oth,PetscInt prstart,PetscReal fill,Mat *C) 1048e240928fSHong Zhang { 1049e240928fSHong Zhang PetscErrorCode ierr; 1050e240928fSHong Zhang FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1051e240928fSHong Zhang Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 1052e240928fSHong Zhang Mat_SeqAIJ *ad=(Mat_SeqAIJ*)(a->A)->data,*ao=(Mat_SeqAIJ*)(a->B)->data,*c; 1053e240928fSHong Zhang PetscInt *pti,*ptj,*ptJ,*ajj,*pjj; 1054e240928fSHong Zhang PetscInt *adi=ad->i,*adj=ad->j,*aoi=ao->i,*aoj=ao->j,nnz,cstart=a->cstart,cend=a->cend,col; 1055e240928fSHong Zhang PetscInt *ci,*cj,*ptaj; 10566abd8857SHong Zhang PetscInt aN=A->N,am=A->m,pN=P_loc->N; 1057e240928fSHong Zhang PetscInt i,j,k,ptnzi,arow,anzj,prow,pnzj,cnzi; 1058e240928fSHong Zhang PetscInt pm=P_loc->m,nlnk,*lnk; 1059e240928fSHong Zhang MatScalar *ca; 1060e240928fSHong Zhang PetscBT lnkbt; 1061e240928fSHong Zhang PetscInt prend,nprow_loc,nprow_oth; 1062e240928fSHong Zhang PetscInt *ptadenserow_loc,*ptadenserow_oth,*ptasparserow_loc,*ptasparserow_oth; 1063e240928fSHong Zhang Mat_SeqAIJ *p_loc=(Mat_SeqAIJ*)P_loc->data,*p_oth=(Mat_SeqAIJ*)P_oth->data; 1064e240928fSHong Zhang PetscInt *pi_loc=p_loc->i,*pj_loc=p_loc->j,*pi_oth=p_oth->i,*pj_oth=p_oth->j; 1065e240928fSHong Zhang 1066e240928fSHong Zhang PetscFunctionBegin; 1067e240928fSHong Zhang if (!logkey_matptapsymbolic_local) { 1068e240928fSHong Zhang ierr = PetscLogEventRegister(&logkey_matptapsymbolic_local,"MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local",MAT_COOKIE); 1069e240928fSHong Zhang } 1070e240928fSHong Zhang ierr = PetscLogEventBegin(logkey_matptapsymbolic_local,A,P_loc,P_oth,0);CHKERRQ(ierr); 1071e240928fSHong Zhang 1072e240928fSHong Zhang prend = prstart + pm; 1073e240928fSHong Zhang 1074e240928fSHong Zhang /* get ij structure of P_loc^T */ 1075e240928fSHong Zhang ierr = MatGetSymbolicTranspose_SeqAIJ(P_loc,&pti,&ptj);CHKERRQ(ierr); 1076e240928fSHong Zhang ptJ=ptj; 1077e240928fSHong Zhang 1078e240928fSHong Zhang /* Allocate ci array, arrays for fill computation and */ 1079e240928fSHong Zhang /* free space for accumulating nonzero column info */ 1080e240928fSHong Zhang ierr = PetscMalloc((pN+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 1081e240928fSHong Zhang ci[0] = 0; 1082e240928fSHong Zhang 10836abd8857SHong Zhang ierr = PetscMalloc((4*aN+1)*sizeof(PetscInt),&ptadenserow_loc);CHKERRQ(ierr); 10846abd8857SHong Zhang ierr = PetscMemzero(ptadenserow_loc,(4*aN+1)*sizeof(PetscInt));CHKERRQ(ierr); 10856abd8857SHong Zhang ptasparserow_loc = ptadenserow_loc + aN; 10866abd8857SHong Zhang ptadenserow_oth = ptasparserow_loc + aN; 10876abd8857SHong Zhang ptasparserow_oth = ptadenserow_oth + aN; 1088e240928fSHong Zhang 1089e240928fSHong Zhang /* create and initialize a linked list */ 1090e240928fSHong Zhang nlnk = pN+1; 1091e240928fSHong Zhang ierr = PetscLLCreate(pN,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1092e240928fSHong Zhang 10936abd8857SHong Zhang /* Set initial free space to be nnz(A) scaled by fill*P->N/P->M. */ 1094e240928fSHong Zhang /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 1095e240928fSHong Zhang nnz = adi[am] + aoi[am]; 10966abd8857SHong Zhang ierr = GetMoreSpace((PetscInt)(fill*nnz*pN/aN+1),&free_space); 1097e240928fSHong Zhang current_space = free_space; 1098e240928fSHong Zhang 1099e240928fSHong Zhang /* determine symbolic info for each row of C: */ 1100e240928fSHong Zhang for (i=0;i<pN;i++) { 1101e240928fSHong Zhang ptnzi = pti[i+1] - pti[i]; 1102e240928fSHong Zhang nprow_loc = 0; nprow_oth = 0; 1103e240928fSHong Zhang /* i-th row of symbolic P_loc^T*A_loc: */ 1104e240928fSHong Zhang for (j=0;j<ptnzi;j++) { 1105e240928fSHong Zhang arow = *ptJ++; 1106e240928fSHong Zhang /* diagonal portion of A */ 1107e240928fSHong Zhang anzj = adi[arow+1] - adi[arow]; 1108e240928fSHong Zhang ajj = adj + adi[arow]; 1109e240928fSHong Zhang for (k=0;k<anzj;k++) { 1110e240928fSHong Zhang col = ajj[k]+prstart; 1111e240928fSHong Zhang if (!ptadenserow_loc[col]) { 1112e240928fSHong Zhang ptadenserow_loc[col] = -1; 1113e240928fSHong Zhang ptasparserow_loc[nprow_loc++] = col; 1114e240928fSHong Zhang } 1115e240928fSHong Zhang } 1116e240928fSHong Zhang /* off-diagonal portion of A */ 1117e240928fSHong Zhang anzj = aoi[arow+1] - aoi[arow]; 1118e240928fSHong Zhang ajj = aoj + aoi[arow]; 1119e240928fSHong Zhang for (k=0;k<anzj;k++) { 1120e240928fSHong Zhang col = a->garray[ajj[k]]; /* global col */ 1121e240928fSHong Zhang if (col < cstart){ 1122e240928fSHong Zhang col = ajj[k]; 1123e240928fSHong Zhang } else if (col >= cend){ 1124e240928fSHong Zhang col = ajj[k] + pm; 1125e240928fSHong Zhang } else { 1126e240928fSHong Zhang SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Off-diagonal portion of A has wrong column map"); 1127e240928fSHong Zhang } 1128e240928fSHong Zhang if (!ptadenserow_oth[col]) { 1129e240928fSHong Zhang ptadenserow_oth[col] = -1; 1130e240928fSHong Zhang ptasparserow_oth[nprow_oth++] = col; 1131e240928fSHong Zhang } 1132e240928fSHong Zhang } 1133e240928fSHong Zhang } 1134e240928fSHong Zhang 1135e240928fSHong Zhang /* using symbolic info of local PtA, determine symbolic info for row of C: */ 1136e240928fSHong Zhang cnzi = 0; 1137e240928fSHong Zhang /* rows of P_loc */ 1138e240928fSHong Zhang ptaj = ptasparserow_loc; 1139e240928fSHong Zhang for (j=0; j<nprow_loc; j++) { 1140e240928fSHong Zhang prow = *ptaj++; 1141e240928fSHong Zhang prow -= prstart; /* rm */ 1142e240928fSHong Zhang pnzj = pi_loc[prow+1] - pi_loc[prow]; 1143e240928fSHong Zhang pjj = pj_loc + pi_loc[prow]; 1144e240928fSHong Zhang /* add non-zero cols of P into the sorted linked list lnk */ 1145e240928fSHong Zhang ierr = PetscLLAdd(pnzj,pjj,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1146e240928fSHong Zhang cnzi += nlnk; 1147e240928fSHong Zhang } 1148e240928fSHong Zhang /* rows of P_oth */ 1149e240928fSHong Zhang ptaj = ptasparserow_oth; 1150e240928fSHong Zhang for (j=0; j<nprow_oth; j++) { 1151e240928fSHong Zhang prow = *ptaj++; 1152e240928fSHong Zhang if (prow >= prend) prow -= pm; /* rm */ 1153e240928fSHong Zhang pnzj = pi_oth[prow+1] - pi_oth[prow]; 1154e240928fSHong Zhang pjj = pj_oth + pi_oth[prow]; 1155e240928fSHong Zhang /* add non-zero cols of P into the sorted linked list lnk */ 1156e240928fSHong Zhang ierr = PetscLLAdd(pnzj,pjj,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1157e240928fSHong Zhang cnzi += nlnk; 1158e240928fSHong Zhang } 1159e240928fSHong Zhang 1160e240928fSHong Zhang /* If free space is not available, make more free space */ 1161e240928fSHong Zhang /* Double the amount of total space in the list */ 1162e240928fSHong Zhang if (current_space->local_remaining<cnzi) { 1163e240928fSHong Zhang ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 1164e240928fSHong Zhang } 1165e240928fSHong Zhang 1166e240928fSHong Zhang /* Copy data into free space, and zero out denserows */ 1167e240928fSHong Zhang ierr = PetscLLClean(pN,pN,cnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 1168e240928fSHong Zhang current_space->array += cnzi; 1169e240928fSHong Zhang current_space->local_used += cnzi; 1170e240928fSHong Zhang current_space->local_remaining -= cnzi; 1171e240928fSHong Zhang 1172e240928fSHong Zhang for (j=0;j<nprow_loc; j++) { 1173e240928fSHong Zhang ptadenserow_loc[ptasparserow_loc[j]] = 0; 1174e240928fSHong Zhang } 1175e240928fSHong Zhang for (j=0;j<nprow_oth; j++) { 1176e240928fSHong Zhang ptadenserow_oth[ptasparserow_oth[j]] = 0; 1177e240928fSHong Zhang } 1178e240928fSHong Zhang 1179e240928fSHong Zhang /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 1180e240928fSHong Zhang /* For now, we will recompute what is needed. */ 1181e240928fSHong Zhang ci[i+1] = ci[i] + cnzi; 1182e240928fSHong Zhang } 1183e240928fSHong Zhang /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 1184e240928fSHong Zhang /* Allocate space for cj, initialize cj, and */ 1185e240928fSHong Zhang /* destroy list of free space and other temporary array(s) */ 1186e240928fSHong Zhang ierr = PetscMalloc((ci[pN]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr); 1187e240928fSHong Zhang ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 1188e240928fSHong Zhang ierr = PetscFree(ptadenserow_loc);CHKERRQ(ierr); 1189e240928fSHong Zhang ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1190e240928fSHong Zhang 1191e240928fSHong Zhang /* Allocate space for ca */ 1192e240928fSHong Zhang ierr = PetscMalloc((ci[pN]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 1193e240928fSHong Zhang ierr = PetscMemzero(ca,(ci[pN]+1)*sizeof(MatScalar));CHKERRQ(ierr); 1194e240928fSHong Zhang 1195e240928fSHong Zhang /* put together the new matrix */ 1196e240928fSHong Zhang ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,pN,pN,ci,cj,ca,C);CHKERRQ(ierr); 1197e240928fSHong Zhang 1198e240928fSHong Zhang /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 1199e240928fSHong Zhang /* Since these are PETSc arrays, change flags to free them as necessary. */ 1200e240928fSHong Zhang c = (Mat_SeqAIJ *)((*C)->data); 1201e240928fSHong Zhang c->freedata = PETSC_TRUE; 1202e240928fSHong Zhang c->nonew = 0; 1203e240928fSHong Zhang 1204e240928fSHong Zhang /* Clean up. */ 1205e240928fSHong Zhang ierr = MatRestoreSymbolicTranspose_SeqAIJ(P_loc,&pti,&ptj);CHKERRQ(ierr); 1206a61c8c0fSHong Zhang 1207e240928fSHong Zhang ierr = PetscLogEventEnd(logkey_matptapsymbolic_local,A,P_loc,P_oth,0);CHKERRQ(ierr); 1208e240928fSHong Zhang PetscFunctionReturn(0); 1209e240928fSHong Zhang } 1210