1eb9c0419SKris Buschelman /* 29af31e4aSHong Zhang Defines projective product routines where A is a AIJ matrix 3eb9c0419SKris Buschelman C = P^T * A * P 4eb9c0419SKris Buschelman */ 5eb9c0419SKris Buschelman 6231952e2SKris Buschelman #include "src/mat/impls/aij/seq/aij.h" /*I "petscmat.h" I*/ 7eb9c0419SKris Buschelman #include "src/mat/utils/freespace.h" 89af31e4aSHong Zhang #include "src/mat/impls/aij/mpi/mpiaij.h" 9eb9c0419SKris Buschelman 10eb9c0419SKris Buschelman #undef __FUNCT__ 119af31e4aSHong Zhang #define __FUNCT__ "MatPtAP" 124d3841fdSKris Buschelman /*@ 139af31e4aSHong Zhang MatPtAP - Creates the matrix projection C = P^T * A * P 144d3841fdSKris Buschelman 154d3841fdSKris Buschelman Collective on Mat 164d3841fdSKris Buschelman 174d3841fdSKris Buschelman Input Parameters: 184d3841fdSKris Buschelman + A - the matrix 19f747e1aeSHong Zhang . P - the projection matrix 20f747e1aeSHong Zhang . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 21f747e1aeSHong Zhang - fill - expected fill as ratio of nnz(C)/(nnz(A) + nnz(P)) 224d3841fdSKris Buschelman 234d3841fdSKris Buschelman Output Parameters: 244d3841fdSKris Buschelman . C - the product matrix 254d3841fdSKris Buschelman 264d3841fdSKris Buschelman Notes: 274d3841fdSKris Buschelman C will be created and must be destroyed by the user with MatDestroy(). 284d3841fdSKris Buschelman 294d3841fdSKris Buschelman This routine is currently only implemented for pairs of SeqAIJ matrices and classes 304d3841fdSKris Buschelman which inherit from SeqAIJ. C will be of type MATSEQAIJ. 314d3841fdSKris Buschelman 324d3841fdSKris Buschelman Level: intermediate 334d3841fdSKris Buschelman 349af31e4aSHong Zhang .seealso: MatPtAPSymbolic(),MatPtAPNumeric(),MatMatMult() 354d3841fdSKris Buschelman @*/ 36dfbe8321SBarry Smith PetscErrorCode MatPtAP(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C) { 37dfbe8321SBarry Smith PetscErrorCode ierr; 38534c1384SKris Buschelman PetscErrorCode (*fA)(Mat,Mat,MatReuse,PetscReal,Mat *); 39534c1384SKris Buschelman PetscErrorCode (*fP)(Mat,Mat,MatReuse,PetscReal,Mat *); 40eb9c0419SKris Buschelman 41eb9c0419SKris Buschelman PetscFunctionBegin; 429af31e4aSHong Zhang PetscValidHeaderSpecific(A,MAT_COOKIE,1); 439af31e4aSHong Zhang PetscValidType(A,1); 449af31e4aSHong Zhang MatPreallocated(A); 459af31e4aSHong Zhang if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 469af31e4aSHong Zhang if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 479af31e4aSHong Zhang PetscValidHeaderSpecific(P,MAT_COOKIE,2); 489af31e4aSHong Zhang PetscValidType(P,2); 499af31e4aSHong Zhang MatPreallocated(P); 509af31e4aSHong Zhang if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 519af31e4aSHong Zhang if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 529af31e4aSHong Zhang PetscValidPointer(C,3); 53534c1384SKris Buschelman 549af31e4aSHong Zhang if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->M,A->N); 55eb9c0419SKris Buschelman 569af31e4aSHong Zhang if (fill <=0.0) SETERRQ1(PETSC_ERR_ARG_SIZ,"fill=%g must be > 0.0",fill); 57eb9c0419SKris Buschelman 58534c1384SKris Buschelman /* For now, we do not dispatch based on the type of A and P */ 59534c1384SKris Buschelman /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */ 60534c1384SKris Buschelman fA = A->ops->ptap; 61534c1384SKris Buschelman if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatPtAP not supported for A of type %s",A->type_name); 62534c1384SKris Buschelman fP = P->ops->ptap; 63534c1384SKris Buschelman if (!fP) SETERRQ1(PETSC_ERR_SUP,"MatPtAP not supported for P of type %s",P->type_name); 64534c1384SKris Buschelman if (fP!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatPtAP requires A, %s, to be compatible with P, %s",A->type_name,P->type_name); 65534c1384SKris Buschelman 669af31e4aSHong Zhang ierr = PetscLogEventBegin(MAT_PtAP,A,P,0,0);CHKERRQ(ierr); 67534c1384SKris Buschelman ierr = (*fA)(A,P,scall,fill,C);CHKERRQ(ierr); 689af31e4aSHong Zhang ierr = PetscLogEventEnd(MAT_PtAP,A,P,0,0);CHKERRQ(ierr); 69eb9c0419SKris Buschelman PetscFunctionReturn(0); 70eb9c0419SKris Buschelman } 71eb9c0419SKris Buschelman 72eb9c0419SKris Buschelman #undef __FUNCT__ 73ff134f7aSHong Zhang #define __FUNCT__ "MatPtAP_MPIAIJ_MPIAIJ" 74ff134f7aSHong Zhang PetscErrorCode MatPtAP_MPIAIJ_MPIAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C) 75ff134f7aSHong Zhang { 76170ef064SHong Zhang #ifdef TMP 77ff134f7aSHong Zhang PetscErrorCode ierr; 78ff134f7aSHong Zhang Mat C_mpi,AP_seq,P_seq,P_subseq,*psubseq; 79ff134f7aSHong Zhang Mat_MPIAIJ *p = (Mat_MPIAIJ*)P->data; 80ff134f7aSHong Zhang Mat_MatMatMultMPI *mult; 81ff134f7aSHong Zhang int i,prow,prstart,prend,m=P->m,pncols; 82ff134f7aSHong Zhang const int *pcols; 83ff134f7aSHong Zhang const PetscScalar *pvals; 8432dcc486SBarry Smith PetscMPIInt rank; 85ff134f7aSHong Zhang 86ff134f7aSHong Zhang PetscFunctionBegin; 87ff134f7aSHong Zhang ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr); 88ff134f7aSHong Zhang 89ff134f7aSHong Zhang ierr = MatMatMultSymbolic_MPIAIJ_MPIAIJ(A,P,fill,&C_mpi);CHKERRQ(ierr); 90ff134f7aSHong Zhang mult = (Mat_MatMatMultMPI*)C_mpi->spptr; 91ff134f7aSHong Zhang P_seq = mult->bseq[0]; 92ff134f7aSHong Zhang AP_seq = mult->C_seq; 93ff134f7aSHong Zhang prstart = mult->brstart; 94ff134f7aSHong Zhang prend = prstart + m; 95ff134f7aSHong Zhang ierr = PetscPrintf(PETSC_COMM_SELF," [%d] prstart: %d, prend: %d, dim of P_seq: %d, %d\n",rank,prstart,prend,P_seq->m,P_seq->n); 96ff134f7aSHong Zhang 97ff134f7aSHong Zhang /* get seq matrix P_subseq by taking local rows of P */ 98ff134f7aSHong Zhang IS isrow,iscol; 99ff134f7aSHong Zhang ierr = ISCreateStride(PETSC_COMM_SELF,m,prstart,1,&isrow);CHKERRQ(ierr); 100ff134f7aSHong Zhang ierr = ISCreateStride(PETSC_COMM_SELF,P_seq->n,0,1,&iscol);CHKERRQ(ierr); 101ff134f7aSHong Zhang ierr = MatGetSubMatrices(P_seq,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&psubseq);CHKERRQ(ierr); 102ff134f7aSHong Zhang P_subseq = psubseq[0]; 103ff134f7aSHong Zhang ierr = PetscPrintf(PETSC_COMM_SELF," [%d] dim of P_subseq: %d, %d\n",rank,P_subseq->m,P_subseq->n); 104ff134f7aSHong Zhang 105ff134f7aSHong Zhang /* Compute P_subseq^T*C_seq using outer product (P_loc^T)[:,i]*C_seq[i,:]. */ 106ff134f7aSHong Zhang for (i=0;i<m;i++) { 107ff134f7aSHong Zhang prow = prstart + i; 108ff134f7aSHong Zhang ierr = MatGetRow(P_seq,prow,&pncols,&pcols,&pvals);CHKERRQ(ierr); 109ff134f7aSHong Zhang ierr = MatRestoreRow(P_seq,prow,&pncols,&pcols,&pvals);CHKERRQ(ierr); 110ff134f7aSHong Zhang } 111ff134f7aSHong Zhang 112ff134f7aSHong Zhang *C = C_mpi; /* to be removed! */ 113170ef064SHong Zhang #endif /* TMP */ 114ff134f7aSHong Zhang PetscFunctionReturn(0); 115ff134f7aSHong Zhang } 116ff134f7aSHong Zhang 117ff134f7aSHong Zhang #undef __FUNCT__ 118ff134f7aSHong Zhang #define __FUNCT__ "MatMatMultSymbolic_MPIAIJ_MPIAIJ" 119ff134f7aSHong Zhang PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 120ff134f7aSHong Zhang { 121ff134f7aSHong Zhang Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 122ff134f7aSHong Zhang PetscErrorCode ierr; 123ff134f7aSHong Zhang 124ff134f7aSHong Zhang PetscFunctionBegin; 125ff134f7aSHong Zhang PetscFunctionReturn(0); 126ff134f7aSHong Zhang } 127ff134f7aSHong Zhang 128ff134f7aSHong Zhang #undef __FUNCT__ 129ff134f7aSHong Zhang #define __FUNCT__ "MatMatMultSymbolic_MPIAIJ_MPIAIJ" 130ff134f7aSHong Zhang PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIAIJ(Mat A,Mat B,Mat C) 131ff134f7aSHong Zhang { 132ff134f7aSHong Zhang Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 133ff134f7aSHong Zhang PetscErrorCode ierr; 134ff134f7aSHong Zhang 135ff134f7aSHong Zhang PetscFunctionBegin; 136ff134f7aSHong Zhang PetscFunctionReturn(0); 137ff134f7aSHong Zhang } 138ff134f7aSHong Zhang 139ff134f7aSHong Zhang #undef __FUNCT__ 1409af31e4aSHong Zhang #define __FUNCT__ "MatPtAP_SeqAIJ_SeqAIJ" 141dfbe8321SBarry Smith PetscErrorCode MatPtAP_SeqAIJ_SeqAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C) 1429af31e4aSHong Zhang { 143dfbe8321SBarry Smith PetscErrorCode ierr; 1449af31e4aSHong Zhang PetscFunctionBegin; 1459af31e4aSHong Zhang if (scall == MAT_INITIAL_MATRIX){ 146*d20bfe6fSHong Zhang ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 1479af31e4aSHong Zhang ierr = MatPtAPSymbolic_SeqAIJ_SeqAIJ(A,P,fill,C);CHKERRQ(ierr); 148*d20bfe6fSHong Zhang ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 1499af31e4aSHong Zhang } 150*d20bfe6fSHong Zhang ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 1519af31e4aSHong Zhang ierr = MatPtAPNumeric_SeqAIJ_SeqAIJ(A,P,*C);CHKERRQ(ierr); 152*d20bfe6fSHong Zhang ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 1539af31e4aSHong Zhang PetscFunctionReturn(0); 1549af31e4aSHong Zhang } 1559af31e4aSHong Zhang 1569af31e4aSHong Zhang #undef __FUNCT__ 1579af31e4aSHong Zhang #define __FUNCT__ "MatPtAPSymbolic" 1586849ba73SBarry Smith /* 1599af31e4aSHong Zhang MatPtAPSymbolic - Creates the (i,j) structure of the matrix projection C = P^T * A * P 1604d3841fdSKris Buschelman 1614d3841fdSKris Buschelman Collective on Mat 1624d3841fdSKris Buschelman 1634d3841fdSKris Buschelman Input Parameters: 1644d3841fdSKris Buschelman + A - the matrix 1654d3841fdSKris Buschelman - P - the projection matrix 1664d3841fdSKris Buschelman 1674d3841fdSKris Buschelman Output Parameters: 1684d3841fdSKris Buschelman . C - the (i,j) structure of the product matrix 1694d3841fdSKris Buschelman 1704d3841fdSKris Buschelman Notes: 1714d3841fdSKris Buschelman C will be created and must be destroyed by the user with MatDestroy(). 1724d3841fdSKris Buschelman 1734d3841fdSKris Buschelman This routine is currently only implemented for pairs of SeqAIJ matrices and classes 1744d3841fdSKris Buschelman which inherit from SeqAIJ. C will be of type MATSEQAIJ. The product is computed using 1759af31e4aSHong Zhang this (i,j) structure by calling MatPtAPNumeric(). 1764d3841fdSKris Buschelman 1774d3841fdSKris Buschelman Level: intermediate 1784d3841fdSKris Buschelman 1799af31e4aSHong Zhang .seealso: MatPtAP(),MatPtAPNumeric(),MatMatMultSymbolic() 1806849ba73SBarry Smith */ 181dfbe8321SBarry Smith PetscErrorCode MatPtAPSymbolic(Mat A,Mat P,PetscReal fill,Mat *C) { 182dfbe8321SBarry Smith PetscErrorCode ierr; 183534c1384SKris Buschelman PetscErrorCode (*fA)(Mat,Mat,PetscReal,Mat*); 184534c1384SKris Buschelman PetscErrorCode (*fP)(Mat,Mat,PetscReal,Mat*); 185eb9c0419SKris Buschelman 186eb9c0419SKris Buschelman PetscFunctionBegin; 187eb9c0419SKris Buschelman 1884482741eSBarry Smith PetscValidHeaderSpecific(A,MAT_COOKIE,1); 189c9780b6fSBarry Smith PetscValidType(A,1); 190eb9c0419SKris Buschelman MatPreallocated(A); 191eb9c0419SKris Buschelman if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 192eb9c0419SKris Buschelman if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 193eb9c0419SKris Buschelman 1944482741eSBarry Smith PetscValidHeaderSpecific(P,MAT_COOKIE,2); 195c9780b6fSBarry Smith PetscValidType(P,2); 196eb9c0419SKris Buschelman MatPreallocated(P); 197eb9c0419SKris Buschelman if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 198eb9c0419SKris Buschelman if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 199eb9c0419SKris Buschelman 2004482741eSBarry Smith PetscValidPointer(C,3); 2014482741eSBarry Smith 202eb9c0419SKris Buschelman if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->M,A->N); 203eb9c0419SKris Buschelman if (A->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %d != %d",A->M,A->N); 204eb9c0419SKris Buschelman 205534c1384SKris Buschelman /* For now, we do not dispatch based on the type of A and P */ 206534c1384SKris Buschelman /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */ 207534c1384SKris Buschelman fA = A->ops->ptapsymbolic; 208534c1384SKris Buschelman if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatPtAPSymbolic not supported for A of type %s",A->type_name); 209534c1384SKris Buschelman fP = P->ops->ptapsymbolic; 210534c1384SKris Buschelman if (!fP) SETERRQ1(PETSC_ERR_SUP,"MatPtAPSymbolic not supported for P of type %s",P->type_name); 211534c1384SKris Buschelman if (fP!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatPtAPSymbolic requires A, %s, to be compatible with P, %s",A->type_name,P->type_name); 2124d3841fdSKris Buschelman 213534c1384SKris Buschelman ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 214534c1384SKris Buschelman ierr = (*fA)(A,P,fill,C);CHKERRQ(ierr); 215534c1384SKris Buschelman ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 216eb9c0419SKris Buschelman 217eb9c0419SKris Buschelman PetscFunctionReturn(0); 218eb9c0419SKris Buschelman } 219eb9c0419SKris Buschelman 220f747e1aeSHong Zhang typedef struct { 221f747e1aeSHong Zhang Mat symAP; 222f747e1aeSHong Zhang } Mat_PtAPstruct; 223f747e1aeSHong Zhang 22478a80504SBarry Smith EXTERN PetscErrorCode MatDestroy_SeqAIJ(Mat); 22578a80504SBarry Smith 226f747e1aeSHong Zhang #undef __FUNCT__ 227f747e1aeSHong Zhang #define __FUNCT__ "MatDestroy_SeqAIJ_PtAP" 228f4a850bbSBarry Smith PetscErrorCode MatDestroy_SeqAIJ_PtAP(Mat A) 229f747e1aeSHong Zhang { 230f4a850bbSBarry Smith PetscErrorCode ierr; 231f747e1aeSHong Zhang Mat_PtAPstruct *ptap=(Mat_PtAPstruct*)A->spptr; 232f747e1aeSHong Zhang 233f747e1aeSHong Zhang PetscFunctionBegin; 234f747e1aeSHong Zhang ierr = MatDestroy(ptap->symAP);CHKERRQ(ierr); 235f747e1aeSHong Zhang ierr = PetscFree(ptap);CHKERRQ(ierr); 23678a80504SBarry Smith ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr); 237f747e1aeSHong Zhang PetscFunctionReturn(0); 238f747e1aeSHong Zhang } 239f747e1aeSHong Zhang 240eb9c0419SKris Buschelman #undef __FUNCT__ 2419af31e4aSHong Zhang #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqAIJ" 242dfbe8321SBarry Smith PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat P,PetscReal fill,Mat *C) { 243dfbe8321SBarry Smith PetscErrorCode ierr; 244*d20bfe6fSHong Zhang FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 245*d20bfe6fSHong Zhang Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c; 246*d20bfe6fSHong Zhang int *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj; 247*d20bfe6fSHong Zhang int *ci,*cj,*denserow,*sparserow,*ptadenserow,*ptasparserow,*ptaj; 248*d20bfe6fSHong Zhang int an=A->N,am=A->M,pn=P->N,pm=P->M; 249*d20bfe6fSHong Zhang int i,j,k,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi; 250*d20bfe6fSHong Zhang MatScalar *ca; 251eb9c0419SKris Buschelman 252eb9c0419SKris Buschelman PetscFunctionBegin; 253*d20bfe6fSHong Zhang /* Get ij structure of P^T */ 254eb9c0419SKris Buschelman ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 255*d20bfe6fSHong Zhang ptJ=ptj; 256eb9c0419SKris Buschelman 257*d20bfe6fSHong Zhang /* Allocate ci array, arrays for fill computation and */ 258*d20bfe6fSHong Zhang /* free space for accumulating nonzero column info */ 259*d20bfe6fSHong Zhang ierr = PetscMalloc((pn+1)*sizeof(int),&ci);CHKERRQ(ierr); 260*d20bfe6fSHong Zhang ci[0] = 0; 261eb9c0419SKris Buschelman 262*d20bfe6fSHong Zhang ierr = PetscMalloc((2*pn+2*an+1)*sizeof(int),&ptadenserow);CHKERRQ(ierr); 263*d20bfe6fSHong Zhang ierr = PetscMemzero(ptadenserow,(2*pn+2*an+1)*sizeof(int));CHKERRQ(ierr); 264*d20bfe6fSHong Zhang ptasparserow = ptadenserow + an; 265*d20bfe6fSHong Zhang denserow = ptasparserow + an; 266*d20bfe6fSHong Zhang sparserow = denserow + pn; 267eb9c0419SKris Buschelman 268*d20bfe6fSHong Zhang /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */ 269*d20bfe6fSHong Zhang /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 270*d20bfe6fSHong Zhang ierr = GetMoreSpace((ai[am]/pm)*pn,&free_space); 271*d20bfe6fSHong Zhang current_space = free_space; 272*d20bfe6fSHong Zhang 273*d20bfe6fSHong Zhang /* Determine symbolic info for each row of C: */ 274*d20bfe6fSHong Zhang for (i=0;i<pn;i++) { 275*d20bfe6fSHong Zhang ptnzi = pti[i+1] - pti[i]; 276*d20bfe6fSHong Zhang ptanzi = 0; 277*d20bfe6fSHong Zhang /* Determine symbolic row of PtA: */ 278*d20bfe6fSHong Zhang for (j=0;j<ptnzi;j++) { 279*d20bfe6fSHong Zhang arow = *ptJ++; 280*d20bfe6fSHong Zhang anzj = ai[arow+1] - ai[arow]; 281*d20bfe6fSHong Zhang ajj = aj + ai[arow]; 282*d20bfe6fSHong Zhang for (k=0;k<anzj;k++) { 283*d20bfe6fSHong Zhang if (!ptadenserow[ajj[k]]) { 284*d20bfe6fSHong Zhang ptadenserow[ajj[k]] = -1; 285*d20bfe6fSHong Zhang ptasparserow[ptanzi++] = ajj[k]; 286*d20bfe6fSHong Zhang } 287*d20bfe6fSHong Zhang } 288*d20bfe6fSHong Zhang } 289*d20bfe6fSHong Zhang /* Using symbolic info for row of PtA, determine symbolic info for row of C: */ 290*d20bfe6fSHong Zhang ptaj = ptasparserow; 291*d20bfe6fSHong Zhang cnzi = 0; 292*d20bfe6fSHong Zhang for (j=0;j<ptanzi;j++) { 293*d20bfe6fSHong Zhang prow = *ptaj++; 294*d20bfe6fSHong Zhang pnzj = pi[prow+1] - pi[prow]; 295*d20bfe6fSHong Zhang pjj = pj + pi[prow]; 296*d20bfe6fSHong Zhang for (k=0;k<pnzj;k++) { 297*d20bfe6fSHong Zhang if (!denserow[pjj[k]]) { 298*d20bfe6fSHong Zhang denserow[pjj[k]] = -1; 299*d20bfe6fSHong Zhang sparserow[cnzi++] = pjj[k]; 300*d20bfe6fSHong Zhang } 301*d20bfe6fSHong Zhang } 302*d20bfe6fSHong Zhang } 303*d20bfe6fSHong Zhang 304*d20bfe6fSHong Zhang /* sort sparserow */ 305*d20bfe6fSHong Zhang ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr); 306*d20bfe6fSHong Zhang 307*d20bfe6fSHong Zhang /* If free space is not available, make more free space */ 308*d20bfe6fSHong Zhang /* Double the amount of total space in the list */ 309*d20bfe6fSHong Zhang if (current_space->local_remaining<cnzi) { 310*d20bfe6fSHong Zhang ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 311*d20bfe6fSHong Zhang } 312*d20bfe6fSHong Zhang 313*d20bfe6fSHong Zhang /* Copy data into free space, and zero out denserows */ 314*d20bfe6fSHong Zhang ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(int));CHKERRQ(ierr); 315*d20bfe6fSHong Zhang current_space->array += cnzi; 316*d20bfe6fSHong Zhang current_space->local_used += cnzi; 317*d20bfe6fSHong Zhang current_space->local_remaining -= cnzi; 318*d20bfe6fSHong Zhang 319*d20bfe6fSHong Zhang for (j=0;j<ptanzi;j++) { 320*d20bfe6fSHong Zhang ptadenserow[ptasparserow[j]] = 0; 321*d20bfe6fSHong Zhang } 322*d20bfe6fSHong Zhang for (j=0;j<cnzi;j++) { 323*d20bfe6fSHong Zhang denserow[sparserow[j]] = 0; 324*d20bfe6fSHong Zhang } 325*d20bfe6fSHong Zhang /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 326*d20bfe6fSHong Zhang /* For now, we will recompute what is needed. */ 327*d20bfe6fSHong Zhang ci[i+1] = ci[i] + cnzi; 328*d20bfe6fSHong Zhang } 329*d20bfe6fSHong Zhang /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 330*d20bfe6fSHong Zhang /* Allocate space for cj, initialize cj, and */ 331*d20bfe6fSHong Zhang /* destroy list of free space and other temporary array(s) */ 332*d20bfe6fSHong Zhang ierr = PetscMalloc((ci[pn]+1)*sizeof(int),&cj);CHKERRQ(ierr); 333*d20bfe6fSHong Zhang ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 334*d20bfe6fSHong Zhang ierr = PetscFree(ptadenserow);CHKERRQ(ierr); 335*d20bfe6fSHong Zhang 336*d20bfe6fSHong Zhang /* Allocate space for ca */ 337*d20bfe6fSHong Zhang ierr = PetscMalloc((ci[pn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 338*d20bfe6fSHong Zhang ierr = PetscMemzero(ca,(ci[pn]+1)*sizeof(MatScalar));CHKERRQ(ierr); 339*d20bfe6fSHong Zhang 340*d20bfe6fSHong Zhang /* put together the new matrix */ 341*d20bfe6fSHong Zhang ierr = MatCreateSeqAIJWithArrays(A->comm,pn,pn,ci,cj,ca,C);CHKERRQ(ierr); 342*d20bfe6fSHong Zhang 343*d20bfe6fSHong Zhang /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 344*d20bfe6fSHong Zhang /* Since these are PETSc arrays, change flags to free them as necessary. */ 345*d20bfe6fSHong Zhang c = (Mat_SeqAIJ *)((*C)->data); 346*d20bfe6fSHong Zhang c->freedata = PETSC_TRUE; 347*d20bfe6fSHong Zhang c->nonew = 0; 348*d20bfe6fSHong Zhang 349*d20bfe6fSHong Zhang /* Clean up. */ 350*d20bfe6fSHong Zhang ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 351eb9c0419SKris Buschelman 352eb9c0419SKris Buschelman PetscFunctionReturn(0); 353eb9c0419SKris Buschelman } 354eb9c0419SKris Buschelman 3553985e5eaSKris Buschelman #include "src/mat/impls/maij/maij.h" 3563985e5eaSKris Buschelman EXTERN_C_BEGIN 3573985e5eaSKris Buschelman #undef __FUNCT__ 3589af31e4aSHong Zhang #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqMAIJ" 359dfbe8321SBarry Smith PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat *C) { 3605c66b693SKris Buschelman /* This routine requires testing -- I don't think it works. */ 361dfbe8321SBarry Smith PetscErrorCode ierr; 3623985e5eaSKris Buschelman FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 3633985e5eaSKris Buschelman Mat_SeqMAIJ *pp=(Mat_SeqMAIJ*)PP->data; 3643985e5eaSKris Buschelman Mat P=pp->AIJ; 3653985e5eaSKris Buschelman Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c; 3663985e5eaSKris Buschelman int *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj; 3673985e5eaSKris Buschelman int *ci,*cj,*denserow,*sparserow,*ptadenserow,*ptasparserow,*ptaj; 3683985e5eaSKris Buschelman int an=A->N,am=A->M,pn=P->N,pm=P->M,ppdof=pp->dof; 369fe05a634SKris Buschelman int i,j,k,dof,pdof,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi; 3703985e5eaSKris Buschelman MatScalar *ca; 3713985e5eaSKris Buschelman 3723985e5eaSKris Buschelman PetscFunctionBegin; 3733985e5eaSKris Buschelman /* Start timer */ 3749af31e4aSHong Zhang ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr); 3753985e5eaSKris Buschelman 3763985e5eaSKris Buschelman /* Get ij structure of P^T */ 3773985e5eaSKris Buschelman ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 3783985e5eaSKris Buschelman 3793985e5eaSKris Buschelman /* Allocate ci array, arrays for fill computation and */ 3803985e5eaSKris Buschelman /* free space for accumulating nonzero column info */ 3813985e5eaSKris Buschelman ierr = PetscMalloc((pn+1)*sizeof(int),&ci);CHKERRQ(ierr); 3823985e5eaSKris Buschelman ci[0] = 0; 3833985e5eaSKris Buschelman 3843985e5eaSKris Buschelman ierr = PetscMalloc((2*pn+2*an+1)*sizeof(int),&ptadenserow);CHKERRQ(ierr); 3853985e5eaSKris Buschelman ierr = PetscMemzero(ptadenserow,(2*pn+2*an+1)*sizeof(int));CHKERRQ(ierr); 3863985e5eaSKris Buschelman ptasparserow = ptadenserow + an; 3873985e5eaSKris Buschelman denserow = ptasparserow + an; 3883985e5eaSKris Buschelman sparserow = denserow + pn; 3893985e5eaSKris Buschelman 3903985e5eaSKris Buschelman /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */ 3913985e5eaSKris Buschelman /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 3923985e5eaSKris Buschelman ierr = GetMoreSpace((ai[am]/pm)*pn,&free_space); 3933985e5eaSKris Buschelman current_space = free_space; 3943985e5eaSKris Buschelman 3953985e5eaSKris Buschelman /* Determine symbolic info for each row of C: */ 3963985e5eaSKris Buschelman for (i=0;i<pn/ppdof;i++) { 3973985e5eaSKris Buschelman ptnzi = pti[i+1] - pti[i]; 3983985e5eaSKris Buschelman ptanzi = 0; 3993985e5eaSKris Buschelman ptJ = ptj + pti[i]; 4003985e5eaSKris Buschelman for (dof=0;dof<ppdof;dof++) { 4013985e5eaSKris Buschelman /* Determine symbolic row of PtA: */ 4023985e5eaSKris Buschelman for (j=0;j<ptnzi;j++) { 4033985e5eaSKris Buschelman arow = ptJ[j] + dof; 4043985e5eaSKris Buschelman anzj = ai[arow+1] - ai[arow]; 4053985e5eaSKris Buschelman ajj = aj + ai[arow]; 4063985e5eaSKris Buschelman for (k=0;k<anzj;k++) { 4073985e5eaSKris Buschelman if (!ptadenserow[ajj[k]]) { 4083985e5eaSKris Buschelman ptadenserow[ajj[k]] = -1; 4093985e5eaSKris Buschelman ptasparserow[ptanzi++] = ajj[k]; 4103985e5eaSKris Buschelman } 4113985e5eaSKris Buschelman } 4123985e5eaSKris Buschelman } 4133985e5eaSKris Buschelman /* Using symbolic info for row of PtA, determine symbolic info for row of C: */ 4143985e5eaSKris Buschelman ptaj = ptasparserow; 4153985e5eaSKris Buschelman cnzi = 0; 4163985e5eaSKris Buschelman for (j=0;j<ptanzi;j++) { 417fe05a634SKris Buschelman pdof = *ptaj%dof; 4183985e5eaSKris Buschelman prow = (*ptaj++)/dof; 4193985e5eaSKris Buschelman pnzj = pi[prow+1] - pi[prow]; 4203985e5eaSKris Buschelman pjj = pj + pi[prow]; 4213985e5eaSKris Buschelman for (k=0;k<pnzj;k++) { 422fe05a634SKris Buschelman if (!denserow[pjj[k]+pdof]) { 423fe05a634SKris Buschelman denserow[pjj[k]+pdof] = -1; 424fe05a634SKris Buschelman sparserow[cnzi++] = pjj[k]+pdof; 4253985e5eaSKris Buschelman } 4263985e5eaSKris Buschelman } 4273985e5eaSKris Buschelman } 4283985e5eaSKris Buschelman 4293985e5eaSKris Buschelman /* sort sparserow */ 4303985e5eaSKris Buschelman ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr); 4313985e5eaSKris Buschelman 4323985e5eaSKris Buschelman /* If free space is not available, make more free space */ 4333985e5eaSKris Buschelman /* Double the amount of total space in the list */ 4343985e5eaSKris Buschelman if (current_space->local_remaining<cnzi) { 4353985e5eaSKris Buschelman ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 4363985e5eaSKris Buschelman } 4373985e5eaSKris Buschelman 4383985e5eaSKris Buschelman /* Copy data into free space, and zero out denserows */ 4393985e5eaSKris Buschelman ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(int));CHKERRQ(ierr); 4403985e5eaSKris Buschelman current_space->array += cnzi; 4413985e5eaSKris Buschelman current_space->local_used += cnzi; 4423985e5eaSKris Buschelman current_space->local_remaining -= cnzi; 4433985e5eaSKris Buschelman 4443985e5eaSKris Buschelman for (j=0;j<ptanzi;j++) { 4453985e5eaSKris Buschelman ptadenserow[ptasparserow[j]] = 0; 4463985e5eaSKris Buschelman } 4473985e5eaSKris Buschelman for (j=0;j<cnzi;j++) { 4483985e5eaSKris Buschelman denserow[sparserow[j]] = 0; 4493985e5eaSKris Buschelman } 4503985e5eaSKris Buschelman /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 4513985e5eaSKris Buschelman /* For now, we will recompute what is needed. */ 4523985e5eaSKris Buschelman ci[i+1+dof] = ci[i+dof] + cnzi; 4533985e5eaSKris Buschelman } 4543985e5eaSKris Buschelman } 4553985e5eaSKris Buschelman /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 4563985e5eaSKris Buschelman /* Allocate space for cj, initialize cj, and */ 4573985e5eaSKris Buschelman /* destroy list of free space and other temporary array(s) */ 4583985e5eaSKris Buschelman ierr = PetscMalloc((ci[pn]+1)*sizeof(int),&cj);CHKERRQ(ierr); 4593985e5eaSKris Buschelman ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 4603985e5eaSKris Buschelman ierr = PetscFree(ptadenserow);CHKERRQ(ierr); 4613985e5eaSKris Buschelman 4623985e5eaSKris Buschelman /* Allocate space for ca */ 4633985e5eaSKris Buschelman ierr = PetscMalloc((ci[pn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 4643985e5eaSKris Buschelman ierr = PetscMemzero(ca,(ci[pn]+1)*sizeof(MatScalar));CHKERRQ(ierr); 4653985e5eaSKris Buschelman 4663985e5eaSKris Buschelman /* put together the new matrix */ 4673985e5eaSKris Buschelman ierr = MatCreateSeqAIJWithArrays(A->comm,pn,pn,ci,cj,ca,C);CHKERRQ(ierr); 4683985e5eaSKris Buschelman 4693985e5eaSKris Buschelman /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4703985e5eaSKris Buschelman /* Since these are PETSc arrays, change flags to free them as necessary. */ 4713985e5eaSKris Buschelman c = (Mat_SeqAIJ *)((*C)->data); 4723985e5eaSKris Buschelman c->freedata = PETSC_TRUE; 4733985e5eaSKris Buschelman c->nonew = 0; 4743985e5eaSKris Buschelman 4753985e5eaSKris Buschelman /* Clean up. */ 4763985e5eaSKris Buschelman ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 4773985e5eaSKris Buschelman 4789af31e4aSHong Zhang ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr); 4793985e5eaSKris Buschelman PetscFunctionReturn(0); 4803985e5eaSKris Buschelman } 4813985e5eaSKris Buschelman EXTERN_C_END 4823985e5eaSKris Buschelman 483eb9c0419SKris Buschelman #undef __FUNCT__ 4849af31e4aSHong Zhang #define __FUNCT__ "MatPtAPNumeric" 4856849ba73SBarry Smith /* 4869af31e4aSHong Zhang MatPtAPNumeric - Computes the matrix projection C = P^T * A * P 4874d3841fdSKris Buschelman 4884d3841fdSKris Buschelman Collective on Mat 4894d3841fdSKris Buschelman 4904d3841fdSKris Buschelman Input Parameters: 4914d3841fdSKris Buschelman + A - the matrix 4924d3841fdSKris Buschelman - P - the projection matrix 4934d3841fdSKris Buschelman 4944d3841fdSKris Buschelman Output Parameters: 4954d3841fdSKris Buschelman . C - the product matrix 4964d3841fdSKris Buschelman 4974d3841fdSKris Buschelman Notes: 4989af31e4aSHong Zhang C must have been created by calling MatPtAPSymbolic and must be destroyed by 4994d3841fdSKris Buschelman the user using MatDeatroy(). 5004d3841fdSKris Buschelman 501170ef064SHong Zhang This routine is currently only implemented for pairs of AIJ matrices and classes 502170ef064SHong Zhang which inherit from AIJ. C will be of type MATAIJ. 5034d3841fdSKris Buschelman 5044d3841fdSKris Buschelman Level: intermediate 5054d3841fdSKris Buschelman 5069af31e4aSHong Zhang .seealso: MatPtAP(),MatPtAPSymbolic(),MatMatMultNumeric() 5076849ba73SBarry Smith */ 508dfbe8321SBarry Smith PetscErrorCode MatPtAPNumeric(Mat A,Mat P,Mat C) { 509dfbe8321SBarry Smith PetscErrorCode ierr; 510534c1384SKris Buschelman PetscErrorCode (*fA)(Mat,Mat,Mat); 511534c1384SKris Buschelman PetscErrorCode (*fP)(Mat,Mat,Mat); 512eb9c0419SKris Buschelman 513eb9c0419SKris Buschelman PetscFunctionBegin; 514eb9c0419SKris Buschelman 5154482741eSBarry Smith PetscValidHeaderSpecific(A,MAT_COOKIE,1); 516c9780b6fSBarry Smith PetscValidType(A,1); 517eb9c0419SKris Buschelman MatPreallocated(A); 518eb9c0419SKris Buschelman if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 519eb9c0419SKris Buschelman if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 520eb9c0419SKris Buschelman 5214482741eSBarry Smith PetscValidHeaderSpecific(P,MAT_COOKIE,2); 522c9780b6fSBarry Smith PetscValidType(P,2); 523eb9c0419SKris Buschelman MatPreallocated(P); 524eb9c0419SKris Buschelman if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 525eb9c0419SKris Buschelman if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 526eb9c0419SKris Buschelman 5274482741eSBarry Smith PetscValidHeaderSpecific(C,MAT_COOKIE,3); 528c9780b6fSBarry Smith PetscValidType(C,3); 529eb9c0419SKris Buschelman MatPreallocated(C); 530eb9c0419SKris Buschelman if (!C->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 531eb9c0419SKris Buschelman if (C->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 532eb9c0419SKris Buschelman 533eb9c0419SKris Buschelman if (P->N!=C->M) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->N,C->M); 534eb9c0419SKris Buschelman if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->M,A->N); 535eb9c0419SKris Buschelman if (A->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %d != %d",A->M,A->N); 536eb9c0419SKris Buschelman if (P->N!=C->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->N,C->N); 537eb9c0419SKris Buschelman 538534c1384SKris Buschelman /* For now, we do not dispatch based on the type of A and P */ 539534c1384SKris Buschelman /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */ 540534c1384SKris Buschelman fA = A->ops->ptapnumeric; 541534c1384SKris Buschelman if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatPtAPNumeric not supported for A of type %s",A->type_name); 542534c1384SKris Buschelman fP = P->ops->ptapnumeric; 543534c1384SKris Buschelman if (!fP) SETERRQ1(PETSC_ERR_SUP,"MatPtAPNumeric not supported for P of type %s",P->type_name); 544534c1384SKris Buschelman if (fP!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatPtAPNumeric requires A, %s, to be compatible with P, %s",A->type_name,P->type_name); 5454d3841fdSKris Buschelman 546534c1384SKris Buschelman ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 547534c1384SKris Buschelman ierr = (*fA)(A,P,C);CHKERRQ(ierr); 548534c1384SKris Buschelman ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 549eb9c0419SKris Buschelman 550eb9c0419SKris Buschelman PetscFunctionReturn(0); 551eb9c0419SKris Buschelman } 552eb9c0419SKris Buschelman 553eb9c0419SKris Buschelman #undef __FUNCT__ 5549af31e4aSHong Zhang #define __FUNCT__ "MatPtAPNumeric_SeqAIJ_SeqAIJ" 555dfbe8321SBarry Smith PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqAIJ(Mat A,Mat P,Mat C) 556dfbe8321SBarry Smith { 557dfbe8321SBarry Smith PetscErrorCode ierr; 558*d20bfe6fSHong Zhang int flops=0; 559*d20bfe6fSHong Zhang Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 560*d20bfe6fSHong Zhang Mat_SeqAIJ *p = (Mat_SeqAIJ *) P->data; 561*d20bfe6fSHong Zhang Mat_SeqAIJ *c = (Mat_SeqAIJ *) C->data; 562*d20bfe6fSHong Zhang int *ai=a->i,*aj=a->j,*apj,*apjdense,*pi=p->i,*pj=p->j,*pJ=p->j,*pjj; 563*d20bfe6fSHong Zhang int *ci=c->i,*cj=c->j,*cjj; 564*d20bfe6fSHong Zhang int am=A->M,cn=C->N,cm=C->M; 565*d20bfe6fSHong Zhang int i,j,k,anzi,pnzi,apnzj,nextap,pnzj,prow,crow; 566*d20bfe6fSHong Zhang MatScalar *aa=a->a,*apa,*pa=p->a,*pA=p->a,*paj,*ca=c->a,*caj; 567eb9c0419SKris Buschelman 568eb9c0419SKris Buschelman PetscFunctionBegin; 569*d20bfe6fSHong Zhang /* Allocate temporary array for storage of one row of A*P */ 570*d20bfe6fSHong Zhang ierr = PetscMalloc(cn*(sizeof(MatScalar)+2*sizeof(int)),&apa);CHKERRQ(ierr); 571*d20bfe6fSHong Zhang ierr = PetscMemzero(apa,cn*(sizeof(MatScalar)+2*sizeof(int)));CHKERRQ(ierr); 572eb9c0419SKris Buschelman 573*d20bfe6fSHong Zhang apj = (int *)(apa + cn); 574*d20bfe6fSHong Zhang apjdense = apj + cn; 575*d20bfe6fSHong Zhang 576*d20bfe6fSHong Zhang /* Clear old values in C */ 577*d20bfe6fSHong Zhang ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr); 578*d20bfe6fSHong Zhang 579*d20bfe6fSHong Zhang for (i=0;i<am;i++) { 580*d20bfe6fSHong Zhang /* Form sparse row of A*P */ 581*d20bfe6fSHong Zhang anzi = ai[i+1] - ai[i]; 582*d20bfe6fSHong Zhang apnzj = 0; 583*d20bfe6fSHong Zhang for (j=0;j<anzi;j++) { 584*d20bfe6fSHong Zhang prow = *aj++; 585*d20bfe6fSHong Zhang pnzj = pi[prow+1] - pi[prow]; 586*d20bfe6fSHong Zhang pjj = pj + pi[prow]; 587*d20bfe6fSHong Zhang paj = pa + pi[prow]; 588*d20bfe6fSHong Zhang for (k=0;k<pnzj;k++) { 589*d20bfe6fSHong Zhang if (!apjdense[pjj[k]]) { 590*d20bfe6fSHong Zhang apjdense[pjj[k]] = -1; 591*d20bfe6fSHong Zhang apj[apnzj++] = pjj[k]; 592*d20bfe6fSHong Zhang } 593*d20bfe6fSHong Zhang apa[pjj[k]] += (*aa)*paj[k]; 594*d20bfe6fSHong Zhang } 595*d20bfe6fSHong Zhang flops += 2*pnzj; 596*d20bfe6fSHong Zhang aa++; 597*d20bfe6fSHong Zhang } 598*d20bfe6fSHong Zhang 599*d20bfe6fSHong Zhang /* Sort the j index array for quick sparse axpy. */ 600*d20bfe6fSHong Zhang ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr); 601*d20bfe6fSHong Zhang 602*d20bfe6fSHong Zhang /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */ 603*d20bfe6fSHong Zhang pnzi = pi[i+1] - pi[i]; 604*d20bfe6fSHong Zhang for (j=0;j<pnzi;j++) { 605*d20bfe6fSHong Zhang nextap = 0; 606*d20bfe6fSHong Zhang crow = *pJ++; 607*d20bfe6fSHong Zhang cjj = cj + ci[crow]; 608*d20bfe6fSHong Zhang caj = ca + ci[crow]; 609*d20bfe6fSHong Zhang /* Perform sparse axpy operation. Note cjj includes apj. */ 610*d20bfe6fSHong Zhang for (k=0;nextap<apnzj;k++) { 611*d20bfe6fSHong Zhang if (cjj[k]==apj[nextap]) { 612*d20bfe6fSHong Zhang caj[k] += (*pA)*apa[apj[nextap++]]; 613*d20bfe6fSHong Zhang } 614*d20bfe6fSHong Zhang } 615*d20bfe6fSHong Zhang flops += 2*apnzj; 616*d20bfe6fSHong Zhang pA++; 617*d20bfe6fSHong Zhang } 618*d20bfe6fSHong Zhang 619*d20bfe6fSHong Zhang /* Zero the current row info for A*P */ 620*d20bfe6fSHong Zhang for (j=0;j<apnzj;j++) { 621*d20bfe6fSHong Zhang apa[apj[j]] = 0.; 622*d20bfe6fSHong Zhang apjdense[apj[j]] = 0; 623*d20bfe6fSHong Zhang } 624*d20bfe6fSHong Zhang } 625*d20bfe6fSHong Zhang 626*d20bfe6fSHong Zhang /* Assemble the final matrix and clean up */ 627*d20bfe6fSHong Zhang ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 628*d20bfe6fSHong Zhang ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 629*d20bfe6fSHong Zhang ierr = PetscFree(apa);CHKERRQ(ierr); 630*d20bfe6fSHong Zhang ierr = PetscLogFlops(flops);CHKERRQ(ierr); 631*d20bfe6fSHong Zhang 632eb9c0419SKris Buschelman PetscFunctionReturn(0); 633eb9c0419SKris Buschelman } 634