1eb9c0419SKris Buschelman /* 29af31e4aSHong Zhang Defines projective product routines where A is a AIJ matrix 3eb9c0419SKris Buschelman C = P^T * A * P 4eb9c0419SKris Buschelman */ 5eb9c0419SKris Buschelman 6231952e2SKris Buschelman #include "src/mat/impls/aij/seq/aij.h" /*I "petscmat.h" I*/ 7eb9c0419SKris Buschelman #include "src/mat/utils/freespace.h" 89af31e4aSHong Zhang #include "src/mat/impls/aij/mpi/mpiaij.h" 955a3bba9SHong Zhang #include "petscbt.h" 10eb9c0419SKris Buschelman 11eb9c0419SKris Buschelman #undef __FUNCT__ 129af31e4aSHong Zhang #define __FUNCT__ "MatPtAP" 134d3841fdSKris Buschelman /*@ 149af31e4aSHong Zhang MatPtAP - Creates the matrix projection C = P^T * A * P 154d3841fdSKris Buschelman 164d3841fdSKris Buschelman Collective on Mat 174d3841fdSKris Buschelman 184d3841fdSKris Buschelman Input Parameters: 194d3841fdSKris Buschelman + A - the matrix 20f747e1aeSHong Zhang . P - the projection matrix 21f747e1aeSHong Zhang . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 22f747e1aeSHong Zhang - fill - expected fill as ratio of nnz(C)/(nnz(A) + nnz(P)) 234d3841fdSKris Buschelman 244d3841fdSKris Buschelman Output Parameters: 254d3841fdSKris Buschelman . C - the product matrix 264d3841fdSKris Buschelman 274d3841fdSKris Buschelman Notes: 284d3841fdSKris Buschelman C will be created and must be destroyed by the user with MatDestroy(). 294d3841fdSKris Buschelman 304d3841fdSKris Buschelman This routine is currently only implemented for pairs of SeqAIJ matrices and classes 314d3841fdSKris Buschelman which inherit from SeqAIJ. C will be of type MATSEQAIJ. 324d3841fdSKris Buschelman 334d3841fdSKris Buschelman Level: intermediate 344d3841fdSKris Buschelman 359af31e4aSHong Zhang .seealso: MatPtAPSymbolic(),MatPtAPNumeric(),MatMatMult() 364d3841fdSKris Buschelman @*/ 3755a3bba9SHong Zhang PetscErrorCode MatPtAP(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C) 3855a3bba9SHong Zhang { 39dfbe8321SBarry Smith PetscErrorCode ierr; 40534c1384SKris Buschelman PetscErrorCode (*fA)(Mat,Mat,MatReuse,PetscReal,Mat *); 41534c1384SKris Buschelman PetscErrorCode (*fP)(Mat,Mat,MatReuse,PetscReal,Mat *); 42eb9c0419SKris Buschelman 43eb9c0419SKris Buschelman PetscFunctionBegin; 449af31e4aSHong Zhang PetscValidHeaderSpecific(A,MAT_COOKIE,1); 459af31e4aSHong Zhang PetscValidType(A,1); 469af31e4aSHong Zhang MatPreallocated(A); 479af31e4aSHong Zhang if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 489af31e4aSHong Zhang if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 499af31e4aSHong Zhang PetscValidHeaderSpecific(P,MAT_COOKIE,2); 509af31e4aSHong Zhang PetscValidType(P,2); 519af31e4aSHong Zhang MatPreallocated(P); 529af31e4aSHong Zhang if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 539af31e4aSHong Zhang if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 549af31e4aSHong Zhang PetscValidPointer(C,3); 55534c1384SKris Buschelman 569af31e4aSHong Zhang if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->M,A->N); 57eb9c0419SKris Buschelman 589af31e4aSHong Zhang if (fill <=0.0) SETERRQ1(PETSC_ERR_ARG_SIZ,"fill=%g must be > 0.0",fill); 59eb9c0419SKris Buschelman 60534c1384SKris Buschelman /* For now, we do not dispatch based on the type of A and P */ 61534c1384SKris Buschelman /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */ 62534c1384SKris Buschelman fA = A->ops->ptap; 63534c1384SKris Buschelman if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatPtAP not supported for A of type %s",A->type_name); 64534c1384SKris Buschelman fP = P->ops->ptap; 65534c1384SKris Buschelman if (!fP) SETERRQ1(PETSC_ERR_SUP,"MatPtAP not supported for P of type %s",P->type_name); 66534c1384SKris Buschelman if (fP!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatPtAP requires A, %s, to be compatible with P, %s",A->type_name,P->type_name); 67534c1384SKris Buschelman 689af31e4aSHong Zhang ierr = PetscLogEventBegin(MAT_PtAP,A,P,0,0);CHKERRQ(ierr); 69534c1384SKris Buschelman ierr = (*fA)(A,P,scall,fill,C);CHKERRQ(ierr); 709af31e4aSHong Zhang ierr = PetscLogEventEnd(MAT_PtAP,A,P,0,0);CHKERRQ(ierr); 71eb9c0419SKris Buschelman PetscFunctionReturn(0); 72eb9c0419SKris Buschelman } 73eb9c0419SKris Buschelman 74b1d57f15SBarry Smith EXTERN PetscErrorCode MatPtAP_SeqAIJ_SeqAIJ_ReducedPt(Mat,Mat,MatReuse,PetscReal,PetscInt,PetscInt,Mat*); 75b1d57f15SBarry Smith EXTERN PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqAIJ_ReducedPt(Mat,Mat,PetscReal,PetscInt,PetscInt,Mat*); 76b1d57f15SBarry Smith EXTERN PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqAIJ_ReducedPt(Mat,Mat,PetscInt,PetscInt,Mat); 77b90dcfe3SHong Zhang 7822e3da61SHong Zhang EXTERN PetscErrorCode MatPtAP_MPIAIJ_SeqAIJ_ReducedPt(Mat,Mat,MatReuse,PetscReal,PetscInt,PetscInt,Mat*); 7922e3da61SHong Zhang EXTERN PetscErrorCode MatPtAPSymbolic_MPIAIJ_SeqAIJ_ReducedPt(Mat,Mat,PetscReal,PetscInt,PetscInt,Mat*); 8022e3da61SHong Zhang EXTERN PetscErrorCode MatPtAPNumeric_MPIAIJ_SeqAIJ_ReducedPt(Mat,Mat,PetscInt,PetscInt,Mat); 8122e3da61SHong Zhang 82eb9c0419SKris Buschelman #undef __FUNCT__ 83ff134f7aSHong Zhang #define __FUNCT__ "MatPtAP_MPIAIJ_MPIAIJ" 84ff134f7aSHong Zhang PetscErrorCode MatPtAP_MPIAIJ_MPIAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C) 85ff134f7aSHong Zhang { 86ff134f7aSHong Zhang PetscErrorCode ierr; 87b90dcfe3SHong Zhang 88b90dcfe3SHong Zhang PetscFunctionBegin; 89b90dcfe3SHong Zhang if (scall == MAT_INITIAL_MATRIX){ 904c627768SHong Zhang ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 91b90dcfe3SHong Zhang ierr = MatPtAPSymbolic_MPIAIJ_MPIAIJ(A,P,fill,C);CHKERRQ(ierr);/* numeric product is computed as well */ 924c627768SHong Zhang ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 93b90dcfe3SHong Zhang } else if (scall == MAT_REUSE_MATRIX){ 944c627768SHong Zhang ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 95b90dcfe3SHong Zhang ierr = MatPtAPNumeric_MPIAIJ_MPIAIJ(A,P,*C);CHKERRQ(ierr); 964c627768SHong Zhang ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 97b90dcfe3SHong Zhang } else { 98b90dcfe3SHong Zhang SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",scall); 99b90dcfe3SHong Zhang } 100b90dcfe3SHong Zhang PetscFunctionReturn(0); 101b90dcfe3SHong Zhang } 102b90dcfe3SHong Zhang 103b90dcfe3SHong Zhang #undef __FUNCT__ 104b90dcfe3SHong Zhang #define __FUNCT__ "MatMatMultSymbolic_MPIAIJ_MPIAIJ" 105b90dcfe3SHong Zhang PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIAIJ(Mat A,Mat P,PetscReal fill,Mat *C) 106b90dcfe3SHong Zhang { 107b90dcfe3SHong Zhang PetscErrorCode ierr; 10822e3da61SHong Zhang Mat P_seq,C_seq; /* A_loc */ 109b1d57f15SBarry Smith PetscInt prstart,prend,m=P->m; 11022e3da61SHong Zhang /* IS isrowp,iscolp; */ 111ff134f7aSHong Zhang 112ff134f7aSHong Zhang PetscFunctionBegin; 11325616d81SHong Zhang /* get P_seq = submatrix of P by taking rows of P that equal to nonzero col of A */ 11422e3da61SHong Zhang ierr = MatGetBrowsOfAcols(A,P,MAT_INITIAL_MATRIX,PETSC_NULL,PETSC_NULL,&prstart,&P_seq);CHKERRQ(ierr); 11522e3da61SHong Zhang /* 116b90dcfe3SHong Zhang ierr = MatGetBrowsOfAcols(A,P,MAT_INITIAL_MATRIX,&isrowp,&iscolp,&prstart,&P_seq);CHKERRQ(ierr); 11722e3da61SHong Zhang ierr = ISDestroy(iscolp);CHKERRQ(ierr); */ 118ff134f7aSHong Zhang 11925616d81SHong Zhang /* get A_loc = submatrix of A by taking all local rows of A */ 12022e3da61SHong Zhang /* 121b90dcfe3SHong Zhang ierr = MatGetLocalMat(A,MAT_INITIAL_MATRIX,PETSC_NULL,&isrowp,&A_loc);CHKERRQ(ierr); 12222e3da61SHong Zhang */ 12322e3da61SHong Zhang /* ierr = ISDestroy(isrowp);CHKERRQ(ierr); */ 1240e36024fSHong Zhang 12525616d81SHong Zhang /* compute C_seq = P_loc^T * A_loc * P_seq */ 126ff134f7aSHong Zhang prend = prstart + m; 12722e3da61SHong Zhang ierr = MatPtAP_MPIAIJ_SeqAIJ_ReducedPt(A,P_seq,MAT_INITIAL_MATRIX,fill,prstart,prend,&C_seq);CHKERRQ(ierr); 12822e3da61SHong Zhang /* 129b90dcfe3SHong Zhang ierr = MatPtAP_SeqAIJ_SeqAIJ_ReducedPt(A_loc,P_seq,MAT_INITIAL_MATRIX,fill,prstart,prend,&C_seq);CHKERRQ(ierr); 13022e3da61SHong Zhang */ 13125616d81SHong Zhang ierr = MatDestroy(P_seq);CHKERRQ(ierr); 13222e3da61SHong Zhang /* ierr = MatDestroy(A_loc);CHKERRQ(ierr); */ 133b90dcfe3SHong Zhang 134b90dcfe3SHong Zhang /* add C_seq into mpi C */ 135b90dcfe3SHong Zhang ierr = MatMerge_SeqsToMPI(A->comm,C_seq,P->n,P->n,MAT_INITIAL_MATRIX,C);CHKERRQ(ierr); 136b90dcfe3SHong Zhang 137ff134f7aSHong Zhang PetscFunctionReturn(0); 138ff134f7aSHong Zhang } 139ff134f7aSHong Zhang 140ff134f7aSHong Zhang #undef __FUNCT__ 141ff134f7aSHong Zhang #define __FUNCT__ "MatMatMultSymbolic_MPIAIJ_MPIAIJ" 142b90dcfe3SHong Zhang PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIAIJ(Mat A,Mat P,Mat C) 143ff134f7aSHong Zhang { 144b90dcfe3SHong Zhang PetscErrorCode ierr; 145b90dcfe3SHong Zhang Mat P_seq,A_loc,C_seq; 146b1d57f15SBarry Smith PetscInt prstart,prend,m=P->m; 147b90dcfe3SHong Zhang IS isrowp,iscolp; 148671beff6SHong Zhang Mat_Merge_SeqsToMPI *merge; 149671beff6SHong Zhang PetscObjectContainer container; 150ff134f7aSHong Zhang 151ff134f7aSHong Zhang PetscFunctionBegin; 152671beff6SHong Zhang ierr = PetscObjectQuery((PetscObject)C,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 153671beff6SHong Zhang if (container) { 1547f79fd58SMatthew Knepley ierr = PetscObjectContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 1557f79fd58SMatthew Knepley } else { 1567f79fd58SMatthew Knepley SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix C does not posses and object container"); 157671beff6SHong Zhang } 158671beff6SHong Zhang 159b90dcfe3SHong Zhang /* get P_seq = submatrix of P by taking rows of P that equal to nonzero col of A */ 160b90dcfe3SHong Zhang ierr = MatGetBrowsOfAcols(A,P,MAT_INITIAL_MATRIX,&isrowp,&iscolp,&prstart,&P_seq);CHKERRQ(ierr); 161b90dcfe3SHong Zhang ierr = ISDestroy(iscolp);CHKERRQ(ierr); 162ff134f7aSHong Zhang 163b90dcfe3SHong Zhang /* get A_loc = submatrix of A by taking all local rows of A */ 164b90dcfe3SHong Zhang ierr = MatGetLocalMat(A,MAT_INITIAL_MATRIX,PETSC_NULL,&isrowp,&A_loc);CHKERRQ(ierr); 165b90dcfe3SHong Zhang ierr = ISDestroy(isrowp);CHKERRQ(ierr); 166ff134f7aSHong Zhang 167b90dcfe3SHong Zhang /* compute C_seq = P_loc^T * A_loc * P_seq */ 168b90dcfe3SHong Zhang prend = prstart + m; 169b90dcfe3SHong Zhang C_seq = merge->C_seq; 170b90dcfe3SHong Zhang ierr = MatPtAP_SeqAIJ_SeqAIJ_ReducedPt(A_loc,P_seq,MAT_REUSE_MATRIX,1.0,prstart,prend,&C_seq);CHKERRQ(ierr); 171b90dcfe3SHong Zhang ierr = MatDestroy(P_seq);CHKERRQ(ierr); 172b90dcfe3SHong Zhang ierr = MatDestroy(A_loc);CHKERRQ(ierr); 173b90dcfe3SHong Zhang 174b90dcfe3SHong Zhang /* add C_seq into mpi C */ 175b90dcfe3SHong Zhang ierr = MatMerge_SeqsToMPI(A->comm,C_seq,P->n,P->n,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 176b90dcfe3SHong Zhang 177ff134f7aSHong Zhang PetscFunctionReturn(0); 178ff134f7aSHong Zhang } 179ff134f7aSHong Zhang 180ff134f7aSHong Zhang #undef __FUNCT__ 1819af31e4aSHong Zhang #define __FUNCT__ "MatPtAP_SeqAIJ_SeqAIJ" 182dfbe8321SBarry Smith PetscErrorCode MatPtAP_SeqAIJ_SeqAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C) 1839af31e4aSHong Zhang { 184dfbe8321SBarry Smith PetscErrorCode ierr; 185b1d57f15SBarry Smith 1869af31e4aSHong Zhang PetscFunctionBegin; 1879af31e4aSHong Zhang if (scall == MAT_INITIAL_MATRIX){ 188d20bfe6fSHong Zhang ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 1899af31e4aSHong Zhang ierr = MatPtAPSymbolic_SeqAIJ_SeqAIJ(A,P,fill,C);CHKERRQ(ierr); 190d20bfe6fSHong Zhang ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 1919af31e4aSHong Zhang } 192d20bfe6fSHong Zhang ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 1939af31e4aSHong Zhang ierr = MatPtAPNumeric_SeqAIJ_SeqAIJ(A,P,*C);CHKERRQ(ierr); 194d20bfe6fSHong Zhang ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 1959af31e4aSHong Zhang PetscFunctionReturn(0); 1969af31e4aSHong Zhang } 1979af31e4aSHong Zhang 1989af31e4aSHong Zhang #undef __FUNCT__ 1999af31e4aSHong Zhang #define __FUNCT__ "MatPtAPSymbolic" 2006849ba73SBarry Smith /* 2019af31e4aSHong Zhang MatPtAPSymbolic - Creates the (i,j) structure of the matrix projection C = P^T * A * P 2024d3841fdSKris Buschelman 2034d3841fdSKris Buschelman Collective on Mat 2044d3841fdSKris Buschelman 2054d3841fdSKris Buschelman Input Parameters: 2064d3841fdSKris Buschelman + A - the matrix 2074d3841fdSKris Buschelman - P - the projection matrix 2084d3841fdSKris Buschelman 2094d3841fdSKris Buschelman Output Parameters: 2104d3841fdSKris Buschelman . C - the (i,j) structure of the product matrix 2114d3841fdSKris Buschelman 2124d3841fdSKris Buschelman Notes: 2134d3841fdSKris Buschelman C will be created and must be destroyed by the user with MatDestroy(). 2144d3841fdSKris Buschelman 2154d3841fdSKris Buschelman This routine is currently only implemented for pairs of SeqAIJ matrices and classes 2164d3841fdSKris Buschelman which inherit from SeqAIJ. C will be of type MATSEQAIJ. The product is computed using 2179af31e4aSHong Zhang this (i,j) structure by calling MatPtAPNumeric(). 2184d3841fdSKris Buschelman 2194d3841fdSKris Buschelman Level: intermediate 2204d3841fdSKris Buschelman 2219af31e4aSHong Zhang .seealso: MatPtAP(),MatPtAPNumeric(),MatMatMultSymbolic() 2226849ba73SBarry Smith */ 22355a3bba9SHong Zhang PetscErrorCode MatPtAPSymbolic(Mat A,Mat P,PetscReal fill,Mat *C) 22455a3bba9SHong Zhang { 225dfbe8321SBarry Smith PetscErrorCode ierr; 226534c1384SKris Buschelman PetscErrorCode (*fA)(Mat,Mat,PetscReal,Mat*); 227534c1384SKris Buschelman PetscErrorCode (*fP)(Mat,Mat,PetscReal,Mat*); 228eb9c0419SKris Buschelman 229eb9c0419SKris Buschelman PetscFunctionBegin; 230eb9c0419SKris Buschelman 2314482741eSBarry Smith PetscValidHeaderSpecific(A,MAT_COOKIE,1); 232c9780b6fSBarry Smith PetscValidType(A,1); 233eb9c0419SKris Buschelman MatPreallocated(A); 234eb9c0419SKris Buschelman if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 235eb9c0419SKris Buschelman if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 236eb9c0419SKris Buschelman 2374482741eSBarry Smith PetscValidHeaderSpecific(P,MAT_COOKIE,2); 238c9780b6fSBarry Smith PetscValidType(P,2); 239eb9c0419SKris Buschelman MatPreallocated(P); 240eb9c0419SKris Buschelman if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 241eb9c0419SKris Buschelman if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 242eb9c0419SKris Buschelman 2434482741eSBarry Smith PetscValidPointer(C,3); 2444482741eSBarry Smith 245eb9c0419SKris Buschelman if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->M,A->N); 246eb9c0419SKris Buschelman if (A->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %d != %d",A->M,A->N); 247eb9c0419SKris Buschelman 248534c1384SKris Buschelman /* For now, we do not dispatch based on the type of A and P */ 249534c1384SKris Buschelman /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */ 250534c1384SKris Buschelman fA = A->ops->ptapsymbolic; 251534c1384SKris Buschelman if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatPtAPSymbolic not supported for A of type %s",A->type_name); 252534c1384SKris Buschelman fP = P->ops->ptapsymbolic; 253534c1384SKris Buschelman if (!fP) SETERRQ1(PETSC_ERR_SUP,"MatPtAPSymbolic not supported for P of type %s",P->type_name); 254534c1384SKris Buschelman if (fP!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatPtAPSymbolic requires A, %s, to be compatible with P, %s",A->type_name,P->type_name); 2554d3841fdSKris Buschelman 256534c1384SKris Buschelman ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 257534c1384SKris Buschelman ierr = (*fA)(A,P,fill,C);CHKERRQ(ierr); 258534c1384SKris Buschelman ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 259eb9c0419SKris Buschelman 260eb9c0419SKris Buschelman PetscFunctionReturn(0); 261eb9c0419SKris Buschelman } 262eb9c0419SKris Buschelman 263f747e1aeSHong Zhang typedef struct { 264f747e1aeSHong Zhang Mat symAP; 265f747e1aeSHong Zhang } Mat_PtAPstruct; 266f747e1aeSHong Zhang 26778a80504SBarry Smith EXTERN PetscErrorCode MatDestroy_SeqAIJ(Mat); 26878a80504SBarry Smith 269f747e1aeSHong Zhang #undef __FUNCT__ 270f747e1aeSHong Zhang #define __FUNCT__ "MatDestroy_SeqAIJ_PtAP" 271f4a850bbSBarry Smith PetscErrorCode MatDestroy_SeqAIJ_PtAP(Mat A) 272f747e1aeSHong Zhang { 273f4a850bbSBarry Smith PetscErrorCode ierr; 274f747e1aeSHong Zhang Mat_PtAPstruct *ptap=(Mat_PtAPstruct*)A->spptr; 275f747e1aeSHong Zhang 276f747e1aeSHong Zhang PetscFunctionBegin; 277f747e1aeSHong Zhang ierr = MatDestroy(ptap->symAP);CHKERRQ(ierr); 278f747e1aeSHong Zhang ierr = PetscFree(ptap);CHKERRQ(ierr); 27978a80504SBarry Smith ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr); 280f747e1aeSHong Zhang PetscFunctionReturn(0); 281f747e1aeSHong Zhang } 282f747e1aeSHong Zhang 283eb9c0419SKris Buschelman #undef __FUNCT__ 2849af31e4aSHong Zhang #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqAIJ" 28555a3bba9SHong Zhang PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat P,PetscReal fill,Mat *C) 28655a3bba9SHong Zhang { 287dfbe8321SBarry Smith PetscErrorCode ierr; 288d20bfe6fSHong Zhang FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 289d20bfe6fSHong Zhang Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*p = (Mat_SeqAIJ*)P->data,*c; 29055a3bba9SHong Zhang PetscInt *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj; 291b8374ebeSBarry Smith PetscInt *ci,*cj,*ptadenserow,*ptasparserow,*ptaj; 29255a3bba9SHong Zhang PetscInt an=A->N,am=A->M,pn=P->N,pm=P->M; 293b8374ebeSBarry Smith PetscInt i,j,k,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi,nlnk,*lnk; 294d20bfe6fSHong Zhang MatScalar *ca; 29555a3bba9SHong Zhang PetscBT lnkbt; 296eb9c0419SKris Buschelman 297eb9c0419SKris Buschelman PetscFunctionBegin; 298d20bfe6fSHong Zhang /* Get ij structure of P^T */ 299eb9c0419SKris Buschelman ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 300d20bfe6fSHong Zhang ptJ=ptj; 301eb9c0419SKris Buschelman 302d20bfe6fSHong Zhang /* Allocate ci array, arrays for fill computation and */ 303d20bfe6fSHong Zhang /* free space for accumulating nonzero column info */ 30455a3bba9SHong Zhang ierr = PetscMalloc((pn+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 305d20bfe6fSHong Zhang ci[0] = 0; 306eb9c0419SKris Buschelman 30755a3bba9SHong Zhang ierr = PetscMalloc((2*an+1)*sizeof(PetscInt),&ptadenserow);CHKERRQ(ierr); 30855a3bba9SHong Zhang ierr = PetscMemzero(ptadenserow,(2*an+1)*sizeof(PetscInt));CHKERRQ(ierr); 309d20bfe6fSHong Zhang ptasparserow = ptadenserow + an; 31055a3bba9SHong Zhang 31155a3bba9SHong Zhang /* create and initialize a linked list */ 31255a3bba9SHong Zhang nlnk = pn+1; 31355a3bba9SHong Zhang ierr = PetscLLCreate(pn,pn,nlnk,lnk,lnkbt);CHKERRQ(ierr); 314eb9c0419SKris Buschelman 315d20bfe6fSHong Zhang /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */ 316d20bfe6fSHong Zhang /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 317d20bfe6fSHong Zhang ierr = GetMoreSpace((ai[am]/pm)*pn,&free_space); 318d20bfe6fSHong Zhang current_space = free_space; 319d20bfe6fSHong Zhang 320d20bfe6fSHong Zhang /* Determine symbolic info for each row of C: */ 321d20bfe6fSHong Zhang for (i=0;i<pn;i++) { 322d20bfe6fSHong Zhang ptnzi = pti[i+1] - pti[i]; 323d20bfe6fSHong Zhang ptanzi = 0; 324d20bfe6fSHong Zhang /* Determine symbolic row of PtA: */ 325d20bfe6fSHong Zhang for (j=0;j<ptnzi;j++) { 326d20bfe6fSHong Zhang arow = *ptJ++; 327d20bfe6fSHong Zhang anzj = ai[arow+1] - ai[arow]; 328d20bfe6fSHong Zhang ajj = aj + ai[arow]; 329d20bfe6fSHong Zhang for (k=0;k<anzj;k++) { 330d20bfe6fSHong Zhang if (!ptadenserow[ajj[k]]) { 331d20bfe6fSHong Zhang ptadenserow[ajj[k]] = -1; 332d20bfe6fSHong Zhang ptasparserow[ptanzi++] = ajj[k]; 333d20bfe6fSHong Zhang } 334d20bfe6fSHong Zhang } 335d20bfe6fSHong Zhang } 336d20bfe6fSHong Zhang /* Using symbolic info for row of PtA, determine symbolic info for row of C: */ 337d20bfe6fSHong Zhang ptaj = ptasparserow; 338d20bfe6fSHong Zhang cnzi = 0; 339d20bfe6fSHong Zhang for (j=0;j<ptanzi;j++) { 340d20bfe6fSHong Zhang prow = *ptaj++; 341d20bfe6fSHong Zhang pnzj = pi[prow+1] - pi[prow]; 342d20bfe6fSHong Zhang pjj = pj + pi[prow]; 34355a3bba9SHong Zhang /* add non-zero cols of P into the sorted linked list lnk */ 34455a3bba9SHong Zhang ierr = PetscLLAdd(pnzj,pjj,pn,nlnk,lnk,lnkbt);CHKERRQ(ierr); 34555a3bba9SHong Zhang cnzi += nlnk; 346d20bfe6fSHong Zhang } 347d20bfe6fSHong Zhang 348d20bfe6fSHong Zhang /* If free space is not available, make more free space */ 349d20bfe6fSHong Zhang /* Double the amount of total space in the list */ 350d20bfe6fSHong Zhang if (current_space->local_remaining<cnzi) { 351d20bfe6fSHong Zhang ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 352d20bfe6fSHong Zhang } 353d20bfe6fSHong Zhang 354d20bfe6fSHong Zhang /* Copy data into free space, and zero out denserows */ 35555a3bba9SHong Zhang ierr = PetscLLClean(pn,pn,cnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 356d20bfe6fSHong Zhang current_space->array += cnzi; 357d20bfe6fSHong Zhang current_space->local_used += cnzi; 358d20bfe6fSHong Zhang current_space->local_remaining -= cnzi; 359d20bfe6fSHong Zhang 360d20bfe6fSHong Zhang for (j=0;j<ptanzi;j++) { 361d20bfe6fSHong Zhang ptadenserow[ptasparserow[j]] = 0; 362d20bfe6fSHong Zhang } 363d20bfe6fSHong Zhang /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 364d20bfe6fSHong Zhang /* For now, we will recompute what is needed. */ 365d20bfe6fSHong Zhang ci[i+1] = ci[i] + cnzi; 366d20bfe6fSHong Zhang } 367d20bfe6fSHong Zhang /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 368d20bfe6fSHong Zhang /* Allocate space for cj, initialize cj, and */ 369d20bfe6fSHong Zhang /* destroy list of free space and other temporary array(s) */ 37055a3bba9SHong Zhang ierr = PetscMalloc((ci[pn]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr); 371d20bfe6fSHong Zhang ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 372d20bfe6fSHong Zhang ierr = PetscFree(ptadenserow);CHKERRQ(ierr); 37355a3bba9SHong Zhang ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 374d20bfe6fSHong Zhang 375d20bfe6fSHong Zhang /* Allocate space for ca */ 376d20bfe6fSHong Zhang ierr = PetscMalloc((ci[pn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 377d20bfe6fSHong Zhang ierr = PetscMemzero(ca,(ci[pn]+1)*sizeof(MatScalar));CHKERRQ(ierr); 378d20bfe6fSHong Zhang 379d20bfe6fSHong Zhang /* put together the new matrix */ 380d20bfe6fSHong Zhang ierr = MatCreateSeqAIJWithArrays(A->comm,pn,pn,ci,cj,ca,C);CHKERRQ(ierr); 381d20bfe6fSHong Zhang 382d20bfe6fSHong Zhang /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 383d20bfe6fSHong Zhang /* Since these are PETSc arrays, change flags to free them as necessary. */ 384d20bfe6fSHong Zhang c = (Mat_SeqAIJ *)((*C)->data); 385d20bfe6fSHong Zhang c->freedata = PETSC_TRUE; 386d20bfe6fSHong Zhang c->nonew = 0; 387d20bfe6fSHong Zhang 388d20bfe6fSHong Zhang /* Clean up. */ 389d20bfe6fSHong Zhang ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 390eb9c0419SKris Buschelman 391eb9c0419SKris Buschelman PetscFunctionReturn(0); 392eb9c0419SKris Buschelman } 393eb9c0419SKris Buschelman 3943985e5eaSKris Buschelman #include "src/mat/impls/maij/maij.h" 3953985e5eaSKris Buschelman EXTERN_C_BEGIN 3963985e5eaSKris Buschelman #undef __FUNCT__ 3979af31e4aSHong Zhang #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqMAIJ" 39855a3bba9SHong Zhang PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat *C) 39955a3bba9SHong Zhang { 4005c66b693SKris Buschelman /* This routine requires testing -- I don't think it works. */ 401dfbe8321SBarry Smith PetscErrorCode ierr; 4023985e5eaSKris Buschelman FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 4033985e5eaSKris Buschelman Mat_SeqMAIJ *pp=(Mat_SeqMAIJ*)PP->data; 4043985e5eaSKris Buschelman Mat P=pp->AIJ; 4053985e5eaSKris Buschelman Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c; 406b1d57f15SBarry Smith PetscInt *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj; 407b1d57f15SBarry Smith PetscInt *ci,*cj,*denserow,*sparserow,*ptadenserow,*ptasparserow,*ptaj; 408b1d57f15SBarry Smith PetscInt an=A->N,am=A->M,pn=P->N,pm=P->M,ppdof=pp->dof; 409b1d57f15SBarry Smith PetscInt i,j,k,dof,pdof,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi; 4103985e5eaSKris Buschelman MatScalar *ca; 4113985e5eaSKris Buschelman 4123985e5eaSKris Buschelman PetscFunctionBegin; 4133985e5eaSKris Buschelman /* Start timer */ 4149af31e4aSHong Zhang ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr); 4153985e5eaSKris Buschelman 4163985e5eaSKris Buschelman /* Get ij structure of P^T */ 4173985e5eaSKris Buschelman ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 4183985e5eaSKris Buschelman 4193985e5eaSKris Buschelman /* Allocate ci array, arrays for fill computation and */ 4203985e5eaSKris Buschelman /* free space for accumulating nonzero column info */ 421b1d57f15SBarry Smith ierr = PetscMalloc((pn+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 4223985e5eaSKris Buschelman ci[0] = 0; 4233985e5eaSKris Buschelman 424b1d57f15SBarry Smith ierr = PetscMalloc((2*pn+2*an+1)*sizeof(PetscInt),&ptadenserow);CHKERRQ(ierr); 425b1d57f15SBarry Smith ierr = PetscMemzero(ptadenserow,(2*pn+2*an+1)*sizeof(PetscInt));CHKERRQ(ierr); 4263985e5eaSKris Buschelman ptasparserow = ptadenserow + an; 4273985e5eaSKris Buschelman denserow = ptasparserow + an; 4283985e5eaSKris Buschelman sparserow = denserow + pn; 4293985e5eaSKris Buschelman 4303985e5eaSKris Buschelman /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */ 4313985e5eaSKris Buschelman /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 4323985e5eaSKris Buschelman ierr = GetMoreSpace((ai[am]/pm)*pn,&free_space); 4333985e5eaSKris Buschelman current_space = free_space; 4343985e5eaSKris Buschelman 4353985e5eaSKris Buschelman /* Determine symbolic info for each row of C: */ 4363985e5eaSKris Buschelman for (i=0;i<pn/ppdof;i++) { 4373985e5eaSKris Buschelman ptnzi = pti[i+1] - pti[i]; 4383985e5eaSKris Buschelman ptanzi = 0; 4393985e5eaSKris Buschelman ptJ = ptj + pti[i]; 4403985e5eaSKris Buschelman for (dof=0;dof<ppdof;dof++) { 4413985e5eaSKris Buschelman /* Determine symbolic row of PtA: */ 4423985e5eaSKris Buschelman for (j=0;j<ptnzi;j++) { 4433985e5eaSKris Buschelman arow = ptJ[j] + dof; 4443985e5eaSKris Buschelman anzj = ai[arow+1] - ai[arow]; 4453985e5eaSKris Buschelman ajj = aj + ai[arow]; 4463985e5eaSKris Buschelman for (k=0;k<anzj;k++) { 4473985e5eaSKris Buschelman if (!ptadenserow[ajj[k]]) { 4483985e5eaSKris Buschelman ptadenserow[ajj[k]] = -1; 4493985e5eaSKris Buschelman ptasparserow[ptanzi++] = ajj[k]; 4503985e5eaSKris Buschelman } 4513985e5eaSKris Buschelman } 4523985e5eaSKris Buschelman } 4533985e5eaSKris Buschelman /* Using symbolic info for row of PtA, determine symbolic info for row of C: */ 4543985e5eaSKris Buschelman ptaj = ptasparserow; 4553985e5eaSKris Buschelman cnzi = 0; 4563985e5eaSKris Buschelman for (j=0;j<ptanzi;j++) { 457fe05a634SKris Buschelman pdof = *ptaj%dof; 4583985e5eaSKris Buschelman prow = (*ptaj++)/dof; 4593985e5eaSKris Buschelman pnzj = pi[prow+1] - pi[prow]; 4603985e5eaSKris Buschelman pjj = pj + pi[prow]; 4613985e5eaSKris Buschelman for (k=0;k<pnzj;k++) { 462fe05a634SKris Buschelman if (!denserow[pjj[k]+pdof]) { 463fe05a634SKris Buschelman denserow[pjj[k]+pdof] = -1; 464fe05a634SKris Buschelman sparserow[cnzi++] = pjj[k]+pdof; 4653985e5eaSKris Buschelman } 4663985e5eaSKris Buschelman } 4673985e5eaSKris Buschelman } 4683985e5eaSKris Buschelman 4693985e5eaSKris Buschelman /* sort sparserow */ 4703985e5eaSKris Buschelman ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr); 4713985e5eaSKris Buschelman 4723985e5eaSKris Buschelman /* If free space is not available, make more free space */ 4733985e5eaSKris Buschelman /* Double the amount of total space in the list */ 4743985e5eaSKris Buschelman if (current_space->local_remaining<cnzi) { 4753985e5eaSKris Buschelman ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 4763985e5eaSKris Buschelman } 4773985e5eaSKris Buschelman 4783985e5eaSKris Buschelman /* Copy data into free space, and zero out denserows */ 479b1d57f15SBarry Smith ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(PetscInt));CHKERRQ(ierr); 4803985e5eaSKris Buschelman current_space->array += cnzi; 4813985e5eaSKris Buschelman current_space->local_used += cnzi; 4823985e5eaSKris Buschelman current_space->local_remaining -= cnzi; 4833985e5eaSKris Buschelman 4843985e5eaSKris Buschelman for (j=0;j<ptanzi;j++) { 4853985e5eaSKris Buschelman ptadenserow[ptasparserow[j]] = 0; 4863985e5eaSKris Buschelman } 4873985e5eaSKris Buschelman for (j=0;j<cnzi;j++) { 4883985e5eaSKris Buschelman denserow[sparserow[j]] = 0; 4893985e5eaSKris Buschelman } 4903985e5eaSKris Buschelman /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 4913985e5eaSKris Buschelman /* For now, we will recompute what is needed. */ 4923985e5eaSKris Buschelman ci[i+1+dof] = ci[i+dof] + cnzi; 4933985e5eaSKris Buschelman } 4943985e5eaSKris Buschelman } 4953985e5eaSKris Buschelman /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 4963985e5eaSKris Buschelman /* Allocate space for cj, initialize cj, and */ 4973985e5eaSKris Buschelman /* destroy list of free space and other temporary array(s) */ 498b1d57f15SBarry Smith ierr = PetscMalloc((ci[pn]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr); 4993985e5eaSKris Buschelman ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 5003985e5eaSKris Buschelman ierr = PetscFree(ptadenserow);CHKERRQ(ierr); 5013985e5eaSKris Buschelman 5023985e5eaSKris Buschelman /* Allocate space for ca */ 5033985e5eaSKris Buschelman ierr = PetscMalloc((ci[pn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 5043985e5eaSKris Buschelman ierr = PetscMemzero(ca,(ci[pn]+1)*sizeof(MatScalar));CHKERRQ(ierr); 5053985e5eaSKris Buschelman 5063985e5eaSKris Buschelman /* put together the new matrix */ 5073985e5eaSKris Buschelman ierr = MatCreateSeqAIJWithArrays(A->comm,pn,pn,ci,cj,ca,C);CHKERRQ(ierr); 5083985e5eaSKris Buschelman 5093985e5eaSKris Buschelman /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5103985e5eaSKris Buschelman /* Since these are PETSc arrays, change flags to free them as necessary. */ 5113985e5eaSKris Buschelman c = (Mat_SeqAIJ *)((*C)->data); 5123985e5eaSKris Buschelman c->freedata = PETSC_TRUE; 5133985e5eaSKris Buschelman c->nonew = 0; 5143985e5eaSKris Buschelman 5153985e5eaSKris Buschelman /* Clean up. */ 5163985e5eaSKris Buschelman ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 5173985e5eaSKris Buschelman 5189af31e4aSHong Zhang ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr); 5193985e5eaSKris Buschelman PetscFunctionReturn(0); 5203985e5eaSKris Buschelman } 5213985e5eaSKris Buschelman EXTERN_C_END 5223985e5eaSKris Buschelman 523eb9c0419SKris Buschelman #undef __FUNCT__ 5249af31e4aSHong Zhang #define __FUNCT__ "MatPtAPNumeric" 5256849ba73SBarry Smith /* 5269af31e4aSHong Zhang MatPtAPNumeric - Computes the matrix projection C = P^T * A * P 5274d3841fdSKris Buschelman 5284d3841fdSKris Buschelman Collective on Mat 5294d3841fdSKris Buschelman 5304d3841fdSKris Buschelman Input Parameters: 5314d3841fdSKris Buschelman + A - the matrix 5324d3841fdSKris Buschelman - P - the projection matrix 5334d3841fdSKris Buschelman 5344d3841fdSKris Buschelman Output Parameters: 5354d3841fdSKris Buschelman . C - the product matrix 5364d3841fdSKris Buschelman 5374d3841fdSKris Buschelman Notes: 5389af31e4aSHong Zhang C must have been created by calling MatPtAPSymbolic and must be destroyed by 5394d3841fdSKris Buschelman the user using MatDeatroy(). 5404d3841fdSKris Buschelman 541170ef064SHong Zhang This routine is currently only implemented for pairs of AIJ matrices and classes 542170ef064SHong Zhang which inherit from AIJ. C will be of type MATAIJ. 5434d3841fdSKris Buschelman 5444d3841fdSKris Buschelman Level: intermediate 5454d3841fdSKris Buschelman 5469af31e4aSHong Zhang .seealso: MatPtAP(),MatPtAPSymbolic(),MatMatMultNumeric() 5476849ba73SBarry Smith */ 54855a3bba9SHong Zhang PetscErrorCode MatPtAPNumeric(Mat A,Mat P,Mat C) 54955a3bba9SHong Zhang { 550dfbe8321SBarry Smith PetscErrorCode ierr; 551534c1384SKris Buschelman PetscErrorCode (*fA)(Mat,Mat,Mat); 552534c1384SKris Buschelman PetscErrorCode (*fP)(Mat,Mat,Mat); 553eb9c0419SKris Buschelman 554eb9c0419SKris Buschelman PetscFunctionBegin; 555eb9c0419SKris Buschelman 5564482741eSBarry Smith PetscValidHeaderSpecific(A,MAT_COOKIE,1); 557c9780b6fSBarry Smith PetscValidType(A,1); 558eb9c0419SKris Buschelman MatPreallocated(A); 559eb9c0419SKris Buschelman if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 560eb9c0419SKris Buschelman if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 561eb9c0419SKris Buschelman 5624482741eSBarry Smith PetscValidHeaderSpecific(P,MAT_COOKIE,2); 563c9780b6fSBarry Smith PetscValidType(P,2); 564eb9c0419SKris Buschelman MatPreallocated(P); 565eb9c0419SKris Buschelman if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 566eb9c0419SKris Buschelman if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 567eb9c0419SKris Buschelman 5684482741eSBarry Smith PetscValidHeaderSpecific(C,MAT_COOKIE,3); 569c9780b6fSBarry Smith PetscValidType(C,3); 570eb9c0419SKris Buschelman MatPreallocated(C); 571eb9c0419SKris Buschelman if (!C->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 572eb9c0419SKris Buschelman if (C->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 573eb9c0419SKris Buschelman 574eb9c0419SKris Buschelman if (P->N!=C->M) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->N,C->M); 575eb9c0419SKris Buschelman if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->M,A->N); 576eb9c0419SKris Buschelman if (A->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %d != %d",A->M,A->N); 577eb9c0419SKris Buschelman if (P->N!=C->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->N,C->N); 578eb9c0419SKris Buschelman 579534c1384SKris Buschelman /* For now, we do not dispatch based on the type of A and P */ 580534c1384SKris Buschelman /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */ 581534c1384SKris Buschelman fA = A->ops->ptapnumeric; 582534c1384SKris Buschelman if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatPtAPNumeric not supported for A of type %s",A->type_name); 583534c1384SKris Buschelman fP = P->ops->ptapnumeric; 584534c1384SKris Buschelman if (!fP) SETERRQ1(PETSC_ERR_SUP,"MatPtAPNumeric not supported for P of type %s",P->type_name); 585534c1384SKris Buschelman if (fP!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatPtAPNumeric requires A, %s, to be compatible with P, %s",A->type_name,P->type_name); 5864d3841fdSKris Buschelman 587534c1384SKris Buschelman ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 588534c1384SKris Buschelman ierr = (*fA)(A,P,C);CHKERRQ(ierr); 589534c1384SKris Buschelman ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 590eb9c0419SKris Buschelman 591eb9c0419SKris Buschelman PetscFunctionReturn(0); 592eb9c0419SKris Buschelman } 593eb9c0419SKris Buschelman 594eb9c0419SKris Buschelman #undef __FUNCT__ 5959af31e4aSHong Zhang #define __FUNCT__ "MatPtAPNumeric_SeqAIJ_SeqAIJ" 596dfbe8321SBarry Smith PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqAIJ(Mat A,Mat P,Mat C) 597dfbe8321SBarry Smith { 598dfbe8321SBarry Smith PetscErrorCode ierr; 599b1d57f15SBarry Smith PetscInt flops=0; 600d20bfe6fSHong Zhang Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 601d20bfe6fSHong Zhang Mat_SeqAIJ *p = (Mat_SeqAIJ *) P->data; 602d20bfe6fSHong Zhang Mat_SeqAIJ *c = (Mat_SeqAIJ *) C->data; 603b1d57f15SBarry Smith PetscInt *ai=a->i,*aj=a->j,*apj,*apjdense,*pi=p->i,*pj=p->j,*pJ=p->j,*pjj; 604b1d57f15SBarry Smith PetscInt *ci=c->i,*cj=c->j,*cjj; 605b1d57f15SBarry Smith PetscInt am=A->M,cn=C->N,cm=C->M; 606b1d57f15SBarry Smith PetscInt i,j,k,anzi,pnzi,apnzj,nextap,pnzj,prow,crow; 607d20bfe6fSHong Zhang MatScalar *aa=a->a,*apa,*pa=p->a,*pA=p->a,*paj,*ca=c->a,*caj; 608eb9c0419SKris Buschelman 609eb9c0419SKris Buschelman PetscFunctionBegin; 610d20bfe6fSHong Zhang /* Allocate temporary array for storage of one row of A*P */ 611b1d57f15SBarry Smith ierr = PetscMalloc(cn*(sizeof(MatScalar)+2*sizeof(PetscInt)),&apa);CHKERRQ(ierr); 612b1d57f15SBarry Smith ierr = PetscMemzero(apa,cn*(sizeof(MatScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr); 613eb9c0419SKris Buschelman 614b1d57f15SBarry Smith apj = (PetscInt *)(apa + cn); 615d20bfe6fSHong Zhang apjdense = apj + cn; 616d20bfe6fSHong Zhang 617d20bfe6fSHong Zhang /* Clear old values in C */ 618d20bfe6fSHong Zhang ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr); 619d20bfe6fSHong Zhang 620d20bfe6fSHong Zhang for (i=0;i<am;i++) { 621d20bfe6fSHong Zhang /* Form sparse row of A*P */ 622d20bfe6fSHong Zhang anzi = ai[i+1] - ai[i]; 623d20bfe6fSHong Zhang apnzj = 0; 624d20bfe6fSHong Zhang for (j=0;j<anzi;j++) { 625d20bfe6fSHong Zhang prow = *aj++; 626d20bfe6fSHong Zhang pnzj = pi[prow+1] - pi[prow]; 627d20bfe6fSHong Zhang pjj = pj + pi[prow]; 628d20bfe6fSHong Zhang paj = pa + pi[prow]; 629d20bfe6fSHong Zhang for (k=0;k<pnzj;k++) { 630d20bfe6fSHong Zhang if (!apjdense[pjj[k]]) { 631d20bfe6fSHong Zhang apjdense[pjj[k]] = -1; 632d20bfe6fSHong Zhang apj[apnzj++] = pjj[k]; 633d20bfe6fSHong Zhang } 634d20bfe6fSHong Zhang apa[pjj[k]] += (*aa)*paj[k]; 635d20bfe6fSHong Zhang } 636d20bfe6fSHong Zhang flops += 2*pnzj; 637d20bfe6fSHong Zhang aa++; 638d20bfe6fSHong Zhang } 639d20bfe6fSHong Zhang 640d20bfe6fSHong Zhang /* Sort the j index array for quick sparse axpy. */ 641d20bfe6fSHong Zhang ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr); 642d20bfe6fSHong Zhang 643d20bfe6fSHong Zhang /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */ 644d20bfe6fSHong Zhang pnzi = pi[i+1] - pi[i]; 645d20bfe6fSHong Zhang for (j=0;j<pnzi;j++) { 646d20bfe6fSHong Zhang nextap = 0; 647d20bfe6fSHong Zhang crow = *pJ++; 648d20bfe6fSHong Zhang cjj = cj + ci[crow]; 649d20bfe6fSHong Zhang caj = ca + ci[crow]; 650d20bfe6fSHong Zhang /* Perform sparse axpy operation. Note cjj includes apj. */ 651d20bfe6fSHong Zhang for (k=0;nextap<apnzj;k++) { 652d20bfe6fSHong Zhang if (cjj[k]==apj[nextap]) { 653d20bfe6fSHong Zhang caj[k] += (*pA)*apa[apj[nextap++]]; 654d20bfe6fSHong Zhang } 655d20bfe6fSHong Zhang } 656d20bfe6fSHong Zhang flops += 2*apnzj; 657d20bfe6fSHong Zhang pA++; 658d20bfe6fSHong Zhang } 659d20bfe6fSHong Zhang 660d20bfe6fSHong Zhang /* Zero the current row info for A*P */ 661d20bfe6fSHong Zhang for (j=0;j<apnzj;j++) { 662d20bfe6fSHong Zhang apa[apj[j]] = 0.; 663d20bfe6fSHong Zhang apjdense[apj[j]] = 0; 664d20bfe6fSHong Zhang } 665d20bfe6fSHong Zhang } 666d20bfe6fSHong Zhang 667d20bfe6fSHong Zhang /* Assemble the final matrix and clean up */ 668d20bfe6fSHong Zhang ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 669d20bfe6fSHong Zhang ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 670d20bfe6fSHong Zhang ierr = PetscFree(apa);CHKERRQ(ierr); 671d20bfe6fSHong Zhang ierr = PetscLogFlops(flops);CHKERRQ(ierr); 672d20bfe6fSHong Zhang 673eb9c0419SKris Buschelman PetscFunctionReturn(0); 674eb9c0419SKris Buschelman } 6750e36024fSHong Zhang 6760e36024fSHong Zhang /* Compute C = P[rstart:rend,:]^T * A * P of seqaij matrices - used by MatPtAP_MPIAIJ_MPIAIJ() */ 6770e36024fSHong Zhang 6780e36024fSHong Zhang #undef __FUNCT__ 6790e36024fSHong Zhang #define __FUNCT__ "MatPtAPNumeric_SeqAIJ_SeqAIJ_ReducedPt" 6807f79fd58SMatthew Knepley /*@C 681e9b43d0fSSatish Balay MatPtAPNumeric_SeqAIJ_SeqAIJ_ReducedPt - Creates C = P[rstart:rend,:]^T * A * P of seqaij matrices, 682b90dcfe3SHong Zhang used by MatPtAP_MPIAIJ_MPIAIJ() 683b90dcfe3SHong Zhang 684b90dcfe3SHong Zhang Collective on Mat 685b90dcfe3SHong Zhang 686b90dcfe3SHong Zhang Input Parameters: 687b90dcfe3SHong Zhang + A - the matrix in seqaij format 688b90dcfe3SHong Zhang . P - the projection matrix in seqaij format 689b90dcfe3SHong Zhang . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 690b90dcfe3SHong Zhang . fill - expected fill (not being used when scall==MAT_REUSE_MATRIX) 691b90dcfe3SHong Zhang . prstart, prend - the starting and ending-1-th row of the matrix P to be used for transpose 692b90dcfe3SHong Zhang 693b90dcfe3SHong Zhang Output Parameters: 694b90dcfe3SHong Zhang . C - the product matrix in seqaij format 695b90dcfe3SHong Zhang 696b90dcfe3SHong Zhang Level: developer 697b90dcfe3SHong Zhang 698b90dcfe3SHong Zhang .seealso: MatPtAPSymbolic(),MatPtAPNumeric(),MatMatMult() 699b90dcfe3SHong Zhang @*/ 700b1d57f15SBarry Smith PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqAIJ_ReducedPt(Mat A,Mat P,PetscInt prstart,PetscInt prend,Mat C) 7010e36024fSHong Zhang { 7020e36024fSHong Zhang PetscErrorCode ierr; 703b1d57f15SBarry Smith PetscInt flops=0; 7040e36024fSHong Zhang Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 7050e36024fSHong Zhang Mat_SeqAIJ *p = (Mat_SeqAIJ *) P->data; 7060e36024fSHong Zhang Mat_SeqAIJ *c = (Mat_SeqAIJ *) C->data; 707b1d57f15SBarry Smith PetscInt *ai=a->i,*aj=a->j,*apj,*apjdense; 708b1d57f15SBarry Smith PetscInt *pi=p->i,*pj=p->j,*pJ=p->j+pi[prstart],*pjj; 709b1d57f15SBarry Smith PetscInt *ci=c->i,*cj=c->j,*cjj; 710b1d57f15SBarry Smith PetscInt am=A->M,cn=C->N,cm=C->M; 711b1d57f15SBarry Smith PetscInt i,j,k,anzi,pnzi,apnzj,nextap,pnzj,prow,crow; 7120e36024fSHong Zhang MatScalar *aa=a->a,*apa,*pa=p->a,*pA=p->a,*paj,*ca=c->a,*caj; 7130e36024fSHong Zhang 7140e36024fSHong Zhang PetscFunctionBegin; 7150e36024fSHong Zhang pA=p->a+pi[prstart]; 7160e36024fSHong Zhang /* Allocate temporary array for storage of one row of A*P */ 717b1d57f15SBarry Smith ierr = PetscMalloc(cn*(sizeof(MatScalar)+2*sizeof(PetscInt)),&apa);CHKERRQ(ierr); 718b1d57f15SBarry Smith ierr = PetscMemzero(apa,cn*(sizeof(MatScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr); 7190e36024fSHong Zhang 720b1d57f15SBarry Smith apj = (PetscInt *)(apa + cn); 7210e36024fSHong Zhang apjdense = apj + cn; 7220e36024fSHong Zhang 7230e36024fSHong Zhang /* Clear old values in C */ 7240e36024fSHong Zhang ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr); 7250e36024fSHong Zhang 7260e36024fSHong Zhang for (i=0;i<am;i++) { 7270e36024fSHong Zhang /* Form sparse row of A*P */ 7280e36024fSHong Zhang anzi = ai[i+1] - ai[i]; 7290e36024fSHong Zhang apnzj = 0; 7300e36024fSHong Zhang for (j=0;j<anzi;j++) { 7310e36024fSHong Zhang prow = *aj++; 7320e36024fSHong Zhang pnzj = pi[prow+1] - pi[prow]; 7330e36024fSHong Zhang pjj = pj + pi[prow]; 7340e36024fSHong Zhang paj = pa + pi[prow]; 7350e36024fSHong Zhang for (k=0;k<pnzj;k++) { 7360e36024fSHong Zhang if (!apjdense[pjj[k]]) { 7370e36024fSHong Zhang apjdense[pjj[k]] = -1; 7380e36024fSHong Zhang apj[apnzj++] = pjj[k]; 7390e36024fSHong Zhang } 7400e36024fSHong Zhang apa[pjj[k]] += (*aa)*paj[k]; 7410e36024fSHong Zhang } 7420e36024fSHong Zhang flops += 2*pnzj; 7430e36024fSHong Zhang aa++; 7440e36024fSHong Zhang } 7450e36024fSHong Zhang 7460e36024fSHong Zhang /* Sort the j index array for quick sparse axpy. */ 7470e36024fSHong Zhang ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr); 7480e36024fSHong Zhang 7490e36024fSHong Zhang /* Compute P[[prstart:prend,:]^T*A*P using outer product (P^T)[:,j+prstart]*(A*P)[j,:]. */ 7500e36024fSHong Zhang pnzi = pi[i+1+prstart] - pi[i+prstart]; 7510e36024fSHong Zhang for (j=0;j<pnzi;j++) { 7520e36024fSHong Zhang nextap = 0; 7530e36024fSHong Zhang crow = *pJ++; 7540e36024fSHong Zhang cjj = cj + ci[crow]; 7550e36024fSHong Zhang caj = ca + ci[crow]; 7560e36024fSHong Zhang /* Perform sparse axpy operation. Note cjj includes apj. */ 7570e36024fSHong Zhang for (k=0;nextap<apnzj;k++) { 7580e36024fSHong Zhang if (cjj[k]==apj[nextap]) { 7590e36024fSHong Zhang caj[k] += (*pA)*apa[apj[nextap++]]; 7600e36024fSHong Zhang } 7610e36024fSHong Zhang } 7620e36024fSHong Zhang flops += 2*apnzj; 7630e36024fSHong Zhang pA++; 7640e36024fSHong Zhang } 7650e36024fSHong Zhang 7660e36024fSHong Zhang /* Zero the current row info for A*P */ 7670e36024fSHong Zhang for (j=0;j<apnzj;j++) { 7680e36024fSHong Zhang apa[apj[j]] = 0.; 7690e36024fSHong Zhang apjdense[apj[j]] = 0; 7700e36024fSHong Zhang } 7710e36024fSHong Zhang } 7720e36024fSHong Zhang 7730e36024fSHong Zhang /* Assemble the final matrix and clean up */ 7740e36024fSHong Zhang ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7750e36024fSHong Zhang ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7760e36024fSHong Zhang ierr = PetscFree(apa);CHKERRQ(ierr); 7770e36024fSHong Zhang ierr = PetscLogFlops(flops);CHKERRQ(ierr); 7780e36024fSHong Zhang 77922e3da61SHong Zhang int rank; 78022e3da61SHong Zhang ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr); 78122e3da61SHong Zhang if (rank==1){ 78222e3da61SHong Zhang ierr = MatView(C,PETSC_VIEWER_STDOUT_SELF); 78322e3da61SHong Zhang } 78422e3da61SHong Zhang 7850e36024fSHong Zhang PetscFunctionReturn(0); 7860e36024fSHong Zhang } 7870e36024fSHong Zhang 7880e36024fSHong Zhang #undef __FUNCT__ 7890e36024fSHong Zhang #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqAIJ_ReducedPt" 790b8374ebeSBarry Smith PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqAIJ_ReducedPt(Mat A,Mat P,PetscReal fill,PetscInt prstart,PetscInt prend,Mat *C) 79155a3bba9SHong Zhang { 7920e36024fSHong Zhang PetscErrorCode ierr; 7930e36024fSHong Zhang FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 7940e36024fSHong Zhang Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c; 79555a3bba9SHong Zhang PetscInt *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj; 796b8374ebeSBarry Smith PetscInt *ci,*cj,*ptadenserow,*ptasparserow,*ptaj; 79755a3bba9SHong Zhang PetscInt an=A->N,am=A->M,pn=P->N,pm=P->M; 798b1d57f15SBarry Smith PetscInt i,j,k,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi; 799b8374ebeSBarry Smith PetscInt m = prend - prstart,nlnk,*lnk; 8000e36024fSHong Zhang MatScalar *ca; 8010e36024fSHong Zhang Mat *psub,P_sub; 8020e36024fSHong Zhang IS isrow,iscol; 80355a3bba9SHong Zhang PetscBT lnkbt; 8040b89d903Svictorle 8050b89d903Svictorle PetscFunctionBegin; 8060b89d903Svictorle /* Get ij structure of P[rstart:rend,:]^T */ 8070e36024fSHong Zhang ierr = ISCreateStride(PETSC_COMM_SELF,m,prstart,1,&isrow);CHKERRQ(ierr); 8080e36024fSHong Zhang ierr = ISCreateStride(PETSC_COMM_SELF,P->n,0,1,&iscol);CHKERRQ(ierr); 8090e36024fSHong Zhang ierr = MatGetSubMatrices(P,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&psub);CHKERRQ(ierr); 8100e36024fSHong Zhang ierr = ISDestroy(isrow);CHKERRQ(ierr); 8110e36024fSHong Zhang ierr = ISDestroy(iscol);CHKERRQ(ierr); 8120e36024fSHong Zhang P_sub = psub[0]; 8130e36024fSHong Zhang ierr = MatGetSymbolicTranspose_SeqAIJ(P_sub,&pti,&ptj);CHKERRQ(ierr); 8140e36024fSHong Zhang ierr = MatDestroyMatrices(1,&psub);CHKERRQ(ierr); 8150e36024fSHong Zhang ptJ=ptj; 8160e36024fSHong Zhang 8170e36024fSHong Zhang /* Allocate ci array, arrays for fill computation and */ 8180e36024fSHong Zhang /* free space for accumulating nonzero column info */ 81955a3bba9SHong Zhang ierr = PetscMalloc((pn+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 8200e36024fSHong Zhang ci[0] = 0; 8210e36024fSHong Zhang 82255a3bba9SHong Zhang ierr = PetscMalloc((2*an+1)*sizeof(PetscInt),&ptadenserow);CHKERRQ(ierr); 82355a3bba9SHong Zhang ierr = PetscMemzero(ptadenserow,(2*an+1)*sizeof(PetscInt));CHKERRQ(ierr); 8240e36024fSHong Zhang ptasparserow = ptadenserow + an; 82555a3bba9SHong Zhang 82655a3bba9SHong Zhang /* create and initialize a linked list */ 82755a3bba9SHong Zhang nlnk = pn+1; 82855a3bba9SHong Zhang ierr = PetscLLCreate(pn,pn,nlnk,lnk,lnkbt);CHKERRQ(ierr); 8290e36024fSHong Zhang 8300e36024fSHong Zhang /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */ 8310e36024fSHong Zhang /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 83255a3bba9SHong Zhang ierr = GetMoreSpace((PetscInt)(fill*ai[am]/pm)*pn,&free_space); 8330e36024fSHong Zhang current_space = free_space; 8340e36024fSHong Zhang 83522e3da61SHong Zhang int rank; 83622e3da61SHong Zhang ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr); 83722e3da61SHong Zhang /* printf(" [%d] Symbolic_SeqAIJ_SeqAIJ_ReducedPt, an: %d, am: %d, nnz: %d\n",rank,an,am,ai[am]);*/ 83822e3da61SHong Zhang 8390e36024fSHong Zhang /* Determine symbolic info for each row of C: */ 8400e36024fSHong Zhang for (i=0;i<pn;i++) { 8410e36024fSHong Zhang ptnzi = pti[i+1] - pti[i]; 8420e36024fSHong Zhang ptanzi = 0; 8430e36024fSHong Zhang /* Determine symbolic row of PtA_reduced: */ 8440e36024fSHong Zhang for (j=0;j<ptnzi;j++) { 8450e36024fSHong Zhang arow = *ptJ++; 8460e36024fSHong Zhang anzj = ai[arow+1] - ai[arow]; 8470e36024fSHong Zhang ajj = aj + ai[arow]; 8480e36024fSHong Zhang for (k=0;k<anzj;k++) { 8490e36024fSHong Zhang if (!ptadenserow[ajj[k]]) { 8500e36024fSHong Zhang ptadenserow[ajj[k]] = -1; 8510e36024fSHong Zhang ptasparserow[ptanzi++] = ajj[k]; 8520e36024fSHong Zhang } 8530e36024fSHong Zhang } 8540e36024fSHong Zhang } 8550e36024fSHong Zhang /* Using symbolic info for row of PtA, determine symbolic info for row of C: */ 8560e36024fSHong Zhang ptaj = ptasparserow; 8570e36024fSHong Zhang cnzi = 0; 8580e36024fSHong Zhang for (j=0;j<ptanzi;j++) { 8590e36024fSHong Zhang prow = *ptaj++; 8600e36024fSHong Zhang pnzj = pi[prow+1] - pi[prow]; 8610e36024fSHong Zhang pjj = pj + pi[prow]; 86255a3bba9SHong Zhang /* add non-zero cols of P into the sorted linked list lnk */ 86355a3bba9SHong Zhang ierr = PetscLLAdd(pnzj,pjj,pn,nlnk,lnk,lnkbt);CHKERRQ(ierr); 86455a3bba9SHong Zhang cnzi += nlnk; 8650e36024fSHong Zhang } 8660e36024fSHong Zhang 8670e36024fSHong Zhang /* If free space is not available, make more free space */ 8680e36024fSHong Zhang /* Double the amount of total space in the list */ 8690e36024fSHong Zhang if (current_space->local_remaining<cnzi) { 8700e36024fSHong Zhang ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 8710e36024fSHong Zhang } 8720e36024fSHong Zhang 8730e36024fSHong Zhang /* Copy data into free space, and zero out denserows */ 87455a3bba9SHong Zhang ierr = PetscLLClean(pn,pn,cnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 8750e36024fSHong Zhang current_space->array += cnzi; 8760e36024fSHong Zhang current_space->local_used += cnzi; 8770e36024fSHong Zhang current_space->local_remaining -= cnzi; 8780e36024fSHong Zhang 8790e36024fSHong Zhang for (j=0;j<ptanzi;j++) { 8800e36024fSHong Zhang ptadenserow[ptasparserow[j]] = 0; 8810e36024fSHong Zhang } 88255a3bba9SHong Zhang 8830e36024fSHong Zhang /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 8840e36024fSHong Zhang /* For now, we will recompute what is needed. */ 8850e36024fSHong Zhang ci[i+1] = ci[i] + cnzi; 8860e36024fSHong Zhang } 8870e36024fSHong Zhang /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 8880e36024fSHong Zhang /* Allocate space for cj, initialize cj, and */ 8890e36024fSHong Zhang /* destroy list of free space and other temporary array(s) */ 89055a3bba9SHong Zhang ierr = PetscMalloc((ci[pn]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr); 8910e36024fSHong Zhang ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 8920e36024fSHong Zhang ierr = PetscFree(ptadenserow);CHKERRQ(ierr); 89355a3bba9SHong Zhang ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 8940e36024fSHong Zhang 8950e36024fSHong Zhang /* Allocate space for ca */ 8960e36024fSHong Zhang ierr = PetscMalloc((ci[pn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 8970e36024fSHong Zhang ierr = PetscMemzero(ca,(ci[pn]+1)*sizeof(MatScalar));CHKERRQ(ierr); 8980e36024fSHong Zhang 8990e36024fSHong Zhang /* put together the new matrix */ 9000e36024fSHong Zhang ierr = MatCreateSeqAIJWithArrays(A->comm,pn,pn,ci,cj,ca,C);CHKERRQ(ierr); 9010e36024fSHong Zhang 9020e36024fSHong Zhang /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 9030e36024fSHong Zhang /* Since these are PETSc arrays, change flags to free them as necessary. */ 9040e36024fSHong Zhang c = (Mat_SeqAIJ *)((*C)->data); 9050e36024fSHong Zhang c->freedata = PETSC_TRUE; 9060e36024fSHong Zhang c->nonew = 0; 9070e36024fSHong Zhang 9080e36024fSHong Zhang /* Clean up. */ 9090e36024fSHong Zhang ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 9100e36024fSHong Zhang 91122e3da61SHong Zhang if (rank==1){ 91222e3da61SHong Zhang ierr = MatView((*C), PETSC_VIEWER_STDOUT_SELF); 91322e3da61SHong Zhang } 9140e36024fSHong Zhang PetscFunctionReturn(0); 9150e36024fSHong Zhang } 9160e36024fSHong Zhang 9170e36024fSHong Zhang #undef __FUNCT__ 9180e36024fSHong Zhang #define __FUNCT__ "MatPtAP_SeqAIJ_SeqAIJ_ReducedPt" 91955a3bba9SHong Zhang PetscErrorCode MatPtAP_SeqAIJ_SeqAIJ_ReducedPt(Mat A,Mat P,MatReuse scall,PetscReal fill,PetscInt prstart,PetscInt prend,Mat *C) 9200e36024fSHong Zhang { 9210e36024fSHong Zhang PetscErrorCode ierr; 922b1d57f15SBarry Smith 9230e36024fSHong Zhang PetscFunctionBegin; 9240e36024fSHong Zhang if (A->m != prend-prstart) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",A->m,prend-prstart); 9250e36024fSHong Zhang if (prend-prstart > P->m) SETERRQ2(PETSC_ERR_ARG_SIZ," prend-prstart %d cannot be larger than P->m %d",prend-prstart,P->m); 9260e36024fSHong Zhang if (scall == MAT_INITIAL_MATRIX){ 9270e36024fSHong Zhang ierr = MatPtAPSymbolic_SeqAIJ_SeqAIJ_ReducedPt(A,P,fill,prstart,prend,C);CHKERRQ(ierr); 9280e36024fSHong Zhang } 9290e36024fSHong Zhang 9300e36024fSHong Zhang ierr = MatPtAPNumeric_SeqAIJ_SeqAIJ_ReducedPt(A,P,prstart,prend,*C);CHKERRQ(ierr); 9310e36024fSHong Zhang 9320e36024fSHong Zhang PetscFunctionReturn(0); 9330e36024fSHong Zhang } 9340e36024fSHong Zhang 93522e3da61SHong Zhang /* ----------- new ------------------ */ 93622e3da61SHong Zhang #undef __FUNCT__ 93722e3da61SHong Zhang #define __FUNCT__ "MatPtAPNumeric_MPIAIJ_SeqAIJ_ReducedPt" 93822e3da61SHong Zhang PetscErrorCode MatPtAPNumeric_MPIAIJ_SeqAIJ_ReducedPt(Mat A,Mat P,PetscInt prstart,PetscInt prend,Mat C) 93922e3da61SHong Zhang { 94022e3da61SHong Zhang PetscErrorCode ierr; 94122e3da61SHong Zhang PetscInt flops=0; 94222e3da61SHong Zhang Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 94322e3da61SHong Zhang Mat_SeqAIJ *ad=(Mat_SeqAIJ*)(a->A)->data,*ao=(Mat_SeqAIJ*)(a->B)->data; 94422e3da61SHong Zhang Mat_SeqAIJ *p = (Mat_SeqAIJ *) P->data; 94522e3da61SHong Zhang Mat_SeqAIJ *c = (Mat_SeqAIJ *) C->data; 94622e3da61SHong Zhang PetscInt *adi=ad->i,*adj=ad->j,*aoi=ao->i,*aoj=ao->j,*apj,*apjdense,cstart=a->cstart,cend=a->cend,col; 94722e3da61SHong Zhang PetscInt *pi=p->i,*pj=p->j,*pJ=p->j+pi[prstart],*pjj; 94822e3da61SHong Zhang PetscInt *ci=c->i,*cj=c->j,*cjj; 94922e3da61SHong Zhang PetscInt am=A->m,cn=C->N,cm=C->M; 95022e3da61SHong Zhang PetscInt i,j,k,anzi,pnzi,apnzj,nextap,pnzj,prow,crow; 95122e3da61SHong Zhang MatScalar *ada=ad->a,*aoa=ao->a,*apa,*pa=p->a,*pA=p->a,*paj,*ca=c->a,*caj; 95222e3da61SHong Zhang 95322e3da61SHong Zhang PetscFunctionBegin; 95422e3da61SHong Zhang pA=p->a+pi[prstart]; 95522e3da61SHong Zhang /* Allocate temporary array for storage of one row of A*P */ 95622e3da61SHong Zhang ierr = PetscMalloc(cn*(sizeof(MatScalar)+2*sizeof(PetscInt)),&apa);CHKERRQ(ierr); 95722e3da61SHong Zhang ierr = PetscMemzero(apa,cn*(sizeof(MatScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr); 95822e3da61SHong Zhang 95922e3da61SHong Zhang apj = (PetscInt *)(apa + cn); 96022e3da61SHong Zhang apjdense = apj + cn; 96122e3da61SHong Zhang 96222e3da61SHong Zhang /* Clear old values in C */ 96322e3da61SHong Zhang ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr); 96422e3da61SHong Zhang 96522e3da61SHong Zhang for (i=0;i<am;i++) { 96622e3da61SHong Zhang /* Form sparse row of A*P */ 96722e3da61SHong Zhang /* diagonal portion of A */ 96822e3da61SHong Zhang anzi = adi[i+1] - adi[i]; 96922e3da61SHong Zhang apnzj = 0; 97022e3da61SHong Zhang for (j=0;j<anzi;j++) { 97122e3da61SHong Zhang prow = *adj + prstart; 97222e3da61SHong Zhang adj++; 97322e3da61SHong Zhang pnzj = pi[prow+1] - pi[prow]; 97422e3da61SHong Zhang pjj = pj + pi[prow]; 97522e3da61SHong Zhang paj = pa + pi[prow]; 97622e3da61SHong Zhang for (k=0;k<pnzj;k++) { 97722e3da61SHong Zhang if (!apjdense[pjj[k]]) { 97822e3da61SHong Zhang apjdense[pjj[k]] = -1; 97922e3da61SHong Zhang apj[apnzj++] = pjj[k]; 98022e3da61SHong Zhang } 98122e3da61SHong Zhang apa[pjj[k]] += (*ada)*paj[k]; 98222e3da61SHong Zhang } 98322e3da61SHong Zhang flops += 2*pnzj; 98422e3da61SHong Zhang ada++; 98522e3da61SHong Zhang } 98622e3da61SHong Zhang /* off-diagonal portion of A */ 98722e3da61SHong Zhang anzi = aoi[i+1] - aoi[i]; 98822e3da61SHong Zhang for (j=0;j<anzi;j++) { 98922e3da61SHong Zhang col = a->garray[*aoj]; 99022e3da61SHong Zhang if (col < cstart){ 99122e3da61SHong Zhang prow = *aoj; 99222e3da61SHong Zhang } else if (col >= cend){ 99322e3da61SHong Zhang prow = *aoj + prend-prstart; 99422e3da61SHong Zhang } else { 99522e3da61SHong Zhang SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Off-diagonal portion of A has wrong column map"); 99622e3da61SHong Zhang } 99722e3da61SHong Zhang aoj++; 99822e3da61SHong Zhang pnzj = pi[prow+1] - pi[prow]; 99922e3da61SHong Zhang pjj = pj + pi[prow]; 100022e3da61SHong Zhang paj = pa + pi[prow]; 100122e3da61SHong Zhang for (k=0;k<pnzj;k++) { 100222e3da61SHong Zhang if (!apjdense[pjj[k]]) { 100322e3da61SHong Zhang apjdense[pjj[k]] = -1; 100422e3da61SHong Zhang apj[apnzj++] = pjj[k]; 100522e3da61SHong Zhang } 100622e3da61SHong Zhang apa[pjj[k]] += (*aoa)*paj[k]; 100722e3da61SHong Zhang } 100822e3da61SHong Zhang flops += 2*pnzj; 100922e3da61SHong Zhang aoa++; 101022e3da61SHong Zhang } 101122e3da61SHong Zhang 101222e3da61SHong Zhang /* Sort the j index array for quick sparse axpy. */ 101322e3da61SHong Zhang ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr); 101422e3da61SHong Zhang 101522e3da61SHong Zhang /* Compute P[[prstart:prend,:]^T*A*P using outer product (P^T)[:,j+prstart]*(A*P)[j,:]. */ 101622e3da61SHong Zhang pnzi = pi[i+1+prstart] - pi[i+prstart]; 101722e3da61SHong Zhang for (j=0;j<pnzi;j++) { 101822e3da61SHong Zhang nextap = 0; 101922e3da61SHong Zhang crow = *pJ++; 102022e3da61SHong Zhang cjj = cj + ci[crow]; 102122e3da61SHong Zhang caj = ca + ci[crow]; 102222e3da61SHong Zhang /* Perform sparse axpy operation. Note cjj includes apj. */ 102322e3da61SHong Zhang for (k=0;nextap<apnzj;k++) { 102422e3da61SHong Zhang if (cjj[k]==apj[nextap]) { 102522e3da61SHong Zhang caj[k] += (*pA)*apa[apj[nextap++]]; 102622e3da61SHong Zhang } 102722e3da61SHong Zhang } 102822e3da61SHong Zhang flops += 2*apnzj; 102922e3da61SHong Zhang pA++; 103022e3da61SHong Zhang } 103122e3da61SHong Zhang 103222e3da61SHong Zhang /* Zero the current row info for A*P */ 103322e3da61SHong Zhang for (j=0;j<apnzj;j++) { 103422e3da61SHong Zhang apa[apj[j]] = 0.; 103522e3da61SHong Zhang apjdense[apj[j]] = 0; 103622e3da61SHong Zhang } 103722e3da61SHong Zhang } 103822e3da61SHong Zhang 103922e3da61SHong Zhang /* Assemble the final matrix and clean up */ 104022e3da61SHong Zhang ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 104122e3da61SHong Zhang ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 104222e3da61SHong Zhang ierr = PetscFree(apa);CHKERRQ(ierr); 104322e3da61SHong Zhang ierr = PetscLogFlops(flops);CHKERRQ(ierr); 104422e3da61SHong Zhang 104522e3da61SHong Zhang PetscFunctionReturn(0); 104622e3da61SHong Zhang } 1047*e462e02cSHong Zhang static PetscEvent logkey_getsymtransreduced = 0; 1048*e462e02cSHong Zhang #undef __FUNCT__ 1049*e462e02cSHong Zhang #define __FUNCT__ "MatGetSymbolicTransposeReduced_SeqIJ" 1050*e462e02cSHong Zhang PetscErrorCode MatGetSymbolicTransposeReduced_SeqAIJ(Mat A,PetscInt rstart,PetscInt rend,PetscInt *Ati[],PetscInt *Atj[]) 1051*e462e02cSHong Zhang { 1052*e462e02cSHong Zhang PetscErrorCode ierr; 1053*e462e02cSHong Zhang PetscInt i,j,anzj; 1054*e462e02cSHong Zhang Mat_SeqAIJ *a=(Mat_SeqAIJ *)A->data; 1055*e462e02cSHong Zhang PetscInt an=A->N,am=A->M; 1056*e462e02cSHong Zhang PetscInt *ati,*atj,*atfill,*ai=a->i,*aj=a->j; 1057*e462e02cSHong Zhang 1058*e462e02cSHong Zhang PetscFunctionBegin; 1059*e462e02cSHong Zhang 1060*e462e02cSHong Zhang ierr = PetscLogInfo(A,"Getting Symbolic Transpose.\n");CHKERRQ(ierr); 1061*e462e02cSHong Zhang 1062*e462e02cSHong Zhang /* Set up timers */ 1063*e462e02cSHong Zhang if (!logkey_getsymtransreduced) { 1064*e462e02cSHong Zhang ierr = PetscLogEventRegister(&logkey_getsymtransreduced,"MatGetSymbolicTransposeReduced",MAT_COOKIE);CHKERRQ(ierr); 1065*e462e02cSHong Zhang } 1066*e462e02cSHong Zhang ierr = PetscLogEventBegin(logkey_getsymtransreduced,A,0,0,0);CHKERRQ(ierr); 1067*e462e02cSHong Zhang 1068*e462e02cSHong Zhang /* Allocate space for symbolic transpose info and work array */ 1069*e462e02cSHong Zhang ierr = PetscMalloc((an+1)*sizeof(PetscInt),&ati);CHKERRQ(ierr); 1070*e462e02cSHong Zhang anzj = ai[rend] - ai[rstart]; 1071*e462e02cSHong Zhang ierr = PetscMalloc((anzj+1)*sizeof(PetscInt),&atj);CHKERRQ(ierr); 1072*e462e02cSHong Zhang ierr = PetscMalloc((an+1)*sizeof(PetscInt),&atfill);CHKERRQ(ierr); 1073*e462e02cSHong Zhang ierr = PetscMemzero(ati,(an+1)*sizeof(PetscInt));CHKERRQ(ierr); 1074*e462e02cSHong Zhang 1075*e462e02cSHong Zhang /* Walk through aj and count ## of non-zeros in each row of A^T. */ 1076*e462e02cSHong Zhang /* Note: offset by 1 for fast conversion into csr format. */ 1077*e462e02cSHong Zhang for (i=ai[rstart]; i<ai[rend]; i++) { 1078*e462e02cSHong Zhang ati[aj[i]+1] += 1; 1079*e462e02cSHong Zhang } 1080*e462e02cSHong Zhang /* Form ati for csr format of A^T. */ 1081*e462e02cSHong Zhang for (i=0;i<an;i++) { 1082*e462e02cSHong Zhang ati[i+1] += ati[i]; 1083*e462e02cSHong Zhang } 1084*e462e02cSHong Zhang 1085*e462e02cSHong Zhang /* Copy ati into atfill so we have locations of the next free space in atj */ 1086*e462e02cSHong Zhang ierr = PetscMemcpy(atfill,ati,an*sizeof(PetscInt));CHKERRQ(ierr); 1087*e462e02cSHong Zhang 1088*e462e02cSHong Zhang /* Walk through A row-wise and mark nonzero entries of A^T. */ 1089*e462e02cSHong Zhang aj = aj + ai[rstart]; 1090*e462e02cSHong Zhang for (i=rstart; i<rend; i++) { 1091*e462e02cSHong Zhang anzj = ai[i+1] - ai[i]; 1092*e462e02cSHong Zhang for (j=0;j<anzj;j++) { 1093*e462e02cSHong Zhang atj[atfill[*aj]] = i-rstart; 1094*e462e02cSHong Zhang atfill[*aj++] += 1; 1095*e462e02cSHong Zhang } 1096*e462e02cSHong Zhang } 1097*e462e02cSHong Zhang 1098*e462e02cSHong Zhang /* Clean up temporary space and complete requests. */ 1099*e462e02cSHong Zhang ierr = PetscFree(atfill);CHKERRQ(ierr); 1100*e462e02cSHong Zhang *Ati = ati; 1101*e462e02cSHong Zhang *Atj = atj; 1102*e462e02cSHong Zhang 1103*e462e02cSHong Zhang ierr = PetscLogEventEnd(logkey_getsymtransreduced,A,0,0,0);CHKERRQ(ierr); 1104*e462e02cSHong Zhang PetscFunctionReturn(0); 1105*e462e02cSHong Zhang } 110622e3da61SHong Zhang 110722e3da61SHong Zhang #undef __FUNCT__ 110822e3da61SHong Zhang #define __FUNCT__ "MatPtAPSymbolic_MPIAIJ_SeqAIJ_ReducedPt" 110922e3da61SHong Zhang PetscErrorCode MatPtAPSymbolic_MPIAIJ_SeqAIJ_ReducedPt(Mat A,Mat P,PetscReal fill,PetscInt prstart,PetscInt prend,Mat *C) 111022e3da61SHong Zhang { 111122e3da61SHong Zhang PetscErrorCode ierr; 111222e3da61SHong Zhang FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 111322e3da61SHong Zhang Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 111422e3da61SHong Zhang Mat_SeqAIJ *ad=(Mat_SeqAIJ*)(a->A)->data,*ao=(Mat_SeqAIJ*)(a->B)->data; 111522e3da61SHong Zhang Mat_SeqAIJ *p=(Mat_SeqAIJ*)P->data,*c; 111622e3da61SHong Zhang PetscInt *pti,*ptj,*ptJ,*ajj,*pi=p->i,*pj=p->j,*pjj; 111722e3da61SHong Zhang PetscInt *adi=ad->i,*adj=ad->j,*aoi=ao->i,*aoj=ao->j,nnz,cstart=a->cstart,cend=a->cend,col; 111822e3da61SHong Zhang PetscInt *ci,*cj,*ptadenserow,*ptasparserow,*ptaj; 111922e3da61SHong Zhang PetscInt an=A->N,am=A->m,pn=P->N,pm=P->M; 112022e3da61SHong Zhang PetscInt i,j,k,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi; 112122e3da61SHong Zhang PetscInt m=prend-prstart,nlnk,*lnk; 112222e3da61SHong Zhang MatScalar *ca; 112322e3da61SHong Zhang PetscBT lnkbt; 112422e3da61SHong Zhang 112522e3da61SHong Zhang PetscFunctionBegin; 112622e3da61SHong Zhang 112722e3da61SHong Zhang /* Get ij structure of P[rstart:rend,:]^T */ 1128*e462e02cSHong Zhang ierr = MatGetSymbolicTransposeReduced_SeqAIJ(P,prstart,prend,&pti,&ptj);CHKERRQ(ierr); 112922e3da61SHong Zhang ptJ=ptj; 113022e3da61SHong Zhang 113122e3da61SHong Zhang /* Allocate ci array, arrays for fill computation and */ 113222e3da61SHong Zhang /* free space for accumulating nonzero column info */ 113322e3da61SHong Zhang ierr = PetscMalloc((pn+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 113422e3da61SHong Zhang ci[0] = 0; 113522e3da61SHong Zhang 113622e3da61SHong Zhang ierr = PetscMalloc((2*an+1)*sizeof(PetscInt),&ptadenserow);CHKERRQ(ierr); 113722e3da61SHong Zhang ierr = PetscMemzero(ptadenserow,(2*an+1)*sizeof(PetscInt));CHKERRQ(ierr); 113822e3da61SHong Zhang ptasparserow = ptadenserow + an; 113922e3da61SHong Zhang 114022e3da61SHong Zhang /* create and initialize a linked list */ 114122e3da61SHong Zhang nlnk = pn+1; 114222e3da61SHong Zhang ierr = PetscLLCreate(pn,pn,nlnk,lnk,lnkbt);CHKERRQ(ierr); 114322e3da61SHong Zhang 114422e3da61SHong Zhang /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */ 114522e3da61SHong Zhang /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 114622e3da61SHong Zhang nnz = adi[am] + aoi[am]; 114722e3da61SHong Zhang ierr = GetMoreSpace((PetscInt)(fill*nnz/pm)*pn,&free_space); 114822e3da61SHong Zhang current_space = free_space; 114922e3da61SHong Zhang 115022e3da61SHong Zhang /* Determine symbolic info for each row of C: */ 115122e3da61SHong Zhang for (i=0;i<pn;i++) { 115222e3da61SHong Zhang ptnzi = pti[i+1] - pti[i]; 115322e3da61SHong Zhang ptanzi = 0; 115422e3da61SHong Zhang /* Determine symbolic row of PtA_reduced: */ 115522e3da61SHong Zhang for (j=0;j<ptnzi;j++) { 115622e3da61SHong Zhang arow = *ptJ++; 115722e3da61SHong Zhang /* diagonal portion of A */ 115822e3da61SHong Zhang anzj = adi[arow+1] - adi[arow]; 115922e3da61SHong Zhang ajj = adj + adi[arow]; 116022e3da61SHong Zhang for (k=0;k<anzj;k++) { 116122e3da61SHong Zhang col = ajj[k]+prstart; 116222e3da61SHong Zhang if (!ptadenserow[col]) { 116322e3da61SHong Zhang ptadenserow[col] = -1; 116422e3da61SHong Zhang ptasparserow[ptanzi++] = col; 116522e3da61SHong Zhang } 116622e3da61SHong Zhang } 116722e3da61SHong Zhang /* off-diagonal portion of A */ 116822e3da61SHong Zhang anzj = aoi[arow+1] - aoi[arow]; 116922e3da61SHong Zhang ajj = aoj + aoi[arow]; 117022e3da61SHong Zhang for (k=0;k<anzj;k++) { 117122e3da61SHong Zhang col = a->garray[ajj[k]]; /* global col */ 117222e3da61SHong Zhang if (col < cstart){ 117322e3da61SHong Zhang col = ajj[k]; 117422e3da61SHong Zhang } else if (col >= cend){ 117522e3da61SHong Zhang col = ajj[k] + m; 117622e3da61SHong Zhang } else { 117722e3da61SHong Zhang SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Off-diagonal portion of A has wrong column map"); 117822e3da61SHong Zhang } 117922e3da61SHong Zhang if (!ptadenserow[col]) { 118022e3da61SHong Zhang ptadenserow[col] = -1; 118122e3da61SHong Zhang ptasparserow[ptanzi++] = col; 118222e3da61SHong Zhang } 118322e3da61SHong Zhang } 118422e3da61SHong Zhang } 118522e3da61SHong Zhang /* Using symbolic info for row of PtA, determine symbolic info for row of C: */ 118622e3da61SHong Zhang ptaj = ptasparserow; 118722e3da61SHong Zhang cnzi = 0; 118822e3da61SHong Zhang for (j=0;j<ptanzi;j++) { 118922e3da61SHong Zhang prow = *ptaj++; 119022e3da61SHong Zhang pnzj = pi[prow+1] - pi[prow]; 119122e3da61SHong Zhang pjj = pj + pi[prow]; 119222e3da61SHong Zhang /* if (rank==1) printf(" prow: %d, pnzj: %d\n",prow,pnzj); */ 119322e3da61SHong Zhang /* add non-zero cols of P into the sorted linked list lnk */ 119422e3da61SHong Zhang ierr = PetscLLAdd(pnzj,pjj,pn,nlnk,lnk,lnkbt);CHKERRQ(ierr); 119522e3da61SHong Zhang cnzi += nlnk; 119622e3da61SHong Zhang } 119722e3da61SHong Zhang 119822e3da61SHong Zhang /* If free space is not available, make more free space */ 119922e3da61SHong Zhang /* Double the amount of total space in the list */ 120022e3da61SHong Zhang if (current_space->local_remaining<cnzi) { 120122e3da61SHong Zhang ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 120222e3da61SHong Zhang } 120322e3da61SHong Zhang 120422e3da61SHong Zhang /* Copy data into free space, and zero out denserows */ 120522e3da61SHong Zhang ierr = PetscLLClean(pn,pn,cnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 120622e3da61SHong Zhang current_space->array += cnzi; 120722e3da61SHong Zhang current_space->local_used += cnzi; 120822e3da61SHong Zhang current_space->local_remaining -= cnzi; 120922e3da61SHong Zhang 121022e3da61SHong Zhang for (j=0;j<ptanzi;j++) { 121122e3da61SHong Zhang ptadenserow[ptasparserow[j]] = 0; 121222e3da61SHong Zhang } 121322e3da61SHong Zhang 121422e3da61SHong Zhang /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 121522e3da61SHong Zhang /* For now, we will recompute what is needed. */ 121622e3da61SHong Zhang ci[i+1] = ci[i] + cnzi; 121722e3da61SHong Zhang } 121822e3da61SHong Zhang /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 121922e3da61SHong Zhang /* Allocate space for cj, initialize cj, and */ 122022e3da61SHong Zhang /* destroy list of free space and other temporary array(s) */ 122122e3da61SHong Zhang ierr = PetscMalloc((ci[pn]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr); 122222e3da61SHong Zhang ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 122322e3da61SHong Zhang ierr = PetscFree(ptadenserow);CHKERRQ(ierr); 122422e3da61SHong Zhang ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 122522e3da61SHong Zhang 122622e3da61SHong Zhang /* Allocate space for ca */ 122722e3da61SHong Zhang ierr = PetscMalloc((ci[pn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 122822e3da61SHong Zhang ierr = PetscMemzero(ca,(ci[pn]+1)*sizeof(MatScalar));CHKERRQ(ierr); 122922e3da61SHong Zhang 123022e3da61SHong Zhang /* put together the new matrix */ 123122e3da61SHong Zhang ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,pn,pn,ci,cj,ca,C);CHKERRQ(ierr); 123222e3da61SHong Zhang 123322e3da61SHong Zhang /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 123422e3da61SHong Zhang /* Since these are PETSc arrays, change flags to free them as necessary. */ 123522e3da61SHong Zhang c = (Mat_SeqAIJ *)((*C)->data); 123622e3da61SHong Zhang c->freedata = PETSC_TRUE; 123722e3da61SHong Zhang c->nonew = 0; 123822e3da61SHong Zhang 123922e3da61SHong Zhang /* Clean up. */ 124022e3da61SHong Zhang ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 124122e3da61SHong Zhang 124222e3da61SHong Zhang PetscFunctionReturn(0); 124322e3da61SHong Zhang } 124422e3da61SHong Zhang 1245*e462e02cSHong Zhang static PetscEvent logkey_PtAPReducedPt = 0; 124622e3da61SHong Zhang #undef __FUNCT__ 124722e3da61SHong Zhang #define __FUNCT__ "MatPtAP_MPIAIJ_SeqAIJ_ReducedPt" 124822e3da61SHong Zhang PetscErrorCode MatPtAP_MPIAIJ_SeqAIJ_ReducedPt(Mat A,Mat P,MatReuse scall,PetscReal fill,PetscInt prstart,PetscInt prend,Mat *C) 124922e3da61SHong Zhang { 125022e3da61SHong Zhang PetscErrorCode ierr; 125122e3da61SHong Zhang 125222e3da61SHong Zhang PetscFunctionBegin; 125322e3da61SHong Zhang if (A->m != prend-prstart) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",A->m,prend-prstart); 125422e3da61SHong Zhang if (prend-prstart > P->m) SETERRQ2(PETSC_ERR_ARG_SIZ," prend-prstart %d cannot be larger than P->m %d",prend-prstart,P->m); 1255*e462e02cSHong Zhang if (!logkey_PtAPReducedPt) { 1256*e462e02cSHong Zhang ierr = PetscLogEventRegister(&logkey_PtAPReducedPt,"MatPtAP_ReducedPt",MAT_COOKIE); 1257*e462e02cSHong Zhang } 1258*e462e02cSHong Zhang PetscLogEventBegin(logkey_PtAPReducedPt,A,P,0,0);CHKERRQ(ierr); 125922e3da61SHong Zhang 126022e3da61SHong Zhang if (scall == MAT_INITIAL_MATRIX){ 126122e3da61SHong Zhang ierr = MatPtAPSymbolic_MPIAIJ_SeqAIJ_ReducedPt(A,P,fill,prstart,prend,C);CHKERRQ(ierr); 126222e3da61SHong Zhang } 126322e3da61SHong Zhang ierr = MatPtAPNumeric_MPIAIJ_SeqAIJ_ReducedPt(A,P,prstart,prend,*C);CHKERRQ(ierr); 1264*e462e02cSHong Zhang ierr = PetscLogEventEnd(logkey_PtAPReducedPt,A,P,0,0);CHKERRQ(ierr); 126522e3da61SHong Zhang 126622e3da61SHong Zhang PetscFunctionReturn(0); 126722e3da61SHong Zhang } 126822e3da61SHong Zhang 1269