1eb9c0419SKris Buschelman /* 29af31e4aSHong Zhang Defines projective product routines where A is a AIJ matrix 3eb9c0419SKris Buschelman C = P^T * A * P 4eb9c0419SKris Buschelman */ 5eb9c0419SKris Buschelman 6231952e2SKris Buschelman #include "src/mat/impls/aij/seq/aij.h" /*I "petscmat.h" I*/ 7eb9c0419SKris Buschelman #include "src/mat/utils/freespace.h" 89af31e4aSHong Zhang #include "src/mat/impls/aij/mpi/mpiaij.h" 955a3bba9SHong Zhang #include "petscbt.h" 10eb9c0419SKris Buschelman 11eb9c0419SKris Buschelman #undef __FUNCT__ 129af31e4aSHong Zhang #define __FUNCT__ "MatPtAP" 134d3841fdSKris Buschelman /*@ 149af31e4aSHong Zhang MatPtAP - Creates the matrix projection C = P^T * A * P 154d3841fdSKris Buschelman 164d3841fdSKris Buschelman Collective on Mat 174d3841fdSKris Buschelman 184d3841fdSKris Buschelman Input Parameters: 194d3841fdSKris Buschelman + A - the matrix 20f747e1aeSHong Zhang . P - the projection matrix 21f747e1aeSHong Zhang . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 22f747e1aeSHong Zhang - fill - expected fill as ratio of nnz(C)/(nnz(A) + nnz(P)) 234d3841fdSKris Buschelman 244d3841fdSKris Buschelman Output Parameters: 254d3841fdSKris Buschelman . C - the product matrix 264d3841fdSKris Buschelman 274d3841fdSKris Buschelman Notes: 284d3841fdSKris Buschelman C will be created and must be destroyed by the user with MatDestroy(). 294d3841fdSKris Buschelman 304d3841fdSKris Buschelman This routine is currently only implemented for pairs of SeqAIJ matrices and classes 314d3841fdSKris Buschelman which inherit from SeqAIJ. C will be of type MATSEQAIJ. 324d3841fdSKris Buschelman 334d3841fdSKris Buschelman Level: intermediate 344d3841fdSKris Buschelman 359af31e4aSHong Zhang .seealso: MatPtAPSymbolic(),MatPtAPNumeric(),MatMatMult() 364d3841fdSKris Buschelman @*/ 3755a3bba9SHong Zhang PetscErrorCode MatPtAP(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C) 3855a3bba9SHong Zhang { 39dfbe8321SBarry Smith PetscErrorCode ierr; 40534c1384SKris Buschelman PetscErrorCode (*fA)(Mat,Mat,MatReuse,PetscReal,Mat *); 41534c1384SKris Buschelman PetscErrorCode (*fP)(Mat,Mat,MatReuse,PetscReal,Mat *); 42eb9c0419SKris Buschelman 43eb9c0419SKris Buschelman PetscFunctionBegin; 449af31e4aSHong Zhang PetscValidHeaderSpecific(A,MAT_COOKIE,1); 459af31e4aSHong Zhang PetscValidType(A,1); 469af31e4aSHong Zhang MatPreallocated(A); 479af31e4aSHong Zhang if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 489af31e4aSHong Zhang if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 499af31e4aSHong Zhang PetscValidHeaderSpecific(P,MAT_COOKIE,2); 509af31e4aSHong Zhang PetscValidType(P,2); 519af31e4aSHong Zhang MatPreallocated(P); 529af31e4aSHong Zhang if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 539af31e4aSHong Zhang if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 549af31e4aSHong Zhang PetscValidPointer(C,3); 55534c1384SKris Buschelman 569af31e4aSHong Zhang if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->M,A->N); 57eb9c0419SKris Buschelman 589af31e4aSHong Zhang if (fill <=0.0) SETERRQ1(PETSC_ERR_ARG_SIZ,"fill=%g must be > 0.0",fill); 59eb9c0419SKris Buschelman 60534c1384SKris Buschelman /* For now, we do not dispatch based on the type of A and P */ 61534c1384SKris Buschelman /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */ 62534c1384SKris Buschelman fA = A->ops->ptap; 63534c1384SKris Buschelman if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatPtAP not supported for A of type %s",A->type_name); 64534c1384SKris Buschelman fP = P->ops->ptap; 65534c1384SKris Buschelman if (!fP) SETERRQ1(PETSC_ERR_SUP,"MatPtAP not supported for P of type %s",P->type_name); 66534c1384SKris Buschelman if (fP!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatPtAP requires A, %s, to be compatible with P, %s",A->type_name,P->type_name); 67534c1384SKris Buschelman 689af31e4aSHong Zhang ierr = PetscLogEventBegin(MAT_PtAP,A,P,0,0);CHKERRQ(ierr); 69534c1384SKris Buschelman ierr = (*fA)(A,P,scall,fill,C);CHKERRQ(ierr); 709af31e4aSHong Zhang ierr = PetscLogEventEnd(MAT_PtAP,A,P,0,0);CHKERRQ(ierr); 71eb9c0419SKris Buschelman PetscFunctionReturn(0); 72eb9c0419SKris Buschelman } 73eb9c0419SKris Buschelman 740390132cSHong Zhang EXTERN PetscErrorCode MatPtAPReducedPt_MPIAIJ_SeqAIJ(Mat,Mat,MatReuse,PetscReal,PetscInt,PetscInt,Mat*); 750390132cSHong Zhang EXTERN PetscErrorCode MatPtAPReducedPtSymbolic_MPIAIJ_SeqAIJ(Mat,Mat,PetscReal,PetscInt,PetscInt,Mat*); 760390132cSHong Zhang EXTERN PetscErrorCode MatPtAPReducedPtNumeric_MPIAIJ_SeqAIJ(Mat,Mat,PetscInt,PetscInt,Mat); 7722e3da61SHong Zhang 78*e240928fSHong Zhang 79*e240928fSHong Zhang EXTERN PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local(Mat,Mat,Mat,PetscInt,PetscReal,Mat*); 80*e240928fSHong Zhang EXTERN PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIAIJ_Local(Mat,Mat,Mat,PetscInt,Mat C); 81*e240928fSHong Zhang 82eb9c0419SKris Buschelman #undef __FUNCT__ 83ff134f7aSHong Zhang #define __FUNCT__ "MatPtAP_MPIAIJ_MPIAIJ" 84ff134f7aSHong Zhang PetscErrorCode MatPtAP_MPIAIJ_MPIAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C) 85ff134f7aSHong Zhang { 86ff134f7aSHong Zhang PetscErrorCode ierr; 87b90dcfe3SHong Zhang 88b90dcfe3SHong Zhang PetscFunctionBegin; 89b90dcfe3SHong Zhang if (scall == MAT_INITIAL_MATRIX){ 904c627768SHong Zhang ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 91b90dcfe3SHong Zhang ierr = MatPtAPSymbolic_MPIAIJ_MPIAIJ(A,P,fill,C);CHKERRQ(ierr);/* numeric product is computed as well */ 924c627768SHong Zhang ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 93*e240928fSHong Zhang } else if (scall == MAT_REUSE_MATRIX){ /* not done yet! */ 944c627768SHong Zhang ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 95*e240928fSHong Zhang ierr = MatDestroy(*C);CHKERRQ(ierr); 96*e240928fSHong Zhang ierr = MatPtAPSymbolic_MPIAIJ_MPIAIJ(A,P,fill,C);CHKERRQ(ierr); 97*e240928fSHong Zhang 98*e240928fSHong Zhang /* 99b90dcfe3SHong Zhang ierr = MatPtAPNumeric_MPIAIJ_MPIAIJ(A,P,*C);CHKERRQ(ierr); 100*e240928fSHong Zhang */ 1014c627768SHong Zhang ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 102b90dcfe3SHong Zhang } else { 103b90dcfe3SHong Zhang SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",scall); 104b90dcfe3SHong Zhang } 105b90dcfe3SHong Zhang PetscFunctionReturn(0); 106b90dcfe3SHong Zhang } 107*e240928fSHong Zhang #define NEW 108b90dcfe3SHong Zhang #undef __FUNCT__ 109*e240928fSHong Zhang #define __FUNCT__ "MatPtAPSymbolic_MPIAIJ_MPIAIJ" 110b90dcfe3SHong Zhang PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIAIJ(Mat A,Mat P,PetscReal fill,Mat *C) 111b90dcfe3SHong Zhang { 112b90dcfe3SHong Zhang PetscErrorCode ierr; 1130390132cSHong Zhang Mat P_seq,C_seq; 114*e240928fSHong Zhang Mat P_loc,P_oth,*ploc; 115*e240928fSHong Zhang PetscInt prstart,prend,m=P->m,low; 116*e240928fSHong Zhang IS isrow,iscol; 117ff134f7aSHong Zhang 118ff134f7aSHong Zhang PetscFunctionBegin; 119*e240928fSHong Zhang /* get P_oth by taking rows of P (= non-zero cols of local A) from other processors */ 120*e240928fSHong Zhang ierr = MatGetBrowsOfAoCols_Private(A,P,MAT_INITIAL_MATRIX,PETSC_NULL,PETSC_NULL,&prstart,&P_oth);CHKERRQ(ierr); 121*e240928fSHong Zhang prend = prstart + m; 122*e240928fSHong Zhang /* get P_loc by taking all local rows of P */ 123*e240928fSHong Zhang ierr = MatGetOwnershipRange(P,&low,PETSC_NULL);CHKERRQ(ierr); 124*e240928fSHong Zhang ierr = ISCreateStride(PETSC_COMM_SELF,P->m,low,1,&isrow);CHKERRQ(ierr); 125*e240928fSHong Zhang ierr = ISCreateStride(PETSC_COMM_SELF,P->N,0,1,&iscol);CHKERRQ(ierr); 126*e240928fSHong Zhang ierr = MatGetSubMatrices(P,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&ploc);CHKERRQ(ierr); 127*e240928fSHong Zhang P_loc = ploc[0]; 128*e240928fSHong Zhang ierr = ISDestroy(isrow);CHKERRQ(ierr); 129*e240928fSHong Zhang ierr = ISDestroy(iscol);CHKERRQ(ierr); 130*e240928fSHong Zhang 131*e240928fSHong Zhang /* compute C_seq = P_loc^T * A_loc * P_seq */ 132*e240928fSHong Zhang ierr = MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local(A,P_loc,P_oth,prstart,fill,&C_seq);CHKERRQ(ierr); 133*e240928fSHong Zhang ierr = MatPtAPNumeric_MPIAIJ_MPIAIJ_Local(A,P_loc,P_oth,prstart,C_seq);CHKERRQ(ierr); 134*e240928fSHong Zhang 135*e240928fSHong Zhang ierr = MatDestroyMatrices(1,&ploc);CHKERRQ(ierr); 136*e240928fSHong Zhang ierr = MatDestroy(P_oth);CHKERRQ(ierr); 137*e240928fSHong Zhang #ifdef OLD 13825616d81SHong Zhang /* get P_seq = submatrix of P by taking rows of P that equal to nonzero col of A */ 13922e3da61SHong Zhang ierr = MatGetBrowsOfAcols(A,P,MAT_INITIAL_MATRIX,PETSC_NULL,PETSC_NULL,&prstart,&P_seq);CHKERRQ(ierr); 14025616d81SHong Zhang /* compute C_seq = P_loc^T * A_loc * P_seq */ 141ff134f7aSHong Zhang prend = prstart + m; 1420390132cSHong Zhang ierr = MatPtAPReducedPt_MPIAIJ_SeqAIJ(A,P_seq,MAT_INITIAL_MATRIX,fill,prstart,prend,&C_seq);CHKERRQ(ierr); 14325616d81SHong Zhang ierr = MatDestroy(P_seq);CHKERRQ(ierr); 144*e240928fSHong Zhang #endif 145b90dcfe3SHong Zhang /* add C_seq into mpi C */ 146b90dcfe3SHong Zhang ierr = MatMerge_SeqsToMPI(A->comm,C_seq,P->n,P->n,MAT_INITIAL_MATRIX,C);CHKERRQ(ierr); 147b90dcfe3SHong Zhang 148ff134f7aSHong Zhang PetscFunctionReturn(0); 149ff134f7aSHong Zhang } 150ff134f7aSHong Zhang 151ff134f7aSHong Zhang #undef __FUNCT__ 152*e240928fSHong Zhang #define __FUNCT__ "MatPtAPNumeric_MPIAIJ_MPIAIJ" 153b90dcfe3SHong Zhang PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIAIJ(Mat A,Mat P,Mat C) 154ff134f7aSHong Zhang { 155b90dcfe3SHong Zhang PetscErrorCode ierr; 1560390132cSHong Zhang Mat P_seq,C_seq; 157b1d57f15SBarry Smith PetscInt prstart,prend,m=P->m; 158671beff6SHong Zhang Mat_Merge_SeqsToMPI *merge; 159671beff6SHong Zhang PetscObjectContainer container; 160ff134f7aSHong Zhang 161ff134f7aSHong Zhang PetscFunctionBegin; 162671beff6SHong Zhang ierr = PetscObjectQuery((PetscObject)C,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 163671beff6SHong Zhang if (container) { 1647f79fd58SMatthew Knepley ierr = PetscObjectContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 1657f79fd58SMatthew Knepley } else { 1667f79fd58SMatthew Knepley SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix C does not posses and object container"); 167671beff6SHong Zhang } 168671beff6SHong Zhang 169b90dcfe3SHong Zhang /* get P_seq = submatrix of P by taking rows of P that equal to nonzero col of A */ 1700390132cSHong Zhang ierr = MatGetBrowsOfAcols(A,P,MAT_INITIAL_MATRIX,PETSC_NULL,PETSC_NULL,&prstart,&P_seq);CHKERRQ(ierr); 171ff134f7aSHong Zhang 172b90dcfe3SHong Zhang /* compute C_seq = P_loc^T * A_loc * P_seq */ 173b90dcfe3SHong Zhang prend = prstart + m; 174b90dcfe3SHong Zhang C_seq = merge->C_seq; 1750390132cSHong Zhang ierr = MatPtAPReducedPt_MPIAIJ_SeqAIJ(A,P_seq,MAT_REUSE_MATRIX,1.0,prstart,prend,&C_seq);CHKERRQ(ierr); 176b90dcfe3SHong Zhang ierr = MatDestroy(P_seq);CHKERRQ(ierr); 177b90dcfe3SHong Zhang 178b90dcfe3SHong Zhang /* add C_seq into mpi C */ 179b90dcfe3SHong Zhang ierr = MatMerge_SeqsToMPI(A->comm,C_seq,P->n,P->n,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 180b90dcfe3SHong Zhang 181ff134f7aSHong Zhang PetscFunctionReturn(0); 182ff134f7aSHong Zhang } 183ff134f7aSHong Zhang 184ff134f7aSHong Zhang #undef __FUNCT__ 1859af31e4aSHong Zhang #define __FUNCT__ "MatPtAP_SeqAIJ_SeqAIJ" 186dfbe8321SBarry Smith PetscErrorCode MatPtAP_SeqAIJ_SeqAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C) 1879af31e4aSHong Zhang { 188dfbe8321SBarry Smith PetscErrorCode ierr; 189b1d57f15SBarry Smith 1909af31e4aSHong Zhang PetscFunctionBegin; 1919af31e4aSHong Zhang if (scall == MAT_INITIAL_MATRIX){ 192d20bfe6fSHong Zhang ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 1939af31e4aSHong Zhang ierr = MatPtAPSymbolic_SeqAIJ_SeqAIJ(A,P,fill,C);CHKERRQ(ierr); 194d20bfe6fSHong Zhang ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 1959af31e4aSHong Zhang } 196d20bfe6fSHong Zhang ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 1979af31e4aSHong Zhang ierr = MatPtAPNumeric_SeqAIJ_SeqAIJ(A,P,*C);CHKERRQ(ierr); 198d20bfe6fSHong Zhang ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 1999af31e4aSHong Zhang PetscFunctionReturn(0); 2009af31e4aSHong Zhang } 2019af31e4aSHong Zhang 2026849ba73SBarry Smith /* 2039af31e4aSHong Zhang MatPtAPSymbolic - Creates the (i,j) structure of the matrix projection C = P^T * A * P 2044d3841fdSKris Buschelman 2054d3841fdSKris Buschelman Collective on Mat 2064d3841fdSKris Buschelman 2074d3841fdSKris Buschelman Input Parameters: 2084d3841fdSKris Buschelman + A - the matrix 2094d3841fdSKris Buschelman - P - the projection matrix 2104d3841fdSKris Buschelman 2114d3841fdSKris Buschelman Output Parameters: 2124d3841fdSKris Buschelman . C - the (i,j) structure of the product matrix 2134d3841fdSKris Buschelman 2144d3841fdSKris Buschelman Notes: 2154d3841fdSKris Buschelman C will be created and must be destroyed by the user with MatDestroy(). 2164d3841fdSKris Buschelman 2174d3841fdSKris Buschelman This routine is currently only implemented for pairs of SeqAIJ matrices and classes 2184d3841fdSKris Buschelman which inherit from SeqAIJ. C will be of type MATSEQAIJ. The product is computed using 2199af31e4aSHong Zhang this (i,j) structure by calling MatPtAPNumeric(). 2204d3841fdSKris Buschelman 2214d3841fdSKris Buschelman Level: intermediate 2224d3841fdSKris Buschelman 2239af31e4aSHong Zhang .seealso: MatPtAP(),MatPtAPNumeric(),MatMatMultSymbolic() 2246849ba73SBarry Smith */ 225*e240928fSHong Zhang #undef __FUNCT__ 226*e240928fSHong Zhang #define __FUNCT__ "MatPtAPSymbolic" 22755a3bba9SHong Zhang PetscErrorCode MatPtAPSymbolic(Mat A,Mat P,PetscReal fill,Mat *C) 22855a3bba9SHong Zhang { 229dfbe8321SBarry Smith PetscErrorCode ierr; 230534c1384SKris Buschelman PetscErrorCode (*fA)(Mat,Mat,PetscReal,Mat*); 231534c1384SKris Buschelman PetscErrorCode (*fP)(Mat,Mat,PetscReal,Mat*); 232eb9c0419SKris Buschelman 233eb9c0419SKris Buschelman PetscFunctionBegin; 234eb9c0419SKris Buschelman 2354482741eSBarry Smith PetscValidHeaderSpecific(A,MAT_COOKIE,1); 236c9780b6fSBarry Smith PetscValidType(A,1); 237eb9c0419SKris Buschelman MatPreallocated(A); 238eb9c0419SKris Buschelman if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 239eb9c0419SKris Buschelman if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 240eb9c0419SKris Buschelman 2414482741eSBarry Smith PetscValidHeaderSpecific(P,MAT_COOKIE,2); 242c9780b6fSBarry Smith PetscValidType(P,2); 243eb9c0419SKris Buschelman MatPreallocated(P); 244eb9c0419SKris Buschelman if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 245eb9c0419SKris Buschelman if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 246eb9c0419SKris Buschelman 2474482741eSBarry Smith PetscValidPointer(C,3); 2484482741eSBarry Smith 249eb9c0419SKris Buschelman if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->M,A->N); 250eb9c0419SKris Buschelman if (A->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %d != %d",A->M,A->N); 251eb9c0419SKris Buschelman 252534c1384SKris Buschelman /* For now, we do not dispatch based on the type of A and P */ 253534c1384SKris Buschelman /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */ 254534c1384SKris Buschelman fA = A->ops->ptapsymbolic; 255534c1384SKris Buschelman if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatPtAPSymbolic not supported for A of type %s",A->type_name); 256534c1384SKris Buschelman fP = P->ops->ptapsymbolic; 257534c1384SKris Buschelman if (!fP) SETERRQ1(PETSC_ERR_SUP,"MatPtAPSymbolic not supported for P of type %s",P->type_name); 258534c1384SKris Buschelman if (fP!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatPtAPSymbolic requires A, %s, to be compatible with P, %s",A->type_name,P->type_name); 2594d3841fdSKris Buschelman 260534c1384SKris Buschelman ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 261534c1384SKris Buschelman ierr = (*fA)(A,P,fill,C);CHKERRQ(ierr); 262534c1384SKris Buschelman ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 263eb9c0419SKris Buschelman 264eb9c0419SKris Buschelman PetscFunctionReturn(0); 265eb9c0419SKris Buschelman } 266eb9c0419SKris Buschelman 267f747e1aeSHong Zhang typedef struct { 268f747e1aeSHong Zhang Mat symAP; 269f747e1aeSHong Zhang } Mat_PtAPstruct; 270f747e1aeSHong Zhang 27178a80504SBarry Smith EXTERN PetscErrorCode MatDestroy_SeqAIJ(Mat); 27278a80504SBarry Smith 273f747e1aeSHong Zhang #undef __FUNCT__ 274f747e1aeSHong Zhang #define __FUNCT__ "MatDestroy_SeqAIJ_PtAP" 275f4a850bbSBarry Smith PetscErrorCode MatDestroy_SeqAIJ_PtAP(Mat A) 276f747e1aeSHong Zhang { 277f4a850bbSBarry Smith PetscErrorCode ierr; 278f747e1aeSHong Zhang Mat_PtAPstruct *ptap=(Mat_PtAPstruct*)A->spptr; 279f747e1aeSHong Zhang 280f747e1aeSHong Zhang PetscFunctionBegin; 281f747e1aeSHong Zhang ierr = MatDestroy(ptap->symAP);CHKERRQ(ierr); 282f747e1aeSHong Zhang ierr = PetscFree(ptap);CHKERRQ(ierr); 28378a80504SBarry Smith ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr); 284f747e1aeSHong Zhang PetscFunctionReturn(0); 285f747e1aeSHong Zhang } 286f747e1aeSHong Zhang 287eb9c0419SKris Buschelman #undef __FUNCT__ 2889af31e4aSHong Zhang #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqAIJ" 28955a3bba9SHong Zhang PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat P,PetscReal fill,Mat *C) 29055a3bba9SHong Zhang { 291dfbe8321SBarry Smith PetscErrorCode ierr; 292d20bfe6fSHong Zhang FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 293d20bfe6fSHong Zhang Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*p = (Mat_SeqAIJ*)P->data,*c; 29455a3bba9SHong Zhang PetscInt *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj; 295b8374ebeSBarry Smith PetscInt *ci,*cj,*ptadenserow,*ptasparserow,*ptaj; 29655a3bba9SHong Zhang PetscInt an=A->N,am=A->M,pn=P->N,pm=P->M; 297b8374ebeSBarry Smith PetscInt i,j,k,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi,nlnk,*lnk; 298d20bfe6fSHong Zhang MatScalar *ca; 29955a3bba9SHong Zhang PetscBT lnkbt; 300eb9c0419SKris Buschelman 301eb9c0419SKris Buschelman PetscFunctionBegin; 302d20bfe6fSHong Zhang /* Get ij structure of P^T */ 303eb9c0419SKris Buschelman ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 304d20bfe6fSHong Zhang ptJ=ptj; 305eb9c0419SKris Buschelman 306d20bfe6fSHong Zhang /* Allocate ci array, arrays for fill computation and */ 307d20bfe6fSHong Zhang /* free space for accumulating nonzero column info */ 30855a3bba9SHong Zhang ierr = PetscMalloc((pn+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 309d20bfe6fSHong Zhang ci[0] = 0; 310eb9c0419SKris Buschelman 31155a3bba9SHong Zhang ierr = PetscMalloc((2*an+1)*sizeof(PetscInt),&ptadenserow);CHKERRQ(ierr); 31255a3bba9SHong Zhang ierr = PetscMemzero(ptadenserow,(2*an+1)*sizeof(PetscInt));CHKERRQ(ierr); 313d20bfe6fSHong Zhang ptasparserow = ptadenserow + an; 31455a3bba9SHong Zhang 31555a3bba9SHong Zhang /* create and initialize a linked list */ 31655a3bba9SHong Zhang nlnk = pn+1; 31755a3bba9SHong Zhang ierr = PetscLLCreate(pn,pn,nlnk,lnk,lnkbt);CHKERRQ(ierr); 318eb9c0419SKris Buschelman 319d20bfe6fSHong Zhang /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */ 320d20bfe6fSHong Zhang /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 321d20bfe6fSHong Zhang ierr = GetMoreSpace((ai[am]/pm)*pn,&free_space); 322d20bfe6fSHong Zhang current_space = free_space; 323d20bfe6fSHong Zhang 324d20bfe6fSHong Zhang /* Determine symbolic info for each row of C: */ 325d20bfe6fSHong Zhang for (i=0;i<pn;i++) { 326d20bfe6fSHong Zhang ptnzi = pti[i+1] - pti[i]; 327d20bfe6fSHong Zhang ptanzi = 0; 328d20bfe6fSHong Zhang /* Determine symbolic row of PtA: */ 329d20bfe6fSHong Zhang for (j=0;j<ptnzi;j++) { 330d20bfe6fSHong Zhang arow = *ptJ++; 331d20bfe6fSHong Zhang anzj = ai[arow+1] - ai[arow]; 332d20bfe6fSHong Zhang ajj = aj + ai[arow]; 333d20bfe6fSHong Zhang for (k=0;k<anzj;k++) { 334d20bfe6fSHong Zhang if (!ptadenserow[ajj[k]]) { 335d20bfe6fSHong Zhang ptadenserow[ajj[k]] = -1; 336d20bfe6fSHong Zhang ptasparserow[ptanzi++] = ajj[k]; 337d20bfe6fSHong Zhang } 338d20bfe6fSHong Zhang } 339d20bfe6fSHong Zhang } 340d20bfe6fSHong Zhang /* Using symbolic info for row of PtA, determine symbolic info for row of C: */ 341d20bfe6fSHong Zhang ptaj = ptasparserow; 342d20bfe6fSHong Zhang cnzi = 0; 343d20bfe6fSHong Zhang for (j=0;j<ptanzi;j++) { 344d20bfe6fSHong Zhang prow = *ptaj++; 345d20bfe6fSHong Zhang pnzj = pi[prow+1] - pi[prow]; 346d20bfe6fSHong Zhang pjj = pj + pi[prow]; 34755a3bba9SHong Zhang /* add non-zero cols of P into the sorted linked list lnk */ 34855a3bba9SHong Zhang ierr = PetscLLAdd(pnzj,pjj,pn,nlnk,lnk,lnkbt);CHKERRQ(ierr); 34955a3bba9SHong Zhang cnzi += nlnk; 350d20bfe6fSHong Zhang } 351d20bfe6fSHong Zhang 352d20bfe6fSHong Zhang /* If free space is not available, make more free space */ 353d20bfe6fSHong Zhang /* Double the amount of total space in the list */ 354d20bfe6fSHong Zhang if (current_space->local_remaining<cnzi) { 355d20bfe6fSHong Zhang ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 356d20bfe6fSHong Zhang } 357d20bfe6fSHong Zhang 358d20bfe6fSHong Zhang /* Copy data into free space, and zero out denserows */ 35955a3bba9SHong Zhang ierr = PetscLLClean(pn,pn,cnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 360d20bfe6fSHong Zhang current_space->array += cnzi; 361d20bfe6fSHong Zhang current_space->local_used += cnzi; 362d20bfe6fSHong Zhang current_space->local_remaining -= cnzi; 363d20bfe6fSHong Zhang 364d20bfe6fSHong Zhang for (j=0;j<ptanzi;j++) { 365d20bfe6fSHong Zhang ptadenserow[ptasparserow[j]] = 0; 366d20bfe6fSHong Zhang } 367d20bfe6fSHong Zhang /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 368d20bfe6fSHong Zhang /* For now, we will recompute what is needed. */ 369d20bfe6fSHong Zhang ci[i+1] = ci[i] + cnzi; 370d20bfe6fSHong Zhang } 371d20bfe6fSHong Zhang /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 372d20bfe6fSHong Zhang /* Allocate space for cj, initialize cj, and */ 373d20bfe6fSHong Zhang /* destroy list of free space and other temporary array(s) */ 37455a3bba9SHong Zhang ierr = PetscMalloc((ci[pn]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr); 375d20bfe6fSHong Zhang ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 376d20bfe6fSHong Zhang ierr = PetscFree(ptadenserow);CHKERRQ(ierr); 37755a3bba9SHong Zhang ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 378d20bfe6fSHong Zhang 379d20bfe6fSHong Zhang /* Allocate space for ca */ 380d20bfe6fSHong Zhang ierr = PetscMalloc((ci[pn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 381d20bfe6fSHong Zhang ierr = PetscMemzero(ca,(ci[pn]+1)*sizeof(MatScalar));CHKERRQ(ierr); 382d20bfe6fSHong Zhang 383d20bfe6fSHong Zhang /* put together the new matrix */ 384d20bfe6fSHong Zhang ierr = MatCreateSeqAIJWithArrays(A->comm,pn,pn,ci,cj,ca,C);CHKERRQ(ierr); 385d20bfe6fSHong Zhang 386d20bfe6fSHong Zhang /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 387d20bfe6fSHong Zhang /* Since these are PETSc arrays, change flags to free them as necessary. */ 388d20bfe6fSHong Zhang c = (Mat_SeqAIJ *)((*C)->data); 389d20bfe6fSHong Zhang c->freedata = PETSC_TRUE; 390d20bfe6fSHong Zhang c->nonew = 0; 391d20bfe6fSHong Zhang 392d20bfe6fSHong Zhang /* Clean up. */ 393d20bfe6fSHong Zhang ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 394eb9c0419SKris Buschelman 395eb9c0419SKris Buschelman PetscFunctionReturn(0); 396eb9c0419SKris Buschelman } 397eb9c0419SKris Buschelman 3983985e5eaSKris Buschelman #include "src/mat/impls/maij/maij.h" 3993985e5eaSKris Buschelman EXTERN_C_BEGIN 4003985e5eaSKris Buschelman #undef __FUNCT__ 4019af31e4aSHong Zhang #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqMAIJ" 40255a3bba9SHong Zhang PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat *C) 40355a3bba9SHong Zhang { 4045c66b693SKris Buschelman /* This routine requires testing -- I don't think it works. */ 405dfbe8321SBarry Smith PetscErrorCode ierr; 4063985e5eaSKris Buschelman FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 4073985e5eaSKris Buschelman Mat_SeqMAIJ *pp=(Mat_SeqMAIJ*)PP->data; 4083985e5eaSKris Buschelman Mat P=pp->AIJ; 4093985e5eaSKris Buschelman Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c; 410b1d57f15SBarry Smith PetscInt *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj; 411b1d57f15SBarry Smith PetscInt *ci,*cj,*denserow,*sparserow,*ptadenserow,*ptasparserow,*ptaj; 412b1d57f15SBarry Smith PetscInt an=A->N,am=A->M,pn=P->N,pm=P->M,ppdof=pp->dof; 413b1d57f15SBarry Smith PetscInt i,j,k,dof,pdof,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi; 4143985e5eaSKris Buschelman MatScalar *ca; 4153985e5eaSKris Buschelman 4163985e5eaSKris Buschelman PetscFunctionBegin; 4173985e5eaSKris Buschelman /* Start timer */ 4189af31e4aSHong Zhang ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr); 4193985e5eaSKris Buschelman 4203985e5eaSKris Buschelman /* Get ij structure of P^T */ 4213985e5eaSKris Buschelman ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 4223985e5eaSKris Buschelman 4233985e5eaSKris Buschelman /* Allocate ci array, arrays for fill computation and */ 4243985e5eaSKris Buschelman /* free space for accumulating nonzero column info */ 425b1d57f15SBarry Smith ierr = PetscMalloc((pn+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 4263985e5eaSKris Buschelman ci[0] = 0; 4273985e5eaSKris Buschelman 428b1d57f15SBarry Smith ierr = PetscMalloc((2*pn+2*an+1)*sizeof(PetscInt),&ptadenserow);CHKERRQ(ierr); 429b1d57f15SBarry Smith ierr = PetscMemzero(ptadenserow,(2*pn+2*an+1)*sizeof(PetscInt));CHKERRQ(ierr); 4303985e5eaSKris Buschelman ptasparserow = ptadenserow + an; 4313985e5eaSKris Buschelman denserow = ptasparserow + an; 4323985e5eaSKris Buschelman sparserow = denserow + pn; 4333985e5eaSKris Buschelman 4343985e5eaSKris Buschelman /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */ 4353985e5eaSKris Buschelman /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 4363985e5eaSKris Buschelman ierr = GetMoreSpace((ai[am]/pm)*pn,&free_space); 4373985e5eaSKris Buschelman current_space = free_space; 4383985e5eaSKris Buschelman 4393985e5eaSKris Buschelman /* Determine symbolic info for each row of C: */ 4403985e5eaSKris Buschelman for (i=0;i<pn/ppdof;i++) { 4413985e5eaSKris Buschelman ptnzi = pti[i+1] - pti[i]; 4423985e5eaSKris Buschelman ptanzi = 0; 4433985e5eaSKris Buschelman ptJ = ptj + pti[i]; 4443985e5eaSKris Buschelman for (dof=0;dof<ppdof;dof++) { 4453985e5eaSKris Buschelman /* Determine symbolic row of PtA: */ 4463985e5eaSKris Buschelman for (j=0;j<ptnzi;j++) { 4473985e5eaSKris Buschelman arow = ptJ[j] + dof; 4483985e5eaSKris Buschelman anzj = ai[arow+1] - ai[arow]; 4493985e5eaSKris Buschelman ajj = aj + ai[arow]; 4503985e5eaSKris Buschelman for (k=0;k<anzj;k++) { 4513985e5eaSKris Buschelman if (!ptadenserow[ajj[k]]) { 4523985e5eaSKris Buschelman ptadenserow[ajj[k]] = -1; 4533985e5eaSKris Buschelman ptasparserow[ptanzi++] = ajj[k]; 4543985e5eaSKris Buschelman } 4553985e5eaSKris Buschelman } 4563985e5eaSKris Buschelman } 4573985e5eaSKris Buschelman /* Using symbolic info for row of PtA, determine symbolic info for row of C: */ 4583985e5eaSKris Buschelman ptaj = ptasparserow; 4593985e5eaSKris Buschelman cnzi = 0; 4603985e5eaSKris Buschelman for (j=0;j<ptanzi;j++) { 461fe05a634SKris Buschelman pdof = *ptaj%dof; 4623985e5eaSKris Buschelman prow = (*ptaj++)/dof; 4633985e5eaSKris Buschelman pnzj = pi[prow+1] - pi[prow]; 4643985e5eaSKris Buschelman pjj = pj + pi[prow]; 4653985e5eaSKris Buschelman for (k=0;k<pnzj;k++) { 466fe05a634SKris Buschelman if (!denserow[pjj[k]+pdof]) { 467fe05a634SKris Buschelman denserow[pjj[k]+pdof] = -1; 468fe05a634SKris Buschelman sparserow[cnzi++] = pjj[k]+pdof; 4693985e5eaSKris Buschelman } 4703985e5eaSKris Buschelman } 4713985e5eaSKris Buschelman } 4723985e5eaSKris Buschelman 4733985e5eaSKris Buschelman /* sort sparserow */ 4743985e5eaSKris Buschelman ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr); 4753985e5eaSKris Buschelman 4763985e5eaSKris Buschelman /* If free space is not available, make more free space */ 4773985e5eaSKris Buschelman /* Double the amount of total space in the list */ 4783985e5eaSKris Buschelman if (current_space->local_remaining<cnzi) { 4793985e5eaSKris Buschelman ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 4803985e5eaSKris Buschelman } 4813985e5eaSKris Buschelman 4823985e5eaSKris Buschelman /* Copy data into free space, and zero out denserows */ 483b1d57f15SBarry Smith ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(PetscInt));CHKERRQ(ierr); 4843985e5eaSKris Buschelman current_space->array += cnzi; 4853985e5eaSKris Buschelman current_space->local_used += cnzi; 4863985e5eaSKris Buschelman current_space->local_remaining -= cnzi; 4873985e5eaSKris Buschelman 4883985e5eaSKris Buschelman for (j=0;j<ptanzi;j++) { 4893985e5eaSKris Buschelman ptadenserow[ptasparserow[j]] = 0; 4903985e5eaSKris Buschelman } 4913985e5eaSKris Buschelman for (j=0;j<cnzi;j++) { 4923985e5eaSKris Buschelman denserow[sparserow[j]] = 0; 4933985e5eaSKris Buschelman } 4943985e5eaSKris Buschelman /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 4953985e5eaSKris Buschelman /* For now, we will recompute what is needed. */ 4963985e5eaSKris Buschelman ci[i+1+dof] = ci[i+dof] + cnzi; 4973985e5eaSKris Buschelman } 4983985e5eaSKris Buschelman } 4993985e5eaSKris Buschelman /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 5003985e5eaSKris Buschelman /* Allocate space for cj, initialize cj, and */ 5013985e5eaSKris Buschelman /* destroy list of free space and other temporary array(s) */ 502b1d57f15SBarry Smith ierr = PetscMalloc((ci[pn]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr); 5033985e5eaSKris Buschelman ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 5043985e5eaSKris Buschelman ierr = PetscFree(ptadenserow);CHKERRQ(ierr); 5053985e5eaSKris Buschelman 5063985e5eaSKris Buschelman /* Allocate space for ca */ 5073985e5eaSKris Buschelman ierr = PetscMalloc((ci[pn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 5083985e5eaSKris Buschelman ierr = PetscMemzero(ca,(ci[pn]+1)*sizeof(MatScalar));CHKERRQ(ierr); 5093985e5eaSKris Buschelman 5103985e5eaSKris Buschelman /* put together the new matrix */ 5113985e5eaSKris Buschelman ierr = MatCreateSeqAIJWithArrays(A->comm,pn,pn,ci,cj,ca,C);CHKERRQ(ierr); 5123985e5eaSKris Buschelman 5133985e5eaSKris Buschelman /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5143985e5eaSKris Buschelman /* Since these are PETSc arrays, change flags to free them as necessary. */ 5153985e5eaSKris Buschelman c = (Mat_SeqAIJ *)((*C)->data); 5163985e5eaSKris Buschelman c->freedata = PETSC_TRUE; 5173985e5eaSKris Buschelman c->nonew = 0; 5183985e5eaSKris Buschelman 5193985e5eaSKris Buschelman /* Clean up. */ 5203985e5eaSKris Buschelman ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 5213985e5eaSKris Buschelman 5229af31e4aSHong Zhang ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr); 5233985e5eaSKris Buschelman PetscFunctionReturn(0); 5243985e5eaSKris Buschelman } 5253985e5eaSKris Buschelman EXTERN_C_END 5263985e5eaSKris Buschelman 5276849ba73SBarry Smith /* 5289af31e4aSHong Zhang MatPtAPNumeric - Computes the matrix projection C = P^T * A * P 5294d3841fdSKris Buschelman 5304d3841fdSKris Buschelman Collective on Mat 5314d3841fdSKris Buschelman 5324d3841fdSKris Buschelman Input Parameters: 5334d3841fdSKris Buschelman + A - the matrix 5344d3841fdSKris Buschelman - P - the projection matrix 5354d3841fdSKris Buschelman 5364d3841fdSKris Buschelman Output Parameters: 5374d3841fdSKris Buschelman . C - the product matrix 5384d3841fdSKris Buschelman 5394d3841fdSKris Buschelman Notes: 5409af31e4aSHong Zhang C must have been created by calling MatPtAPSymbolic and must be destroyed by 5414d3841fdSKris Buschelman the user using MatDeatroy(). 5424d3841fdSKris Buschelman 543170ef064SHong Zhang This routine is currently only implemented for pairs of AIJ matrices and classes 544170ef064SHong Zhang which inherit from AIJ. C will be of type MATAIJ. 5454d3841fdSKris Buschelman 5464d3841fdSKris Buschelman Level: intermediate 5474d3841fdSKris Buschelman 5489af31e4aSHong Zhang .seealso: MatPtAP(),MatPtAPSymbolic(),MatMatMultNumeric() 5496849ba73SBarry Smith */ 550*e240928fSHong Zhang #undef __FUNCT__ 551*e240928fSHong Zhang #define __FUNCT__ "MatPtAPNumeric" 55255a3bba9SHong Zhang PetscErrorCode MatPtAPNumeric(Mat A,Mat P,Mat C) 55355a3bba9SHong Zhang { 554dfbe8321SBarry Smith PetscErrorCode ierr; 555534c1384SKris Buschelman PetscErrorCode (*fA)(Mat,Mat,Mat); 556534c1384SKris Buschelman PetscErrorCode (*fP)(Mat,Mat,Mat); 557eb9c0419SKris Buschelman 558eb9c0419SKris Buschelman PetscFunctionBegin; 559eb9c0419SKris Buschelman 5604482741eSBarry Smith PetscValidHeaderSpecific(A,MAT_COOKIE,1); 561c9780b6fSBarry Smith PetscValidType(A,1); 562eb9c0419SKris Buschelman MatPreallocated(A); 563eb9c0419SKris Buschelman if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 564eb9c0419SKris Buschelman if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 565eb9c0419SKris Buschelman 5664482741eSBarry Smith PetscValidHeaderSpecific(P,MAT_COOKIE,2); 567c9780b6fSBarry Smith PetscValidType(P,2); 568eb9c0419SKris Buschelman MatPreallocated(P); 569eb9c0419SKris Buschelman if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 570eb9c0419SKris Buschelman if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 571eb9c0419SKris Buschelman 5724482741eSBarry Smith PetscValidHeaderSpecific(C,MAT_COOKIE,3); 573c9780b6fSBarry Smith PetscValidType(C,3); 574eb9c0419SKris Buschelman MatPreallocated(C); 575eb9c0419SKris Buschelman if (!C->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 576eb9c0419SKris Buschelman if (C->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 577eb9c0419SKris Buschelman 578eb9c0419SKris Buschelman if (P->N!=C->M) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->N,C->M); 579eb9c0419SKris Buschelman if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->M,A->N); 580eb9c0419SKris Buschelman if (A->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %d != %d",A->M,A->N); 581eb9c0419SKris Buschelman if (P->N!=C->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->N,C->N); 582eb9c0419SKris Buschelman 583534c1384SKris Buschelman /* For now, we do not dispatch based on the type of A and P */ 584534c1384SKris Buschelman /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */ 585534c1384SKris Buschelman fA = A->ops->ptapnumeric; 586534c1384SKris Buschelman if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatPtAPNumeric not supported for A of type %s",A->type_name); 587534c1384SKris Buschelman fP = P->ops->ptapnumeric; 588534c1384SKris Buschelman if (!fP) SETERRQ1(PETSC_ERR_SUP,"MatPtAPNumeric not supported for P of type %s",P->type_name); 589534c1384SKris Buschelman if (fP!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatPtAPNumeric requires A, %s, to be compatible with P, %s",A->type_name,P->type_name); 5904d3841fdSKris Buschelman 591534c1384SKris Buschelman ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 592534c1384SKris Buschelman ierr = (*fA)(A,P,C);CHKERRQ(ierr); 593534c1384SKris Buschelman ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 594eb9c0419SKris Buschelman 595eb9c0419SKris Buschelman PetscFunctionReturn(0); 596eb9c0419SKris Buschelman } 597eb9c0419SKris Buschelman 598eb9c0419SKris Buschelman #undef __FUNCT__ 5999af31e4aSHong Zhang #define __FUNCT__ "MatPtAPNumeric_SeqAIJ_SeqAIJ" 600dfbe8321SBarry Smith PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqAIJ(Mat A,Mat P,Mat C) 601dfbe8321SBarry Smith { 602dfbe8321SBarry Smith PetscErrorCode ierr; 603b1d57f15SBarry Smith PetscInt flops=0; 604d20bfe6fSHong Zhang Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 605d20bfe6fSHong Zhang Mat_SeqAIJ *p = (Mat_SeqAIJ *) P->data; 606d20bfe6fSHong Zhang Mat_SeqAIJ *c = (Mat_SeqAIJ *) C->data; 607b1d57f15SBarry Smith PetscInt *ai=a->i,*aj=a->j,*apj,*apjdense,*pi=p->i,*pj=p->j,*pJ=p->j,*pjj; 608b1d57f15SBarry Smith PetscInt *ci=c->i,*cj=c->j,*cjj; 609b1d57f15SBarry Smith PetscInt am=A->M,cn=C->N,cm=C->M; 610b1d57f15SBarry Smith PetscInt i,j,k,anzi,pnzi,apnzj,nextap,pnzj,prow,crow; 611d20bfe6fSHong Zhang MatScalar *aa=a->a,*apa,*pa=p->a,*pA=p->a,*paj,*ca=c->a,*caj; 612eb9c0419SKris Buschelman 613eb9c0419SKris Buschelman PetscFunctionBegin; 614d20bfe6fSHong Zhang /* Allocate temporary array for storage of one row of A*P */ 615b1d57f15SBarry Smith ierr = PetscMalloc(cn*(sizeof(MatScalar)+2*sizeof(PetscInt)),&apa);CHKERRQ(ierr); 616b1d57f15SBarry Smith ierr = PetscMemzero(apa,cn*(sizeof(MatScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr); 617eb9c0419SKris Buschelman 618b1d57f15SBarry Smith apj = (PetscInt *)(apa + cn); 619d20bfe6fSHong Zhang apjdense = apj + cn; 620d20bfe6fSHong Zhang 621d20bfe6fSHong Zhang /* Clear old values in C */ 622d20bfe6fSHong Zhang ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr); 623d20bfe6fSHong Zhang 624d20bfe6fSHong Zhang for (i=0;i<am;i++) { 625d20bfe6fSHong Zhang /* Form sparse row of A*P */ 626d20bfe6fSHong Zhang anzi = ai[i+1] - ai[i]; 627d20bfe6fSHong Zhang apnzj = 0; 628d20bfe6fSHong Zhang for (j=0;j<anzi;j++) { 629d20bfe6fSHong Zhang prow = *aj++; 630d20bfe6fSHong Zhang pnzj = pi[prow+1] - pi[prow]; 631d20bfe6fSHong Zhang pjj = pj + pi[prow]; 632d20bfe6fSHong Zhang paj = pa + pi[prow]; 633d20bfe6fSHong Zhang for (k=0;k<pnzj;k++) { 634d20bfe6fSHong Zhang if (!apjdense[pjj[k]]) { 635d20bfe6fSHong Zhang apjdense[pjj[k]] = -1; 636d20bfe6fSHong Zhang apj[apnzj++] = pjj[k]; 637d20bfe6fSHong Zhang } 638d20bfe6fSHong Zhang apa[pjj[k]] += (*aa)*paj[k]; 639d20bfe6fSHong Zhang } 640d20bfe6fSHong Zhang flops += 2*pnzj; 641d20bfe6fSHong Zhang aa++; 642d20bfe6fSHong Zhang } 643d20bfe6fSHong Zhang 644d20bfe6fSHong Zhang /* Sort the j index array for quick sparse axpy. */ 645d20bfe6fSHong Zhang ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr); 646d20bfe6fSHong Zhang 647d20bfe6fSHong Zhang /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */ 648d20bfe6fSHong Zhang pnzi = pi[i+1] - pi[i]; 649d20bfe6fSHong Zhang for (j=0;j<pnzi;j++) { 650d20bfe6fSHong Zhang nextap = 0; 651d20bfe6fSHong Zhang crow = *pJ++; 652d20bfe6fSHong Zhang cjj = cj + ci[crow]; 653d20bfe6fSHong Zhang caj = ca + ci[crow]; 654d20bfe6fSHong Zhang /* Perform sparse axpy operation. Note cjj includes apj. */ 655d20bfe6fSHong Zhang for (k=0;nextap<apnzj;k++) { 656d20bfe6fSHong Zhang if (cjj[k]==apj[nextap]) { 657d20bfe6fSHong Zhang caj[k] += (*pA)*apa[apj[nextap++]]; 658d20bfe6fSHong Zhang } 659d20bfe6fSHong Zhang } 660d20bfe6fSHong Zhang flops += 2*apnzj; 661d20bfe6fSHong Zhang pA++; 662d20bfe6fSHong Zhang } 663d20bfe6fSHong Zhang 664d20bfe6fSHong Zhang /* Zero the current row info for A*P */ 665d20bfe6fSHong Zhang for (j=0;j<apnzj;j++) { 666d20bfe6fSHong Zhang apa[apj[j]] = 0.; 667d20bfe6fSHong Zhang apjdense[apj[j]] = 0; 668d20bfe6fSHong Zhang } 669d20bfe6fSHong Zhang } 670d20bfe6fSHong Zhang 671d20bfe6fSHong Zhang /* Assemble the final matrix and clean up */ 672d20bfe6fSHong Zhang ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 673d20bfe6fSHong Zhang ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 674d20bfe6fSHong Zhang ierr = PetscFree(apa);CHKERRQ(ierr); 675d20bfe6fSHong Zhang ierr = PetscLogFlops(flops);CHKERRQ(ierr); 676d20bfe6fSHong Zhang 677eb9c0419SKris Buschelman PetscFunctionReturn(0); 678eb9c0419SKris Buschelman } 6790e36024fSHong Zhang 6800390132cSHong Zhang /* Compute local C = P[rstart:rend,:]^T * A * P - used by MatPtAP_MPIAIJ_MPIAIJ() */ 6810e36024fSHong Zhang 6820e36024fSHong Zhang #undef __FUNCT__ 6830390132cSHong Zhang #define __FUNCT__ "MatPtAPReducedPtNumeric_MPIAIJ_SeqAIJ" 6840390132cSHong Zhang PetscErrorCode MatPtAPReducedPtNumeric_MPIAIJ_SeqAIJ(Mat A,Mat P,PetscInt prstart,PetscInt prend,Mat C) 68522e3da61SHong Zhang { 68622e3da61SHong Zhang PetscErrorCode ierr; 68722e3da61SHong Zhang PetscInt flops=0; 68822e3da61SHong Zhang Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 68922e3da61SHong Zhang Mat_SeqAIJ *ad=(Mat_SeqAIJ*)(a->A)->data,*ao=(Mat_SeqAIJ*)(a->B)->data; 69022e3da61SHong Zhang Mat_SeqAIJ *p = (Mat_SeqAIJ *) P->data; 69122e3da61SHong Zhang Mat_SeqAIJ *c = (Mat_SeqAIJ *) C->data; 69222e3da61SHong Zhang PetscInt *adi=ad->i,*adj=ad->j,*aoi=ao->i,*aoj=ao->j,*apj,*apjdense,cstart=a->cstart,cend=a->cend,col; 69322e3da61SHong Zhang PetscInt *pi=p->i,*pj=p->j,*pJ=p->j+pi[prstart],*pjj; 69422e3da61SHong Zhang PetscInt *ci=c->i,*cj=c->j,*cjj; 69522e3da61SHong Zhang PetscInt am=A->m,cn=C->N,cm=C->M; 69622e3da61SHong Zhang PetscInt i,j,k,anzi,pnzi,apnzj,nextap,pnzj,prow,crow; 69722e3da61SHong Zhang MatScalar *ada=ad->a,*aoa=ao->a,*apa,*pa=p->a,*pA=p->a,*paj,*ca=c->a,*caj; 69822e3da61SHong Zhang 69922e3da61SHong Zhang PetscFunctionBegin; 70022e3da61SHong Zhang pA=p->a+pi[prstart]; 70122e3da61SHong Zhang /* Allocate temporary array for storage of one row of A*P */ 70222e3da61SHong Zhang ierr = PetscMalloc(cn*(sizeof(MatScalar)+2*sizeof(PetscInt)),&apa);CHKERRQ(ierr); 70322e3da61SHong Zhang ierr = PetscMemzero(apa,cn*(sizeof(MatScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr); 70422e3da61SHong Zhang 70522e3da61SHong Zhang apj = (PetscInt *)(apa + cn); 70622e3da61SHong Zhang apjdense = apj + cn; 70722e3da61SHong Zhang 70822e3da61SHong Zhang /* Clear old values in C */ 70922e3da61SHong Zhang ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr); 71022e3da61SHong Zhang 71122e3da61SHong Zhang for (i=0;i<am;i++) { 71222e3da61SHong Zhang /* Form sparse row of A*P */ 71322e3da61SHong Zhang /* diagonal portion of A */ 71422e3da61SHong Zhang anzi = adi[i+1] - adi[i]; 71522e3da61SHong Zhang apnzj = 0; 71622e3da61SHong Zhang for (j=0;j<anzi;j++) { 71722e3da61SHong Zhang prow = *adj + prstart; 71822e3da61SHong Zhang adj++; 71922e3da61SHong Zhang pnzj = pi[prow+1] - pi[prow]; 72022e3da61SHong Zhang pjj = pj + pi[prow]; 72122e3da61SHong Zhang paj = pa + pi[prow]; 72222e3da61SHong Zhang for (k=0;k<pnzj;k++) { 72322e3da61SHong Zhang if (!apjdense[pjj[k]]) { 72422e3da61SHong Zhang apjdense[pjj[k]] = -1; 72522e3da61SHong Zhang apj[apnzj++] = pjj[k]; 72622e3da61SHong Zhang } 72722e3da61SHong Zhang apa[pjj[k]] += (*ada)*paj[k]; 72822e3da61SHong Zhang } 72922e3da61SHong Zhang flops += 2*pnzj; 73022e3da61SHong Zhang ada++; 73122e3da61SHong Zhang } 73222e3da61SHong Zhang /* off-diagonal portion of A */ 73322e3da61SHong Zhang anzi = aoi[i+1] - aoi[i]; 73422e3da61SHong Zhang for (j=0;j<anzi;j++) { 73522e3da61SHong Zhang col = a->garray[*aoj]; 73622e3da61SHong Zhang if (col < cstart){ 73722e3da61SHong Zhang prow = *aoj; 73822e3da61SHong Zhang } else if (col >= cend){ 73922e3da61SHong Zhang prow = *aoj + prend-prstart; 74022e3da61SHong Zhang } else { 74122e3da61SHong Zhang SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Off-diagonal portion of A has wrong column map"); 74222e3da61SHong Zhang } 74322e3da61SHong Zhang aoj++; 74422e3da61SHong Zhang pnzj = pi[prow+1] - pi[prow]; 74522e3da61SHong Zhang pjj = pj + pi[prow]; 74622e3da61SHong Zhang paj = pa + pi[prow]; 74722e3da61SHong Zhang for (k=0;k<pnzj;k++) { 74822e3da61SHong Zhang if (!apjdense[pjj[k]]) { 74922e3da61SHong Zhang apjdense[pjj[k]] = -1; 75022e3da61SHong Zhang apj[apnzj++] = pjj[k]; 75122e3da61SHong Zhang } 75222e3da61SHong Zhang apa[pjj[k]] += (*aoa)*paj[k]; 75322e3da61SHong Zhang } 75422e3da61SHong Zhang flops += 2*pnzj; 75522e3da61SHong Zhang aoa++; 75622e3da61SHong Zhang } 75722e3da61SHong Zhang 75822e3da61SHong Zhang /* Sort the j index array for quick sparse axpy. */ 75922e3da61SHong Zhang ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr); 76022e3da61SHong Zhang 76122e3da61SHong Zhang /* Compute P[[prstart:prend,:]^T*A*P using outer product (P^T)[:,j+prstart]*(A*P)[j,:]. */ 76222e3da61SHong Zhang pnzi = pi[i+1+prstart] - pi[i+prstart]; 76322e3da61SHong Zhang for (j=0;j<pnzi;j++) { 76422e3da61SHong Zhang nextap = 0; 76522e3da61SHong Zhang crow = *pJ++; 76622e3da61SHong Zhang cjj = cj + ci[crow]; 76722e3da61SHong Zhang caj = ca + ci[crow]; 76822e3da61SHong Zhang /* Perform sparse axpy operation. Note cjj includes apj. */ 76922e3da61SHong Zhang for (k=0;nextap<apnzj;k++) { 77022e3da61SHong Zhang if (cjj[k]==apj[nextap]) { 77122e3da61SHong Zhang caj[k] += (*pA)*apa[apj[nextap++]]; 77222e3da61SHong Zhang } 77322e3da61SHong Zhang } 77422e3da61SHong Zhang flops += 2*apnzj; 77522e3da61SHong Zhang pA++; 77622e3da61SHong Zhang } 77722e3da61SHong Zhang 77822e3da61SHong Zhang /* Zero the current row info for A*P */ 77922e3da61SHong Zhang for (j=0;j<apnzj;j++) { 78022e3da61SHong Zhang apa[apj[j]] = 0.; 78122e3da61SHong Zhang apjdense[apj[j]] = 0; 78222e3da61SHong Zhang } 78322e3da61SHong Zhang } 78422e3da61SHong Zhang 78522e3da61SHong Zhang /* Assemble the final matrix and clean up */ 78622e3da61SHong Zhang ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 78722e3da61SHong Zhang ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 78822e3da61SHong Zhang ierr = PetscFree(apa);CHKERRQ(ierr); 78922e3da61SHong Zhang ierr = PetscLogFlops(flops);CHKERRQ(ierr); 79022e3da61SHong Zhang 79122e3da61SHong Zhang PetscFunctionReturn(0); 79222e3da61SHong Zhang } 79322e3da61SHong Zhang 79422e3da61SHong Zhang #undef __FUNCT__ 7950390132cSHong Zhang #define __FUNCT__ "MatPtAPReducedPtSymbolic_MPIAIJ_SeqAIJ" 7960390132cSHong Zhang PetscErrorCode MatPtAPReducedPtSymbolic_MPIAIJ_SeqAIJ(Mat A,Mat P,PetscReal fill,PetscInt prstart,PetscInt prend,Mat *C) 79722e3da61SHong Zhang { 79822e3da61SHong Zhang PetscErrorCode ierr; 79922e3da61SHong Zhang FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 80022e3da61SHong Zhang Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 80122e3da61SHong Zhang Mat_SeqAIJ *ad=(Mat_SeqAIJ*)(a->A)->data,*ao=(Mat_SeqAIJ*)(a->B)->data; 80222e3da61SHong Zhang Mat_SeqAIJ *p=(Mat_SeqAIJ*)P->data,*c; 80322e3da61SHong Zhang PetscInt *pti,*ptj,*ptJ,*ajj,*pi=p->i,*pj=p->j,*pjj; 80422e3da61SHong Zhang PetscInt *adi=ad->i,*adj=ad->j,*aoi=ao->i,*aoj=ao->j,nnz,cstart=a->cstart,cend=a->cend,col; 80522e3da61SHong Zhang PetscInt *ci,*cj,*ptadenserow,*ptasparserow,*ptaj; 80622e3da61SHong Zhang PetscInt an=A->N,am=A->m,pn=P->N,pm=P->M; 80722e3da61SHong Zhang PetscInt i,j,k,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi; 80822e3da61SHong Zhang PetscInt m=prend-prstart,nlnk,*lnk; 80922e3da61SHong Zhang MatScalar *ca; 81022e3da61SHong Zhang PetscBT lnkbt; 81122e3da61SHong Zhang 81222e3da61SHong Zhang PetscFunctionBegin; 813*e240928fSHong Zhang int rank; 814*e240928fSHong Zhang ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr); 81522e3da61SHong Zhang 81622e3da61SHong Zhang /* Get ij structure of P[rstart:rend,:]^T */ 817e462e02cSHong Zhang ierr = MatGetSymbolicTransposeReduced_SeqAIJ(P,prstart,prend,&pti,&ptj);CHKERRQ(ierr); 81822e3da61SHong Zhang ptJ=ptj; 81922e3da61SHong Zhang 82022e3da61SHong Zhang /* Allocate ci array, arrays for fill computation and */ 82122e3da61SHong Zhang /* free space for accumulating nonzero column info */ 82222e3da61SHong Zhang ierr = PetscMalloc((pn+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 82322e3da61SHong Zhang ci[0] = 0; 82422e3da61SHong Zhang 82522e3da61SHong Zhang ierr = PetscMalloc((2*an+1)*sizeof(PetscInt),&ptadenserow);CHKERRQ(ierr); 82622e3da61SHong Zhang ierr = PetscMemzero(ptadenserow,(2*an+1)*sizeof(PetscInt));CHKERRQ(ierr); 82722e3da61SHong Zhang ptasparserow = ptadenserow + an; 82822e3da61SHong Zhang 82922e3da61SHong Zhang /* create and initialize a linked list */ 83022e3da61SHong Zhang nlnk = pn+1; 83122e3da61SHong Zhang ierr = PetscLLCreate(pn,pn,nlnk,lnk,lnkbt);CHKERRQ(ierr); 83222e3da61SHong Zhang 83322e3da61SHong Zhang /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */ 83422e3da61SHong Zhang /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 83522e3da61SHong Zhang nnz = adi[am] + aoi[am]; 83622e3da61SHong Zhang ierr = GetMoreSpace((PetscInt)(fill*nnz/pm)*pn,&free_space); 83722e3da61SHong Zhang current_space = free_space; 83822e3da61SHong Zhang 83922e3da61SHong Zhang /* Determine symbolic info for each row of C: */ 84022e3da61SHong Zhang for (i=0;i<pn;i++) { 84122e3da61SHong Zhang ptnzi = pti[i+1] - pti[i]; 84222e3da61SHong Zhang ptanzi = 0; 84322e3da61SHong Zhang /* Determine symbolic row of PtA_reduced: */ 84422e3da61SHong Zhang for (j=0;j<ptnzi;j++) { 84522e3da61SHong Zhang arow = *ptJ++; 846*e240928fSHong Zhang if (rank==1) printf("i: %d, ptnzi: %d, arow: %d\n",i,ptnzi,arow); 84722e3da61SHong Zhang /* diagonal portion of A */ 84822e3da61SHong Zhang anzj = adi[arow+1] - adi[arow]; 84922e3da61SHong Zhang ajj = adj + adi[arow]; 85022e3da61SHong Zhang for (k=0;k<anzj;k++) { 85122e3da61SHong Zhang col = ajj[k]+prstart; 85222e3da61SHong Zhang if (!ptadenserow[col]) { 85322e3da61SHong Zhang ptadenserow[col] = -1; 85422e3da61SHong Zhang ptasparserow[ptanzi++] = col; 85522e3da61SHong Zhang } 85622e3da61SHong Zhang } 85722e3da61SHong Zhang /* off-diagonal portion of A */ 85822e3da61SHong Zhang anzj = aoi[arow+1] - aoi[arow]; 85922e3da61SHong Zhang ajj = aoj + aoi[arow]; 86022e3da61SHong Zhang for (k=0;k<anzj;k++) { 86122e3da61SHong Zhang col = a->garray[ajj[k]]; /* global col */ 86222e3da61SHong Zhang if (col < cstart){ 86322e3da61SHong Zhang col = ajj[k]; 86422e3da61SHong Zhang } else if (col >= cend){ 86522e3da61SHong Zhang col = ajj[k] + m; 86622e3da61SHong Zhang } else { 86722e3da61SHong Zhang SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Off-diagonal portion of A has wrong column map"); 86822e3da61SHong Zhang } 86922e3da61SHong Zhang if (!ptadenserow[col]) { 87022e3da61SHong Zhang ptadenserow[col] = -1; 87122e3da61SHong Zhang ptasparserow[ptanzi++] = col; 87222e3da61SHong Zhang } 87322e3da61SHong Zhang } 87422e3da61SHong Zhang } 875*e240928fSHong Zhang 876*e240928fSHong Zhang printf(" [%d] i: %d, ptanzi: %d \n",rank,i,ptanzi); 87722e3da61SHong Zhang /* Using symbolic info for row of PtA, determine symbolic info for row of C: */ 87822e3da61SHong Zhang ptaj = ptasparserow; 87922e3da61SHong Zhang cnzi = 0; 88022e3da61SHong Zhang for (j=0;j<ptanzi;j++) { 88122e3da61SHong Zhang prow = *ptaj++; 88222e3da61SHong Zhang pnzj = pi[prow+1] - pi[prow]; 88322e3da61SHong Zhang pjj = pj + pi[prow]; 88422e3da61SHong Zhang /* if (rank==1) printf(" prow: %d, pnzj: %d\n",prow,pnzj); */ 88522e3da61SHong Zhang /* add non-zero cols of P into the sorted linked list lnk */ 88622e3da61SHong Zhang ierr = PetscLLAdd(pnzj,pjj,pn,nlnk,lnk,lnkbt);CHKERRQ(ierr); 88722e3da61SHong Zhang cnzi += nlnk; 88822e3da61SHong Zhang } 88922e3da61SHong Zhang 89022e3da61SHong Zhang /* If free space is not available, make more free space */ 89122e3da61SHong Zhang /* Double the amount of total space in the list */ 89222e3da61SHong Zhang if (current_space->local_remaining<cnzi) { 89322e3da61SHong Zhang ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 89422e3da61SHong Zhang } 89522e3da61SHong Zhang 89622e3da61SHong Zhang /* Copy data into free space, and zero out denserows */ 89722e3da61SHong Zhang ierr = PetscLLClean(pn,pn,cnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 89822e3da61SHong Zhang current_space->array += cnzi; 89922e3da61SHong Zhang current_space->local_used += cnzi; 90022e3da61SHong Zhang current_space->local_remaining -= cnzi; 90122e3da61SHong Zhang 90222e3da61SHong Zhang for (j=0;j<ptanzi;j++) { 90322e3da61SHong Zhang ptadenserow[ptasparserow[j]] = 0; 90422e3da61SHong Zhang } 90522e3da61SHong Zhang 90622e3da61SHong Zhang /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 90722e3da61SHong Zhang /* For now, we will recompute what is needed. */ 90822e3da61SHong Zhang ci[i+1] = ci[i] + cnzi; 90922e3da61SHong Zhang } 91022e3da61SHong Zhang /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 91122e3da61SHong Zhang /* Allocate space for cj, initialize cj, and */ 91222e3da61SHong Zhang /* destroy list of free space and other temporary array(s) */ 91322e3da61SHong Zhang ierr = PetscMalloc((ci[pn]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr); 91422e3da61SHong Zhang ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 91522e3da61SHong Zhang ierr = PetscFree(ptadenserow);CHKERRQ(ierr); 91622e3da61SHong Zhang ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 91722e3da61SHong Zhang 91822e3da61SHong Zhang /* Allocate space for ca */ 91922e3da61SHong Zhang ierr = PetscMalloc((ci[pn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 92022e3da61SHong Zhang ierr = PetscMemzero(ca,(ci[pn]+1)*sizeof(MatScalar));CHKERRQ(ierr); 92122e3da61SHong Zhang 92222e3da61SHong Zhang /* put together the new matrix */ 92322e3da61SHong Zhang ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,pn,pn,ci,cj,ca,C);CHKERRQ(ierr); 92422e3da61SHong Zhang 92522e3da61SHong Zhang /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 92622e3da61SHong Zhang /* Since these are PETSc arrays, change flags to free them as necessary. */ 92722e3da61SHong Zhang c = (Mat_SeqAIJ *)((*C)->data); 92822e3da61SHong Zhang c->freedata = PETSC_TRUE; 92922e3da61SHong Zhang c->nonew = 0; 93022e3da61SHong Zhang 93122e3da61SHong Zhang /* Clean up. */ 93222e3da61SHong Zhang ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 93322e3da61SHong Zhang 93422e3da61SHong Zhang PetscFunctionReturn(0); 93522e3da61SHong Zhang } 93622e3da61SHong Zhang 937*e240928fSHong Zhang /*@C 938*e240928fSHong Zhang MatPtAPReducedPt - Creates C = P[rstart:rend,:]^T * A * P of seqaij matrices, 939*e240928fSHong Zhang used by MatPtAP_MPIAIJ_MPIAIJ() 940*e240928fSHong Zhang 941*e240928fSHong Zhang Collective on Mat 942*e240928fSHong Zhang 943*e240928fSHong Zhang Input Parameters: 944*e240928fSHong Zhang + A - the matrix in mpiaij format 945*e240928fSHong Zhang . P - the projection matrix in seqaij format 946*e240928fSHong Zhang . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 947*e240928fSHong Zhang . fill - expected fill (not being used when scall==MAT_REUSE_MATRIX) 948*e240928fSHong Zhang . prstart, prend - the starting and ending-1-th row of the matrix P to be used for transpose 949*e240928fSHong Zhang 950*e240928fSHong Zhang Output Parameters: 951*e240928fSHong Zhang . C - the product matrix in seqaij format 952*e240928fSHong Zhang 953*e240928fSHong Zhang Level: developer 954*e240928fSHong Zhang 955*e240928fSHong Zhang .seealso: MatPtAPSymbolic(),MatPtAPNumeric(),MatMatMult() 956*e240928fSHong Zhang @*/ 9570390132cSHong Zhang static PetscEvent logkey_matptapreducedpt = 0; 95822e3da61SHong Zhang #undef __FUNCT__ 9590390132cSHong Zhang #define __FUNCT__ "MatPtAPReducedPt_MPIAIJ_SeqAIJ" 9600390132cSHong Zhang PetscErrorCode MatPtAPReducedPt_MPIAIJ_SeqAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,PetscInt prstart,PetscInt prend,Mat *C) 96122e3da61SHong Zhang { 96222e3da61SHong Zhang PetscErrorCode ierr; 96322e3da61SHong Zhang 96422e3da61SHong Zhang PetscFunctionBegin; 96522e3da61SHong Zhang if (A->m != prend-prstart) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",A->m,prend-prstart); 96622e3da61SHong Zhang if (prend-prstart > P->m) SETERRQ2(PETSC_ERR_ARG_SIZ," prend-prstart %d cannot be larger than P->m %d",prend-prstart,P->m); 9670390132cSHong Zhang if (!logkey_matptapreducedpt) { 9680390132cSHong Zhang ierr = PetscLogEventRegister(&logkey_matptapreducedpt,"MatPtAP_ReducedPt",MAT_COOKIE); 969e462e02cSHong Zhang } 9700390132cSHong Zhang PetscLogEventBegin(logkey_matptapreducedpt,A,P,0,0);CHKERRQ(ierr); 97122e3da61SHong Zhang 97222e3da61SHong Zhang if (scall == MAT_INITIAL_MATRIX){ 9730390132cSHong Zhang ierr = MatPtAPReducedPtSymbolic_MPIAIJ_SeqAIJ(A,P,fill,prstart,prend,C);CHKERRQ(ierr); 97422e3da61SHong Zhang } 9750390132cSHong Zhang ierr = MatPtAPReducedPtNumeric_MPIAIJ_SeqAIJ(A,P,prstart,prend,*C);CHKERRQ(ierr); 9760390132cSHong Zhang ierr = PetscLogEventEnd(logkey_matptapreducedpt,A,P,0,0);CHKERRQ(ierr); 97722e3da61SHong Zhang 97822e3da61SHong Zhang PetscFunctionReturn(0); 97922e3da61SHong Zhang } 98022e3da61SHong Zhang 981*e240928fSHong Zhang /*@C 982*e240928fSHong Zhang MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 983*e240928fSHong Zhang of the off-diagonal portion of A 984*e240928fSHong Zhang 985*e240928fSHong Zhang Collective on Mat 986*e240928fSHong Zhang 987*e240928fSHong Zhang Input Parameters: 988*e240928fSHong Zhang + A,B - the matrices in mpiaij format 989*e240928fSHong Zhang . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 990*e240928fSHong Zhang - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL) 991*e240928fSHong Zhang 992*e240928fSHong Zhang Output Parameter: 993*e240928fSHong Zhang + rowb, colb - index sets of rows and columns of B to extract 994*e240928fSHong Zhang . brstart - row index of B_seq from which next B->m rows are taken from B's local rows 995*e240928fSHong Zhang - B_seq - the sequential matrix generated 996*e240928fSHong Zhang 997*e240928fSHong Zhang Level: developer 998*e240928fSHong Zhang 999*e240928fSHong Zhang @*/ 1000*e240928fSHong Zhang static PetscEvent logkey_GetBrowsOfAocols = 0; 1001*e240928fSHong Zhang #undef __FUNCT__ 1002*e240928fSHong Zhang #define __FUNCT__ "MatGetBrowsOfAoCols_Private" 1003*e240928fSHong Zhang PetscErrorCode MatGetBrowsOfAoCols_Private(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq) 1004*e240928fSHong Zhang { 1005*e240928fSHong Zhang Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data,*b=(Mat_MPIAIJ*)B->data; 1006*e240928fSHong Zhang PetscErrorCode ierr; 1007*e240928fSHong Zhang PetscInt *idx,i,start,ncols,nzB,*cmap,imark; 1008*e240928fSHong Zhang IS isrowb,iscolb; 1009*e240928fSHong Zhang Mat *bseq; 1010*e240928fSHong Zhang 1011*e240928fSHong Zhang PetscFunctionBegin; 1012*e240928fSHong Zhang if (a->cstart != b->rstart || a->cend != b->rend){ 1013*e240928fSHong Zhang SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",a->cstart,a->cend,b->rstart,b->rend); 1014*e240928fSHong Zhang } 1015*e240928fSHong Zhang if (!logkey_GetBrowsOfAocols) { 1016*e240928fSHong Zhang ierr = PetscLogEventRegister(&logkey_GetBrowsOfAocols,"MatGetBrowsOfAoCols",MAT_COOKIE); 1017*e240928fSHong Zhang } 1018*e240928fSHong Zhang ierr = PetscLogEventBegin(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 1019*e240928fSHong Zhang 1020*e240928fSHong Zhang if (scall == MAT_INITIAL_MATRIX){ 1021*e240928fSHong Zhang start = a->cstart; 1022*e240928fSHong Zhang cmap = a->garray; 1023*e240928fSHong Zhang nzB = a->B->n; 1024*e240928fSHong Zhang if (!nzB){ /* output a null matrix */ 1025*e240928fSHong Zhang B_seq = PETSC_NULL; 1026*e240928fSHong Zhang ierr = PetscLogEventEnd(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 1027*e240928fSHong Zhang PetscFunctionReturn(0); 1028*e240928fSHong Zhang } 1029*e240928fSHong Zhang ierr = PetscMalloc(nzB*sizeof(PetscInt), &idx);CHKERRQ(ierr); 1030*e240928fSHong Zhang ncols = 0; 1031*e240928fSHong Zhang for (i=0; i<nzB; i++) { /* row < local row index */ 1032*e240928fSHong Zhang if (cmap[i] < start) idx[ncols++] = cmap[i]; 1033*e240928fSHong Zhang else break; 1034*e240928fSHong Zhang } 1035*e240928fSHong Zhang imark = i; 1036*e240928fSHong Zhang /* for (i=0; i<nzA; i++) idx[ncols++] = start + i; */ /* local rows */ 1037*e240928fSHong Zhang for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 1038*e240928fSHong Zhang ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr); 1039*e240928fSHong Zhang ierr = PetscFree(idx);CHKERRQ(ierr); 1040*e240928fSHong Zhang *brstart = imark; 1041*e240928fSHong Zhang ierr = ISCreateStride(PETSC_COMM_SELF,B->N,0,1,&iscolb);CHKERRQ(ierr); 1042*e240928fSHong Zhang } else { 1043*e240928fSHong Zhang if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 1044*e240928fSHong Zhang isrowb = *rowb; iscolb = *colb; 1045*e240928fSHong Zhang ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 1046*e240928fSHong Zhang bseq[0] = *B_seq; 1047*e240928fSHong Zhang } 1048*e240928fSHong Zhang ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 1049*e240928fSHong Zhang *B_seq = bseq[0]; 1050*e240928fSHong Zhang ierr = PetscFree(bseq);CHKERRQ(ierr); 1051*e240928fSHong Zhang if (!rowb){ 1052*e240928fSHong Zhang ierr = ISDestroy(isrowb);CHKERRQ(ierr); 1053*e240928fSHong Zhang } else { 1054*e240928fSHong Zhang *rowb = isrowb; 1055*e240928fSHong Zhang } 1056*e240928fSHong Zhang if (!colb){ 1057*e240928fSHong Zhang ierr = ISDestroy(iscolb);CHKERRQ(ierr); 1058*e240928fSHong Zhang } else { 1059*e240928fSHong Zhang *colb = iscolb; 1060*e240928fSHong Zhang } 1061*e240928fSHong Zhang ierr = PetscLogEventEnd(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 1062*e240928fSHong Zhang PetscFunctionReturn(0); 1063*e240928fSHong Zhang } 1064*e240928fSHong Zhang 1065*e240928fSHong Zhang /* ------------- New --------------------*/ 1066*e240928fSHong Zhang static PetscEvent logkey_matptapnumeric_local = 0; 1067*e240928fSHong Zhang #undef __FUNCT__ 1068*e240928fSHong Zhang #define __FUNCT__ "MatPtAPNumeric_MPIAIJ_MPIAIJ_Local" 1069*e240928fSHong Zhang PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIAIJ_Local(Mat A,Mat P_loc,Mat P_oth,PetscInt prstart,Mat C) 1070*e240928fSHong Zhang { 1071*e240928fSHong Zhang PetscErrorCode ierr; 1072*e240928fSHong Zhang PetscInt flops=0; 1073*e240928fSHong Zhang Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 1074*e240928fSHong Zhang Mat_SeqAIJ *ad=(Mat_SeqAIJ*)(a->A)->data,*ao=(Mat_SeqAIJ*)(a->B)->data; 1075*e240928fSHong Zhang Mat_SeqAIJ *c = (Mat_SeqAIJ *) C->data; 1076*e240928fSHong Zhang Mat_SeqAIJ *p_loc=(Mat_SeqAIJ*)P_loc->data,*p_oth=(Mat_SeqAIJ*)P_oth->data; 1077*e240928fSHong Zhang PetscInt *pi_loc=p_loc->i,*pj_loc=p_loc->j,*pi_oth=p_oth->i,*pj_oth=p_oth->j; 1078*e240928fSHong Zhang PetscInt *adi=ad->i,*adj=ad->j,*aoi=ao->i,*aoj=ao->j,*apj,*apjdense,cstart=a->cstart,cend=a->cend,col; 1079*e240928fSHong Zhang PetscInt *pJ=pj_loc,*pjj; 1080*e240928fSHong Zhang PetscInt *ci=c->i,*cj=c->j,*cjj; 1081*e240928fSHong Zhang PetscInt am=A->m,cn=C->N,cm=C->M; 1082*e240928fSHong Zhang PetscInt i,j,k,anzi,pnzi,apnzj,nextap,pnzj,prow,crow; 1083*e240928fSHong Zhang MatScalar *ada=ad->a,*aoa=ao->a,*apa,*paj,*ca=c->a,*caj; 1084*e240928fSHong Zhang MatScalar *pa_loc=p_loc->a,*pA=pa_loc,*pa_oth=p_oth->a; 1085*e240928fSHong Zhang 1086*e240928fSHong Zhang PetscFunctionBegin; 1087*e240928fSHong Zhang if (!logkey_matptapnumeric_local) { 1088*e240928fSHong Zhang ierr = PetscLogEventRegister(&logkey_matptapnumeric_local,"MatPtAPNumeric_MPIAIJ_MPIAIJ_Local",MAT_COOKIE); 1089*e240928fSHong Zhang } 1090*e240928fSHong Zhang PetscLogEventBegin(logkey_matptapnumeric_local,A,P_loc,P_oth,0);CHKERRQ(ierr); 1091*e240928fSHong Zhang 1092*e240928fSHong Zhang /* Allocate temporary array for storage of one row of A*P */ 1093*e240928fSHong Zhang ierr = PetscMalloc(cn*(sizeof(MatScalar)+2*sizeof(PetscInt)),&apa);CHKERRQ(ierr); 1094*e240928fSHong Zhang ierr = PetscMemzero(apa,cn*(sizeof(MatScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr); 1095*e240928fSHong Zhang apj = (PetscInt *)(apa + cn); 1096*e240928fSHong Zhang apjdense = apj + cn; 1097*e240928fSHong Zhang 1098*e240928fSHong Zhang /* Clear old values in C */ 1099*e240928fSHong Zhang ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr); 1100*e240928fSHong Zhang 1101*e240928fSHong Zhang for (i=0;i<am;i++) { 1102*e240928fSHong Zhang /* Form i-th sparse row of A*P */ 1103*e240928fSHong Zhang apnzj = 0; 1104*e240928fSHong Zhang /* diagonal portion of A */ 1105*e240928fSHong Zhang anzi = adi[i+1] - adi[i]; 1106*e240928fSHong Zhang for (j=0;j<anzi;j++) { 1107*e240928fSHong Zhang prow = *adj; 1108*e240928fSHong Zhang adj++; 1109*e240928fSHong Zhang pnzj = pi_loc[prow+1] - pi_loc[prow]; 1110*e240928fSHong Zhang pjj = pj_loc + pi_loc[prow]; 1111*e240928fSHong Zhang paj = pa_loc + pi_loc[prow]; 1112*e240928fSHong Zhang for (k=0;k<pnzj;k++) { 1113*e240928fSHong Zhang if (!apjdense[pjj[k]]) { 1114*e240928fSHong Zhang apjdense[pjj[k]] = -1; 1115*e240928fSHong Zhang apj[apnzj++] = pjj[k]; 1116*e240928fSHong Zhang } 1117*e240928fSHong Zhang apa[pjj[k]] += (*ada)*paj[k]; 1118*e240928fSHong Zhang } 1119*e240928fSHong Zhang flops += 2*pnzj; 1120*e240928fSHong Zhang ada++; 1121*e240928fSHong Zhang } 1122*e240928fSHong Zhang /* off-diagonal portion of A */ 1123*e240928fSHong Zhang anzi = aoi[i+1] - aoi[i]; 1124*e240928fSHong Zhang for (j=0;j<anzi;j++) { 1125*e240928fSHong Zhang col = a->garray[*aoj]; 1126*e240928fSHong Zhang if (col < cstart){ 1127*e240928fSHong Zhang prow = *aoj; 1128*e240928fSHong Zhang } else if (col >= cend){ 1129*e240928fSHong Zhang prow = *aoj; 1130*e240928fSHong Zhang } else { 1131*e240928fSHong Zhang SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Off-diagonal portion of A has wrong column map"); 1132*e240928fSHong Zhang } 1133*e240928fSHong Zhang aoj++; 1134*e240928fSHong Zhang pnzj = pi_oth[prow+1] - pi_oth[prow]; 1135*e240928fSHong Zhang pjj = pj_oth + pi_oth[prow]; 1136*e240928fSHong Zhang paj = pa_oth + pi_oth[prow]; 1137*e240928fSHong Zhang for (k=0;k<pnzj;k++) { 1138*e240928fSHong Zhang if (!apjdense[pjj[k]]) { 1139*e240928fSHong Zhang apjdense[pjj[k]] = -1; 1140*e240928fSHong Zhang apj[apnzj++] = pjj[k]; 1141*e240928fSHong Zhang } 1142*e240928fSHong Zhang apa[pjj[k]] += (*aoa)*paj[k]; 1143*e240928fSHong Zhang } 1144*e240928fSHong Zhang flops += 2*pnzj; 1145*e240928fSHong Zhang aoa++; 1146*e240928fSHong Zhang } 1147*e240928fSHong Zhang /* Sort the j index array for quick sparse axpy. */ 1148*e240928fSHong Zhang ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr); 1149*e240928fSHong Zhang 1150*e240928fSHong Zhang /* Compute P_loc[i,:]^T*AP[i,:] using outer product */ 1151*e240928fSHong Zhang pnzi = pi_loc[i+1] - pi_loc[i]; 1152*e240928fSHong Zhang for (j=0;j<pnzi;j++) { 1153*e240928fSHong Zhang nextap = 0; 1154*e240928fSHong Zhang crow = *pJ++; 1155*e240928fSHong Zhang cjj = cj + ci[crow]; 1156*e240928fSHong Zhang caj = ca + ci[crow]; 1157*e240928fSHong Zhang /* Perform sparse axpy operation. Note cjj includes apj. */ 1158*e240928fSHong Zhang for (k=0;nextap<apnzj;k++) { 1159*e240928fSHong Zhang if (cjj[k]==apj[nextap]) { 1160*e240928fSHong Zhang caj[k] += (*pA)*apa[apj[nextap++]]; 1161*e240928fSHong Zhang } 1162*e240928fSHong Zhang } 1163*e240928fSHong Zhang flops += 2*apnzj; 1164*e240928fSHong Zhang pA++; 1165*e240928fSHong Zhang } 1166*e240928fSHong Zhang 1167*e240928fSHong Zhang /* Zero the current row info for A*P */ 1168*e240928fSHong Zhang for (j=0;j<apnzj;j++) { 1169*e240928fSHong Zhang apa[apj[j]] = 0.; 1170*e240928fSHong Zhang apjdense[apj[j]] = 0; 1171*e240928fSHong Zhang } 1172*e240928fSHong Zhang } 1173*e240928fSHong Zhang 1174*e240928fSHong Zhang /* Assemble the final matrix and clean up */ 1175*e240928fSHong Zhang ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1176*e240928fSHong Zhang ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1177*e240928fSHong Zhang ierr = PetscFree(apa);CHKERRQ(ierr); 1178*e240928fSHong Zhang ierr = PetscLogFlops(flops);CHKERRQ(ierr); 1179*e240928fSHong Zhang PetscLogEventEnd(logkey_matptapnumeric_local,A,P_loc,P_oth,0);CHKERRQ(ierr); 1180*e240928fSHong Zhang PetscFunctionReturn(0); 1181*e240928fSHong Zhang } 1182*e240928fSHong Zhang static PetscEvent logkey_matptapsymbolic_local = 0; 1183*e240928fSHong Zhang #undef __FUNCT__ 1184*e240928fSHong Zhang #define __FUNCT__ "MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local" 1185*e240928fSHong Zhang PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local(Mat A,Mat P_loc,Mat P_oth,PetscInt prstart,PetscReal fill,Mat *C) 1186*e240928fSHong Zhang { 1187*e240928fSHong Zhang PetscErrorCode ierr; 1188*e240928fSHong Zhang FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1189*e240928fSHong Zhang Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 1190*e240928fSHong Zhang Mat_SeqAIJ *ad=(Mat_SeqAIJ*)(a->A)->data,*ao=(Mat_SeqAIJ*)(a->B)->data,*c; 1191*e240928fSHong Zhang PetscInt *pti,*ptj,*ptJ,*ajj,*pjj; 1192*e240928fSHong Zhang PetscInt *adi=ad->i,*adj=ad->j,*aoi=ao->i,*aoj=ao->j,nnz,cstart=a->cstart,cend=a->cend,col; 1193*e240928fSHong Zhang PetscInt *ci,*cj,*ptaj; 1194*e240928fSHong Zhang PetscInt an=A->N,am=A->m,pN=P_loc->N; 1195*e240928fSHong Zhang PetscInt i,j,k,ptnzi,arow,anzj,prow,pnzj,cnzi; 1196*e240928fSHong Zhang PetscInt pm=P_loc->m,nlnk,*lnk; 1197*e240928fSHong Zhang MatScalar *ca; 1198*e240928fSHong Zhang PetscBT lnkbt; 1199*e240928fSHong Zhang PetscInt prend,nprow_loc,nprow_oth; 1200*e240928fSHong Zhang PetscInt *ptadenserow_loc,*ptadenserow_oth,*ptasparserow_loc,*ptasparserow_oth; 1201*e240928fSHong Zhang Mat_SeqAIJ *p_loc=(Mat_SeqAIJ*)P_loc->data,*p_oth=(Mat_SeqAIJ*)P_oth->data; 1202*e240928fSHong Zhang PetscInt *pi_loc=p_loc->i,*pj_loc=p_loc->j,*pi_oth=p_oth->i,*pj_oth=p_oth->j; 1203*e240928fSHong Zhang /* PetscInt rank; */ 1204*e240928fSHong Zhang 1205*e240928fSHong Zhang PetscFunctionBegin; 1206*e240928fSHong Zhang if (!logkey_matptapsymbolic_local) { 1207*e240928fSHong Zhang ierr = PetscLogEventRegister(&logkey_matptapsymbolic_local,"MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local",MAT_COOKIE); 1208*e240928fSHong Zhang } 1209*e240928fSHong Zhang ierr = PetscLogEventBegin(logkey_matptapsymbolic_local,A,P_loc,P_oth,0);CHKERRQ(ierr); 1210*e240928fSHong Zhang 1211*e240928fSHong Zhang /* ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr); */ 1212*e240928fSHong Zhang prend = prstart + pm; 1213*e240928fSHong Zhang 1214*e240928fSHong Zhang /* get ij structure of P_loc^T */ 1215*e240928fSHong Zhang ierr = MatGetSymbolicTranspose_SeqAIJ(P_loc,&pti,&ptj);CHKERRQ(ierr); 1216*e240928fSHong Zhang ptJ=ptj; 1217*e240928fSHong Zhang 1218*e240928fSHong Zhang /* Allocate ci array, arrays for fill computation and */ 1219*e240928fSHong Zhang /* free space for accumulating nonzero column info */ 1220*e240928fSHong Zhang ierr = PetscMalloc((pN+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 1221*e240928fSHong Zhang ci[0] = 0; 1222*e240928fSHong Zhang 1223*e240928fSHong Zhang ierr = PetscMalloc((4*an+1)*sizeof(PetscInt),&ptadenserow_loc);CHKERRQ(ierr); 1224*e240928fSHong Zhang ierr = PetscMemzero(ptadenserow_loc,(4*an+1)*sizeof(PetscInt));CHKERRQ(ierr); 1225*e240928fSHong Zhang ptasparserow_loc = ptadenserow_loc + an; 1226*e240928fSHong Zhang ptadenserow_oth = ptasparserow_loc + an; 1227*e240928fSHong Zhang ptasparserow_oth = ptadenserow_oth + an; 1228*e240928fSHong Zhang 1229*e240928fSHong Zhang /* create and initialize a linked list */ 1230*e240928fSHong Zhang nlnk = pN+1; 1231*e240928fSHong Zhang ierr = PetscLLCreate(pN,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1232*e240928fSHong Zhang 1233*e240928fSHong Zhang /* Set initial free space to be nnz(A) scaled by aspect ratio of P (P->M=A->N). */ 1234*e240928fSHong Zhang /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 1235*e240928fSHong Zhang nnz = adi[am] + aoi[am]; 1236*e240928fSHong Zhang ierr = GetMoreSpace((PetscInt)(fill*nnz/A->N)*pN,&free_space); 1237*e240928fSHong Zhang current_space = free_space; 1238*e240928fSHong Zhang 1239*e240928fSHong Zhang /* determine symbolic info for each row of C: */ 1240*e240928fSHong Zhang for (i=0;i<pN;i++) { 1241*e240928fSHong Zhang ptnzi = pti[i+1] - pti[i]; 1242*e240928fSHong Zhang nprow_loc = 0; nprow_oth = 0; 1243*e240928fSHong Zhang /* i-th row of symbolic P_loc^T*A_loc: */ 1244*e240928fSHong Zhang for (j=0;j<ptnzi;j++) { 1245*e240928fSHong Zhang arow = *ptJ++; 1246*e240928fSHong Zhang /* if (rank==1) printf("i: %d, ptnzi: %d, arow: %d\n",i,ptnzi,arow); */ 1247*e240928fSHong Zhang /* diagonal portion of A */ 1248*e240928fSHong Zhang anzj = adi[arow+1] - adi[arow]; 1249*e240928fSHong Zhang ajj = adj + adi[arow]; 1250*e240928fSHong Zhang for (k=0;k<anzj;k++) { 1251*e240928fSHong Zhang col = ajj[k]+prstart; 1252*e240928fSHong Zhang if (!ptadenserow_loc[col]) { 1253*e240928fSHong Zhang ptadenserow_loc[col] = -1; 1254*e240928fSHong Zhang ptasparserow_loc[nprow_loc++] = col; 1255*e240928fSHong Zhang } 1256*e240928fSHong Zhang } 1257*e240928fSHong Zhang /* off-diagonal portion of A */ 1258*e240928fSHong Zhang anzj = aoi[arow+1] - aoi[arow]; 1259*e240928fSHong Zhang ajj = aoj + aoi[arow]; 1260*e240928fSHong Zhang for (k=0;k<anzj;k++) { 1261*e240928fSHong Zhang col = a->garray[ajj[k]]; /* global col */ 1262*e240928fSHong Zhang if (col < cstart){ 1263*e240928fSHong Zhang col = ajj[k]; 1264*e240928fSHong Zhang } else if (col >= cend){ 1265*e240928fSHong Zhang col = ajj[k] + pm; 1266*e240928fSHong Zhang } else { 1267*e240928fSHong Zhang SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Off-diagonal portion of A has wrong column map"); 1268*e240928fSHong Zhang } 1269*e240928fSHong Zhang if (!ptadenserow_oth[col]) { 1270*e240928fSHong Zhang ptadenserow_oth[col] = -1; 1271*e240928fSHong Zhang ptasparserow_oth[nprow_oth++] = col; 1272*e240928fSHong Zhang } 1273*e240928fSHong Zhang } 1274*e240928fSHong Zhang } 1275*e240928fSHong Zhang 1276*e240928fSHong Zhang /* printf(" [%d] i: %d, nprow_loc: %d, nprow_oth: %d\n",rank,i,nprow_loc,nprow_oth); */ 1277*e240928fSHong Zhang /* using symbolic info of local PtA, determine symbolic info for row of C: */ 1278*e240928fSHong Zhang cnzi = 0; 1279*e240928fSHong Zhang /* rows of P_loc */ 1280*e240928fSHong Zhang ptaj = ptasparserow_loc; 1281*e240928fSHong Zhang for (j=0; j<nprow_loc; j++) { 1282*e240928fSHong Zhang prow = *ptaj++; 1283*e240928fSHong Zhang prow -= prstart; /* rm */ 1284*e240928fSHong Zhang pnzj = pi_loc[prow+1] - pi_loc[prow]; 1285*e240928fSHong Zhang pjj = pj_loc + pi_loc[prow]; 1286*e240928fSHong Zhang /* add non-zero cols of P into the sorted linked list lnk */ 1287*e240928fSHong Zhang ierr = PetscLLAdd(pnzj,pjj,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1288*e240928fSHong Zhang cnzi += nlnk; 1289*e240928fSHong Zhang } 1290*e240928fSHong Zhang /* rows of P_oth */ 1291*e240928fSHong Zhang ptaj = ptasparserow_oth; 1292*e240928fSHong Zhang for (j=0; j<nprow_oth; j++) { 1293*e240928fSHong Zhang prow = *ptaj++; 1294*e240928fSHong Zhang if (prow >= prend) prow -= pm; /* rm */ 1295*e240928fSHong Zhang pnzj = pi_oth[prow+1] - pi_oth[prow]; 1296*e240928fSHong Zhang pjj = pj_oth + pi_oth[prow]; 1297*e240928fSHong Zhang /* add non-zero cols of P into the sorted linked list lnk */ 1298*e240928fSHong Zhang ierr = PetscLLAdd(pnzj,pjj,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1299*e240928fSHong Zhang cnzi += nlnk; 1300*e240928fSHong Zhang } 1301*e240928fSHong Zhang 1302*e240928fSHong Zhang /* If free space is not available, make more free space */ 1303*e240928fSHong Zhang /* Double the amount of total space in the list */ 1304*e240928fSHong Zhang if (current_space->local_remaining<cnzi) { 1305*e240928fSHong Zhang ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 1306*e240928fSHong Zhang } 1307*e240928fSHong Zhang 1308*e240928fSHong Zhang /* Copy data into free space, and zero out denserows */ 1309*e240928fSHong Zhang ierr = PetscLLClean(pN,pN,cnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 1310*e240928fSHong Zhang current_space->array += cnzi; 1311*e240928fSHong Zhang current_space->local_used += cnzi; 1312*e240928fSHong Zhang current_space->local_remaining -= cnzi; 1313*e240928fSHong Zhang 1314*e240928fSHong Zhang for (j=0;j<nprow_loc; j++) { 1315*e240928fSHong Zhang ptadenserow_loc[ptasparserow_loc[j]] = 0; 1316*e240928fSHong Zhang } 1317*e240928fSHong Zhang for (j=0;j<nprow_oth; j++) { 1318*e240928fSHong Zhang ptadenserow_oth[ptasparserow_oth[j]] = 0; 1319*e240928fSHong Zhang } 1320*e240928fSHong Zhang 1321*e240928fSHong Zhang /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 1322*e240928fSHong Zhang /* For now, we will recompute what is needed. */ 1323*e240928fSHong Zhang ci[i+1] = ci[i] + cnzi; 1324*e240928fSHong Zhang } 1325*e240928fSHong Zhang /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 1326*e240928fSHong Zhang /* Allocate space for cj, initialize cj, and */ 1327*e240928fSHong Zhang /* destroy list of free space and other temporary array(s) */ 1328*e240928fSHong Zhang ierr = PetscMalloc((ci[pN]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr); 1329*e240928fSHong Zhang ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 1330*e240928fSHong Zhang ierr = PetscFree(ptadenserow_loc);CHKERRQ(ierr); 1331*e240928fSHong Zhang ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1332*e240928fSHong Zhang 1333*e240928fSHong Zhang /* Allocate space for ca */ 1334*e240928fSHong Zhang ierr = PetscMalloc((ci[pN]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 1335*e240928fSHong Zhang ierr = PetscMemzero(ca,(ci[pN]+1)*sizeof(MatScalar));CHKERRQ(ierr); 1336*e240928fSHong Zhang 1337*e240928fSHong Zhang /* put together the new matrix */ 1338*e240928fSHong Zhang ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,pN,pN,ci,cj,ca,C);CHKERRQ(ierr); 1339*e240928fSHong Zhang 1340*e240928fSHong Zhang /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 1341*e240928fSHong Zhang /* Since these are PETSc arrays, change flags to free them as necessary. */ 1342*e240928fSHong Zhang c = (Mat_SeqAIJ *)((*C)->data); 1343*e240928fSHong Zhang c->freedata = PETSC_TRUE; 1344*e240928fSHong Zhang c->nonew = 0; 1345*e240928fSHong Zhang 1346*e240928fSHong Zhang /* Clean up. */ 1347*e240928fSHong Zhang ierr = MatRestoreSymbolicTranspose_SeqAIJ(P_loc,&pti,&ptj);CHKERRQ(ierr); 1348*e240928fSHong Zhang /* 1349*e240928fSHong Zhang ierr = MPI_Barrier(PETSC_COMM_WORLD); 1350*e240928fSHong Zhang if (rank == 1){ 1351*e240928fSHong Zhang printf("[%d] C: %d, %d\n",rank,(*C)->M,(*C)->N); 1352*e240928fSHong Zhang ierr = MatView(*C, PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); 1353*e240928fSHong Zhang } */ 1354*e240928fSHong Zhang ierr = PetscLogEventEnd(logkey_matptapsymbolic_local,A,P_loc,P_oth,0);CHKERRQ(ierr); 1355*e240928fSHong Zhang PetscFunctionReturn(0); 1356*e240928fSHong Zhang } 1357