1eb9c0419SKris Buschelman /* 29af31e4aSHong Zhang Defines projective product routines where A is a AIJ matrix 3eb9c0419SKris Buschelman C = P^T * A * P 4eb9c0419SKris Buschelman */ 5eb9c0419SKris Buschelman 6231952e2SKris Buschelman #include "src/mat/impls/aij/seq/aij.h" /*I "petscmat.h" I*/ 7eb9c0419SKris Buschelman #include "src/mat/utils/freespace.h" 89af31e4aSHong Zhang #include "src/mat/impls/aij/mpi/mpiaij.h" 955a3bba9SHong Zhang #include "petscbt.h" 10eb9c0419SKris Buschelman 11eb9c0419SKris Buschelman #undef __FUNCT__ 129af31e4aSHong Zhang #define __FUNCT__ "MatPtAP" 134d3841fdSKris Buschelman /*@ 149af31e4aSHong Zhang MatPtAP - Creates the matrix projection C = P^T * A * P 154d3841fdSKris Buschelman 164d3841fdSKris Buschelman Collective on Mat 174d3841fdSKris Buschelman 184d3841fdSKris Buschelman Input Parameters: 194d3841fdSKris Buschelman + A - the matrix 20f747e1aeSHong Zhang . P - the projection matrix 21f747e1aeSHong Zhang . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 22f747e1aeSHong Zhang - fill - expected fill as ratio of nnz(C)/(nnz(A) + nnz(P)) 234d3841fdSKris Buschelman 244d3841fdSKris Buschelman Output Parameters: 254d3841fdSKris Buschelman . C - the product matrix 264d3841fdSKris Buschelman 274d3841fdSKris Buschelman Notes: 284d3841fdSKris Buschelman C will be created and must be destroyed by the user with MatDestroy(). 294d3841fdSKris Buschelman 304d3841fdSKris Buschelman This routine is currently only implemented for pairs of SeqAIJ matrices and classes 314d3841fdSKris Buschelman which inherit from SeqAIJ. C will be of type MATSEQAIJ. 324d3841fdSKris Buschelman 334d3841fdSKris Buschelman Level: intermediate 344d3841fdSKris Buschelman 359af31e4aSHong Zhang .seealso: MatPtAPSymbolic(),MatPtAPNumeric(),MatMatMult() 364d3841fdSKris Buschelman @*/ 3755a3bba9SHong Zhang PetscErrorCode MatPtAP(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C) 3855a3bba9SHong Zhang { 39dfbe8321SBarry Smith PetscErrorCode ierr; 40534c1384SKris Buschelman PetscErrorCode (*fA)(Mat,Mat,MatReuse,PetscReal,Mat *); 41534c1384SKris Buschelman PetscErrorCode (*fP)(Mat,Mat,MatReuse,PetscReal,Mat *); 42eb9c0419SKris Buschelman 43eb9c0419SKris Buschelman PetscFunctionBegin; 449af31e4aSHong Zhang PetscValidHeaderSpecific(A,MAT_COOKIE,1); 459af31e4aSHong Zhang PetscValidType(A,1); 469af31e4aSHong Zhang MatPreallocated(A); 479af31e4aSHong Zhang if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 489af31e4aSHong Zhang if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 499af31e4aSHong Zhang PetscValidHeaderSpecific(P,MAT_COOKIE,2); 509af31e4aSHong Zhang PetscValidType(P,2); 519af31e4aSHong Zhang MatPreallocated(P); 529af31e4aSHong Zhang if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 539af31e4aSHong Zhang if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 549af31e4aSHong Zhang PetscValidPointer(C,3); 55534c1384SKris Buschelman 569af31e4aSHong Zhang if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->M,A->N); 57eb9c0419SKris Buschelman 589af31e4aSHong Zhang if (fill <=0.0) SETERRQ1(PETSC_ERR_ARG_SIZ,"fill=%g must be > 0.0",fill); 59eb9c0419SKris Buschelman 60534c1384SKris Buschelman /* For now, we do not dispatch based on the type of A and P */ 61534c1384SKris Buschelman /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */ 62534c1384SKris Buschelman fA = A->ops->ptap; 63534c1384SKris Buschelman if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatPtAP not supported for A of type %s",A->type_name); 64534c1384SKris Buschelman fP = P->ops->ptap; 65534c1384SKris Buschelman if (!fP) SETERRQ1(PETSC_ERR_SUP,"MatPtAP not supported for P of type %s",P->type_name); 66534c1384SKris Buschelman if (fP!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatPtAP requires A, %s, to be compatible with P, %s",A->type_name,P->type_name); 67534c1384SKris Buschelman 689af31e4aSHong Zhang ierr = PetscLogEventBegin(MAT_PtAP,A,P,0,0);CHKERRQ(ierr); 69534c1384SKris Buschelman ierr = (*fA)(A,P,scall,fill,C);CHKERRQ(ierr); 709af31e4aSHong Zhang ierr = PetscLogEventEnd(MAT_PtAP,A,P,0,0);CHKERRQ(ierr); 71eb9c0419SKris Buschelman PetscFunctionReturn(0); 72eb9c0419SKris Buschelman } 73eb9c0419SKris Buschelman 740390132cSHong Zhang EXTERN PetscErrorCode MatPtAPReducedPt_MPIAIJ_SeqAIJ(Mat,Mat,MatReuse,PetscReal,PetscInt,PetscInt,Mat*); 750390132cSHong Zhang EXTERN PetscErrorCode MatPtAPReducedPtSymbolic_MPIAIJ_SeqAIJ(Mat,Mat,PetscReal,PetscInt,PetscInt,Mat*); 760390132cSHong Zhang EXTERN PetscErrorCode MatPtAPReducedPtNumeric_MPIAIJ_SeqAIJ(Mat,Mat,PetscInt,PetscInt,Mat); 7722e3da61SHong Zhang 78e240928fSHong Zhang 79e240928fSHong Zhang EXTERN PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local(Mat,Mat,Mat,PetscInt,PetscReal,Mat*); 80e240928fSHong Zhang EXTERN PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIAIJ_Local(Mat,Mat,Mat,PetscInt,Mat C); 81e240928fSHong Zhang 82eb9c0419SKris Buschelman #undef __FUNCT__ 83ff134f7aSHong Zhang #define __FUNCT__ "MatPtAP_MPIAIJ_MPIAIJ" 84ff134f7aSHong Zhang PetscErrorCode MatPtAP_MPIAIJ_MPIAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C) 85ff134f7aSHong Zhang { 86ff134f7aSHong Zhang PetscErrorCode ierr; 87b90dcfe3SHong Zhang 88b90dcfe3SHong Zhang PetscFunctionBegin; 89b90dcfe3SHong Zhang if (scall == MAT_INITIAL_MATRIX){ 904c627768SHong Zhang ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 91b90dcfe3SHong Zhang ierr = MatPtAPSymbolic_MPIAIJ_MPIAIJ(A,P,fill,C);CHKERRQ(ierr);/* numeric product is computed as well */ 924c627768SHong Zhang ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 93e240928fSHong Zhang } else if (scall == MAT_REUSE_MATRIX){ /* not done yet! */ 944c627768SHong Zhang ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 95e240928fSHong Zhang ierr = MatDestroy(*C);CHKERRQ(ierr); 96e240928fSHong Zhang ierr = MatPtAPSymbolic_MPIAIJ_MPIAIJ(A,P,fill,C);CHKERRQ(ierr); 97e240928fSHong Zhang 98e240928fSHong Zhang /* 99b90dcfe3SHong Zhang ierr = MatPtAPNumeric_MPIAIJ_MPIAIJ(A,P,*C);CHKERRQ(ierr); 100e240928fSHong Zhang */ 1014c627768SHong Zhang ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 102b90dcfe3SHong Zhang } else { 103b90dcfe3SHong Zhang SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",scall); 104b90dcfe3SHong Zhang } 105b90dcfe3SHong Zhang PetscFunctionReturn(0); 106b90dcfe3SHong Zhang } 107e240928fSHong Zhang #define NEW 108b90dcfe3SHong Zhang #undef __FUNCT__ 109e240928fSHong Zhang #define __FUNCT__ "MatPtAPSymbolic_MPIAIJ_MPIAIJ" 110b90dcfe3SHong Zhang PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIAIJ(Mat A,Mat P,PetscReal fill,Mat *C) 111b90dcfe3SHong Zhang { 112b90dcfe3SHong Zhang PetscErrorCode ierr; 113*429d309bSHong Zhang Mat C_seq; 114e240928fSHong Zhang Mat P_loc,P_oth,*ploc; 115e240928fSHong Zhang PetscInt prstart,prend,m=P->m,low; 116e240928fSHong Zhang IS isrow,iscol; 117ff134f7aSHong Zhang 118ff134f7aSHong Zhang PetscFunctionBegin; 119e240928fSHong Zhang /* get P_oth by taking rows of P (= non-zero cols of local A) from other processors */ 120*429d309bSHong Zhang ierr = MatGetBrowsOfAoCols(A,P,MAT_INITIAL_MATRIX,PETSC_NULL,PETSC_NULL,&prstart,&P_oth);CHKERRQ(ierr); 121e240928fSHong Zhang prend = prstart + m; 122e240928fSHong Zhang /* get P_loc by taking all local rows of P */ 123e240928fSHong Zhang ierr = MatGetOwnershipRange(P,&low,PETSC_NULL);CHKERRQ(ierr); 124e240928fSHong Zhang ierr = ISCreateStride(PETSC_COMM_SELF,P->m,low,1,&isrow);CHKERRQ(ierr); 125e240928fSHong Zhang ierr = ISCreateStride(PETSC_COMM_SELF,P->N,0,1,&iscol);CHKERRQ(ierr); 126e240928fSHong Zhang ierr = MatGetSubMatrices(P,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&ploc);CHKERRQ(ierr); 127e240928fSHong Zhang P_loc = ploc[0]; 128e240928fSHong Zhang ierr = ISDestroy(isrow);CHKERRQ(ierr); 129e240928fSHong Zhang ierr = ISDestroy(iscol);CHKERRQ(ierr); 130e240928fSHong Zhang 131e240928fSHong Zhang /* compute C_seq = P_loc^T * A_loc * P_seq */ 132e240928fSHong Zhang ierr = MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local(A,P_loc,P_oth,prstart,fill,&C_seq);CHKERRQ(ierr); 133e240928fSHong Zhang ierr = MatPtAPNumeric_MPIAIJ_MPIAIJ_Local(A,P_loc,P_oth,prstart,C_seq);CHKERRQ(ierr); 134e240928fSHong Zhang 135e240928fSHong Zhang ierr = MatDestroyMatrices(1,&ploc);CHKERRQ(ierr); 136e240928fSHong Zhang ierr = MatDestroy(P_oth);CHKERRQ(ierr); 137e240928fSHong Zhang #ifdef OLD 138*429d309bSHong Zhang Mat P_seq; 13925616d81SHong Zhang /* get P_seq = submatrix of P by taking rows of P that equal to nonzero col of A */ 14022e3da61SHong Zhang ierr = MatGetBrowsOfAcols(A,P,MAT_INITIAL_MATRIX,PETSC_NULL,PETSC_NULL,&prstart,&P_seq);CHKERRQ(ierr); 14125616d81SHong Zhang /* compute C_seq = P_loc^T * A_loc * P_seq */ 142ff134f7aSHong Zhang prend = prstart + m; 1430390132cSHong Zhang ierr = MatPtAPReducedPt_MPIAIJ_SeqAIJ(A,P_seq,MAT_INITIAL_MATRIX,fill,prstart,prend,&C_seq);CHKERRQ(ierr); 14425616d81SHong Zhang ierr = MatDestroy(P_seq);CHKERRQ(ierr); 145e240928fSHong Zhang #endif 146b90dcfe3SHong Zhang /* add C_seq into mpi C */ 147b90dcfe3SHong Zhang ierr = MatMerge_SeqsToMPI(A->comm,C_seq,P->n,P->n,MAT_INITIAL_MATRIX,C);CHKERRQ(ierr); 148b90dcfe3SHong Zhang 149ff134f7aSHong Zhang PetscFunctionReturn(0); 150ff134f7aSHong Zhang } 151ff134f7aSHong Zhang 152ff134f7aSHong Zhang #undef __FUNCT__ 153e240928fSHong Zhang #define __FUNCT__ "MatPtAPNumeric_MPIAIJ_MPIAIJ" 154b90dcfe3SHong Zhang PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIAIJ(Mat A,Mat P,Mat C) 155ff134f7aSHong Zhang { 156b90dcfe3SHong Zhang PetscErrorCode ierr; 1570390132cSHong Zhang Mat P_seq,C_seq; 158b1d57f15SBarry Smith PetscInt prstart,prend,m=P->m; 159671beff6SHong Zhang Mat_Merge_SeqsToMPI *merge; 160671beff6SHong Zhang PetscObjectContainer container; 161ff134f7aSHong Zhang 162ff134f7aSHong Zhang PetscFunctionBegin; 163671beff6SHong Zhang ierr = PetscObjectQuery((PetscObject)C,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 164671beff6SHong Zhang if (container) { 1657f79fd58SMatthew Knepley ierr = PetscObjectContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 1667f79fd58SMatthew Knepley } else { 1677f79fd58SMatthew Knepley SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix C does not posses and object container"); 168671beff6SHong Zhang } 169671beff6SHong Zhang 170b90dcfe3SHong Zhang /* get P_seq = submatrix of P by taking rows of P that equal to nonzero col of A */ 1710390132cSHong Zhang ierr = MatGetBrowsOfAcols(A,P,MAT_INITIAL_MATRIX,PETSC_NULL,PETSC_NULL,&prstart,&P_seq);CHKERRQ(ierr); 172ff134f7aSHong Zhang 173b90dcfe3SHong Zhang /* compute C_seq = P_loc^T * A_loc * P_seq */ 174b90dcfe3SHong Zhang prend = prstart + m; 175b90dcfe3SHong Zhang C_seq = merge->C_seq; 1760390132cSHong Zhang ierr = MatPtAPReducedPt_MPIAIJ_SeqAIJ(A,P_seq,MAT_REUSE_MATRIX,1.0,prstart,prend,&C_seq);CHKERRQ(ierr); 177b90dcfe3SHong Zhang ierr = MatDestroy(P_seq);CHKERRQ(ierr); 178b90dcfe3SHong Zhang 179b90dcfe3SHong Zhang /* add C_seq into mpi C */ 180b90dcfe3SHong Zhang ierr = MatMerge_SeqsToMPI(A->comm,C_seq,P->n,P->n,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 181b90dcfe3SHong Zhang 182ff134f7aSHong Zhang PetscFunctionReturn(0); 183ff134f7aSHong Zhang } 184ff134f7aSHong Zhang 185ff134f7aSHong Zhang #undef __FUNCT__ 1869af31e4aSHong Zhang #define __FUNCT__ "MatPtAP_SeqAIJ_SeqAIJ" 187dfbe8321SBarry Smith PetscErrorCode MatPtAP_SeqAIJ_SeqAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C) 1889af31e4aSHong Zhang { 189dfbe8321SBarry Smith PetscErrorCode ierr; 190b1d57f15SBarry Smith 1919af31e4aSHong Zhang PetscFunctionBegin; 1929af31e4aSHong Zhang if (scall == MAT_INITIAL_MATRIX){ 193d20bfe6fSHong Zhang ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 1949af31e4aSHong Zhang ierr = MatPtAPSymbolic_SeqAIJ_SeqAIJ(A,P,fill,C);CHKERRQ(ierr); 195d20bfe6fSHong Zhang ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 1969af31e4aSHong Zhang } 197d20bfe6fSHong Zhang ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 1989af31e4aSHong Zhang ierr = MatPtAPNumeric_SeqAIJ_SeqAIJ(A,P,*C);CHKERRQ(ierr); 199d20bfe6fSHong Zhang ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 2009af31e4aSHong Zhang PetscFunctionReturn(0); 2019af31e4aSHong Zhang } 2029af31e4aSHong Zhang 2036849ba73SBarry Smith /* 2049af31e4aSHong Zhang MatPtAPSymbolic - Creates the (i,j) structure of the matrix projection C = P^T * A * P 2054d3841fdSKris Buschelman 2064d3841fdSKris Buschelman Collective on Mat 2074d3841fdSKris Buschelman 2084d3841fdSKris Buschelman Input Parameters: 2094d3841fdSKris Buschelman + A - the matrix 2104d3841fdSKris Buschelman - P - the projection matrix 2114d3841fdSKris Buschelman 2124d3841fdSKris Buschelman Output Parameters: 2134d3841fdSKris Buschelman . C - the (i,j) structure of the product matrix 2144d3841fdSKris Buschelman 2154d3841fdSKris Buschelman Notes: 2164d3841fdSKris Buschelman C will be created and must be destroyed by the user with MatDestroy(). 2174d3841fdSKris Buschelman 2184d3841fdSKris Buschelman This routine is currently only implemented for pairs of SeqAIJ matrices and classes 2194d3841fdSKris Buschelman which inherit from SeqAIJ. C will be of type MATSEQAIJ. The product is computed using 2209af31e4aSHong Zhang this (i,j) structure by calling MatPtAPNumeric(). 2214d3841fdSKris Buschelman 2224d3841fdSKris Buschelman Level: intermediate 2234d3841fdSKris Buschelman 2249af31e4aSHong Zhang .seealso: MatPtAP(),MatPtAPNumeric(),MatMatMultSymbolic() 2256849ba73SBarry Smith */ 226e240928fSHong Zhang #undef __FUNCT__ 227e240928fSHong Zhang #define __FUNCT__ "MatPtAPSymbolic" 22855a3bba9SHong Zhang PetscErrorCode MatPtAPSymbolic(Mat A,Mat P,PetscReal fill,Mat *C) 22955a3bba9SHong Zhang { 230dfbe8321SBarry Smith PetscErrorCode ierr; 231534c1384SKris Buschelman PetscErrorCode (*fA)(Mat,Mat,PetscReal,Mat*); 232534c1384SKris Buschelman PetscErrorCode (*fP)(Mat,Mat,PetscReal,Mat*); 233eb9c0419SKris Buschelman 234eb9c0419SKris Buschelman PetscFunctionBegin; 235eb9c0419SKris Buschelman 2364482741eSBarry Smith PetscValidHeaderSpecific(A,MAT_COOKIE,1); 237c9780b6fSBarry Smith PetscValidType(A,1); 238eb9c0419SKris Buschelman MatPreallocated(A); 239eb9c0419SKris Buschelman if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 240eb9c0419SKris Buschelman if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 241eb9c0419SKris Buschelman 2424482741eSBarry Smith PetscValidHeaderSpecific(P,MAT_COOKIE,2); 243c9780b6fSBarry Smith PetscValidType(P,2); 244eb9c0419SKris Buschelman MatPreallocated(P); 245eb9c0419SKris Buschelman if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 246eb9c0419SKris Buschelman if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 247eb9c0419SKris Buschelman 2484482741eSBarry Smith PetscValidPointer(C,3); 2494482741eSBarry Smith 250eb9c0419SKris Buschelman if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->M,A->N); 251eb9c0419SKris Buschelman if (A->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %d != %d",A->M,A->N); 252eb9c0419SKris Buschelman 253534c1384SKris Buschelman /* For now, we do not dispatch based on the type of A and P */ 254534c1384SKris Buschelman /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */ 255534c1384SKris Buschelman fA = A->ops->ptapsymbolic; 256534c1384SKris Buschelman if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatPtAPSymbolic not supported for A of type %s",A->type_name); 257534c1384SKris Buschelman fP = P->ops->ptapsymbolic; 258534c1384SKris Buschelman if (!fP) SETERRQ1(PETSC_ERR_SUP,"MatPtAPSymbolic not supported for P of type %s",P->type_name); 259534c1384SKris Buschelman if (fP!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatPtAPSymbolic requires A, %s, to be compatible with P, %s",A->type_name,P->type_name); 2604d3841fdSKris Buschelman 261534c1384SKris Buschelman ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 262534c1384SKris Buschelman ierr = (*fA)(A,P,fill,C);CHKERRQ(ierr); 263534c1384SKris Buschelman ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 264eb9c0419SKris Buschelman 265eb9c0419SKris Buschelman PetscFunctionReturn(0); 266eb9c0419SKris Buschelman } 267eb9c0419SKris Buschelman 268f747e1aeSHong Zhang typedef struct { 269f747e1aeSHong Zhang Mat symAP; 270f747e1aeSHong Zhang } Mat_PtAPstruct; 271f747e1aeSHong Zhang 27278a80504SBarry Smith EXTERN PetscErrorCode MatDestroy_SeqAIJ(Mat); 27378a80504SBarry Smith 274f747e1aeSHong Zhang #undef __FUNCT__ 275f747e1aeSHong Zhang #define __FUNCT__ "MatDestroy_SeqAIJ_PtAP" 276f4a850bbSBarry Smith PetscErrorCode MatDestroy_SeqAIJ_PtAP(Mat A) 277f747e1aeSHong Zhang { 278f4a850bbSBarry Smith PetscErrorCode ierr; 279f747e1aeSHong Zhang Mat_PtAPstruct *ptap=(Mat_PtAPstruct*)A->spptr; 280f747e1aeSHong Zhang 281f747e1aeSHong Zhang PetscFunctionBegin; 282f747e1aeSHong Zhang ierr = MatDestroy(ptap->symAP);CHKERRQ(ierr); 283f747e1aeSHong Zhang ierr = PetscFree(ptap);CHKERRQ(ierr); 28478a80504SBarry Smith ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr); 285f747e1aeSHong Zhang PetscFunctionReturn(0); 286f747e1aeSHong Zhang } 287f747e1aeSHong Zhang 288eb9c0419SKris Buschelman #undef __FUNCT__ 2899af31e4aSHong Zhang #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqAIJ" 29055a3bba9SHong Zhang PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat P,PetscReal fill,Mat *C) 29155a3bba9SHong Zhang { 292dfbe8321SBarry Smith PetscErrorCode ierr; 293d20bfe6fSHong Zhang FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 294d20bfe6fSHong Zhang Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*p = (Mat_SeqAIJ*)P->data,*c; 29555a3bba9SHong Zhang PetscInt *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj; 296b8374ebeSBarry Smith PetscInt *ci,*cj,*ptadenserow,*ptasparserow,*ptaj; 29755a3bba9SHong Zhang PetscInt an=A->N,am=A->M,pn=P->N,pm=P->M; 298b8374ebeSBarry Smith PetscInt i,j,k,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi,nlnk,*lnk; 299d20bfe6fSHong Zhang MatScalar *ca; 30055a3bba9SHong Zhang PetscBT lnkbt; 301eb9c0419SKris Buschelman 302eb9c0419SKris Buschelman PetscFunctionBegin; 303d20bfe6fSHong Zhang /* Get ij structure of P^T */ 304eb9c0419SKris Buschelman ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 305d20bfe6fSHong Zhang ptJ=ptj; 306eb9c0419SKris Buschelman 307d20bfe6fSHong Zhang /* Allocate ci array, arrays for fill computation and */ 308d20bfe6fSHong Zhang /* free space for accumulating nonzero column info */ 30955a3bba9SHong Zhang ierr = PetscMalloc((pn+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 310d20bfe6fSHong Zhang ci[0] = 0; 311eb9c0419SKris Buschelman 31255a3bba9SHong Zhang ierr = PetscMalloc((2*an+1)*sizeof(PetscInt),&ptadenserow);CHKERRQ(ierr); 31355a3bba9SHong Zhang ierr = PetscMemzero(ptadenserow,(2*an+1)*sizeof(PetscInt));CHKERRQ(ierr); 314d20bfe6fSHong Zhang ptasparserow = ptadenserow + an; 31555a3bba9SHong Zhang 31655a3bba9SHong Zhang /* create and initialize a linked list */ 31755a3bba9SHong Zhang nlnk = pn+1; 31855a3bba9SHong Zhang ierr = PetscLLCreate(pn,pn,nlnk,lnk,lnkbt);CHKERRQ(ierr); 319eb9c0419SKris Buschelman 320d20bfe6fSHong Zhang /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */ 321d20bfe6fSHong Zhang /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 322d20bfe6fSHong Zhang ierr = GetMoreSpace((ai[am]/pm)*pn,&free_space); 323d20bfe6fSHong Zhang current_space = free_space; 324d20bfe6fSHong Zhang 325d20bfe6fSHong Zhang /* Determine symbolic info for each row of C: */ 326d20bfe6fSHong Zhang for (i=0;i<pn;i++) { 327d20bfe6fSHong Zhang ptnzi = pti[i+1] - pti[i]; 328d20bfe6fSHong Zhang ptanzi = 0; 329d20bfe6fSHong Zhang /* Determine symbolic row of PtA: */ 330d20bfe6fSHong Zhang for (j=0;j<ptnzi;j++) { 331d20bfe6fSHong Zhang arow = *ptJ++; 332d20bfe6fSHong Zhang anzj = ai[arow+1] - ai[arow]; 333d20bfe6fSHong Zhang ajj = aj + ai[arow]; 334d20bfe6fSHong Zhang for (k=0;k<anzj;k++) { 335d20bfe6fSHong Zhang if (!ptadenserow[ajj[k]]) { 336d20bfe6fSHong Zhang ptadenserow[ajj[k]] = -1; 337d20bfe6fSHong Zhang ptasparserow[ptanzi++] = ajj[k]; 338d20bfe6fSHong Zhang } 339d20bfe6fSHong Zhang } 340d20bfe6fSHong Zhang } 341d20bfe6fSHong Zhang /* Using symbolic info for row of PtA, determine symbolic info for row of C: */ 342d20bfe6fSHong Zhang ptaj = ptasparserow; 343d20bfe6fSHong Zhang cnzi = 0; 344d20bfe6fSHong Zhang for (j=0;j<ptanzi;j++) { 345d20bfe6fSHong Zhang prow = *ptaj++; 346d20bfe6fSHong Zhang pnzj = pi[prow+1] - pi[prow]; 347d20bfe6fSHong Zhang pjj = pj + pi[prow]; 34855a3bba9SHong Zhang /* add non-zero cols of P into the sorted linked list lnk */ 34955a3bba9SHong Zhang ierr = PetscLLAdd(pnzj,pjj,pn,nlnk,lnk,lnkbt);CHKERRQ(ierr); 35055a3bba9SHong Zhang cnzi += nlnk; 351d20bfe6fSHong Zhang } 352d20bfe6fSHong Zhang 353d20bfe6fSHong Zhang /* If free space is not available, make more free space */ 354d20bfe6fSHong Zhang /* Double the amount of total space in the list */ 355d20bfe6fSHong Zhang if (current_space->local_remaining<cnzi) { 356d20bfe6fSHong Zhang ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 357d20bfe6fSHong Zhang } 358d20bfe6fSHong Zhang 359d20bfe6fSHong Zhang /* Copy data into free space, and zero out denserows */ 36055a3bba9SHong Zhang ierr = PetscLLClean(pn,pn,cnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 361d20bfe6fSHong Zhang current_space->array += cnzi; 362d20bfe6fSHong Zhang current_space->local_used += cnzi; 363d20bfe6fSHong Zhang current_space->local_remaining -= cnzi; 364d20bfe6fSHong Zhang 365d20bfe6fSHong Zhang for (j=0;j<ptanzi;j++) { 366d20bfe6fSHong Zhang ptadenserow[ptasparserow[j]] = 0; 367d20bfe6fSHong Zhang } 368d20bfe6fSHong Zhang /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 369d20bfe6fSHong Zhang /* For now, we will recompute what is needed. */ 370d20bfe6fSHong Zhang ci[i+1] = ci[i] + cnzi; 371d20bfe6fSHong Zhang } 372d20bfe6fSHong Zhang /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 373d20bfe6fSHong Zhang /* Allocate space for cj, initialize cj, and */ 374d20bfe6fSHong Zhang /* destroy list of free space and other temporary array(s) */ 37555a3bba9SHong Zhang ierr = PetscMalloc((ci[pn]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr); 376d20bfe6fSHong Zhang ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 377d20bfe6fSHong Zhang ierr = PetscFree(ptadenserow);CHKERRQ(ierr); 37855a3bba9SHong Zhang ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 379d20bfe6fSHong Zhang 380d20bfe6fSHong Zhang /* Allocate space for ca */ 381d20bfe6fSHong Zhang ierr = PetscMalloc((ci[pn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 382d20bfe6fSHong Zhang ierr = PetscMemzero(ca,(ci[pn]+1)*sizeof(MatScalar));CHKERRQ(ierr); 383d20bfe6fSHong Zhang 384d20bfe6fSHong Zhang /* put together the new matrix */ 385d20bfe6fSHong Zhang ierr = MatCreateSeqAIJWithArrays(A->comm,pn,pn,ci,cj,ca,C);CHKERRQ(ierr); 386d20bfe6fSHong Zhang 387d20bfe6fSHong Zhang /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 388d20bfe6fSHong Zhang /* Since these are PETSc arrays, change flags to free them as necessary. */ 389d20bfe6fSHong Zhang c = (Mat_SeqAIJ *)((*C)->data); 390d20bfe6fSHong Zhang c->freedata = PETSC_TRUE; 391d20bfe6fSHong Zhang c->nonew = 0; 392d20bfe6fSHong Zhang 393d20bfe6fSHong Zhang /* Clean up. */ 394d20bfe6fSHong Zhang ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 395eb9c0419SKris Buschelman 396eb9c0419SKris Buschelman PetscFunctionReturn(0); 397eb9c0419SKris Buschelman } 398eb9c0419SKris Buschelman 3993985e5eaSKris Buschelman #include "src/mat/impls/maij/maij.h" 4003985e5eaSKris Buschelman EXTERN_C_BEGIN 4013985e5eaSKris Buschelman #undef __FUNCT__ 4029af31e4aSHong Zhang #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqMAIJ" 40355a3bba9SHong Zhang PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat *C) 40455a3bba9SHong Zhang { 4055c66b693SKris Buschelman /* This routine requires testing -- I don't think it works. */ 406dfbe8321SBarry Smith PetscErrorCode ierr; 4073985e5eaSKris Buschelman FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 4083985e5eaSKris Buschelman Mat_SeqMAIJ *pp=(Mat_SeqMAIJ*)PP->data; 4093985e5eaSKris Buschelman Mat P=pp->AIJ; 4103985e5eaSKris Buschelman Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c; 411b1d57f15SBarry Smith PetscInt *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj; 412b1d57f15SBarry Smith PetscInt *ci,*cj,*denserow,*sparserow,*ptadenserow,*ptasparserow,*ptaj; 413b1d57f15SBarry Smith PetscInt an=A->N,am=A->M,pn=P->N,pm=P->M,ppdof=pp->dof; 414b1d57f15SBarry Smith PetscInt i,j,k,dof,pdof,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi; 4153985e5eaSKris Buschelman MatScalar *ca; 4163985e5eaSKris Buschelman 4173985e5eaSKris Buschelman PetscFunctionBegin; 4183985e5eaSKris Buschelman /* Start timer */ 4199af31e4aSHong Zhang ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr); 4203985e5eaSKris Buschelman 4213985e5eaSKris Buschelman /* Get ij structure of P^T */ 4223985e5eaSKris Buschelman ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 4233985e5eaSKris Buschelman 4243985e5eaSKris Buschelman /* Allocate ci array, arrays for fill computation and */ 4253985e5eaSKris Buschelman /* free space for accumulating nonzero column info */ 426b1d57f15SBarry Smith ierr = PetscMalloc((pn+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 4273985e5eaSKris Buschelman ci[0] = 0; 4283985e5eaSKris Buschelman 429b1d57f15SBarry Smith ierr = PetscMalloc((2*pn+2*an+1)*sizeof(PetscInt),&ptadenserow);CHKERRQ(ierr); 430b1d57f15SBarry Smith ierr = PetscMemzero(ptadenserow,(2*pn+2*an+1)*sizeof(PetscInt));CHKERRQ(ierr); 4313985e5eaSKris Buschelman ptasparserow = ptadenserow + an; 4323985e5eaSKris Buschelman denserow = ptasparserow + an; 4333985e5eaSKris Buschelman sparserow = denserow + pn; 4343985e5eaSKris Buschelman 4353985e5eaSKris Buschelman /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */ 4363985e5eaSKris Buschelman /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 4373985e5eaSKris Buschelman ierr = GetMoreSpace((ai[am]/pm)*pn,&free_space); 4383985e5eaSKris Buschelman current_space = free_space; 4393985e5eaSKris Buschelman 4403985e5eaSKris Buschelman /* Determine symbolic info for each row of C: */ 4413985e5eaSKris Buschelman for (i=0;i<pn/ppdof;i++) { 4423985e5eaSKris Buschelman ptnzi = pti[i+1] - pti[i]; 4433985e5eaSKris Buschelman ptanzi = 0; 4443985e5eaSKris Buschelman ptJ = ptj + pti[i]; 4453985e5eaSKris Buschelman for (dof=0;dof<ppdof;dof++) { 4463985e5eaSKris Buschelman /* Determine symbolic row of PtA: */ 4473985e5eaSKris Buschelman for (j=0;j<ptnzi;j++) { 4483985e5eaSKris Buschelman arow = ptJ[j] + dof; 4493985e5eaSKris Buschelman anzj = ai[arow+1] - ai[arow]; 4503985e5eaSKris Buschelman ajj = aj + ai[arow]; 4513985e5eaSKris Buschelman for (k=0;k<anzj;k++) { 4523985e5eaSKris Buschelman if (!ptadenserow[ajj[k]]) { 4533985e5eaSKris Buschelman ptadenserow[ajj[k]] = -1; 4543985e5eaSKris Buschelman ptasparserow[ptanzi++] = ajj[k]; 4553985e5eaSKris Buschelman } 4563985e5eaSKris Buschelman } 4573985e5eaSKris Buschelman } 4583985e5eaSKris Buschelman /* Using symbolic info for row of PtA, determine symbolic info for row of C: */ 4593985e5eaSKris Buschelman ptaj = ptasparserow; 4603985e5eaSKris Buschelman cnzi = 0; 4613985e5eaSKris Buschelman for (j=0;j<ptanzi;j++) { 462fe05a634SKris Buschelman pdof = *ptaj%dof; 4633985e5eaSKris Buschelman prow = (*ptaj++)/dof; 4643985e5eaSKris Buschelman pnzj = pi[prow+1] - pi[prow]; 4653985e5eaSKris Buschelman pjj = pj + pi[prow]; 4663985e5eaSKris Buschelman for (k=0;k<pnzj;k++) { 467fe05a634SKris Buschelman if (!denserow[pjj[k]+pdof]) { 468fe05a634SKris Buschelman denserow[pjj[k]+pdof] = -1; 469fe05a634SKris Buschelman sparserow[cnzi++] = pjj[k]+pdof; 4703985e5eaSKris Buschelman } 4713985e5eaSKris Buschelman } 4723985e5eaSKris Buschelman } 4733985e5eaSKris Buschelman 4743985e5eaSKris Buschelman /* sort sparserow */ 4753985e5eaSKris Buschelman ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr); 4763985e5eaSKris Buschelman 4773985e5eaSKris Buschelman /* If free space is not available, make more free space */ 4783985e5eaSKris Buschelman /* Double the amount of total space in the list */ 4793985e5eaSKris Buschelman if (current_space->local_remaining<cnzi) { 4803985e5eaSKris Buschelman ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 4813985e5eaSKris Buschelman } 4823985e5eaSKris Buschelman 4833985e5eaSKris Buschelman /* Copy data into free space, and zero out denserows */ 484b1d57f15SBarry Smith ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(PetscInt));CHKERRQ(ierr); 4853985e5eaSKris Buschelman current_space->array += cnzi; 4863985e5eaSKris Buschelman current_space->local_used += cnzi; 4873985e5eaSKris Buschelman current_space->local_remaining -= cnzi; 4883985e5eaSKris Buschelman 4893985e5eaSKris Buschelman for (j=0;j<ptanzi;j++) { 4903985e5eaSKris Buschelman ptadenserow[ptasparserow[j]] = 0; 4913985e5eaSKris Buschelman } 4923985e5eaSKris Buschelman for (j=0;j<cnzi;j++) { 4933985e5eaSKris Buschelman denserow[sparserow[j]] = 0; 4943985e5eaSKris Buschelman } 4953985e5eaSKris Buschelman /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 4963985e5eaSKris Buschelman /* For now, we will recompute what is needed. */ 4973985e5eaSKris Buschelman ci[i+1+dof] = ci[i+dof] + cnzi; 4983985e5eaSKris Buschelman } 4993985e5eaSKris Buschelman } 5003985e5eaSKris Buschelman /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 5013985e5eaSKris Buschelman /* Allocate space for cj, initialize cj, and */ 5023985e5eaSKris Buschelman /* destroy list of free space and other temporary array(s) */ 503b1d57f15SBarry Smith ierr = PetscMalloc((ci[pn]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr); 5043985e5eaSKris Buschelman ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 5053985e5eaSKris Buschelman ierr = PetscFree(ptadenserow);CHKERRQ(ierr); 5063985e5eaSKris Buschelman 5073985e5eaSKris Buschelman /* Allocate space for ca */ 5083985e5eaSKris Buschelman ierr = PetscMalloc((ci[pn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 5093985e5eaSKris Buschelman ierr = PetscMemzero(ca,(ci[pn]+1)*sizeof(MatScalar));CHKERRQ(ierr); 5103985e5eaSKris Buschelman 5113985e5eaSKris Buschelman /* put together the new matrix */ 5123985e5eaSKris Buschelman ierr = MatCreateSeqAIJWithArrays(A->comm,pn,pn,ci,cj,ca,C);CHKERRQ(ierr); 5133985e5eaSKris Buschelman 5143985e5eaSKris Buschelman /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5153985e5eaSKris Buschelman /* Since these are PETSc arrays, change flags to free them as necessary. */ 5163985e5eaSKris Buschelman c = (Mat_SeqAIJ *)((*C)->data); 5173985e5eaSKris Buschelman c->freedata = PETSC_TRUE; 5183985e5eaSKris Buschelman c->nonew = 0; 5193985e5eaSKris Buschelman 5203985e5eaSKris Buschelman /* Clean up. */ 5213985e5eaSKris Buschelman ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 5223985e5eaSKris Buschelman 5239af31e4aSHong Zhang ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr); 5243985e5eaSKris Buschelman PetscFunctionReturn(0); 5253985e5eaSKris Buschelman } 5263985e5eaSKris Buschelman EXTERN_C_END 5273985e5eaSKris Buschelman 5286849ba73SBarry Smith /* 5299af31e4aSHong Zhang MatPtAPNumeric - Computes the matrix projection C = P^T * A * P 5304d3841fdSKris Buschelman 5314d3841fdSKris Buschelman Collective on Mat 5324d3841fdSKris Buschelman 5334d3841fdSKris Buschelman Input Parameters: 5344d3841fdSKris Buschelman + A - the matrix 5354d3841fdSKris Buschelman - P - the projection matrix 5364d3841fdSKris Buschelman 5374d3841fdSKris Buschelman Output Parameters: 5384d3841fdSKris Buschelman . C - the product matrix 5394d3841fdSKris Buschelman 5404d3841fdSKris Buschelman Notes: 5419af31e4aSHong Zhang C must have been created by calling MatPtAPSymbolic and must be destroyed by 5424d3841fdSKris Buschelman the user using MatDeatroy(). 5434d3841fdSKris Buschelman 544170ef064SHong Zhang This routine is currently only implemented for pairs of AIJ matrices and classes 545170ef064SHong Zhang which inherit from AIJ. C will be of type MATAIJ. 5464d3841fdSKris Buschelman 5474d3841fdSKris Buschelman Level: intermediate 5484d3841fdSKris Buschelman 5499af31e4aSHong Zhang .seealso: MatPtAP(),MatPtAPSymbolic(),MatMatMultNumeric() 5506849ba73SBarry Smith */ 551e240928fSHong Zhang #undef __FUNCT__ 552e240928fSHong Zhang #define __FUNCT__ "MatPtAPNumeric" 55355a3bba9SHong Zhang PetscErrorCode MatPtAPNumeric(Mat A,Mat P,Mat C) 55455a3bba9SHong Zhang { 555dfbe8321SBarry Smith PetscErrorCode ierr; 556534c1384SKris Buschelman PetscErrorCode (*fA)(Mat,Mat,Mat); 557534c1384SKris Buschelman PetscErrorCode (*fP)(Mat,Mat,Mat); 558eb9c0419SKris Buschelman 559eb9c0419SKris Buschelman PetscFunctionBegin; 560eb9c0419SKris Buschelman 5614482741eSBarry Smith PetscValidHeaderSpecific(A,MAT_COOKIE,1); 562c9780b6fSBarry Smith PetscValidType(A,1); 563eb9c0419SKris Buschelman MatPreallocated(A); 564eb9c0419SKris Buschelman if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 565eb9c0419SKris Buschelman if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 566eb9c0419SKris Buschelman 5674482741eSBarry Smith PetscValidHeaderSpecific(P,MAT_COOKIE,2); 568c9780b6fSBarry Smith PetscValidType(P,2); 569eb9c0419SKris Buschelman MatPreallocated(P); 570eb9c0419SKris Buschelman if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 571eb9c0419SKris Buschelman if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 572eb9c0419SKris Buschelman 5734482741eSBarry Smith PetscValidHeaderSpecific(C,MAT_COOKIE,3); 574c9780b6fSBarry Smith PetscValidType(C,3); 575eb9c0419SKris Buschelman MatPreallocated(C); 576eb9c0419SKris Buschelman if (!C->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 577eb9c0419SKris Buschelman if (C->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 578eb9c0419SKris Buschelman 579eb9c0419SKris Buschelman if (P->N!=C->M) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->N,C->M); 580eb9c0419SKris Buschelman if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->M,A->N); 581eb9c0419SKris Buschelman if (A->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %d != %d",A->M,A->N); 582eb9c0419SKris Buschelman if (P->N!=C->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->N,C->N); 583eb9c0419SKris Buschelman 584534c1384SKris Buschelman /* For now, we do not dispatch based on the type of A and P */ 585534c1384SKris Buschelman /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */ 586534c1384SKris Buschelman fA = A->ops->ptapnumeric; 587534c1384SKris Buschelman if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatPtAPNumeric not supported for A of type %s",A->type_name); 588534c1384SKris Buschelman fP = P->ops->ptapnumeric; 589534c1384SKris Buschelman if (!fP) SETERRQ1(PETSC_ERR_SUP,"MatPtAPNumeric not supported for P of type %s",P->type_name); 590534c1384SKris Buschelman if (fP!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatPtAPNumeric requires A, %s, to be compatible with P, %s",A->type_name,P->type_name); 5914d3841fdSKris Buschelman 592534c1384SKris Buschelman ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 593534c1384SKris Buschelman ierr = (*fA)(A,P,C);CHKERRQ(ierr); 594534c1384SKris Buschelman ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 595eb9c0419SKris Buschelman 596eb9c0419SKris Buschelman PetscFunctionReturn(0); 597eb9c0419SKris Buschelman } 598eb9c0419SKris Buschelman 599eb9c0419SKris Buschelman #undef __FUNCT__ 6009af31e4aSHong Zhang #define __FUNCT__ "MatPtAPNumeric_SeqAIJ_SeqAIJ" 601dfbe8321SBarry Smith PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqAIJ(Mat A,Mat P,Mat C) 602dfbe8321SBarry Smith { 603dfbe8321SBarry Smith PetscErrorCode ierr; 604b1d57f15SBarry Smith PetscInt flops=0; 605d20bfe6fSHong Zhang Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 606d20bfe6fSHong Zhang Mat_SeqAIJ *p = (Mat_SeqAIJ *) P->data; 607d20bfe6fSHong Zhang Mat_SeqAIJ *c = (Mat_SeqAIJ *) C->data; 608b1d57f15SBarry Smith PetscInt *ai=a->i,*aj=a->j,*apj,*apjdense,*pi=p->i,*pj=p->j,*pJ=p->j,*pjj; 609b1d57f15SBarry Smith PetscInt *ci=c->i,*cj=c->j,*cjj; 610b1d57f15SBarry Smith PetscInt am=A->M,cn=C->N,cm=C->M; 611b1d57f15SBarry Smith PetscInt i,j,k,anzi,pnzi,apnzj,nextap,pnzj,prow,crow; 612d20bfe6fSHong Zhang MatScalar *aa=a->a,*apa,*pa=p->a,*pA=p->a,*paj,*ca=c->a,*caj; 613eb9c0419SKris Buschelman 614eb9c0419SKris Buschelman PetscFunctionBegin; 615d20bfe6fSHong Zhang /* Allocate temporary array for storage of one row of A*P */ 616b1d57f15SBarry Smith ierr = PetscMalloc(cn*(sizeof(MatScalar)+2*sizeof(PetscInt)),&apa);CHKERRQ(ierr); 617b1d57f15SBarry Smith ierr = PetscMemzero(apa,cn*(sizeof(MatScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr); 618eb9c0419SKris Buschelman 619b1d57f15SBarry Smith apj = (PetscInt *)(apa + cn); 620d20bfe6fSHong Zhang apjdense = apj + cn; 621d20bfe6fSHong Zhang 622d20bfe6fSHong Zhang /* Clear old values in C */ 623d20bfe6fSHong Zhang ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr); 624d20bfe6fSHong Zhang 625d20bfe6fSHong Zhang for (i=0;i<am;i++) { 626d20bfe6fSHong Zhang /* Form sparse row of A*P */ 627d20bfe6fSHong Zhang anzi = ai[i+1] - ai[i]; 628d20bfe6fSHong Zhang apnzj = 0; 629d20bfe6fSHong Zhang for (j=0;j<anzi;j++) { 630d20bfe6fSHong Zhang prow = *aj++; 631d20bfe6fSHong Zhang pnzj = pi[prow+1] - pi[prow]; 632d20bfe6fSHong Zhang pjj = pj + pi[prow]; 633d20bfe6fSHong Zhang paj = pa + pi[prow]; 634d20bfe6fSHong Zhang for (k=0;k<pnzj;k++) { 635d20bfe6fSHong Zhang if (!apjdense[pjj[k]]) { 636d20bfe6fSHong Zhang apjdense[pjj[k]] = -1; 637d20bfe6fSHong Zhang apj[apnzj++] = pjj[k]; 638d20bfe6fSHong Zhang } 639d20bfe6fSHong Zhang apa[pjj[k]] += (*aa)*paj[k]; 640d20bfe6fSHong Zhang } 641d20bfe6fSHong Zhang flops += 2*pnzj; 642d20bfe6fSHong Zhang aa++; 643d20bfe6fSHong Zhang } 644d20bfe6fSHong Zhang 645d20bfe6fSHong Zhang /* Sort the j index array for quick sparse axpy. */ 646d20bfe6fSHong Zhang ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr); 647d20bfe6fSHong Zhang 648d20bfe6fSHong Zhang /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */ 649d20bfe6fSHong Zhang pnzi = pi[i+1] - pi[i]; 650d20bfe6fSHong Zhang for (j=0;j<pnzi;j++) { 651d20bfe6fSHong Zhang nextap = 0; 652d20bfe6fSHong Zhang crow = *pJ++; 653d20bfe6fSHong Zhang cjj = cj + ci[crow]; 654d20bfe6fSHong Zhang caj = ca + ci[crow]; 655d20bfe6fSHong Zhang /* Perform sparse axpy operation. Note cjj includes apj. */ 656d20bfe6fSHong Zhang for (k=0;nextap<apnzj;k++) { 657d20bfe6fSHong Zhang if (cjj[k]==apj[nextap]) { 658d20bfe6fSHong Zhang caj[k] += (*pA)*apa[apj[nextap++]]; 659d20bfe6fSHong Zhang } 660d20bfe6fSHong Zhang } 661d20bfe6fSHong Zhang flops += 2*apnzj; 662d20bfe6fSHong Zhang pA++; 663d20bfe6fSHong Zhang } 664d20bfe6fSHong Zhang 665d20bfe6fSHong Zhang /* Zero the current row info for A*P */ 666d20bfe6fSHong Zhang for (j=0;j<apnzj;j++) { 667d20bfe6fSHong Zhang apa[apj[j]] = 0.; 668d20bfe6fSHong Zhang apjdense[apj[j]] = 0; 669d20bfe6fSHong Zhang } 670d20bfe6fSHong Zhang } 671d20bfe6fSHong Zhang 672d20bfe6fSHong Zhang /* Assemble the final matrix and clean up */ 673d20bfe6fSHong Zhang ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 674d20bfe6fSHong Zhang ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 675d20bfe6fSHong Zhang ierr = PetscFree(apa);CHKERRQ(ierr); 676d20bfe6fSHong Zhang ierr = PetscLogFlops(flops);CHKERRQ(ierr); 677d20bfe6fSHong Zhang 678eb9c0419SKris Buschelman PetscFunctionReturn(0); 679eb9c0419SKris Buschelman } 6800e36024fSHong Zhang 6810390132cSHong Zhang /* Compute local C = P[rstart:rend,:]^T * A * P - used by MatPtAP_MPIAIJ_MPIAIJ() */ 6820e36024fSHong Zhang 6830e36024fSHong Zhang #undef __FUNCT__ 6840390132cSHong Zhang #define __FUNCT__ "MatPtAPReducedPtNumeric_MPIAIJ_SeqAIJ" 6850390132cSHong Zhang PetscErrorCode MatPtAPReducedPtNumeric_MPIAIJ_SeqAIJ(Mat A,Mat P,PetscInt prstart,PetscInt prend,Mat C) 68622e3da61SHong Zhang { 68722e3da61SHong Zhang PetscErrorCode ierr; 68822e3da61SHong Zhang PetscInt flops=0; 68922e3da61SHong Zhang Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 69022e3da61SHong Zhang Mat_SeqAIJ *ad=(Mat_SeqAIJ*)(a->A)->data,*ao=(Mat_SeqAIJ*)(a->B)->data; 69122e3da61SHong Zhang Mat_SeqAIJ *p = (Mat_SeqAIJ *) P->data; 69222e3da61SHong Zhang Mat_SeqAIJ *c = (Mat_SeqAIJ *) C->data; 69322e3da61SHong Zhang PetscInt *adi=ad->i,*adj=ad->j,*aoi=ao->i,*aoj=ao->j,*apj,*apjdense,cstart=a->cstart,cend=a->cend,col; 69422e3da61SHong Zhang PetscInt *pi=p->i,*pj=p->j,*pJ=p->j+pi[prstart],*pjj; 69522e3da61SHong Zhang PetscInt *ci=c->i,*cj=c->j,*cjj; 69622e3da61SHong Zhang PetscInt am=A->m,cn=C->N,cm=C->M; 69722e3da61SHong Zhang PetscInt i,j,k,anzi,pnzi,apnzj,nextap,pnzj,prow,crow; 69822e3da61SHong Zhang MatScalar *ada=ad->a,*aoa=ao->a,*apa,*pa=p->a,*pA=p->a,*paj,*ca=c->a,*caj; 69922e3da61SHong Zhang 70022e3da61SHong Zhang PetscFunctionBegin; 70122e3da61SHong Zhang pA=p->a+pi[prstart]; 70222e3da61SHong Zhang /* Allocate temporary array for storage of one row of A*P */ 70322e3da61SHong Zhang ierr = PetscMalloc(cn*(sizeof(MatScalar)+2*sizeof(PetscInt)),&apa);CHKERRQ(ierr); 70422e3da61SHong Zhang ierr = PetscMemzero(apa,cn*(sizeof(MatScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr); 70522e3da61SHong Zhang 70622e3da61SHong Zhang apj = (PetscInt *)(apa + cn); 70722e3da61SHong Zhang apjdense = apj + cn; 70822e3da61SHong Zhang 70922e3da61SHong Zhang /* Clear old values in C */ 71022e3da61SHong Zhang ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr); 71122e3da61SHong Zhang 71222e3da61SHong Zhang for (i=0;i<am;i++) { 71322e3da61SHong Zhang /* Form sparse row of A*P */ 71422e3da61SHong Zhang /* diagonal portion of A */ 71522e3da61SHong Zhang anzi = adi[i+1] - adi[i]; 71622e3da61SHong Zhang apnzj = 0; 71722e3da61SHong Zhang for (j=0;j<anzi;j++) { 71822e3da61SHong Zhang prow = *adj + prstart; 71922e3da61SHong Zhang adj++; 72022e3da61SHong Zhang pnzj = pi[prow+1] - pi[prow]; 72122e3da61SHong Zhang pjj = pj + pi[prow]; 72222e3da61SHong Zhang paj = pa + pi[prow]; 72322e3da61SHong Zhang for (k=0;k<pnzj;k++) { 72422e3da61SHong Zhang if (!apjdense[pjj[k]]) { 72522e3da61SHong Zhang apjdense[pjj[k]] = -1; 72622e3da61SHong Zhang apj[apnzj++] = pjj[k]; 72722e3da61SHong Zhang } 72822e3da61SHong Zhang apa[pjj[k]] += (*ada)*paj[k]; 72922e3da61SHong Zhang } 73022e3da61SHong Zhang flops += 2*pnzj; 73122e3da61SHong Zhang ada++; 73222e3da61SHong Zhang } 73322e3da61SHong Zhang /* off-diagonal portion of A */ 73422e3da61SHong Zhang anzi = aoi[i+1] - aoi[i]; 73522e3da61SHong Zhang for (j=0;j<anzi;j++) { 73622e3da61SHong Zhang col = a->garray[*aoj]; 73722e3da61SHong Zhang if (col < cstart){ 73822e3da61SHong Zhang prow = *aoj; 73922e3da61SHong Zhang } else if (col >= cend){ 74022e3da61SHong Zhang prow = *aoj + prend-prstart; 74122e3da61SHong Zhang } else { 74222e3da61SHong Zhang SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Off-diagonal portion of A has wrong column map"); 74322e3da61SHong Zhang } 74422e3da61SHong Zhang aoj++; 74522e3da61SHong Zhang pnzj = pi[prow+1] - pi[prow]; 74622e3da61SHong Zhang pjj = pj + pi[prow]; 74722e3da61SHong Zhang paj = pa + pi[prow]; 74822e3da61SHong Zhang for (k=0;k<pnzj;k++) { 74922e3da61SHong Zhang if (!apjdense[pjj[k]]) { 75022e3da61SHong Zhang apjdense[pjj[k]] = -1; 75122e3da61SHong Zhang apj[apnzj++] = pjj[k]; 75222e3da61SHong Zhang } 75322e3da61SHong Zhang apa[pjj[k]] += (*aoa)*paj[k]; 75422e3da61SHong Zhang } 75522e3da61SHong Zhang flops += 2*pnzj; 75622e3da61SHong Zhang aoa++; 75722e3da61SHong Zhang } 75822e3da61SHong Zhang 75922e3da61SHong Zhang /* Sort the j index array for quick sparse axpy. */ 76022e3da61SHong Zhang ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr); 76122e3da61SHong Zhang 76222e3da61SHong Zhang /* Compute P[[prstart:prend,:]^T*A*P using outer product (P^T)[:,j+prstart]*(A*P)[j,:]. */ 76322e3da61SHong Zhang pnzi = pi[i+1+prstart] - pi[i+prstart]; 76422e3da61SHong Zhang for (j=0;j<pnzi;j++) { 76522e3da61SHong Zhang nextap = 0; 76622e3da61SHong Zhang crow = *pJ++; 76722e3da61SHong Zhang cjj = cj + ci[crow]; 76822e3da61SHong Zhang caj = ca + ci[crow]; 76922e3da61SHong Zhang /* Perform sparse axpy operation. Note cjj includes apj. */ 77022e3da61SHong Zhang for (k=0;nextap<apnzj;k++) { 77122e3da61SHong Zhang if (cjj[k]==apj[nextap]) { 77222e3da61SHong Zhang caj[k] += (*pA)*apa[apj[nextap++]]; 77322e3da61SHong Zhang } 77422e3da61SHong Zhang } 77522e3da61SHong Zhang flops += 2*apnzj; 77622e3da61SHong Zhang pA++; 77722e3da61SHong Zhang } 77822e3da61SHong Zhang 77922e3da61SHong Zhang /* Zero the current row info for A*P */ 78022e3da61SHong Zhang for (j=0;j<apnzj;j++) { 78122e3da61SHong Zhang apa[apj[j]] = 0.; 78222e3da61SHong Zhang apjdense[apj[j]] = 0; 78322e3da61SHong Zhang } 78422e3da61SHong Zhang } 78522e3da61SHong Zhang 78622e3da61SHong Zhang /* Assemble the final matrix and clean up */ 78722e3da61SHong Zhang ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 78822e3da61SHong Zhang ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 78922e3da61SHong Zhang ierr = PetscFree(apa);CHKERRQ(ierr); 79022e3da61SHong Zhang ierr = PetscLogFlops(flops);CHKERRQ(ierr); 79122e3da61SHong Zhang 79222e3da61SHong Zhang PetscFunctionReturn(0); 79322e3da61SHong Zhang } 79422e3da61SHong Zhang 79522e3da61SHong Zhang #undef __FUNCT__ 7960390132cSHong Zhang #define __FUNCT__ "MatPtAPReducedPtSymbolic_MPIAIJ_SeqAIJ" 7970390132cSHong Zhang PetscErrorCode MatPtAPReducedPtSymbolic_MPIAIJ_SeqAIJ(Mat A,Mat P,PetscReal fill,PetscInt prstart,PetscInt prend,Mat *C) 79822e3da61SHong Zhang { 79922e3da61SHong Zhang PetscErrorCode ierr; 80022e3da61SHong Zhang FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 80122e3da61SHong Zhang Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 80222e3da61SHong Zhang Mat_SeqAIJ *ad=(Mat_SeqAIJ*)(a->A)->data,*ao=(Mat_SeqAIJ*)(a->B)->data; 80322e3da61SHong Zhang Mat_SeqAIJ *p=(Mat_SeqAIJ*)P->data,*c; 80422e3da61SHong Zhang PetscInt *pti,*ptj,*ptJ,*ajj,*pi=p->i,*pj=p->j,*pjj; 80522e3da61SHong Zhang PetscInt *adi=ad->i,*adj=ad->j,*aoi=ao->i,*aoj=ao->j,nnz,cstart=a->cstart,cend=a->cend,col; 80622e3da61SHong Zhang PetscInt *ci,*cj,*ptadenserow,*ptasparserow,*ptaj; 80722e3da61SHong Zhang PetscInt an=A->N,am=A->m,pn=P->N,pm=P->M; 80822e3da61SHong Zhang PetscInt i,j,k,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi; 80922e3da61SHong Zhang PetscInt m=prend-prstart,nlnk,*lnk; 81022e3da61SHong Zhang MatScalar *ca; 81122e3da61SHong Zhang PetscBT lnkbt; 81222e3da61SHong Zhang 81322e3da61SHong Zhang PetscFunctionBegin; 814e240928fSHong Zhang int rank; 815e240928fSHong Zhang ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr); 81622e3da61SHong Zhang 81722e3da61SHong Zhang /* Get ij structure of P[rstart:rend,:]^T */ 818e462e02cSHong Zhang ierr = MatGetSymbolicTransposeReduced_SeqAIJ(P,prstart,prend,&pti,&ptj);CHKERRQ(ierr); 81922e3da61SHong Zhang ptJ=ptj; 82022e3da61SHong Zhang 82122e3da61SHong Zhang /* Allocate ci array, arrays for fill computation and */ 82222e3da61SHong Zhang /* free space for accumulating nonzero column info */ 82322e3da61SHong Zhang ierr = PetscMalloc((pn+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 82422e3da61SHong Zhang ci[0] = 0; 82522e3da61SHong Zhang 82622e3da61SHong Zhang ierr = PetscMalloc((2*an+1)*sizeof(PetscInt),&ptadenserow);CHKERRQ(ierr); 82722e3da61SHong Zhang ierr = PetscMemzero(ptadenserow,(2*an+1)*sizeof(PetscInt));CHKERRQ(ierr); 82822e3da61SHong Zhang ptasparserow = ptadenserow + an; 82922e3da61SHong Zhang 83022e3da61SHong Zhang /* create and initialize a linked list */ 83122e3da61SHong Zhang nlnk = pn+1; 83222e3da61SHong Zhang ierr = PetscLLCreate(pn,pn,nlnk,lnk,lnkbt);CHKERRQ(ierr); 83322e3da61SHong Zhang 83422e3da61SHong Zhang /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */ 83522e3da61SHong Zhang /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 83622e3da61SHong Zhang nnz = adi[am] + aoi[am]; 83722e3da61SHong Zhang ierr = GetMoreSpace((PetscInt)(fill*nnz/pm)*pn,&free_space); 83822e3da61SHong Zhang current_space = free_space; 83922e3da61SHong Zhang 84022e3da61SHong Zhang /* Determine symbolic info for each row of C: */ 84122e3da61SHong Zhang for (i=0;i<pn;i++) { 84222e3da61SHong Zhang ptnzi = pti[i+1] - pti[i]; 84322e3da61SHong Zhang ptanzi = 0; 84422e3da61SHong Zhang /* Determine symbolic row of PtA_reduced: */ 84522e3da61SHong Zhang for (j=0;j<ptnzi;j++) { 84622e3da61SHong Zhang arow = *ptJ++; 847e240928fSHong Zhang if (rank==1) printf("i: %d, ptnzi: %d, arow: %d\n",i,ptnzi,arow); 84822e3da61SHong Zhang /* diagonal portion of A */ 84922e3da61SHong Zhang anzj = adi[arow+1] - adi[arow]; 85022e3da61SHong Zhang ajj = adj + adi[arow]; 85122e3da61SHong Zhang for (k=0;k<anzj;k++) { 85222e3da61SHong Zhang col = ajj[k]+prstart; 85322e3da61SHong Zhang if (!ptadenserow[col]) { 85422e3da61SHong Zhang ptadenserow[col] = -1; 85522e3da61SHong Zhang ptasparserow[ptanzi++] = col; 85622e3da61SHong Zhang } 85722e3da61SHong Zhang } 85822e3da61SHong Zhang /* off-diagonal portion of A */ 85922e3da61SHong Zhang anzj = aoi[arow+1] - aoi[arow]; 86022e3da61SHong Zhang ajj = aoj + aoi[arow]; 86122e3da61SHong Zhang for (k=0;k<anzj;k++) { 86222e3da61SHong Zhang col = a->garray[ajj[k]]; /* global col */ 86322e3da61SHong Zhang if (col < cstart){ 86422e3da61SHong Zhang col = ajj[k]; 86522e3da61SHong Zhang } else if (col >= cend){ 86622e3da61SHong Zhang col = ajj[k] + m; 86722e3da61SHong Zhang } else { 86822e3da61SHong Zhang SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Off-diagonal portion of A has wrong column map"); 86922e3da61SHong Zhang } 87022e3da61SHong Zhang if (!ptadenserow[col]) { 87122e3da61SHong Zhang ptadenserow[col] = -1; 87222e3da61SHong Zhang ptasparserow[ptanzi++] = col; 87322e3da61SHong Zhang } 87422e3da61SHong Zhang } 87522e3da61SHong Zhang } 876e240928fSHong Zhang 877e240928fSHong Zhang printf(" [%d] i: %d, ptanzi: %d \n",rank,i,ptanzi); 87822e3da61SHong Zhang /* Using symbolic info for row of PtA, determine symbolic info for row of C: */ 87922e3da61SHong Zhang ptaj = ptasparserow; 88022e3da61SHong Zhang cnzi = 0; 88122e3da61SHong Zhang for (j=0;j<ptanzi;j++) { 88222e3da61SHong Zhang prow = *ptaj++; 88322e3da61SHong Zhang pnzj = pi[prow+1] - pi[prow]; 88422e3da61SHong Zhang pjj = pj + pi[prow]; 88522e3da61SHong Zhang /* if (rank==1) printf(" prow: %d, pnzj: %d\n",prow,pnzj); */ 88622e3da61SHong Zhang /* add non-zero cols of P into the sorted linked list lnk */ 88722e3da61SHong Zhang ierr = PetscLLAdd(pnzj,pjj,pn,nlnk,lnk,lnkbt);CHKERRQ(ierr); 88822e3da61SHong Zhang cnzi += nlnk; 88922e3da61SHong Zhang } 89022e3da61SHong Zhang 89122e3da61SHong Zhang /* If free space is not available, make more free space */ 89222e3da61SHong Zhang /* Double the amount of total space in the list */ 89322e3da61SHong Zhang if (current_space->local_remaining<cnzi) { 89422e3da61SHong Zhang ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 89522e3da61SHong Zhang } 89622e3da61SHong Zhang 89722e3da61SHong Zhang /* Copy data into free space, and zero out denserows */ 89822e3da61SHong Zhang ierr = PetscLLClean(pn,pn,cnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 89922e3da61SHong Zhang current_space->array += cnzi; 90022e3da61SHong Zhang current_space->local_used += cnzi; 90122e3da61SHong Zhang current_space->local_remaining -= cnzi; 90222e3da61SHong Zhang 90322e3da61SHong Zhang for (j=0;j<ptanzi;j++) { 90422e3da61SHong Zhang ptadenserow[ptasparserow[j]] = 0; 90522e3da61SHong Zhang } 90622e3da61SHong Zhang 90722e3da61SHong Zhang /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 90822e3da61SHong Zhang /* For now, we will recompute what is needed. */ 90922e3da61SHong Zhang ci[i+1] = ci[i] + cnzi; 91022e3da61SHong Zhang } 91122e3da61SHong Zhang /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 91222e3da61SHong Zhang /* Allocate space for cj, initialize cj, and */ 91322e3da61SHong Zhang /* destroy list of free space and other temporary array(s) */ 91422e3da61SHong Zhang ierr = PetscMalloc((ci[pn]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr); 91522e3da61SHong Zhang ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 91622e3da61SHong Zhang ierr = PetscFree(ptadenserow);CHKERRQ(ierr); 91722e3da61SHong Zhang ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 91822e3da61SHong Zhang 91922e3da61SHong Zhang /* Allocate space for ca */ 92022e3da61SHong Zhang ierr = PetscMalloc((ci[pn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 92122e3da61SHong Zhang ierr = PetscMemzero(ca,(ci[pn]+1)*sizeof(MatScalar));CHKERRQ(ierr); 92222e3da61SHong Zhang 92322e3da61SHong Zhang /* put together the new matrix */ 92422e3da61SHong Zhang ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,pn,pn,ci,cj,ca,C);CHKERRQ(ierr); 92522e3da61SHong Zhang 92622e3da61SHong Zhang /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 92722e3da61SHong Zhang /* Since these are PETSc arrays, change flags to free them as necessary. */ 92822e3da61SHong Zhang c = (Mat_SeqAIJ *)((*C)->data); 92922e3da61SHong Zhang c->freedata = PETSC_TRUE; 93022e3da61SHong Zhang c->nonew = 0; 93122e3da61SHong Zhang 93222e3da61SHong Zhang /* Clean up. */ 93322e3da61SHong Zhang ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 93422e3da61SHong Zhang 93522e3da61SHong Zhang PetscFunctionReturn(0); 93622e3da61SHong Zhang } 93722e3da61SHong Zhang 938e240928fSHong Zhang /*@C 939e240928fSHong Zhang MatPtAPReducedPt - Creates C = P[rstart:rend,:]^T * A * P of seqaij matrices, 940e240928fSHong Zhang used by MatPtAP_MPIAIJ_MPIAIJ() 941e240928fSHong Zhang 942e240928fSHong Zhang Collective on Mat 943e240928fSHong Zhang 944e240928fSHong Zhang Input Parameters: 945e240928fSHong Zhang + A - the matrix in mpiaij format 946e240928fSHong Zhang . P - the projection matrix in seqaij format 947e240928fSHong Zhang . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 948e240928fSHong Zhang . fill - expected fill (not being used when scall==MAT_REUSE_MATRIX) 949e240928fSHong Zhang . prstart, prend - the starting and ending-1-th row of the matrix P to be used for transpose 950e240928fSHong Zhang 951e240928fSHong Zhang Output Parameters: 952e240928fSHong Zhang . C - the product matrix in seqaij format 953e240928fSHong Zhang 954e240928fSHong Zhang Level: developer 955e240928fSHong Zhang 956e240928fSHong Zhang .seealso: MatPtAPSymbolic(),MatPtAPNumeric(),MatMatMult() 957e240928fSHong Zhang @*/ 9580390132cSHong Zhang static PetscEvent logkey_matptapreducedpt = 0; 95922e3da61SHong Zhang #undef __FUNCT__ 9600390132cSHong Zhang #define __FUNCT__ "MatPtAPReducedPt_MPIAIJ_SeqAIJ" 9610390132cSHong Zhang PetscErrorCode MatPtAPReducedPt_MPIAIJ_SeqAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,PetscInt prstart,PetscInt prend,Mat *C) 96222e3da61SHong Zhang { 96322e3da61SHong Zhang PetscErrorCode ierr; 96422e3da61SHong Zhang 96522e3da61SHong Zhang PetscFunctionBegin; 96622e3da61SHong Zhang if (A->m != prend-prstart) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",A->m,prend-prstart); 96722e3da61SHong Zhang if (prend-prstart > P->m) SETERRQ2(PETSC_ERR_ARG_SIZ," prend-prstart %d cannot be larger than P->m %d",prend-prstart,P->m); 9680390132cSHong Zhang if (!logkey_matptapreducedpt) { 9690390132cSHong Zhang ierr = PetscLogEventRegister(&logkey_matptapreducedpt,"MatPtAP_ReducedPt",MAT_COOKIE); 970e462e02cSHong Zhang } 9710390132cSHong Zhang PetscLogEventBegin(logkey_matptapreducedpt,A,P,0,0);CHKERRQ(ierr); 97222e3da61SHong Zhang 97322e3da61SHong Zhang if (scall == MAT_INITIAL_MATRIX){ 9740390132cSHong Zhang ierr = MatPtAPReducedPtSymbolic_MPIAIJ_SeqAIJ(A,P,fill,prstart,prend,C);CHKERRQ(ierr); 97522e3da61SHong Zhang } 9760390132cSHong Zhang ierr = MatPtAPReducedPtNumeric_MPIAIJ_SeqAIJ(A,P,prstart,prend,*C);CHKERRQ(ierr); 9770390132cSHong Zhang ierr = PetscLogEventEnd(logkey_matptapreducedpt,A,P,0,0);CHKERRQ(ierr); 97822e3da61SHong Zhang 97922e3da61SHong Zhang PetscFunctionReturn(0); 98022e3da61SHong Zhang } 98122e3da61SHong Zhang 982e240928fSHong Zhang /* ------------- New --------------------*/ 983e240928fSHong Zhang static PetscEvent logkey_matptapnumeric_local = 0; 984e240928fSHong Zhang #undef __FUNCT__ 985e240928fSHong Zhang #define __FUNCT__ "MatPtAPNumeric_MPIAIJ_MPIAIJ_Local" 986e240928fSHong Zhang PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIAIJ_Local(Mat A,Mat P_loc,Mat P_oth,PetscInt prstart,Mat C) 987e240928fSHong Zhang { 988e240928fSHong Zhang PetscErrorCode ierr; 989e240928fSHong Zhang PetscInt flops=0; 990e240928fSHong Zhang Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 991e240928fSHong Zhang Mat_SeqAIJ *ad=(Mat_SeqAIJ*)(a->A)->data,*ao=(Mat_SeqAIJ*)(a->B)->data; 992e240928fSHong Zhang Mat_SeqAIJ *c = (Mat_SeqAIJ *) C->data; 993e240928fSHong Zhang Mat_SeqAIJ *p_loc=(Mat_SeqAIJ*)P_loc->data,*p_oth=(Mat_SeqAIJ*)P_oth->data; 994e240928fSHong Zhang PetscInt *pi_loc=p_loc->i,*pj_loc=p_loc->j,*pi_oth=p_oth->i,*pj_oth=p_oth->j; 995e240928fSHong Zhang PetscInt *adi=ad->i,*adj=ad->j,*aoi=ao->i,*aoj=ao->j,*apj,*apjdense,cstart=a->cstart,cend=a->cend,col; 996e240928fSHong Zhang PetscInt *pJ=pj_loc,*pjj; 997e240928fSHong Zhang PetscInt *ci=c->i,*cj=c->j,*cjj; 998e240928fSHong Zhang PetscInt am=A->m,cn=C->N,cm=C->M; 999e240928fSHong Zhang PetscInt i,j,k,anzi,pnzi,apnzj,nextap,pnzj,prow,crow; 1000e240928fSHong Zhang MatScalar *ada=ad->a,*aoa=ao->a,*apa,*paj,*ca=c->a,*caj; 1001e240928fSHong Zhang MatScalar *pa_loc=p_loc->a,*pA=pa_loc,*pa_oth=p_oth->a; 1002e240928fSHong Zhang 1003e240928fSHong Zhang PetscFunctionBegin; 1004e240928fSHong Zhang if (!logkey_matptapnumeric_local) { 1005e240928fSHong Zhang ierr = PetscLogEventRegister(&logkey_matptapnumeric_local,"MatPtAPNumeric_MPIAIJ_MPIAIJ_Local",MAT_COOKIE); 1006e240928fSHong Zhang } 1007e240928fSHong Zhang PetscLogEventBegin(logkey_matptapnumeric_local,A,P_loc,P_oth,0);CHKERRQ(ierr); 1008e240928fSHong Zhang 1009e240928fSHong Zhang /* Allocate temporary array for storage of one row of A*P */ 1010e240928fSHong Zhang ierr = PetscMalloc(cn*(sizeof(MatScalar)+2*sizeof(PetscInt)),&apa);CHKERRQ(ierr); 1011e240928fSHong Zhang ierr = PetscMemzero(apa,cn*(sizeof(MatScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr); 1012e240928fSHong Zhang apj = (PetscInt *)(apa + cn); 1013e240928fSHong Zhang apjdense = apj + cn; 1014e240928fSHong Zhang 1015e240928fSHong Zhang /* Clear old values in C */ 1016e240928fSHong Zhang ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr); 1017e240928fSHong Zhang 1018e240928fSHong Zhang for (i=0;i<am;i++) { 1019e240928fSHong Zhang /* Form i-th sparse row of A*P */ 1020e240928fSHong Zhang apnzj = 0; 1021e240928fSHong Zhang /* diagonal portion of A */ 1022e240928fSHong Zhang anzi = adi[i+1] - adi[i]; 1023e240928fSHong Zhang for (j=0;j<anzi;j++) { 1024e240928fSHong Zhang prow = *adj; 1025e240928fSHong Zhang adj++; 1026e240928fSHong Zhang pnzj = pi_loc[prow+1] - pi_loc[prow]; 1027e240928fSHong Zhang pjj = pj_loc + pi_loc[prow]; 1028e240928fSHong Zhang paj = pa_loc + pi_loc[prow]; 1029e240928fSHong Zhang for (k=0;k<pnzj;k++) { 1030e240928fSHong Zhang if (!apjdense[pjj[k]]) { 1031e240928fSHong Zhang apjdense[pjj[k]] = -1; 1032e240928fSHong Zhang apj[apnzj++] = pjj[k]; 1033e240928fSHong Zhang } 1034e240928fSHong Zhang apa[pjj[k]] += (*ada)*paj[k]; 1035e240928fSHong Zhang } 1036e240928fSHong Zhang flops += 2*pnzj; 1037e240928fSHong Zhang ada++; 1038e240928fSHong Zhang } 1039e240928fSHong Zhang /* off-diagonal portion of A */ 1040e240928fSHong Zhang anzi = aoi[i+1] - aoi[i]; 1041e240928fSHong Zhang for (j=0;j<anzi;j++) { 1042e240928fSHong Zhang col = a->garray[*aoj]; 1043e240928fSHong Zhang if (col < cstart){ 1044e240928fSHong Zhang prow = *aoj; 1045e240928fSHong Zhang } else if (col >= cend){ 1046e240928fSHong Zhang prow = *aoj; 1047e240928fSHong Zhang } else { 1048e240928fSHong Zhang SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Off-diagonal portion of A has wrong column map"); 1049e240928fSHong Zhang } 1050e240928fSHong Zhang aoj++; 1051e240928fSHong Zhang pnzj = pi_oth[prow+1] - pi_oth[prow]; 1052e240928fSHong Zhang pjj = pj_oth + pi_oth[prow]; 1053e240928fSHong Zhang paj = pa_oth + pi_oth[prow]; 1054e240928fSHong Zhang for (k=0;k<pnzj;k++) { 1055e240928fSHong Zhang if (!apjdense[pjj[k]]) { 1056e240928fSHong Zhang apjdense[pjj[k]] = -1; 1057e240928fSHong Zhang apj[apnzj++] = pjj[k]; 1058e240928fSHong Zhang } 1059e240928fSHong Zhang apa[pjj[k]] += (*aoa)*paj[k]; 1060e240928fSHong Zhang } 1061e240928fSHong Zhang flops += 2*pnzj; 1062e240928fSHong Zhang aoa++; 1063e240928fSHong Zhang } 1064e240928fSHong Zhang /* Sort the j index array for quick sparse axpy. */ 1065e240928fSHong Zhang ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr); 1066e240928fSHong Zhang 1067e240928fSHong Zhang /* Compute P_loc[i,:]^T*AP[i,:] using outer product */ 1068e240928fSHong Zhang pnzi = pi_loc[i+1] - pi_loc[i]; 1069e240928fSHong Zhang for (j=0;j<pnzi;j++) { 1070e240928fSHong Zhang nextap = 0; 1071e240928fSHong Zhang crow = *pJ++; 1072e240928fSHong Zhang cjj = cj + ci[crow]; 1073e240928fSHong Zhang caj = ca + ci[crow]; 1074e240928fSHong Zhang /* Perform sparse axpy operation. Note cjj includes apj. */ 1075e240928fSHong Zhang for (k=0;nextap<apnzj;k++) { 1076e240928fSHong Zhang if (cjj[k]==apj[nextap]) { 1077e240928fSHong Zhang caj[k] += (*pA)*apa[apj[nextap++]]; 1078e240928fSHong Zhang } 1079e240928fSHong Zhang } 1080e240928fSHong Zhang flops += 2*apnzj; 1081e240928fSHong Zhang pA++; 1082e240928fSHong Zhang } 1083e240928fSHong Zhang 1084e240928fSHong Zhang /* Zero the current row info for A*P */ 1085e240928fSHong Zhang for (j=0;j<apnzj;j++) { 1086e240928fSHong Zhang apa[apj[j]] = 0.; 1087e240928fSHong Zhang apjdense[apj[j]] = 0; 1088e240928fSHong Zhang } 1089e240928fSHong Zhang } 1090e240928fSHong Zhang 1091e240928fSHong Zhang /* Assemble the final matrix and clean up */ 1092e240928fSHong Zhang ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1093e240928fSHong Zhang ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1094e240928fSHong Zhang ierr = PetscFree(apa);CHKERRQ(ierr); 1095e240928fSHong Zhang ierr = PetscLogFlops(flops);CHKERRQ(ierr); 1096e240928fSHong Zhang PetscLogEventEnd(logkey_matptapnumeric_local,A,P_loc,P_oth,0);CHKERRQ(ierr); 1097e240928fSHong Zhang PetscFunctionReturn(0); 1098e240928fSHong Zhang } 1099e240928fSHong Zhang static PetscEvent logkey_matptapsymbolic_local = 0; 1100e240928fSHong Zhang #undef __FUNCT__ 1101e240928fSHong Zhang #define __FUNCT__ "MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local" 1102e240928fSHong Zhang PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local(Mat A,Mat P_loc,Mat P_oth,PetscInt prstart,PetscReal fill,Mat *C) 1103e240928fSHong Zhang { 1104e240928fSHong Zhang PetscErrorCode ierr; 1105e240928fSHong Zhang FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1106e240928fSHong Zhang Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 1107e240928fSHong Zhang Mat_SeqAIJ *ad=(Mat_SeqAIJ*)(a->A)->data,*ao=(Mat_SeqAIJ*)(a->B)->data,*c; 1108e240928fSHong Zhang PetscInt *pti,*ptj,*ptJ,*ajj,*pjj; 1109e240928fSHong Zhang PetscInt *adi=ad->i,*adj=ad->j,*aoi=ao->i,*aoj=ao->j,nnz,cstart=a->cstart,cend=a->cend,col; 1110e240928fSHong Zhang PetscInt *ci,*cj,*ptaj; 1111e240928fSHong Zhang PetscInt an=A->N,am=A->m,pN=P_loc->N; 1112e240928fSHong Zhang PetscInt i,j,k,ptnzi,arow,anzj,prow,pnzj,cnzi; 1113e240928fSHong Zhang PetscInt pm=P_loc->m,nlnk,*lnk; 1114e240928fSHong Zhang MatScalar *ca; 1115e240928fSHong Zhang PetscBT lnkbt; 1116e240928fSHong Zhang PetscInt prend,nprow_loc,nprow_oth; 1117e240928fSHong Zhang PetscInt *ptadenserow_loc,*ptadenserow_oth,*ptasparserow_loc,*ptasparserow_oth; 1118e240928fSHong Zhang Mat_SeqAIJ *p_loc=(Mat_SeqAIJ*)P_loc->data,*p_oth=(Mat_SeqAIJ*)P_oth->data; 1119e240928fSHong Zhang PetscInt *pi_loc=p_loc->i,*pj_loc=p_loc->j,*pi_oth=p_oth->i,*pj_oth=p_oth->j; 1120e240928fSHong Zhang /* PetscInt rank; */ 1121e240928fSHong Zhang 1122e240928fSHong Zhang PetscFunctionBegin; 1123e240928fSHong Zhang if (!logkey_matptapsymbolic_local) { 1124e240928fSHong Zhang ierr = PetscLogEventRegister(&logkey_matptapsymbolic_local,"MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local",MAT_COOKIE); 1125e240928fSHong Zhang } 1126e240928fSHong Zhang ierr = PetscLogEventBegin(logkey_matptapsymbolic_local,A,P_loc,P_oth,0);CHKERRQ(ierr); 1127e240928fSHong Zhang 1128e240928fSHong Zhang /* ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr); */ 1129e240928fSHong Zhang prend = prstart + pm; 1130e240928fSHong Zhang 1131e240928fSHong Zhang /* get ij structure of P_loc^T */ 1132e240928fSHong Zhang ierr = MatGetSymbolicTranspose_SeqAIJ(P_loc,&pti,&ptj);CHKERRQ(ierr); 1133e240928fSHong Zhang ptJ=ptj; 1134e240928fSHong Zhang 1135e240928fSHong Zhang /* Allocate ci array, arrays for fill computation and */ 1136e240928fSHong Zhang /* free space for accumulating nonzero column info */ 1137e240928fSHong Zhang ierr = PetscMalloc((pN+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 1138e240928fSHong Zhang ci[0] = 0; 1139e240928fSHong Zhang 1140e240928fSHong Zhang ierr = PetscMalloc((4*an+1)*sizeof(PetscInt),&ptadenserow_loc);CHKERRQ(ierr); 1141e240928fSHong Zhang ierr = PetscMemzero(ptadenserow_loc,(4*an+1)*sizeof(PetscInt));CHKERRQ(ierr); 1142e240928fSHong Zhang ptasparserow_loc = ptadenserow_loc + an; 1143e240928fSHong Zhang ptadenserow_oth = ptasparserow_loc + an; 1144e240928fSHong Zhang ptasparserow_oth = ptadenserow_oth + an; 1145e240928fSHong Zhang 1146e240928fSHong Zhang /* create and initialize a linked list */ 1147e240928fSHong Zhang nlnk = pN+1; 1148e240928fSHong Zhang ierr = PetscLLCreate(pN,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1149e240928fSHong Zhang 1150e240928fSHong Zhang /* Set initial free space to be nnz(A) scaled by aspect ratio of P (P->M=A->N). */ 1151e240928fSHong Zhang /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 1152e240928fSHong Zhang nnz = adi[am] + aoi[am]; 1153e240928fSHong Zhang ierr = GetMoreSpace((PetscInt)(fill*nnz/A->N)*pN,&free_space); 1154e240928fSHong Zhang current_space = free_space; 1155e240928fSHong Zhang 1156e240928fSHong Zhang /* determine symbolic info for each row of C: */ 1157e240928fSHong Zhang for (i=0;i<pN;i++) { 1158e240928fSHong Zhang ptnzi = pti[i+1] - pti[i]; 1159e240928fSHong Zhang nprow_loc = 0; nprow_oth = 0; 1160e240928fSHong Zhang /* i-th row of symbolic P_loc^T*A_loc: */ 1161e240928fSHong Zhang for (j=0;j<ptnzi;j++) { 1162e240928fSHong Zhang arow = *ptJ++; 1163e240928fSHong Zhang /* if (rank==1) printf("i: %d, ptnzi: %d, arow: %d\n",i,ptnzi,arow); */ 1164e240928fSHong Zhang /* diagonal portion of A */ 1165e240928fSHong Zhang anzj = adi[arow+1] - adi[arow]; 1166e240928fSHong Zhang ajj = adj + adi[arow]; 1167e240928fSHong Zhang for (k=0;k<anzj;k++) { 1168e240928fSHong Zhang col = ajj[k]+prstart; 1169e240928fSHong Zhang if (!ptadenserow_loc[col]) { 1170e240928fSHong Zhang ptadenserow_loc[col] = -1; 1171e240928fSHong Zhang ptasparserow_loc[nprow_loc++] = col; 1172e240928fSHong Zhang } 1173e240928fSHong Zhang } 1174e240928fSHong Zhang /* off-diagonal portion of A */ 1175e240928fSHong Zhang anzj = aoi[arow+1] - aoi[arow]; 1176e240928fSHong Zhang ajj = aoj + aoi[arow]; 1177e240928fSHong Zhang for (k=0;k<anzj;k++) { 1178e240928fSHong Zhang col = a->garray[ajj[k]]; /* global col */ 1179e240928fSHong Zhang if (col < cstart){ 1180e240928fSHong Zhang col = ajj[k]; 1181e240928fSHong Zhang } else if (col >= cend){ 1182e240928fSHong Zhang col = ajj[k] + pm; 1183e240928fSHong Zhang } else { 1184e240928fSHong Zhang SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Off-diagonal portion of A has wrong column map"); 1185e240928fSHong Zhang } 1186e240928fSHong Zhang if (!ptadenserow_oth[col]) { 1187e240928fSHong Zhang ptadenserow_oth[col] = -1; 1188e240928fSHong Zhang ptasparserow_oth[nprow_oth++] = col; 1189e240928fSHong Zhang } 1190e240928fSHong Zhang } 1191e240928fSHong Zhang } 1192e240928fSHong Zhang 1193e240928fSHong Zhang /* printf(" [%d] i: %d, nprow_loc: %d, nprow_oth: %d\n",rank,i,nprow_loc,nprow_oth); */ 1194e240928fSHong Zhang /* using symbolic info of local PtA, determine symbolic info for row of C: */ 1195e240928fSHong Zhang cnzi = 0; 1196e240928fSHong Zhang /* rows of P_loc */ 1197e240928fSHong Zhang ptaj = ptasparserow_loc; 1198e240928fSHong Zhang for (j=0; j<nprow_loc; j++) { 1199e240928fSHong Zhang prow = *ptaj++; 1200e240928fSHong Zhang prow -= prstart; /* rm */ 1201e240928fSHong Zhang pnzj = pi_loc[prow+1] - pi_loc[prow]; 1202e240928fSHong Zhang pjj = pj_loc + pi_loc[prow]; 1203e240928fSHong Zhang /* add non-zero cols of P into the sorted linked list lnk */ 1204e240928fSHong Zhang ierr = PetscLLAdd(pnzj,pjj,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1205e240928fSHong Zhang cnzi += nlnk; 1206e240928fSHong Zhang } 1207e240928fSHong Zhang /* rows of P_oth */ 1208e240928fSHong Zhang ptaj = ptasparserow_oth; 1209e240928fSHong Zhang for (j=0; j<nprow_oth; j++) { 1210e240928fSHong Zhang prow = *ptaj++; 1211e240928fSHong Zhang if (prow >= prend) prow -= pm; /* rm */ 1212e240928fSHong Zhang pnzj = pi_oth[prow+1] - pi_oth[prow]; 1213e240928fSHong Zhang pjj = pj_oth + pi_oth[prow]; 1214e240928fSHong Zhang /* add non-zero cols of P into the sorted linked list lnk */ 1215e240928fSHong Zhang ierr = PetscLLAdd(pnzj,pjj,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1216e240928fSHong Zhang cnzi += nlnk; 1217e240928fSHong Zhang } 1218e240928fSHong Zhang 1219e240928fSHong Zhang /* If free space is not available, make more free space */ 1220e240928fSHong Zhang /* Double the amount of total space in the list */ 1221e240928fSHong Zhang if (current_space->local_remaining<cnzi) { 1222e240928fSHong Zhang ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 1223e240928fSHong Zhang } 1224e240928fSHong Zhang 1225e240928fSHong Zhang /* Copy data into free space, and zero out denserows */ 1226e240928fSHong Zhang ierr = PetscLLClean(pN,pN,cnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 1227e240928fSHong Zhang current_space->array += cnzi; 1228e240928fSHong Zhang current_space->local_used += cnzi; 1229e240928fSHong Zhang current_space->local_remaining -= cnzi; 1230e240928fSHong Zhang 1231e240928fSHong Zhang for (j=0;j<nprow_loc; j++) { 1232e240928fSHong Zhang ptadenserow_loc[ptasparserow_loc[j]] = 0; 1233e240928fSHong Zhang } 1234e240928fSHong Zhang for (j=0;j<nprow_oth; j++) { 1235e240928fSHong Zhang ptadenserow_oth[ptasparserow_oth[j]] = 0; 1236e240928fSHong Zhang } 1237e240928fSHong Zhang 1238e240928fSHong Zhang /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 1239e240928fSHong Zhang /* For now, we will recompute what is needed. */ 1240e240928fSHong Zhang ci[i+1] = ci[i] + cnzi; 1241e240928fSHong Zhang } 1242e240928fSHong Zhang /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 1243e240928fSHong Zhang /* Allocate space for cj, initialize cj, and */ 1244e240928fSHong Zhang /* destroy list of free space and other temporary array(s) */ 1245e240928fSHong Zhang ierr = PetscMalloc((ci[pN]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr); 1246e240928fSHong Zhang ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 1247e240928fSHong Zhang ierr = PetscFree(ptadenserow_loc);CHKERRQ(ierr); 1248e240928fSHong Zhang ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1249e240928fSHong Zhang 1250e240928fSHong Zhang /* Allocate space for ca */ 1251e240928fSHong Zhang ierr = PetscMalloc((ci[pN]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 1252e240928fSHong Zhang ierr = PetscMemzero(ca,(ci[pN]+1)*sizeof(MatScalar));CHKERRQ(ierr); 1253e240928fSHong Zhang 1254e240928fSHong Zhang /* put together the new matrix */ 1255e240928fSHong Zhang ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,pN,pN,ci,cj,ca,C);CHKERRQ(ierr); 1256e240928fSHong Zhang 1257e240928fSHong Zhang /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 1258e240928fSHong Zhang /* Since these are PETSc arrays, change flags to free them as necessary. */ 1259e240928fSHong Zhang c = (Mat_SeqAIJ *)((*C)->data); 1260e240928fSHong Zhang c->freedata = PETSC_TRUE; 1261e240928fSHong Zhang c->nonew = 0; 1262e240928fSHong Zhang 1263e240928fSHong Zhang /* Clean up. */ 1264e240928fSHong Zhang ierr = MatRestoreSymbolicTranspose_SeqAIJ(P_loc,&pti,&ptj);CHKERRQ(ierr); 1265e240928fSHong Zhang /* 1266e240928fSHong Zhang ierr = MPI_Barrier(PETSC_COMM_WORLD); 1267e240928fSHong Zhang if (rank == 1){ 1268e240928fSHong Zhang printf("[%d] C: %d, %d\n",rank,(*C)->M,(*C)->N); 1269e240928fSHong Zhang ierr = MatView(*C, PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); 1270e240928fSHong Zhang } */ 1271e240928fSHong Zhang ierr = PetscLogEventEnd(logkey_matptapsymbolic_local,A,P_loc,P_oth,0);CHKERRQ(ierr); 1272e240928fSHong Zhang PetscFunctionReturn(0); 1273e240928fSHong Zhang } 1274