1eb9c0419SKris Buschelman /* 29af31e4aSHong Zhang Defines projective product routines where A is a AIJ matrix 3eb9c0419SKris Buschelman C = P^T * A * P 4eb9c0419SKris Buschelman */ 5eb9c0419SKris Buschelman 6231952e2SKris Buschelman #include "src/mat/impls/aij/seq/aij.h" /*I "petscmat.h" I*/ 7eb9c0419SKris Buschelman #include "src/mat/utils/freespace.h" 89af31e4aSHong Zhang #include "src/mat/impls/aij/mpi/mpiaij.h" 955a3bba9SHong Zhang #include "petscbt.h" 10eb9c0419SKris Buschelman 11eb9c0419SKris Buschelman #undef __FUNCT__ 129af31e4aSHong Zhang #define __FUNCT__ "MatPtAP" 134d3841fdSKris Buschelman /*@ 149af31e4aSHong Zhang MatPtAP - Creates the matrix projection C = P^T * A * P 154d3841fdSKris Buschelman 164d3841fdSKris Buschelman Collective on Mat 174d3841fdSKris Buschelman 184d3841fdSKris Buschelman Input Parameters: 194d3841fdSKris Buschelman + A - the matrix 20f747e1aeSHong Zhang . P - the projection matrix 21f747e1aeSHong Zhang . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 22f747e1aeSHong Zhang - fill - expected fill as ratio of nnz(C)/(nnz(A) + nnz(P)) 234d3841fdSKris Buschelman 244d3841fdSKris Buschelman Output Parameters: 254d3841fdSKris Buschelman . C - the product matrix 264d3841fdSKris Buschelman 274d3841fdSKris Buschelman Notes: 284d3841fdSKris Buschelman C will be created and must be destroyed by the user with MatDestroy(). 294d3841fdSKris Buschelman 304d3841fdSKris Buschelman This routine is currently only implemented for pairs of SeqAIJ matrices and classes 314d3841fdSKris Buschelman which inherit from SeqAIJ. C will be of type MATSEQAIJ. 324d3841fdSKris Buschelman 334d3841fdSKris Buschelman Level: intermediate 344d3841fdSKris Buschelman 359af31e4aSHong Zhang .seealso: MatPtAPSymbolic(),MatPtAPNumeric(),MatMatMult() 364d3841fdSKris Buschelman @*/ 3755a3bba9SHong Zhang PetscErrorCode MatPtAP(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C) 3855a3bba9SHong Zhang { 39dfbe8321SBarry Smith PetscErrorCode ierr; 40534c1384SKris Buschelman PetscErrorCode (*fA)(Mat,Mat,MatReuse,PetscReal,Mat *); 41534c1384SKris Buschelman PetscErrorCode (*fP)(Mat,Mat,MatReuse,PetscReal,Mat *); 42eb9c0419SKris Buschelman 43eb9c0419SKris Buschelman PetscFunctionBegin; 449af31e4aSHong Zhang PetscValidHeaderSpecific(A,MAT_COOKIE,1); 459af31e4aSHong Zhang PetscValidType(A,1); 469af31e4aSHong Zhang MatPreallocated(A); 479af31e4aSHong Zhang if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 489af31e4aSHong Zhang if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 499af31e4aSHong Zhang PetscValidHeaderSpecific(P,MAT_COOKIE,2); 509af31e4aSHong Zhang PetscValidType(P,2); 519af31e4aSHong Zhang MatPreallocated(P); 529af31e4aSHong Zhang if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 539af31e4aSHong Zhang if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 549af31e4aSHong Zhang PetscValidPointer(C,3); 55534c1384SKris Buschelman 569af31e4aSHong Zhang if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->M,A->N); 57eb9c0419SKris Buschelman 589af31e4aSHong Zhang if (fill <=0.0) SETERRQ1(PETSC_ERR_ARG_SIZ,"fill=%g must be > 0.0",fill); 59eb9c0419SKris Buschelman 60534c1384SKris Buschelman /* For now, we do not dispatch based on the type of A and P */ 61534c1384SKris Buschelman /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */ 62534c1384SKris Buschelman fA = A->ops->ptap; 63534c1384SKris Buschelman if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatPtAP not supported for A of type %s",A->type_name); 64534c1384SKris Buschelman fP = P->ops->ptap; 65534c1384SKris Buschelman if (!fP) SETERRQ1(PETSC_ERR_SUP,"MatPtAP not supported for P of type %s",P->type_name); 66534c1384SKris Buschelman if (fP!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatPtAP requires A, %s, to be compatible with P, %s",A->type_name,P->type_name); 67534c1384SKris Buschelman 689af31e4aSHong Zhang ierr = PetscLogEventBegin(MAT_PtAP,A,P,0,0);CHKERRQ(ierr); 69534c1384SKris Buschelman ierr = (*fA)(A,P,scall,fill,C);CHKERRQ(ierr); 709af31e4aSHong Zhang ierr = PetscLogEventEnd(MAT_PtAP,A,P,0,0);CHKERRQ(ierr); 71eb9c0419SKris Buschelman PetscFunctionReturn(0); 72eb9c0419SKris Buschelman } 73eb9c0419SKris Buschelman 74e240928fSHong Zhang EXTERN PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local(Mat,Mat,Mat,PetscInt,PetscReal,Mat*); 75e240928fSHong Zhang EXTERN PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIAIJ_Local(Mat,Mat,Mat,PetscInt,Mat C); 76e240928fSHong Zhang 77a61c8c0fSHong Zhang EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat); 78a61c8c0fSHong Zhang #undef __FUNCT__ 79a61c8c0fSHong Zhang #define __FUNCT__ "MatDestroy_MPIAIJ_PtAP" 80a61c8c0fSHong Zhang PetscErrorCode MatDestroy_MPIAIJ_PtAP(Mat A) 81a61c8c0fSHong Zhang { 82a61c8c0fSHong Zhang PetscErrorCode ierr; 83*32fba14fSHong Zhang Mat_MatMatMultMPI *ptap; 84a61c8c0fSHong Zhang PetscObjectContainer container; 85a61c8c0fSHong Zhang 86a61c8c0fSHong Zhang PetscFunctionBegin; 87a61c8c0fSHong Zhang ierr = PetscObjectQuery((PetscObject)A,"MatPtAPMPI",(PetscObject *)&container);CHKERRQ(ierr); 88a61c8c0fSHong Zhang if (container) { 89a61c8c0fSHong Zhang ierr = PetscObjectContainerGetPointer(container,(void **)&ptap);CHKERRQ(ierr); 90*32fba14fSHong Zhang ierr = MatDestroy(ptap->B_loc);CHKERRQ(ierr); 91*32fba14fSHong Zhang ierr = MatDestroy(ptap->B_oth);CHKERRQ(ierr); 92a61c8c0fSHong Zhang 93a61c8c0fSHong Zhang ierr = PetscObjectContainerDestroy(container);CHKERRQ(ierr); 94*32fba14fSHong Zhang ierr = PetscObjectCompose((PetscObject)A,"MatPtAPMPI",0);CHKERRQ(ierr); 95a61c8c0fSHong Zhang } 96a61c8c0fSHong Zhang ierr = PetscFree(ptap);CHKERRQ(ierr); 97a61c8c0fSHong Zhang 98a61c8c0fSHong Zhang ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 99a61c8c0fSHong Zhang PetscFunctionReturn(0); 100a61c8c0fSHong Zhang } 101a61c8c0fSHong Zhang 102eb9c0419SKris Buschelman #undef __FUNCT__ 103ff134f7aSHong Zhang #define __FUNCT__ "MatPtAP_MPIAIJ_MPIAIJ" 104ff134f7aSHong Zhang PetscErrorCode MatPtAP_MPIAIJ_MPIAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C) 105ff134f7aSHong Zhang { 106ff134f7aSHong Zhang PetscErrorCode ierr; 107*32fba14fSHong Zhang Mat_MatMatMultMPI *ptap; 108a61c8c0fSHong Zhang PetscObjectContainer container; 109b90dcfe3SHong Zhang 110b90dcfe3SHong Zhang PetscFunctionBegin; 111b90dcfe3SHong Zhang if (scall == MAT_INITIAL_MATRIX){ 1124c627768SHong Zhang ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 113*32fba14fSHong Zhang ierr = PetscNew(Mat_MatMatMultMPI,&ptap);CHKERRQ(ierr); 114*32fba14fSHong Zhang 115e240928fSHong Zhang /* get P_oth by taking rows of P (= non-zero cols of local A) from other processors */ 116*32fba14fSHong Zhang ierr = MatGetBrowsOfAoCols(A,P,MAT_INITIAL_MATRIX,PETSC_NULL,PETSC_NULL,&ptap->brstart,&ptap->B_oth);CHKERRQ(ierr); 117a61c8c0fSHong Zhang 118e240928fSHong Zhang /* get P_loc by taking all local rows of P */ 119*32fba14fSHong Zhang ierr = MatGetLocalMat(P,scall,&ptap->B_loc);CHKERRQ(ierr); 120e240928fSHong Zhang 121a61c8c0fSHong Zhang /* attach the supporting struct to P for reuse */ 122a61c8c0fSHong Zhang P->ops->destroy = MatDestroy_MPIAIJ_PtAP; 123a61c8c0fSHong Zhang ierr = PetscObjectContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 124a61c8c0fSHong Zhang ierr = PetscObjectContainerSetPointer(container,ptap);CHKERRQ(ierr); 125a61c8c0fSHong Zhang ierr = PetscObjectCompose((PetscObject)P,"MatPtAPMPI",(PetscObject)container);CHKERRQ(ierr); 126e240928fSHong Zhang 127*32fba14fSHong Zhang /* now, compute symbolic local P^T*A*P */ 128a61c8c0fSHong Zhang ierr = MatPtAPSymbolic_MPIAIJ_MPIAIJ(A,P,fill,C);CHKERRQ(ierr);/* numeric product is computed as well */ 129a61c8c0fSHong Zhang ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 130a61c8c0fSHong Zhang } else if (scall == MAT_REUSE_MATRIX){ 131a61c8c0fSHong Zhang ierr = PetscObjectQuery((PetscObject)P,"MatPtAPMPI",(PetscObject *)&container);CHKERRQ(ierr); 132a61c8c0fSHong Zhang if (container) { 133a61c8c0fSHong Zhang ierr = PetscObjectContainerGetPointer(container,(void **)&ptap);CHKERRQ(ierr); 134a61c8c0fSHong Zhang } else { 135a61c8c0fSHong Zhang SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 136a61c8c0fSHong Zhang } 137a61c8c0fSHong Zhang 138a61c8c0fSHong Zhang /* get P_oth by taking rows of P (= non-zero cols of local A) from other processors */ 139a61c8c0fSHong Zhang /* improve! */ 140*32fba14fSHong Zhang ierr = MatDestroy(ptap->B_oth);CHKERRQ(ierr); 141*32fba14fSHong Zhang ierr = MatGetBrowsOfAoCols(A,P,MAT_INITIAL_MATRIX,PETSC_NULL,PETSC_NULL,&ptap->brstart,&ptap->B_oth);CHKERRQ(ierr); 142a61c8c0fSHong Zhang 143a61c8c0fSHong Zhang /* get P_loc by taking all local rows of P */ 144*32fba14fSHong Zhang ierr = MatDestroy(ptap->B_loc);CHKERRQ(ierr); /* improve! */ 145*32fba14fSHong Zhang ierr = MatGetLocalMat(P,scall,&ptap->B_loc);CHKERRQ(ierr); 146*32fba14fSHong Zhang 147a61c8c0fSHong Zhang } else { 148a61c8c0fSHong Zhang SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",scall); 149a61c8c0fSHong Zhang } 150*32fba14fSHong Zhang /* now, compute numeric local P^T*A*P */ 151a61c8c0fSHong Zhang ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 152a61c8c0fSHong Zhang ierr = MatPtAPNumeric_MPIAIJ_MPIAIJ(A,P,*C);CHKERRQ(ierr); 153a61c8c0fSHong Zhang ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 154a61c8c0fSHong Zhang 155a61c8c0fSHong Zhang PetscFunctionReturn(0); 156a61c8c0fSHong Zhang } 157a61c8c0fSHong Zhang 158a61c8c0fSHong Zhang #undef __FUNCT__ 159a61c8c0fSHong Zhang #define __FUNCT__ "MatPtAPSymbolic_MPIAIJ_MPIAIJ" 160a61c8c0fSHong Zhang PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIAIJ(Mat A,Mat P,PetscReal fill,Mat *C) 161a61c8c0fSHong Zhang { 162a61c8c0fSHong Zhang PetscErrorCode ierr; 163a61c8c0fSHong Zhang Mat C_seq; 164*32fba14fSHong Zhang Mat_MatMatMultMPI *ptap; 165a61c8c0fSHong Zhang PetscObjectContainer container; 166a61c8c0fSHong Zhang 167a61c8c0fSHong Zhang PetscFunctionBegin; 168a61c8c0fSHong Zhang 169a61c8c0fSHong Zhang ierr = PetscObjectQuery((PetscObject)P,"MatPtAPMPI",(PetscObject *)&container);CHKERRQ(ierr); 170a61c8c0fSHong Zhang if (container) { 171a61c8c0fSHong Zhang ierr = PetscObjectContainerGetPointer(container,(void **)&ptap);CHKERRQ(ierr); 172a61c8c0fSHong Zhang } else { 173a61c8c0fSHong Zhang SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 174a61c8c0fSHong Zhang } 17525616d81SHong Zhang /* compute C_seq = P_loc^T * A_loc * P_seq */ 176*32fba14fSHong Zhang ierr = MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local(A,ptap->B_loc,ptap->B_oth,ptap->brstart,fill,&C_seq);CHKERRQ(ierr); 177a61c8c0fSHong Zhang 178b90dcfe3SHong Zhang /* add C_seq into mpi C */ 179a61c8c0fSHong Zhang ierr = MatMerge_SeqsToMPISymbolic(A->comm,C_seq,P->n,P->n,C);CHKERRQ(ierr); 180b90dcfe3SHong Zhang 181ff134f7aSHong Zhang PetscFunctionReturn(0); 182ff134f7aSHong Zhang } 183ff134f7aSHong Zhang 184ff134f7aSHong Zhang #undef __FUNCT__ 185e240928fSHong Zhang #define __FUNCT__ "MatPtAPNumeric_MPIAIJ_MPIAIJ" 186b90dcfe3SHong Zhang PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIAIJ(Mat A,Mat P,Mat C) 187ff134f7aSHong Zhang { 188b90dcfe3SHong Zhang PetscErrorCode ierr; 189671beff6SHong Zhang Mat_Merge_SeqsToMPI *merge; 190*32fba14fSHong Zhang Mat_MatMatMultMPI *ptap; 191a61c8c0fSHong Zhang PetscObjectContainer cont_merge,cont_ptap; 192ff134f7aSHong Zhang 193ff134f7aSHong Zhang PetscFunctionBegin; 194a61c8c0fSHong Zhang ierr = PetscObjectQuery((PetscObject)C,"MatMergeSeqsToMPI",(PetscObject *)&cont_merge);CHKERRQ(ierr); 195a61c8c0fSHong Zhang if (cont_merge) { 196a61c8c0fSHong Zhang ierr = PetscObjectContainerGetPointer(cont_merge,(void **)&merge);CHKERRQ(ierr); 1977f79fd58SMatthew Knepley } else { 198a61c8c0fSHong Zhang SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix C does not posses an object container"); 199671beff6SHong Zhang } 200a61c8c0fSHong Zhang ierr = PetscObjectQuery((PetscObject)P,"MatPtAPMPI",(PetscObject *)&cont_ptap);CHKERRQ(ierr); 201a61c8c0fSHong Zhang if (cont_ptap) { 202a61c8c0fSHong Zhang ierr = PetscObjectContainerGetPointer(cont_ptap,(void **)&ptap);CHKERRQ(ierr); 203a61c8c0fSHong Zhang } else { 204a61c8c0fSHong Zhang SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 205a61c8c0fSHong Zhang } 206*32fba14fSHong Zhang ierr = MatPtAPNumeric_MPIAIJ_MPIAIJ_Local(A,ptap->B_loc,ptap->B_oth,ptap->brstart,merge->C_seq);CHKERRQ(ierr); 207a61c8c0fSHong Zhang ierr = MatMerge_SeqsToMPINumeric(merge->C_seq,C);CHKERRQ(ierr); 208ff134f7aSHong Zhang PetscFunctionReturn(0); 209ff134f7aSHong Zhang } 210ff134f7aSHong Zhang 211ff134f7aSHong Zhang #undef __FUNCT__ 2129af31e4aSHong Zhang #define __FUNCT__ "MatPtAP_SeqAIJ_SeqAIJ" 213dfbe8321SBarry Smith PetscErrorCode MatPtAP_SeqAIJ_SeqAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C) 2149af31e4aSHong Zhang { 215dfbe8321SBarry Smith PetscErrorCode ierr; 216b1d57f15SBarry Smith 2179af31e4aSHong Zhang PetscFunctionBegin; 2189af31e4aSHong Zhang if (scall == MAT_INITIAL_MATRIX){ 219d20bfe6fSHong Zhang ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 2209af31e4aSHong Zhang ierr = MatPtAPSymbolic_SeqAIJ_SeqAIJ(A,P,fill,C);CHKERRQ(ierr); 221d20bfe6fSHong Zhang ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 2229af31e4aSHong Zhang } 223d20bfe6fSHong Zhang ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 2249af31e4aSHong Zhang ierr = MatPtAPNumeric_SeqAIJ_SeqAIJ(A,P,*C);CHKERRQ(ierr); 225d20bfe6fSHong Zhang ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 2269af31e4aSHong Zhang PetscFunctionReturn(0); 2279af31e4aSHong Zhang } 2289af31e4aSHong Zhang 2296849ba73SBarry Smith /* 2309af31e4aSHong Zhang MatPtAPSymbolic - Creates the (i,j) structure of the matrix projection C = P^T * A * P 2314d3841fdSKris Buschelman 2324d3841fdSKris Buschelman Collective on Mat 2334d3841fdSKris Buschelman 2344d3841fdSKris Buschelman Input Parameters: 2354d3841fdSKris Buschelman + A - the matrix 2364d3841fdSKris Buschelman - P - the projection matrix 2374d3841fdSKris Buschelman 2384d3841fdSKris Buschelman Output Parameters: 2394d3841fdSKris Buschelman . C - the (i,j) structure of the product matrix 2404d3841fdSKris Buschelman 2414d3841fdSKris Buschelman Notes: 2424d3841fdSKris Buschelman C will be created and must be destroyed by the user with MatDestroy(). 2434d3841fdSKris Buschelman 2444d3841fdSKris Buschelman This routine is currently only implemented for pairs of SeqAIJ matrices and classes 2454d3841fdSKris Buschelman which inherit from SeqAIJ. C will be of type MATSEQAIJ. The product is computed using 2469af31e4aSHong Zhang this (i,j) structure by calling MatPtAPNumeric(). 2474d3841fdSKris Buschelman 2484d3841fdSKris Buschelman Level: intermediate 2494d3841fdSKris Buschelman 2509af31e4aSHong Zhang .seealso: MatPtAP(),MatPtAPNumeric(),MatMatMultSymbolic() 2516849ba73SBarry Smith */ 252e240928fSHong Zhang #undef __FUNCT__ 253e240928fSHong Zhang #define __FUNCT__ "MatPtAPSymbolic" 25455a3bba9SHong Zhang PetscErrorCode MatPtAPSymbolic(Mat A,Mat P,PetscReal fill,Mat *C) 25555a3bba9SHong Zhang { 256dfbe8321SBarry Smith PetscErrorCode ierr; 257534c1384SKris Buschelman PetscErrorCode (*fA)(Mat,Mat,PetscReal,Mat*); 258534c1384SKris Buschelman PetscErrorCode (*fP)(Mat,Mat,PetscReal,Mat*); 259eb9c0419SKris Buschelman 260eb9c0419SKris Buschelman PetscFunctionBegin; 261eb9c0419SKris Buschelman 2624482741eSBarry Smith PetscValidHeaderSpecific(A,MAT_COOKIE,1); 263c9780b6fSBarry Smith PetscValidType(A,1); 264eb9c0419SKris Buschelman MatPreallocated(A); 265eb9c0419SKris Buschelman if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 266eb9c0419SKris Buschelman if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 267eb9c0419SKris Buschelman 2684482741eSBarry Smith PetscValidHeaderSpecific(P,MAT_COOKIE,2); 269c9780b6fSBarry Smith PetscValidType(P,2); 270eb9c0419SKris Buschelman MatPreallocated(P); 271eb9c0419SKris Buschelman if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 272eb9c0419SKris Buschelman if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 273eb9c0419SKris Buschelman 2744482741eSBarry Smith PetscValidPointer(C,3); 2754482741eSBarry Smith 276eb9c0419SKris Buschelman if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->M,A->N); 277eb9c0419SKris Buschelman if (A->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %d != %d",A->M,A->N); 278eb9c0419SKris Buschelman 279534c1384SKris Buschelman /* For now, we do not dispatch based on the type of A and P */ 280534c1384SKris Buschelman /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */ 281534c1384SKris Buschelman fA = A->ops->ptapsymbolic; 282534c1384SKris Buschelman if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatPtAPSymbolic not supported for A of type %s",A->type_name); 283534c1384SKris Buschelman fP = P->ops->ptapsymbolic; 284534c1384SKris Buschelman if (!fP) SETERRQ1(PETSC_ERR_SUP,"MatPtAPSymbolic not supported for P of type %s",P->type_name); 285534c1384SKris Buschelman if (fP!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatPtAPSymbolic requires A, %s, to be compatible with P, %s",A->type_name,P->type_name); 2864d3841fdSKris Buschelman 287534c1384SKris Buschelman ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 288534c1384SKris Buschelman ierr = (*fA)(A,P,fill,C);CHKERRQ(ierr); 289534c1384SKris Buschelman ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 290eb9c0419SKris Buschelman 291eb9c0419SKris Buschelman PetscFunctionReturn(0); 292eb9c0419SKris Buschelman } 293eb9c0419SKris Buschelman 294f747e1aeSHong Zhang typedef struct { 295f747e1aeSHong Zhang Mat symAP; 296f747e1aeSHong Zhang } Mat_PtAPstruct; 297f747e1aeSHong Zhang 29878a80504SBarry Smith EXTERN PetscErrorCode MatDestroy_SeqAIJ(Mat); 29978a80504SBarry Smith 300f747e1aeSHong Zhang #undef __FUNCT__ 301f747e1aeSHong Zhang #define __FUNCT__ "MatDestroy_SeqAIJ_PtAP" 302f4a850bbSBarry Smith PetscErrorCode MatDestroy_SeqAIJ_PtAP(Mat A) 303f747e1aeSHong Zhang { 304f4a850bbSBarry Smith PetscErrorCode ierr; 305f747e1aeSHong Zhang Mat_PtAPstruct *ptap=(Mat_PtAPstruct*)A->spptr; 306f747e1aeSHong Zhang 307f747e1aeSHong Zhang PetscFunctionBegin; 308f747e1aeSHong Zhang ierr = MatDestroy(ptap->symAP);CHKERRQ(ierr); 309f747e1aeSHong Zhang ierr = PetscFree(ptap);CHKERRQ(ierr); 31078a80504SBarry Smith ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr); 311f747e1aeSHong Zhang PetscFunctionReturn(0); 312f747e1aeSHong Zhang } 313f747e1aeSHong Zhang 314eb9c0419SKris Buschelman #undef __FUNCT__ 3159af31e4aSHong Zhang #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqAIJ" 31655a3bba9SHong Zhang PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat P,PetscReal fill,Mat *C) 31755a3bba9SHong Zhang { 318dfbe8321SBarry Smith PetscErrorCode ierr; 319d20bfe6fSHong Zhang FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 320d20bfe6fSHong Zhang Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*p = (Mat_SeqAIJ*)P->data,*c; 32155a3bba9SHong Zhang PetscInt *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj; 322b8374ebeSBarry Smith PetscInt *ci,*cj,*ptadenserow,*ptasparserow,*ptaj; 32355a3bba9SHong Zhang PetscInt an=A->N,am=A->M,pn=P->N,pm=P->M; 324b8374ebeSBarry Smith PetscInt i,j,k,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi,nlnk,*lnk; 325d20bfe6fSHong Zhang MatScalar *ca; 32655a3bba9SHong Zhang PetscBT lnkbt; 327eb9c0419SKris Buschelman 328eb9c0419SKris Buschelman PetscFunctionBegin; 329d20bfe6fSHong Zhang /* Get ij structure of P^T */ 330eb9c0419SKris Buschelman ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 331d20bfe6fSHong Zhang ptJ=ptj; 332eb9c0419SKris Buschelman 333d20bfe6fSHong Zhang /* Allocate ci array, arrays for fill computation and */ 334d20bfe6fSHong Zhang /* free space for accumulating nonzero column info */ 33555a3bba9SHong Zhang ierr = PetscMalloc((pn+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 336d20bfe6fSHong Zhang ci[0] = 0; 337eb9c0419SKris Buschelman 33855a3bba9SHong Zhang ierr = PetscMalloc((2*an+1)*sizeof(PetscInt),&ptadenserow);CHKERRQ(ierr); 33955a3bba9SHong Zhang ierr = PetscMemzero(ptadenserow,(2*an+1)*sizeof(PetscInt));CHKERRQ(ierr); 340d20bfe6fSHong Zhang ptasparserow = ptadenserow + an; 34155a3bba9SHong Zhang 34255a3bba9SHong Zhang /* create and initialize a linked list */ 34355a3bba9SHong Zhang nlnk = pn+1; 34455a3bba9SHong Zhang ierr = PetscLLCreate(pn,pn,nlnk,lnk,lnkbt);CHKERRQ(ierr); 345eb9c0419SKris Buschelman 346d20bfe6fSHong Zhang /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */ 347d20bfe6fSHong Zhang /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 348d20bfe6fSHong Zhang ierr = GetMoreSpace((ai[am]/pm)*pn,&free_space); 349d20bfe6fSHong Zhang current_space = free_space; 350d20bfe6fSHong Zhang 351d20bfe6fSHong Zhang /* Determine symbolic info for each row of C: */ 352d20bfe6fSHong Zhang for (i=0;i<pn;i++) { 353d20bfe6fSHong Zhang ptnzi = pti[i+1] - pti[i]; 354d20bfe6fSHong Zhang ptanzi = 0; 355d20bfe6fSHong Zhang /* Determine symbolic row of PtA: */ 356d20bfe6fSHong Zhang for (j=0;j<ptnzi;j++) { 357d20bfe6fSHong Zhang arow = *ptJ++; 358d20bfe6fSHong Zhang anzj = ai[arow+1] - ai[arow]; 359d20bfe6fSHong Zhang ajj = aj + ai[arow]; 360d20bfe6fSHong Zhang for (k=0;k<anzj;k++) { 361d20bfe6fSHong Zhang if (!ptadenserow[ajj[k]]) { 362d20bfe6fSHong Zhang ptadenserow[ajj[k]] = -1; 363d20bfe6fSHong Zhang ptasparserow[ptanzi++] = ajj[k]; 364d20bfe6fSHong Zhang } 365d20bfe6fSHong Zhang } 366d20bfe6fSHong Zhang } 367d20bfe6fSHong Zhang /* Using symbolic info for row of PtA, determine symbolic info for row of C: */ 368d20bfe6fSHong Zhang ptaj = ptasparserow; 369d20bfe6fSHong Zhang cnzi = 0; 370d20bfe6fSHong Zhang for (j=0;j<ptanzi;j++) { 371d20bfe6fSHong Zhang prow = *ptaj++; 372d20bfe6fSHong Zhang pnzj = pi[prow+1] - pi[prow]; 373d20bfe6fSHong Zhang pjj = pj + pi[prow]; 37455a3bba9SHong Zhang /* add non-zero cols of P into the sorted linked list lnk */ 37555a3bba9SHong Zhang ierr = PetscLLAdd(pnzj,pjj,pn,nlnk,lnk,lnkbt);CHKERRQ(ierr); 37655a3bba9SHong Zhang cnzi += nlnk; 377d20bfe6fSHong Zhang } 378d20bfe6fSHong Zhang 379d20bfe6fSHong Zhang /* If free space is not available, make more free space */ 380d20bfe6fSHong Zhang /* Double the amount of total space in the list */ 381d20bfe6fSHong Zhang if (current_space->local_remaining<cnzi) { 382d20bfe6fSHong Zhang ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 383d20bfe6fSHong Zhang } 384d20bfe6fSHong Zhang 385d20bfe6fSHong Zhang /* Copy data into free space, and zero out denserows */ 38655a3bba9SHong Zhang ierr = PetscLLClean(pn,pn,cnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 387d20bfe6fSHong Zhang current_space->array += cnzi; 388d20bfe6fSHong Zhang current_space->local_used += cnzi; 389d20bfe6fSHong Zhang current_space->local_remaining -= cnzi; 390d20bfe6fSHong Zhang 391d20bfe6fSHong Zhang for (j=0;j<ptanzi;j++) { 392d20bfe6fSHong Zhang ptadenserow[ptasparserow[j]] = 0; 393d20bfe6fSHong Zhang } 394d20bfe6fSHong Zhang /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 395d20bfe6fSHong Zhang /* For now, we will recompute what is needed. */ 396d20bfe6fSHong Zhang ci[i+1] = ci[i] + cnzi; 397d20bfe6fSHong Zhang } 398d20bfe6fSHong Zhang /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 399d20bfe6fSHong Zhang /* Allocate space for cj, initialize cj, and */ 400d20bfe6fSHong Zhang /* destroy list of free space and other temporary array(s) */ 40155a3bba9SHong Zhang ierr = PetscMalloc((ci[pn]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr); 402d20bfe6fSHong Zhang ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 403d20bfe6fSHong Zhang ierr = PetscFree(ptadenserow);CHKERRQ(ierr); 40455a3bba9SHong Zhang ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 405d20bfe6fSHong Zhang 406d20bfe6fSHong Zhang /* Allocate space for ca */ 407d20bfe6fSHong Zhang ierr = PetscMalloc((ci[pn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 408d20bfe6fSHong Zhang ierr = PetscMemzero(ca,(ci[pn]+1)*sizeof(MatScalar));CHKERRQ(ierr); 409d20bfe6fSHong Zhang 410d20bfe6fSHong Zhang /* put together the new matrix */ 411d20bfe6fSHong Zhang ierr = MatCreateSeqAIJWithArrays(A->comm,pn,pn,ci,cj,ca,C);CHKERRQ(ierr); 412d20bfe6fSHong Zhang 413d20bfe6fSHong Zhang /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 414d20bfe6fSHong Zhang /* Since these are PETSc arrays, change flags to free them as necessary. */ 415d20bfe6fSHong Zhang c = (Mat_SeqAIJ *)((*C)->data); 416d20bfe6fSHong Zhang c->freedata = PETSC_TRUE; 417d20bfe6fSHong Zhang c->nonew = 0; 418d20bfe6fSHong Zhang 419d20bfe6fSHong Zhang /* Clean up. */ 420d20bfe6fSHong Zhang ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 421eb9c0419SKris Buschelman 422eb9c0419SKris Buschelman PetscFunctionReturn(0); 423eb9c0419SKris Buschelman } 424eb9c0419SKris Buschelman 4253985e5eaSKris Buschelman #include "src/mat/impls/maij/maij.h" 4263985e5eaSKris Buschelman EXTERN_C_BEGIN 4273985e5eaSKris Buschelman #undef __FUNCT__ 4289af31e4aSHong Zhang #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqMAIJ" 42955a3bba9SHong Zhang PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat *C) 43055a3bba9SHong Zhang { 4315c66b693SKris Buschelman /* This routine requires testing -- I don't think it works. */ 432dfbe8321SBarry Smith PetscErrorCode ierr; 4333985e5eaSKris Buschelman FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 4343985e5eaSKris Buschelman Mat_SeqMAIJ *pp=(Mat_SeqMAIJ*)PP->data; 4353985e5eaSKris Buschelman Mat P=pp->AIJ; 4363985e5eaSKris Buschelman Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c; 437b1d57f15SBarry Smith PetscInt *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj; 438b1d57f15SBarry Smith PetscInt *ci,*cj,*denserow,*sparserow,*ptadenserow,*ptasparserow,*ptaj; 439b1d57f15SBarry Smith PetscInt an=A->N,am=A->M,pn=P->N,pm=P->M,ppdof=pp->dof; 440b1d57f15SBarry Smith PetscInt i,j,k,dof,pdof,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi; 4413985e5eaSKris Buschelman MatScalar *ca; 4423985e5eaSKris Buschelman 4433985e5eaSKris Buschelman PetscFunctionBegin; 4443985e5eaSKris Buschelman /* Start timer */ 4459af31e4aSHong Zhang ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr); 4463985e5eaSKris Buschelman 4473985e5eaSKris Buschelman /* Get ij structure of P^T */ 4483985e5eaSKris Buschelman ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 4493985e5eaSKris Buschelman 4503985e5eaSKris Buschelman /* Allocate ci array, arrays for fill computation and */ 4513985e5eaSKris Buschelman /* free space for accumulating nonzero column info */ 452b1d57f15SBarry Smith ierr = PetscMalloc((pn+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 4533985e5eaSKris Buschelman ci[0] = 0; 4543985e5eaSKris Buschelman 455b1d57f15SBarry Smith ierr = PetscMalloc((2*pn+2*an+1)*sizeof(PetscInt),&ptadenserow);CHKERRQ(ierr); 456b1d57f15SBarry Smith ierr = PetscMemzero(ptadenserow,(2*pn+2*an+1)*sizeof(PetscInt));CHKERRQ(ierr); 4573985e5eaSKris Buschelman ptasparserow = ptadenserow + an; 4583985e5eaSKris Buschelman denserow = ptasparserow + an; 4593985e5eaSKris Buschelman sparserow = denserow + pn; 4603985e5eaSKris Buschelman 4613985e5eaSKris Buschelman /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */ 4623985e5eaSKris Buschelman /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 4633985e5eaSKris Buschelman ierr = GetMoreSpace((ai[am]/pm)*pn,&free_space); 4643985e5eaSKris Buschelman current_space = free_space; 4653985e5eaSKris Buschelman 4663985e5eaSKris Buschelman /* Determine symbolic info for each row of C: */ 4673985e5eaSKris Buschelman for (i=0;i<pn/ppdof;i++) { 4683985e5eaSKris Buschelman ptnzi = pti[i+1] - pti[i]; 4693985e5eaSKris Buschelman ptanzi = 0; 4703985e5eaSKris Buschelman ptJ = ptj + pti[i]; 4713985e5eaSKris Buschelman for (dof=0;dof<ppdof;dof++) { 4723985e5eaSKris Buschelman /* Determine symbolic row of PtA: */ 4733985e5eaSKris Buschelman for (j=0;j<ptnzi;j++) { 4743985e5eaSKris Buschelman arow = ptJ[j] + dof; 4753985e5eaSKris Buschelman anzj = ai[arow+1] - ai[arow]; 4763985e5eaSKris Buschelman ajj = aj + ai[arow]; 4773985e5eaSKris Buschelman for (k=0;k<anzj;k++) { 4783985e5eaSKris Buschelman if (!ptadenserow[ajj[k]]) { 4793985e5eaSKris Buschelman ptadenserow[ajj[k]] = -1; 4803985e5eaSKris Buschelman ptasparserow[ptanzi++] = ajj[k]; 4813985e5eaSKris Buschelman } 4823985e5eaSKris Buschelman } 4833985e5eaSKris Buschelman } 4843985e5eaSKris Buschelman /* Using symbolic info for row of PtA, determine symbolic info for row of C: */ 4853985e5eaSKris Buschelman ptaj = ptasparserow; 4863985e5eaSKris Buschelman cnzi = 0; 4873985e5eaSKris Buschelman for (j=0;j<ptanzi;j++) { 488fe05a634SKris Buschelman pdof = *ptaj%dof; 4893985e5eaSKris Buschelman prow = (*ptaj++)/dof; 4903985e5eaSKris Buschelman pnzj = pi[prow+1] - pi[prow]; 4913985e5eaSKris Buschelman pjj = pj + pi[prow]; 4923985e5eaSKris Buschelman for (k=0;k<pnzj;k++) { 493fe05a634SKris Buschelman if (!denserow[pjj[k]+pdof]) { 494fe05a634SKris Buschelman denserow[pjj[k]+pdof] = -1; 495fe05a634SKris Buschelman sparserow[cnzi++] = pjj[k]+pdof; 4963985e5eaSKris Buschelman } 4973985e5eaSKris Buschelman } 4983985e5eaSKris Buschelman } 4993985e5eaSKris Buschelman 5003985e5eaSKris Buschelman /* sort sparserow */ 5013985e5eaSKris Buschelman ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr); 5023985e5eaSKris Buschelman 5033985e5eaSKris Buschelman /* If free space is not available, make more free space */ 5043985e5eaSKris Buschelman /* Double the amount of total space in the list */ 5053985e5eaSKris Buschelman if (current_space->local_remaining<cnzi) { 5063985e5eaSKris Buschelman ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 5073985e5eaSKris Buschelman } 5083985e5eaSKris Buschelman 5093985e5eaSKris Buschelman /* Copy data into free space, and zero out denserows */ 510b1d57f15SBarry Smith ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(PetscInt));CHKERRQ(ierr); 5113985e5eaSKris Buschelman current_space->array += cnzi; 5123985e5eaSKris Buschelman current_space->local_used += cnzi; 5133985e5eaSKris Buschelman current_space->local_remaining -= cnzi; 5143985e5eaSKris Buschelman 5153985e5eaSKris Buschelman for (j=0;j<ptanzi;j++) { 5163985e5eaSKris Buschelman ptadenserow[ptasparserow[j]] = 0; 5173985e5eaSKris Buschelman } 5183985e5eaSKris Buschelman for (j=0;j<cnzi;j++) { 5193985e5eaSKris Buschelman denserow[sparserow[j]] = 0; 5203985e5eaSKris Buschelman } 5213985e5eaSKris Buschelman /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 5223985e5eaSKris Buschelman /* For now, we will recompute what is needed. */ 5233985e5eaSKris Buschelman ci[i+1+dof] = ci[i+dof] + cnzi; 5243985e5eaSKris Buschelman } 5253985e5eaSKris Buschelman } 5263985e5eaSKris Buschelman /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 5273985e5eaSKris Buschelman /* Allocate space for cj, initialize cj, and */ 5283985e5eaSKris Buschelman /* destroy list of free space and other temporary array(s) */ 529b1d57f15SBarry Smith ierr = PetscMalloc((ci[pn]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr); 5303985e5eaSKris Buschelman ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 5313985e5eaSKris Buschelman ierr = PetscFree(ptadenserow);CHKERRQ(ierr); 5323985e5eaSKris Buschelman 5333985e5eaSKris Buschelman /* Allocate space for ca */ 5343985e5eaSKris Buschelman ierr = PetscMalloc((ci[pn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 5353985e5eaSKris Buschelman ierr = PetscMemzero(ca,(ci[pn]+1)*sizeof(MatScalar));CHKERRQ(ierr); 5363985e5eaSKris Buschelman 5373985e5eaSKris Buschelman /* put together the new matrix */ 5383985e5eaSKris Buschelman ierr = MatCreateSeqAIJWithArrays(A->comm,pn,pn,ci,cj,ca,C);CHKERRQ(ierr); 5393985e5eaSKris Buschelman 5403985e5eaSKris Buschelman /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5413985e5eaSKris Buschelman /* Since these are PETSc arrays, change flags to free them as necessary. */ 5423985e5eaSKris Buschelman c = (Mat_SeqAIJ *)((*C)->data); 5433985e5eaSKris Buschelman c->freedata = PETSC_TRUE; 5443985e5eaSKris Buschelman c->nonew = 0; 5453985e5eaSKris Buschelman 5463985e5eaSKris Buschelman /* Clean up. */ 5473985e5eaSKris Buschelman ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 5483985e5eaSKris Buschelman 5499af31e4aSHong Zhang ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr); 5503985e5eaSKris Buschelman PetscFunctionReturn(0); 5513985e5eaSKris Buschelman } 5523985e5eaSKris Buschelman EXTERN_C_END 5533985e5eaSKris Buschelman 5546849ba73SBarry Smith /* 5559af31e4aSHong Zhang MatPtAPNumeric - Computes the matrix projection C = P^T * A * P 5564d3841fdSKris Buschelman 5574d3841fdSKris Buschelman Collective on Mat 5584d3841fdSKris Buschelman 5594d3841fdSKris Buschelman Input Parameters: 5604d3841fdSKris Buschelman + A - the matrix 5614d3841fdSKris Buschelman - P - the projection matrix 5624d3841fdSKris Buschelman 5634d3841fdSKris Buschelman Output Parameters: 5644d3841fdSKris Buschelman . C - the product matrix 5654d3841fdSKris Buschelman 5664d3841fdSKris Buschelman Notes: 5679af31e4aSHong Zhang C must have been created by calling MatPtAPSymbolic and must be destroyed by 5684d3841fdSKris Buschelman the user using MatDeatroy(). 5694d3841fdSKris Buschelman 570170ef064SHong Zhang This routine is currently only implemented for pairs of AIJ matrices and classes 571170ef064SHong Zhang which inherit from AIJ. C will be of type MATAIJ. 5724d3841fdSKris Buschelman 5734d3841fdSKris Buschelman Level: intermediate 5744d3841fdSKris Buschelman 5759af31e4aSHong Zhang .seealso: MatPtAP(),MatPtAPSymbolic(),MatMatMultNumeric() 5766849ba73SBarry Smith */ 577e240928fSHong Zhang #undef __FUNCT__ 578e240928fSHong Zhang #define __FUNCT__ "MatPtAPNumeric" 57955a3bba9SHong Zhang PetscErrorCode MatPtAPNumeric(Mat A,Mat P,Mat C) 58055a3bba9SHong Zhang { 581dfbe8321SBarry Smith PetscErrorCode ierr; 582534c1384SKris Buschelman PetscErrorCode (*fA)(Mat,Mat,Mat); 583534c1384SKris Buschelman PetscErrorCode (*fP)(Mat,Mat,Mat); 584eb9c0419SKris Buschelman 585eb9c0419SKris Buschelman PetscFunctionBegin; 586eb9c0419SKris Buschelman 5874482741eSBarry Smith PetscValidHeaderSpecific(A,MAT_COOKIE,1); 588c9780b6fSBarry Smith PetscValidType(A,1); 589eb9c0419SKris Buschelman MatPreallocated(A); 590eb9c0419SKris Buschelman if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 591eb9c0419SKris Buschelman if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 592eb9c0419SKris Buschelman 5934482741eSBarry Smith PetscValidHeaderSpecific(P,MAT_COOKIE,2); 594c9780b6fSBarry Smith PetscValidType(P,2); 595eb9c0419SKris Buschelman MatPreallocated(P); 596eb9c0419SKris Buschelman if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 597eb9c0419SKris Buschelman if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 598eb9c0419SKris Buschelman 5994482741eSBarry Smith PetscValidHeaderSpecific(C,MAT_COOKIE,3); 600c9780b6fSBarry Smith PetscValidType(C,3); 601eb9c0419SKris Buschelman MatPreallocated(C); 602eb9c0419SKris Buschelman if (!C->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 603eb9c0419SKris Buschelman if (C->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 604eb9c0419SKris Buschelman 605eb9c0419SKris Buschelman if (P->N!=C->M) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->N,C->M); 606eb9c0419SKris Buschelman if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->M,A->N); 607eb9c0419SKris Buschelman if (A->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %d != %d",A->M,A->N); 608eb9c0419SKris Buschelman if (P->N!=C->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->N,C->N); 609eb9c0419SKris Buschelman 610534c1384SKris Buschelman /* For now, we do not dispatch based on the type of A and P */ 611534c1384SKris Buschelman /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */ 612534c1384SKris Buschelman fA = A->ops->ptapnumeric; 613534c1384SKris Buschelman if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatPtAPNumeric not supported for A of type %s",A->type_name); 614534c1384SKris Buschelman fP = P->ops->ptapnumeric; 615534c1384SKris Buschelman if (!fP) SETERRQ1(PETSC_ERR_SUP,"MatPtAPNumeric not supported for P of type %s",P->type_name); 616534c1384SKris Buschelman if (fP!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatPtAPNumeric requires A, %s, to be compatible with P, %s",A->type_name,P->type_name); 6174d3841fdSKris Buschelman 618534c1384SKris Buschelman ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 619534c1384SKris Buschelman ierr = (*fA)(A,P,C);CHKERRQ(ierr); 620534c1384SKris Buschelman ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 621eb9c0419SKris Buschelman 622eb9c0419SKris Buschelman PetscFunctionReturn(0); 623eb9c0419SKris Buschelman } 624eb9c0419SKris Buschelman 625eb9c0419SKris Buschelman #undef __FUNCT__ 6269af31e4aSHong Zhang #define __FUNCT__ "MatPtAPNumeric_SeqAIJ_SeqAIJ" 627dfbe8321SBarry Smith PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqAIJ(Mat A,Mat P,Mat C) 628dfbe8321SBarry Smith { 629dfbe8321SBarry Smith PetscErrorCode ierr; 630b1d57f15SBarry Smith PetscInt flops=0; 631d20bfe6fSHong Zhang Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 632d20bfe6fSHong Zhang Mat_SeqAIJ *p = (Mat_SeqAIJ *) P->data; 633d20bfe6fSHong Zhang Mat_SeqAIJ *c = (Mat_SeqAIJ *) C->data; 634b1d57f15SBarry Smith PetscInt *ai=a->i,*aj=a->j,*apj,*apjdense,*pi=p->i,*pj=p->j,*pJ=p->j,*pjj; 635b1d57f15SBarry Smith PetscInt *ci=c->i,*cj=c->j,*cjj; 636b1d57f15SBarry Smith PetscInt am=A->M,cn=C->N,cm=C->M; 637b1d57f15SBarry Smith PetscInt i,j,k,anzi,pnzi,apnzj,nextap,pnzj,prow,crow; 638d20bfe6fSHong Zhang MatScalar *aa=a->a,*apa,*pa=p->a,*pA=p->a,*paj,*ca=c->a,*caj; 639eb9c0419SKris Buschelman 640eb9c0419SKris Buschelman PetscFunctionBegin; 641d20bfe6fSHong Zhang /* Allocate temporary array for storage of one row of A*P */ 642b1d57f15SBarry Smith ierr = PetscMalloc(cn*(sizeof(MatScalar)+2*sizeof(PetscInt)),&apa);CHKERRQ(ierr); 643b1d57f15SBarry Smith ierr = PetscMemzero(apa,cn*(sizeof(MatScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr); 644eb9c0419SKris Buschelman 645b1d57f15SBarry Smith apj = (PetscInt *)(apa + cn); 646d20bfe6fSHong Zhang apjdense = apj + cn; 647d20bfe6fSHong Zhang 648d20bfe6fSHong Zhang /* Clear old values in C */ 649d20bfe6fSHong Zhang ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr); 650d20bfe6fSHong Zhang 651d20bfe6fSHong Zhang for (i=0;i<am;i++) { 652d20bfe6fSHong Zhang /* Form sparse row of A*P */ 653d20bfe6fSHong Zhang anzi = ai[i+1] - ai[i]; 654d20bfe6fSHong Zhang apnzj = 0; 655d20bfe6fSHong Zhang for (j=0;j<anzi;j++) { 656d20bfe6fSHong Zhang prow = *aj++; 657d20bfe6fSHong Zhang pnzj = pi[prow+1] - pi[prow]; 658d20bfe6fSHong Zhang pjj = pj + pi[prow]; 659d20bfe6fSHong Zhang paj = pa + pi[prow]; 660d20bfe6fSHong Zhang for (k=0;k<pnzj;k++) { 661d20bfe6fSHong Zhang if (!apjdense[pjj[k]]) { 662d20bfe6fSHong Zhang apjdense[pjj[k]] = -1; 663d20bfe6fSHong Zhang apj[apnzj++] = pjj[k]; 664d20bfe6fSHong Zhang } 665d20bfe6fSHong Zhang apa[pjj[k]] += (*aa)*paj[k]; 666d20bfe6fSHong Zhang } 667d20bfe6fSHong Zhang flops += 2*pnzj; 668d20bfe6fSHong Zhang aa++; 669d20bfe6fSHong Zhang } 670d20bfe6fSHong Zhang 671d20bfe6fSHong Zhang /* Sort the j index array for quick sparse axpy. */ 672d20bfe6fSHong Zhang ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr); 673d20bfe6fSHong Zhang 674d20bfe6fSHong Zhang /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */ 675d20bfe6fSHong Zhang pnzi = pi[i+1] - pi[i]; 676d20bfe6fSHong Zhang for (j=0;j<pnzi;j++) { 677d20bfe6fSHong Zhang nextap = 0; 678d20bfe6fSHong Zhang crow = *pJ++; 679d20bfe6fSHong Zhang cjj = cj + ci[crow]; 680d20bfe6fSHong Zhang caj = ca + ci[crow]; 681d20bfe6fSHong Zhang /* Perform sparse axpy operation. Note cjj includes apj. */ 682d20bfe6fSHong Zhang for (k=0;nextap<apnzj;k++) { 683d20bfe6fSHong Zhang if (cjj[k]==apj[nextap]) { 684d20bfe6fSHong Zhang caj[k] += (*pA)*apa[apj[nextap++]]; 685d20bfe6fSHong Zhang } 686d20bfe6fSHong Zhang } 687d20bfe6fSHong Zhang flops += 2*apnzj; 688d20bfe6fSHong Zhang pA++; 689d20bfe6fSHong Zhang } 690d20bfe6fSHong Zhang 691d20bfe6fSHong Zhang /* Zero the current row info for A*P */ 692d20bfe6fSHong Zhang for (j=0;j<apnzj;j++) { 693d20bfe6fSHong Zhang apa[apj[j]] = 0.; 694d20bfe6fSHong Zhang apjdense[apj[j]] = 0; 695d20bfe6fSHong Zhang } 696d20bfe6fSHong Zhang } 697d20bfe6fSHong Zhang 698d20bfe6fSHong Zhang /* Assemble the final matrix and clean up */ 699d20bfe6fSHong Zhang ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 700d20bfe6fSHong Zhang ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 701d20bfe6fSHong Zhang ierr = PetscFree(apa);CHKERRQ(ierr); 702d20bfe6fSHong Zhang ierr = PetscLogFlops(flops);CHKERRQ(ierr); 703d20bfe6fSHong Zhang 704eb9c0419SKris Buschelman PetscFunctionReturn(0); 705eb9c0419SKris Buschelman } 7060e36024fSHong Zhang 707a61c8c0fSHong Zhang /* Compute local C = P_loc^T * A * P - used by MatPtAP_MPIAIJ_MPIAIJ() */ 708e240928fSHong Zhang static PetscEvent logkey_matptapnumeric_local = 0; 709e240928fSHong Zhang #undef __FUNCT__ 710e240928fSHong Zhang #define __FUNCT__ "MatPtAPNumeric_MPIAIJ_MPIAIJ_Local" 711e240928fSHong Zhang PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIAIJ_Local(Mat A,Mat P_loc,Mat P_oth,PetscInt prstart,Mat C) 712e240928fSHong Zhang { 713e240928fSHong Zhang PetscErrorCode ierr; 714e240928fSHong Zhang PetscInt flops=0; 715e240928fSHong Zhang Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 716e240928fSHong Zhang Mat_SeqAIJ *ad=(Mat_SeqAIJ*)(a->A)->data,*ao=(Mat_SeqAIJ*)(a->B)->data; 717e240928fSHong Zhang Mat_SeqAIJ *c = (Mat_SeqAIJ *) C->data; 718e240928fSHong Zhang Mat_SeqAIJ *p_loc=(Mat_SeqAIJ*)P_loc->data,*p_oth=(Mat_SeqAIJ*)P_oth->data; 719e240928fSHong Zhang PetscInt *pi_loc=p_loc->i,*pj_loc=p_loc->j,*pi_oth=p_oth->i,*pj_oth=p_oth->j; 720e240928fSHong Zhang PetscInt *adi=ad->i,*adj=ad->j,*aoi=ao->i,*aoj=ao->j,*apj,*apjdense,cstart=a->cstart,cend=a->cend,col; 721e240928fSHong Zhang PetscInt *pJ=pj_loc,*pjj; 722e240928fSHong Zhang PetscInt *ci=c->i,*cj=c->j,*cjj; 723e240928fSHong Zhang PetscInt am=A->m,cn=C->N,cm=C->M; 724e240928fSHong Zhang PetscInt i,j,k,anzi,pnzi,apnzj,nextap,pnzj,prow,crow; 725e240928fSHong Zhang MatScalar *ada=ad->a,*aoa=ao->a,*apa,*paj,*ca=c->a,*caj; 726e240928fSHong Zhang MatScalar *pa_loc=p_loc->a,*pA=pa_loc,*pa_oth=p_oth->a; 727e240928fSHong Zhang 728e240928fSHong Zhang PetscFunctionBegin; 729e240928fSHong Zhang if (!logkey_matptapnumeric_local) { 730a61c8c0fSHong Zhang ierr = PetscLogEventRegister(&logkey_matptapnumeric_local,"MatPtAPNumeric_MPIAIJ_MPIAIJ_Local",MAT_COOKIE);CHKERRQ(ierr); 731e240928fSHong Zhang } 732a61c8c0fSHong Zhang ierr = PetscLogEventBegin(logkey_matptapnumeric_local,A,0,0,0);CHKERRQ(ierr); 733e240928fSHong Zhang 734e240928fSHong Zhang /* Allocate temporary array for storage of one row of A*P */ 735e240928fSHong Zhang ierr = PetscMalloc(cn*(sizeof(MatScalar)+2*sizeof(PetscInt)),&apa);CHKERRQ(ierr); 736e240928fSHong Zhang ierr = PetscMemzero(apa,cn*(sizeof(MatScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr); 737e240928fSHong Zhang apj = (PetscInt *)(apa + cn); 738e240928fSHong Zhang apjdense = apj + cn; 739e240928fSHong Zhang 740e240928fSHong Zhang /* Clear old values in C */ 741e240928fSHong Zhang ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr); 742e240928fSHong Zhang 743e240928fSHong Zhang for (i=0;i<am;i++) { 744e240928fSHong Zhang /* Form i-th sparse row of A*P */ 745e240928fSHong Zhang apnzj = 0; 746e240928fSHong Zhang /* diagonal portion of A */ 747e240928fSHong Zhang anzi = adi[i+1] - adi[i]; 748e240928fSHong Zhang for (j=0;j<anzi;j++) { 749e240928fSHong Zhang prow = *adj; 750e240928fSHong Zhang adj++; 751e240928fSHong Zhang pnzj = pi_loc[prow+1] - pi_loc[prow]; 752e240928fSHong Zhang pjj = pj_loc + pi_loc[prow]; 753e240928fSHong Zhang paj = pa_loc + pi_loc[prow]; 754e240928fSHong Zhang for (k=0;k<pnzj;k++) { 755e240928fSHong Zhang if (!apjdense[pjj[k]]) { 756e240928fSHong Zhang apjdense[pjj[k]] = -1; 757e240928fSHong Zhang apj[apnzj++] = pjj[k]; 758e240928fSHong Zhang } 759e240928fSHong Zhang apa[pjj[k]] += (*ada)*paj[k]; 760e240928fSHong Zhang } 761e240928fSHong Zhang flops += 2*pnzj; 762e240928fSHong Zhang ada++; 763e240928fSHong Zhang } 764e240928fSHong Zhang /* off-diagonal portion of A */ 765e240928fSHong Zhang anzi = aoi[i+1] - aoi[i]; 766e240928fSHong Zhang for (j=0;j<anzi;j++) { 767e240928fSHong Zhang col = a->garray[*aoj]; 768e240928fSHong Zhang if (col < cstart){ 769e240928fSHong Zhang prow = *aoj; 770e240928fSHong Zhang } else if (col >= cend){ 771e240928fSHong Zhang prow = *aoj; 772e240928fSHong Zhang } else { 773e240928fSHong Zhang SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Off-diagonal portion of A has wrong column map"); 774e240928fSHong Zhang } 775e240928fSHong Zhang aoj++; 776e240928fSHong Zhang pnzj = pi_oth[prow+1] - pi_oth[prow]; 777e240928fSHong Zhang pjj = pj_oth + pi_oth[prow]; 778e240928fSHong Zhang paj = pa_oth + pi_oth[prow]; 779e240928fSHong Zhang for (k=0;k<pnzj;k++) { 780e240928fSHong Zhang if (!apjdense[pjj[k]]) { 781e240928fSHong Zhang apjdense[pjj[k]] = -1; 782e240928fSHong Zhang apj[apnzj++] = pjj[k]; 783e240928fSHong Zhang } 784e240928fSHong Zhang apa[pjj[k]] += (*aoa)*paj[k]; 785e240928fSHong Zhang } 786e240928fSHong Zhang flops += 2*pnzj; 787e240928fSHong Zhang aoa++; 788e240928fSHong Zhang } 789e240928fSHong Zhang /* Sort the j index array for quick sparse axpy. */ 790e240928fSHong Zhang ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr); 791e240928fSHong Zhang 792e240928fSHong Zhang /* Compute P_loc[i,:]^T*AP[i,:] using outer product */ 793e240928fSHong Zhang pnzi = pi_loc[i+1] - pi_loc[i]; 794e240928fSHong Zhang for (j=0;j<pnzi;j++) { 795e240928fSHong Zhang nextap = 0; 796e240928fSHong Zhang crow = *pJ++; 797e240928fSHong Zhang cjj = cj + ci[crow]; 798e240928fSHong Zhang caj = ca + ci[crow]; 799e240928fSHong Zhang /* Perform sparse axpy operation. Note cjj includes apj. */ 800e240928fSHong Zhang for (k=0;nextap<apnzj;k++) { 801e240928fSHong Zhang if (cjj[k]==apj[nextap]) { 802e240928fSHong Zhang caj[k] += (*pA)*apa[apj[nextap++]]; 803e240928fSHong Zhang } 804e240928fSHong Zhang } 805e240928fSHong Zhang flops += 2*apnzj; 806e240928fSHong Zhang pA++; 807e240928fSHong Zhang } 808e240928fSHong Zhang 809e240928fSHong Zhang /* Zero the current row info for A*P */ 810e240928fSHong Zhang for (j=0;j<apnzj;j++) { 811e240928fSHong Zhang apa[apj[j]] = 0.; 812e240928fSHong Zhang apjdense[apj[j]] = 0; 813e240928fSHong Zhang } 814e240928fSHong Zhang } 815e240928fSHong Zhang 816e240928fSHong Zhang /* Assemble the final matrix and clean up */ 817e240928fSHong Zhang ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 818e240928fSHong Zhang ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 819e240928fSHong Zhang ierr = PetscFree(apa);CHKERRQ(ierr); 820e240928fSHong Zhang ierr = PetscLogFlops(flops);CHKERRQ(ierr); 821a61c8c0fSHong Zhang ierr = PetscLogEventEnd(logkey_matptapnumeric_local,A,0,0,0);CHKERRQ(ierr); 822e240928fSHong Zhang PetscFunctionReturn(0); 823e240928fSHong Zhang } 824e240928fSHong Zhang static PetscEvent logkey_matptapsymbolic_local = 0; 825e240928fSHong Zhang #undef __FUNCT__ 826e240928fSHong Zhang #define __FUNCT__ "MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local" 827e240928fSHong Zhang PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local(Mat A,Mat P_loc,Mat P_oth,PetscInt prstart,PetscReal fill,Mat *C) 828e240928fSHong Zhang { 829e240928fSHong Zhang PetscErrorCode ierr; 830e240928fSHong Zhang FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 831e240928fSHong Zhang Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 832e240928fSHong Zhang Mat_SeqAIJ *ad=(Mat_SeqAIJ*)(a->A)->data,*ao=(Mat_SeqAIJ*)(a->B)->data,*c; 833e240928fSHong Zhang PetscInt *pti,*ptj,*ptJ,*ajj,*pjj; 834e240928fSHong Zhang PetscInt *adi=ad->i,*adj=ad->j,*aoi=ao->i,*aoj=ao->j,nnz,cstart=a->cstart,cend=a->cend,col; 835e240928fSHong Zhang PetscInt *ci,*cj,*ptaj; 836e240928fSHong Zhang PetscInt an=A->N,am=A->m,pN=P_loc->N; 837e240928fSHong Zhang PetscInt i,j,k,ptnzi,arow,anzj,prow,pnzj,cnzi; 838e240928fSHong Zhang PetscInt pm=P_loc->m,nlnk,*lnk; 839e240928fSHong Zhang MatScalar *ca; 840e240928fSHong Zhang PetscBT lnkbt; 841e240928fSHong Zhang PetscInt prend,nprow_loc,nprow_oth; 842e240928fSHong Zhang PetscInt *ptadenserow_loc,*ptadenserow_oth,*ptasparserow_loc,*ptasparserow_oth; 843e240928fSHong Zhang Mat_SeqAIJ *p_loc=(Mat_SeqAIJ*)P_loc->data,*p_oth=(Mat_SeqAIJ*)P_oth->data; 844e240928fSHong Zhang PetscInt *pi_loc=p_loc->i,*pj_loc=p_loc->j,*pi_oth=p_oth->i,*pj_oth=p_oth->j; 845e240928fSHong Zhang 846e240928fSHong Zhang PetscFunctionBegin; 847e240928fSHong Zhang if (!logkey_matptapsymbolic_local) { 848e240928fSHong Zhang ierr = PetscLogEventRegister(&logkey_matptapsymbolic_local,"MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local",MAT_COOKIE); 849e240928fSHong Zhang } 850e240928fSHong Zhang ierr = PetscLogEventBegin(logkey_matptapsymbolic_local,A,P_loc,P_oth,0);CHKERRQ(ierr); 851e240928fSHong Zhang 852e240928fSHong Zhang prend = prstart + pm; 853e240928fSHong Zhang 854e240928fSHong Zhang /* get ij structure of P_loc^T */ 855e240928fSHong Zhang ierr = MatGetSymbolicTranspose_SeqAIJ(P_loc,&pti,&ptj);CHKERRQ(ierr); 856e240928fSHong Zhang ptJ=ptj; 857e240928fSHong Zhang 858e240928fSHong Zhang /* Allocate ci array, arrays for fill computation and */ 859e240928fSHong Zhang /* free space for accumulating nonzero column info */ 860e240928fSHong Zhang ierr = PetscMalloc((pN+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 861e240928fSHong Zhang ci[0] = 0; 862e240928fSHong Zhang 863e240928fSHong Zhang ierr = PetscMalloc((4*an+1)*sizeof(PetscInt),&ptadenserow_loc);CHKERRQ(ierr); 864e240928fSHong Zhang ierr = PetscMemzero(ptadenserow_loc,(4*an+1)*sizeof(PetscInt));CHKERRQ(ierr); 865e240928fSHong Zhang ptasparserow_loc = ptadenserow_loc + an; 866e240928fSHong Zhang ptadenserow_oth = ptasparserow_loc + an; 867e240928fSHong Zhang ptasparserow_oth = ptadenserow_oth + an; 868e240928fSHong Zhang 869e240928fSHong Zhang /* create and initialize a linked list */ 870e240928fSHong Zhang nlnk = pN+1; 871e240928fSHong Zhang ierr = PetscLLCreate(pN,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr); 872e240928fSHong Zhang 873e240928fSHong Zhang /* Set initial free space to be nnz(A) scaled by aspect ratio of P (P->M=A->N). */ 874e240928fSHong Zhang /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 875e240928fSHong Zhang nnz = adi[am] + aoi[am]; 876e240928fSHong Zhang ierr = GetMoreSpace((PetscInt)(fill*nnz/A->N)*pN,&free_space); 877e240928fSHong Zhang current_space = free_space; 878e240928fSHong Zhang 879e240928fSHong Zhang /* determine symbolic info for each row of C: */ 880e240928fSHong Zhang for (i=0;i<pN;i++) { 881e240928fSHong Zhang ptnzi = pti[i+1] - pti[i]; 882e240928fSHong Zhang nprow_loc = 0; nprow_oth = 0; 883e240928fSHong Zhang /* i-th row of symbolic P_loc^T*A_loc: */ 884e240928fSHong Zhang for (j=0;j<ptnzi;j++) { 885e240928fSHong Zhang arow = *ptJ++; 886e240928fSHong Zhang /* diagonal portion of A */ 887e240928fSHong Zhang anzj = adi[arow+1] - adi[arow]; 888e240928fSHong Zhang ajj = adj + adi[arow]; 889e240928fSHong Zhang for (k=0;k<anzj;k++) { 890e240928fSHong Zhang col = ajj[k]+prstart; 891e240928fSHong Zhang if (!ptadenserow_loc[col]) { 892e240928fSHong Zhang ptadenserow_loc[col] = -1; 893e240928fSHong Zhang ptasparserow_loc[nprow_loc++] = col; 894e240928fSHong Zhang } 895e240928fSHong Zhang } 896e240928fSHong Zhang /* off-diagonal portion of A */ 897e240928fSHong Zhang anzj = aoi[arow+1] - aoi[arow]; 898e240928fSHong Zhang ajj = aoj + aoi[arow]; 899e240928fSHong Zhang for (k=0;k<anzj;k++) { 900e240928fSHong Zhang col = a->garray[ajj[k]]; /* global col */ 901e240928fSHong Zhang if (col < cstart){ 902e240928fSHong Zhang col = ajj[k]; 903e240928fSHong Zhang } else if (col >= cend){ 904e240928fSHong Zhang col = ajj[k] + pm; 905e240928fSHong Zhang } else { 906e240928fSHong Zhang SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Off-diagonal portion of A has wrong column map"); 907e240928fSHong Zhang } 908e240928fSHong Zhang if (!ptadenserow_oth[col]) { 909e240928fSHong Zhang ptadenserow_oth[col] = -1; 910e240928fSHong Zhang ptasparserow_oth[nprow_oth++] = col; 911e240928fSHong Zhang } 912e240928fSHong Zhang } 913e240928fSHong Zhang } 914e240928fSHong Zhang 915e240928fSHong Zhang /* using symbolic info of local PtA, determine symbolic info for row of C: */ 916e240928fSHong Zhang cnzi = 0; 917e240928fSHong Zhang /* rows of P_loc */ 918e240928fSHong Zhang ptaj = ptasparserow_loc; 919e240928fSHong Zhang for (j=0; j<nprow_loc; j++) { 920e240928fSHong Zhang prow = *ptaj++; 921e240928fSHong Zhang prow -= prstart; /* rm */ 922e240928fSHong Zhang pnzj = pi_loc[prow+1] - pi_loc[prow]; 923e240928fSHong Zhang pjj = pj_loc + pi_loc[prow]; 924e240928fSHong Zhang /* add non-zero cols of P into the sorted linked list lnk */ 925e240928fSHong Zhang ierr = PetscLLAdd(pnzj,pjj,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr); 926e240928fSHong Zhang cnzi += nlnk; 927e240928fSHong Zhang } 928e240928fSHong Zhang /* rows of P_oth */ 929e240928fSHong Zhang ptaj = ptasparserow_oth; 930e240928fSHong Zhang for (j=0; j<nprow_oth; j++) { 931e240928fSHong Zhang prow = *ptaj++; 932e240928fSHong Zhang if (prow >= prend) prow -= pm; /* rm */ 933e240928fSHong Zhang pnzj = pi_oth[prow+1] - pi_oth[prow]; 934e240928fSHong Zhang pjj = pj_oth + pi_oth[prow]; 935e240928fSHong Zhang /* add non-zero cols of P into the sorted linked list lnk */ 936e240928fSHong Zhang ierr = PetscLLAdd(pnzj,pjj,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr); 937e240928fSHong Zhang cnzi += nlnk; 938e240928fSHong Zhang } 939e240928fSHong Zhang 940e240928fSHong Zhang /* If free space is not available, make more free space */ 941e240928fSHong Zhang /* Double the amount of total space in the list */ 942e240928fSHong Zhang if (current_space->local_remaining<cnzi) { 943e240928fSHong Zhang ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 944e240928fSHong Zhang } 945e240928fSHong Zhang 946e240928fSHong Zhang /* Copy data into free space, and zero out denserows */ 947e240928fSHong Zhang ierr = PetscLLClean(pN,pN,cnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 948e240928fSHong Zhang current_space->array += cnzi; 949e240928fSHong Zhang current_space->local_used += cnzi; 950e240928fSHong Zhang current_space->local_remaining -= cnzi; 951e240928fSHong Zhang 952e240928fSHong Zhang for (j=0;j<nprow_loc; j++) { 953e240928fSHong Zhang ptadenserow_loc[ptasparserow_loc[j]] = 0; 954e240928fSHong Zhang } 955e240928fSHong Zhang for (j=0;j<nprow_oth; j++) { 956e240928fSHong Zhang ptadenserow_oth[ptasparserow_oth[j]] = 0; 957e240928fSHong Zhang } 958e240928fSHong Zhang 959e240928fSHong Zhang /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 960e240928fSHong Zhang /* For now, we will recompute what is needed. */ 961e240928fSHong Zhang ci[i+1] = ci[i] + cnzi; 962e240928fSHong Zhang } 963e240928fSHong Zhang /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 964e240928fSHong Zhang /* Allocate space for cj, initialize cj, and */ 965e240928fSHong Zhang /* destroy list of free space and other temporary array(s) */ 966e240928fSHong Zhang ierr = PetscMalloc((ci[pN]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr); 967e240928fSHong Zhang ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 968e240928fSHong Zhang ierr = PetscFree(ptadenserow_loc);CHKERRQ(ierr); 969e240928fSHong Zhang ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 970e240928fSHong Zhang 971e240928fSHong Zhang /* Allocate space for ca */ 972e240928fSHong Zhang ierr = PetscMalloc((ci[pN]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 973e240928fSHong Zhang ierr = PetscMemzero(ca,(ci[pN]+1)*sizeof(MatScalar));CHKERRQ(ierr); 974e240928fSHong Zhang 975e240928fSHong Zhang /* put together the new matrix */ 976e240928fSHong Zhang ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,pN,pN,ci,cj,ca,C);CHKERRQ(ierr); 977e240928fSHong Zhang 978e240928fSHong Zhang /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 979e240928fSHong Zhang /* Since these are PETSc arrays, change flags to free them as necessary. */ 980e240928fSHong Zhang c = (Mat_SeqAIJ *)((*C)->data); 981e240928fSHong Zhang c->freedata = PETSC_TRUE; 982e240928fSHong Zhang c->nonew = 0; 983e240928fSHong Zhang 984e240928fSHong Zhang /* Clean up. */ 985e240928fSHong Zhang ierr = MatRestoreSymbolicTranspose_SeqAIJ(P_loc,&pti,&ptj);CHKERRQ(ierr); 986a61c8c0fSHong Zhang 987e240928fSHong Zhang ierr = PetscLogEventEnd(logkey_matptapsymbolic_local,A,P_loc,P_oth,0);CHKERRQ(ierr); 988e240928fSHong Zhang PetscFunctionReturn(0); 989e240928fSHong Zhang } 990