1eb9c0419SKris Buschelman /* 29af31e4aSHong Zhang Defines projective product routines where A is a AIJ matrix 3eb9c0419SKris Buschelman C = P^T * A * P 4eb9c0419SKris Buschelman */ 5eb9c0419SKris Buschelman 6231952e2SKris Buschelman #include "src/mat/impls/aij/seq/aij.h" /*I "petscmat.h" I*/ 7eb9c0419SKris Buschelman #include "src/mat/utils/freespace.h" 89af31e4aSHong Zhang #include "src/mat/impls/aij/mpi/mpiaij.h" 955a3bba9SHong Zhang #include "petscbt.h" 10eb9c0419SKris Buschelman 11eb9c0419SKris Buschelman #undef __FUNCT__ 129af31e4aSHong Zhang #define __FUNCT__ "MatPtAP" 134d3841fdSKris Buschelman /*@ 149af31e4aSHong Zhang MatPtAP - Creates the matrix projection C = P^T * A * P 154d3841fdSKris Buschelman 164d3841fdSKris Buschelman Collective on Mat 174d3841fdSKris Buschelman 184d3841fdSKris Buschelman Input Parameters: 194d3841fdSKris Buschelman + A - the matrix 20f747e1aeSHong Zhang . P - the projection matrix 21f747e1aeSHong Zhang . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 22f747e1aeSHong Zhang - fill - expected fill as ratio of nnz(C)/(nnz(A) + nnz(P)) 234d3841fdSKris Buschelman 244d3841fdSKris Buschelman Output Parameters: 254d3841fdSKris Buschelman . C - the product matrix 264d3841fdSKris Buschelman 274d3841fdSKris Buschelman Notes: 284d3841fdSKris Buschelman C will be created and must be destroyed by the user with MatDestroy(). 294d3841fdSKris Buschelman 304d3841fdSKris Buschelman This routine is currently only implemented for pairs of SeqAIJ matrices and classes 314d3841fdSKris Buschelman which inherit from SeqAIJ. C will be of type MATSEQAIJ. 324d3841fdSKris Buschelman 334d3841fdSKris Buschelman Level: intermediate 344d3841fdSKris Buschelman 359af31e4aSHong Zhang .seealso: MatPtAPSymbolic(),MatPtAPNumeric(),MatMatMult() 364d3841fdSKris Buschelman @*/ 3755a3bba9SHong Zhang PetscErrorCode MatPtAP(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C) 3855a3bba9SHong Zhang { 39dfbe8321SBarry Smith PetscErrorCode ierr; 40534c1384SKris Buschelman PetscErrorCode (*fA)(Mat,Mat,MatReuse,PetscReal,Mat *); 41534c1384SKris Buschelman PetscErrorCode (*fP)(Mat,Mat,MatReuse,PetscReal,Mat *); 42eb9c0419SKris Buschelman 43eb9c0419SKris Buschelman PetscFunctionBegin; 449af31e4aSHong Zhang PetscValidHeaderSpecific(A,MAT_COOKIE,1); 459af31e4aSHong Zhang PetscValidType(A,1); 469af31e4aSHong Zhang MatPreallocated(A); 479af31e4aSHong Zhang if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 489af31e4aSHong Zhang if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 499af31e4aSHong Zhang PetscValidHeaderSpecific(P,MAT_COOKIE,2); 509af31e4aSHong Zhang PetscValidType(P,2); 519af31e4aSHong Zhang MatPreallocated(P); 529af31e4aSHong Zhang if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 539af31e4aSHong Zhang if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 549af31e4aSHong Zhang PetscValidPointer(C,3); 55534c1384SKris Buschelman 569af31e4aSHong Zhang if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->M,A->N); 57eb9c0419SKris Buschelman 589af31e4aSHong Zhang if (fill <=0.0) SETERRQ1(PETSC_ERR_ARG_SIZ,"fill=%g must be > 0.0",fill); 59eb9c0419SKris Buschelman 60534c1384SKris Buschelman /* For now, we do not dispatch based on the type of A and P */ 61534c1384SKris Buschelman /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */ 62534c1384SKris Buschelman fA = A->ops->ptap; 63534c1384SKris Buschelman if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatPtAP not supported for A of type %s",A->type_name); 64534c1384SKris Buschelman fP = P->ops->ptap; 65534c1384SKris Buschelman if (!fP) SETERRQ1(PETSC_ERR_SUP,"MatPtAP not supported for P of type %s",P->type_name); 66534c1384SKris Buschelman if (fP!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatPtAP requires A, %s, to be compatible with P, %s",A->type_name,P->type_name); 67534c1384SKris Buschelman 689af31e4aSHong Zhang ierr = PetscLogEventBegin(MAT_PtAP,A,P,0,0);CHKERRQ(ierr); 69534c1384SKris Buschelman ierr = (*fA)(A,P,scall,fill,C);CHKERRQ(ierr); 709af31e4aSHong Zhang ierr = PetscLogEventEnd(MAT_PtAP,A,P,0,0);CHKERRQ(ierr); 71eb9c0419SKris Buschelman PetscFunctionReturn(0); 72eb9c0419SKris Buschelman } 73eb9c0419SKris Buschelman 74e240928fSHong Zhang EXTERN PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local(Mat,Mat,Mat,PetscInt,PetscReal,Mat*); 75e240928fSHong Zhang EXTERN PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIAIJ_Local(Mat,Mat,Mat,PetscInt,Mat C); 76e240928fSHong Zhang 77a61c8c0fSHong Zhang EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat); 78a61c8c0fSHong Zhang #undef __FUNCT__ 79a61c8c0fSHong Zhang #define __FUNCT__ "MatDestroy_MPIAIJ_PtAP" 80a61c8c0fSHong Zhang PetscErrorCode MatDestroy_MPIAIJ_PtAP(Mat A) 81a61c8c0fSHong Zhang { 82a61c8c0fSHong Zhang PetscErrorCode ierr; 8332fba14fSHong Zhang Mat_MatMatMultMPI *ptap; 84a61c8c0fSHong Zhang PetscObjectContainer container; 85a61c8c0fSHong Zhang 86a61c8c0fSHong Zhang PetscFunctionBegin; 87a61c8c0fSHong Zhang ierr = PetscObjectQuery((PetscObject)A,"MatPtAPMPI",(PetscObject *)&container);CHKERRQ(ierr); 88a61c8c0fSHong Zhang if (container) { 89a61c8c0fSHong Zhang ierr = PetscObjectContainerGetPointer(container,(void **)&ptap);CHKERRQ(ierr); 9032fba14fSHong Zhang ierr = MatDestroy(ptap->B_loc);CHKERRQ(ierr); 9132fba14fSHong Zhang ierr = MatDestroy(ptap->B_oth);CHKERRQ(ierr); 9201b7ae99SHong Zhang ierr = ISDestroy(ptap->isrowb);CHKERRQ(ierr); 9301b7ae99SHong Zhang ierr = ISDestroy(ptap->iscolb);CHKERRQ(ierr); 94a61c8c0fSHong Zhang 95a61c8c0fSHong Zhang ierr = PetscObjectContainerDestroy(container);CHKERRQ(ierr); 9632fba14fSHong Zhang ierr = PetscObjectCompose((PetscObject)A,"MatPtAPMPI",0);CHKERRQ(ierr); 97a61c8c0fSHong Zhang } 98a61c8c0fSHong Zhang ierr = PetscFree(ptap);CHKERRQ(ierr); 99a61c8c0fSHong Zhang 100a61c8c0fSHong Zhang ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 101a61c8c0fSHong Zhang PetscFunctionReturn(0); 102a61c8c0fSHong Zhang } 103a61c8c0fSHong Zhang 104eb9c0419SKris Buschelman #undef __FUNCT__ 105ff134f7aSHong Zhang #define __FUNCT__ "MatPtAP_MPIAIJ_MPIAIJ" 106ff134f7aSHong Zhang PetscErrorCode MatPtAP_MPIAIJ_MPIAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C) 107ff134f7aSHong Zhang { 108ff134f7aSHong Zhang PetscErrorCode ierr; 10932fba14fSHong Zhang Mat_MatMatMultMPI *ptap; 110a61c8c0fSHong Zhang PetscObjectContainer container; 111b90dcfe3SHong Zhang 112b90dcfe3SHong Zhang PetscFunctionBegin; 113b90dcfe3SHong Zhang if (scall == MAT_INITIAL_MATRIX){ 1144c627768SHong Zhang ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 11532fba14fSHong Zhang ierr = PetscNew(Mat_MatMatMultMPI,&ptap);CHKERRQ(ierr); 11632fba14fSHong Zhang 117e240928fSHong Zhang /* get P_oth by taking rows of P (= non-zero cols of local A) from other processors */ 11801b7ae99SHong Zhang ierr = MatGetBrowsOfAoCols(A,P,scall,&ptap->isrowb,&ptap->iscolb,&ptap->brstart,&ptap->B_oth);CHKERRQ(ierr); 119a61c8c0fSHong Zhang 120e240928fSHong Zhang /* get P_loc by taking all local rows of P */ 12132fba14fSHong Zhang ierr = MatGetLocalMat(P,scall,&ptap->B_loc);CHKERRQ(ierr); 122e240928fSHong Zhang 123a61c8c0fSHong Zhang /* attach the supporting struct to P for reuse */ 124a61c8c0fSHong Zhang P->ops->destroy = MatDestroy_MPIAIJ_PtAP; 125a61c8c0fSHong Zhang ierr = PetscObjectContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 126a61c8c0fSHong Zhang ierr = PetscObjectContainerSetPointer(container,ptap);CHKERRQ(ierr); 127a61c8c0fSHong Zhang ierr = PetscObjectCompose((PetscObject)P,"MatPtAPMPI",(PetscObject)container);CHKERRQ(ierr); 128e240928fSHong Zhang 12932fba14fSHong Zhang /* now, compute symbolic local P^T*A*P */ 130a61c8c0fSHong Zhang ierr = MatPtAPSymbolic_MPIAIJ_MPIAIJ(A,P,fill,C);CHKERRQ(ierr);/* numeric product is computed as well */ 131a61c8c0fSHong Zhang ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 132a61c8c0fSHong Zhang } else if (scall == MAT_REUSE_MATRIX){ 133a61c8c0fSHong Zhang ierr = PetscObjectQuery((PetscObject)P,"MatPtAPMPI",(PetscObject *)&container);CHKERRQ(ierr); 134a61c8c0fSHong Zhang if (container) { 135a61c8c0fSHong Zhang ierr = PetscObjectContainerGetPointer(container,(void **)&ptap);CHKERRQ(ierr); 136a61c8c0fSHong Zhang } else { 137a61c8c0fSHong Zhang SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 138a61c8c0fSHong Zhang } 139a61c8c0fSHong Zhang 140a61c8c0fSHong Zhang /* get P_oth by taking rows of P (= non-zero cols of local A) from other processors */ 14101b7ae99SHong Zhang ierr = MatGetBrowsOfAoCols(A,P,scall,&ptap->isrowb,&ptap->iscolb,&ptap->brstart,&ptap->B_oth);CHKERRQ(ierr); 142a61c8c0fSHong Zhang 143a61c8c0fSHong Zhang /* get P_loc by taking all local rows of P */ 14432fba14fSHong Zhang ierr = MatGetLocalMat(P,scall,&ptap->B_loc);CHKERRQ(ierr); 14532fba14fSHong Zhang 146a61c8c0fSHong Zhang } else { 147a61c8c0fSHong Zhang SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",scall); 148a61c8c0fSHong Zhang } 14932fba14fSHong Zhang /* now, compute numeric local P^T*A*P */ 150a61c8c0fSHong Zhang ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 151a61c8c0fSHong Zhang ierr = MatPtAPNumeric_MPIAIJ_MPIAIJ(A,P,*C);CHKERRQ(ierr); 152a61c8c0fSHong Zhang ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 153a61c8c0fSHong Zhang 154a61c8c0fSHong Zhang PetscFunctionReturn(0); 155a61c8c0fSHong Zhang } 156a61c8c0fSHong Zhang 157a61c8c0fSHong Zhang #undef __FUNCT__ 158a61c8c0fSHong Zhang #define __FUNCT__ "MatPtAPSymbolic_MPIAIJ_MPIAIJ" 159a61c8c0fSHong Zhang PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIAIJ(Mat A,Mat P,PetscReal fill,Mat *C) 160a61c8c0fSHong Zhang { 161a61c8c0fSHong Zhang PetscErrorCode ierr; 162a61c8c0fSHong Zhang Mat C_seq; 16332fba14fSHong Zhang Mat_MatMatMultMPI *ptap; 164a61c8c0fSHong Zhang PetscObjectContainer container; 165a61c8c0fSHong Zhang 166a61c8c0fSHong Zhang PetscFunctionBegin; 167a61c8c0fSHong Zhang 168a61c8c0fSHong Zhang ierr = PetscObjectQuery((PetscObject)P,"MatPtAPMPI",(PetscObject *)&container);CHKERRQ(ierr); 169a61c8c0fSHong Zhang if (container) { 170a61c8c0fSHong Zhang ierr = PetscObjectContainerGetPointer(container,(void **)&ptap);CHKERRQ(ierr); 171a61c8c0fSHong Zhang } else { 172a61c8c0fSHong Zhang SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 173a61c8c0fSHong Zhang } 17425616d81SHong Zhang /* compute C_seq = P_loc^T * A_loc * P_seq */ 17532fba14fSHong Zhang ierr = MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local(A,ptap->B_loc,ptap->B_oth,ptap->brstart,fill,&C_seq);CHKERRQ(ierr); 176a61c8c0fSHong Zhang 177b90dcfe3SHong Zhang /* add C_seq into mpi C */ 178a61c8c0fSHong Zhang ierr = MatMerge_SeqsToMPISymbolic(A->comm,C_seq,P->n,P->n,C);CHKERRQ(ierr); 179b90dcfe3SHong Zhang 180ff134f7aSHong Zhang PetscFunctionReturn(0); 181ff134f7aSHong Zhang } 182ff134f7aSHong Zhang 183ff134f7aSHong Zhang #undef __FUNCT__ 184e240928fSHong Zhang #define __FUNCT__ "MatPtAPNumeric_MPIAIJ_MPIAIJ" 185b90dcfe3SHong Zhang PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIAIJ(Mat A,Mat P,Mat C) 186ff134f7aSHong Zhang { 187b90dcfe3SHong Zhang PetscErrorCode ierr; 188671beff6SHong Zhang Mat_Merge_SeqsToMPI *merge; 18932fba14fSHong Zhang Mat_MatMatMultMPI *ptap; 190a61c8c0fSHong Zhang PetscObjectContainer cont_merge,cont_ptap; 191ff134f7aSHong Zhang 192ff134f7aSHong Zhang PetscFunctionBegin; 193a61c8c0fSHong Zhang ierr = PetscObjectQuery((PetscObject)C,"MatMergeSeqsToMPI",(PetscObject *)&cont_merge);CHKERRQ(ierr); 194a61c8c0fSHong Zhang if (cont_merge) { 195a61c8c0fSHong Zhang ierr = PetscObjectContainerGetPointer(cont_merge,(void **)&merge);CHKERRQ(ierr); 1967f79fd58SMatthew Knepley } else { 197a61c8c0fSHong Zhang SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix C does not posses an object container"); 198671beff6SHong Zhang } 199a61c8c0fSHong Zhang ierr = PetscObjectQuery((PetscObject)P,"MatPtAPMPI",(PetscObject *)&cont_ptap);CHKERRQ(ierr); 200a61c8c0fSHong Zhang if (cont_ptap) { 201a61c8c0fSHong Zhang ierr = PetscObjectContainerGetPointer(cont_ptap,(void **)&ptap);CHKERRQ(ierr); 202a61c8c0fSHong Zhang } else { 203a61c8c0fSHong Zhang SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 204a61c8c0fSHong Zhang } 20532fba14fSHong Zhang ierr = MatPtAPNumeric_MPIAIJ_MPIAIJ_Local(A,ptap->B_loc,ptap->B_oth,ptap->brstart,merge->C_seq);CHKERRQ(ierr); 206a61c8c0fSHong Zhang ierr = MatMerge_SeqsToMPINumeric(merge->C_seq,C);CHKERRQ(ierr); 207ff134f7aSHong Zhang PetscFunctionReturn(0); 208ff134f7aSHong Zhang } 209ff134f7aSHong Zhang 210ff134f7aSHong Zhang #undef __FUNCT__ 2119af31e4aSHong Zhang #define __FUNCT__ "MatPtAP_SeqAIJ_SeqAIJ" 212dfbe8321SBarry Smith PetscErrorCode MatPtAP_SeqAIJ_SeqAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C) 2139af31e4aSHong Zhang { 214dfbe8321SBarry Smith PetscErrorCode ierr; 215b1d57f15SBarry Smith 2169af31e4aSHong Zhang PetscFunctionBegin; 2179af31e4aSHong Zhang if (scall == MAT_INITIAL_MATRIX){ 218d20bfe6fSHong Zhang ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 2199af31e4aSHong Zhang ierr = MatPtAPSymbolic_SeqAIJ_SeqAIJ(A,P,fill,C);CHKERRQ(ierr); 220d20bfe6fSHong Zhang ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 2219af31e4aSHong Zhang } 222d20bfe6fSHong Zhang ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 2239af31e4aSHong Zhang ierr = MatPtAPNumeric_SeqAIJ_SeqAIJ(A,P,*C);CHKERRQ(ierr); 224d20bfe6fSHong Zhang ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 2259af31e4aSHong Zhang PetscFunctionReturn(0); 2269af31e4aSHong Zhang } 2279af31e4aSHong Zhang 2286849ba73SBarry Smith /* 2299af31e4aSHong Zhang MatPtAPSymbolic - Creates the (i,j) structure of the matrix projection C = P^T * A * P 2304d3841fdSKris Buschelman 2314d3841fdSKris Buschelman Collective on Mat 2324d3841fdSKris Buschelman 2334d3841fdSKris Buschelman Input Parameters: 2344d3841fdSKris Buschelman + A - the matrix 2354d3841fdSKris Buschelman - P - the projection matrix 2364d3841fdSKris Buschelman 2374d3841fdSKris Buschelman Output Parameters: 2384d3841fdSKris Buschelman . C - the (i,j) structure of the product matrix 2394d3841fdSKris Buschelman 2404d3841fdSKris Buschelman Notes: 2414d3841fdSKris Buschelman C will be created and must be destroyed by the user with MatDestroy(). 2424d3841fdSKris Buschelman 2434d3841fdSKris Buschelman This routine is currently only implemented for pairs of SeqAIJ matrices and classes 2444d3841fdSKris Buschelman which inherit from SeqAIJ. C will be of type MATSEQAIJ. The product is computed using 2459af31e4aSHong Zhang this (i,j) structure by calling MatPtAPNumeric(). 2464d3841fdSKris Buschelman 2474d3841fdSKris Buschelman Level: intermediate 2484d3841fdSKris Buschelman 2499af31e4aSHong Zhang .seealso: MatPtAP(),MatPtAPNumeric(),MatMatMultSymbolic() 2506849ba73SBarry Smith */ 251e240928fSHong Zhang #undef __FUNCT__ 252e240928fSHong Zhang #define __FUNCT__ "MatPtAPSymbolic" 25355a3bba9SHong Zhang PetscErrorCode MatPtAPSymbolic(Mat A,Mat P,PetscReal fill,Mat *C) 25455a3bba9SHong Zhang { 255dfbe8321SBarry Smith PetscErrorCode ierr; 256534c1384SKris Buschelman PetscErrorCode (*fA)(Mat,Mat,PetscReal,Mat*); 257534c1384SKris Buschelman PetscErrorCode (*fP)(Mat,Mat,PetscReal,Mat*); 258eb9c0419SKris Buschelman 259eb9c0419SKris Buschelman PetscFunctionBegin; 260eb9c0419SKris Buschelman 2614482741eSBarry Smith PetscValidHeaderSpecific(A,MAT_COOKIE,1); 262c9780b6fSBarry Smith PetscValidType(A,1); 263eb9c0419SKris Buschelman MatPreallocated(A); 264eb9c0419SKris Buschelman if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 265eb9c0419SKris Buschelman if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 266eb9c0419SKris Buschelman 2674482741eSBarry Smith PetscValidHeaderSpecific(P,MAT_COOKIE,2); 268c9780b6fSBarry Smith PetscValidType(P,2); 269eb9c0419SKris Buschelman MatPreallocated(P); 270eb9c0419SKris Buschelman if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 271eb9c0419SKris Buschelman if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 272eb9c0419SKris Buschelman 2734482741eSBarry Smith PetscValidPointer(C,3); 2744482741eSBarry Smith 275eb9c0419SKris Buschelman if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->M,A->N); 276eb9c0419SKris Buschelman if (A->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %d != %d",A->M,A->N); 277eb9c0419SKris Buschelman 278534c1384SKris Buschelman /* For now, we do not dispatch based on the type of A and P */ 279534c1384SKris Buschelman /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */ 280534c1384SKris Buschelman fA = A->ops->ptapsymbolic; 281534c1384SKris Buschelman if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatPtAPSymbolic not supported for A of type %s",A->type_name); 282534c1384SKris Buschelman fP = P->ops->ptapsymbolic; 283534c1384SKris Buschelman if (!fP) SETERRQ1(PETSC_ERR_SUP,"MatPtAPSymbolic not supported for P of type %s",P->type_name); 284534c1384SKris Buschelman if (fP!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatPtAPSymbolic requires A, %s, to be compatible with P, %s",A->type_name,P->type_name); 2854d3841fdSKris Buschelman 286534c1384SKris Buschelman ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 287534c1384SKris Buschelman ierr = (*fA)(A,P,fill,C);CHKERRQ(ierr); 288534c1384SKris Buschelman ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 289eb9c0419SKris Buschelman 290eb9c0419SKris Buschelman PetscFunctionReturn(0); 291eb9c0419SKris Buschelman } 292eb9c0419SKris Buschelman 293f747e1aeSHong Zhang typedef struct { 294f747e1aeSHong Zhang Mat symAP; 295f747e1aeSHong Zhang } Mat_PtAPstruct; 296f747e1aeSHong Zhang 29778a80504SBarry Smith EXTERN PetscErrorCode MatDestroy_SeqAIJ(Mat); 29878a80504SBarry Smith 299f747e1aeSHong Zhang #undef __FUNCT__ 300f747e1aeSHong Zhang #define __FUNCT__ "MatDestroy_SeqAIJ_PtAP" 301f4a850bbSBarry Smith PetscErrorCode MatDestroy_SeqAIJ_PtAP(Mat A) 302f747e1aeSHong Zhang { 303f4a850bbSBarry Smith PetscErrorCode ierr; 304f747e1aeSHong Zhang Mat_PtAPstruct *ptap=(Mat_PtAPstruct*)A->spptr; 305f747e1aeSHong Zhang 306f747e1aeSHong Zhang PetscFunctionBegin; 307f747e1aeSHong Zhang ierr = MatDestroy(ptap->symAP);CHKERRQ(ierr); 308f747e1aeSHong Zhang ierr = PetscFree(ptap);CHKERRQ(ierr); 30978a80504SBarry Smith ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr); 310f747e1aeSHong Zhang PetscFunctionReturn(0); 311f747e1aeSHong Zhang } 312f747e1aeSHong Zhang 313eb9c0419SKris Buschelman #undef __FUNCT__ 3149af31e4aSHong Zhang #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqAIJ" 31555a3bba9SHong Zhang PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat P,PetscReal fill,Mat *C) 31655a3bba9SHong Zhang { 317dfbe8321SBarry Smith PetscErrorCode ierr; 318d20bfe6fSHong Zhang FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 319d20bfe6fSHong Zhang Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*p = (Mat_SeqAIJ*)P->data,*c; 32055a3bba9SHong Zhang PetscInt *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj; 321b8374ebeSBarry Smith PetscInt *ci,*cj,*ptadenserow,*ptasparserow,*ptaj; 32255a3bba9SHong Zhang PetscInt an=A->N,am=A->M,pn=P->N,pm=P->M; 323b8374ebeSBarry Smith PetscInt i,j,k,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi,nlnk,*lnk; 324d20bfe6fSHong Zhang MatScalar *ca; 32555a3bba9SHong Zhang PetscBT lnkbt; 326eb9c0419SKris Buschelman 327eb9c0419SKris Buschelman PetscFunctionBegin; 328d20bfe6fSHong Zhang /* Get ij structure of P^T */ 329eb9c0419SKris Buschelman ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 330d20bfe6fSHong Zhang ptJ=ptj; 331eb9c0419SKris Buschelman 332d20bfe6fSHong Zhang /* Allocate ci array, arrays for fill computation and */ 333d20bfe6fSHong Zhang /* free space for accumulating nonzero column info */ 33455a3bba9SHong Zhang ierr = PetscMalloc((pn+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 335d20bfe6fSHong Zhang ci[0] = 0; 336eb9c0419SKris Buschelman 33755a3bba9SHong Zhang ierr = PetscMalloc((2*an+1)*sizeof(PetscInt),&ptadenserow);CHKERRQ(ierr); 33855a3bba9SHong Zhang ierr = PetscMemzero(ptadenserow,(2*an+1)*sizeof(PetscInt));CHKERRQ(ierr); 339d20bfe6fSHong Zhang ptasparserow = ptadenserow + an; 34055a3bba9SHong Zhang 34155a3bba9SHong Zhang /* create and initialize a linked list */ 34255a3bba9SHong Zhang nlnk = pn+1; 34355a3bba9SHong Zhang ierr = PetscLLCreate(pn,pn,nlnk,lnk,lnkbt);CHKERRQ(ierr); 344eb9c0419SKris Buschelman 345d20bfe6fSHong Zhang /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */ 346d20bfe6fSHong Zhang /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 347d20bfe6fSHong Zhang ierr = GetMoreSpace((ai[am]/pm)*pn,&free_space); 348d20bfe6fSHong Zhang current_space = free_space; 349d20bfe6fSHong Zhang 350d20bfe6fSHong Zhang /* Determine symbolic info for each row of C: */ 351d20bfe6fSHong Zhang for (i=0;i<pn;i++) { 352d20bfe6fSHong Zhang ptnzi = pti[i+1] - pti[i]; 353d20bfe6fSHong Zhang ptanzi = 0; 354d20bfe6fSHong Zhang /* Determine symbolic row of PtA: */ 355d20bfe6fSHong Zhang for (j=0;j<ptnzi;j++) { 356d20bfe6fSHong Zhang arow = *ptJ++; 357d20bfe6fSHong Zhang anzj = ai[arow+1] - ai[arow]; 358d20bfe6fSHong Zhang ajj = aj + ai[arow]; 359d20bfe6fSHong Zhang for (k=0;k<anzj;k++) { 360d20bfe6fSHong Zhang if (!ptadenserow[ajj[k]]) { 361d20bfe6fSHong Zhang ptadenserow[ajj[k]] = -1; 362d20bfe6fSHong Zhang ptasparserow[ptanzi++] = ajj[k]; 363d20bfe6fSHong Zhang } 364d20bfe6fSHong Zhang } 365d20bfe6fSHong Zhang } 366d20bfe6fSHong Zhang /* Using symbolic info for row of PtA, determine symbolic info for row of C: */ 367d20bfe6fSHong Zhang ptaj = ptasparserow; 368d20bfe6fSHong Zhang cnzi = 0; 369d20bfe6fSHong Zhang for (j=0;j<ptanzi;j++) { 370d20bfe6fSHong Zhang prow = *ptaj++; 371d20bfe6fSHong Zhang pnzj = pi[prow+1] - pi[prow]; 372d20bfe6fSHong Zhang pjj = pj + pi[prow]; 37355a3bba9SHong Zhang /* add non-zero cols of P into the sorted linked list lnk */ 37455a3bba9SHong Zhang ierr = PetscLLAdd(pnzj,pjj,pn,nlnk,lnk,lnkbt);CHKERRQ(ierr); 37555a3bba9SHong Zhang cnzi += nlnk; 376d20bfe6fSHong Zhang } 377d20bfe6fSHong Zhang 378d20bfe6fSHong Zhang /* If free space is not available, make more free space */ 379d20bfe6fSHong Zhang /* Double the amount of total space in the list */ 380d20bfe6fSHong Zhang if (current_space->local_remaining<cnzi) { 381d20bfe6fSHong Zhang ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 382d20bfe6fSHong Zhang } 383d20bfe6fSHong Zhang 384d20bfe6fSHong Zhang /* Copy data into free space, and zero out denserows */ 38555a3bba9SHong Zhang ierr = PetscLLClean(pn,pn,cnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 386d20bfe6fSHong Zhang current_space->array += cnzi; 387d20bfe6fSHong Zhang current_space->local_used += cnzi; 388d20bfe6fSHong Zhang current_space->local_remaining -= cnzi; 389d20bfe6fSHong Zhang 390d20bfe6fSHong Zhang for (j=0;j<ptanzi;j++) { 391d20bfe6fSHong Zhang ptadenserow[ptasparserow[j]] = 0; 392d20bfe6fSHong Zhang } 393d20bfe6fSHong Zhang /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 394d20bfe6fSHong Zhang /* For now, we will recompute what is needed. */ 395d20bfe6fSHong Zhang ci[i+1] = ci[i] + cnzi; 396d20bfe6fSHong Zhang } 397d20bfe6fSHong Zhang /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 398d20bfe6fSHong Zhang /* Allocate space for cj, initialize cj, and */ 399d20bfe6fSHong Zhang /* destroy list of free space and other temporary array(s) */ 40055a3bba9SHong Zhang ierr = PetscMalloc((ci[pn]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr); 401d20bfe6fSHong Zhang ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 402d20bfe6fSHong Zhang ierr = PetscFree(ptadenserow);CHKERRQ(ierr); 40355a3bba9SHong Zhang ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 404d20bfe6fSHong Zhang 405d20bfe6fSHong Zhang /* Allocate space for ca */ 406d20bfe6fSHong Zhang ierr = PetscMalloc((ci[pn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 407d20bfe6fSHong Zhang ierr = PetscMemzero(ca,(ci[pn]+1)*sizeof(MatScalar));CHKERRQ(ierr); 408d20bfe6fSHong Zhang 409d20bfe6fSHong Zhang /* put together the new matrix */ 410d20bfe6fSHong Zhang ierr = MatCreateSeqAIJWithArrays(A->comm,pn,pn,ci,cj,ca,C);CHKERRQ(ierr); 411d20bfe6fSHong Zhang 412d20bfe6fSHong Zhang /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 413d20bfe6fSHong Zhang /* Since these are PETSc arrays, change flags to free them as necessary. */ 414d20bfe6fSHong Zhang c = (Mat_SeqAIJ *)((*C)->data); 415d20bfe6fSHong Zhang c->freedata = PETSC_TRUE; 416d20bfe6fSHong Zhang c->nonew = 0; 417d20bfe6fSHong Zhang 418d20bfe6fSHong Zhang /* Clean up. */ 419d20bfe6fSHong Zhang ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 420eb9c0419SKris Buschelman 421eb9c0419SKris Buschelman PetscFunctionReturn(0); 422eb9c0419SKris Buschelman } 423eb9c0419SKris Buschelman 4243985e5eaSKris Buschelman #include "src/mat/impls/maij/maij.h" 4253985e5eaSKris Buschelman EXTERN_C_BEGIN 4263985e5eaSKris Buschelman #undef __FUNCT__ 4279af31e4aSHong Zhang #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqMAIJ" 42855a3bba9SHong Zhang PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat *C) 42955a3bba9SHong Zhang { 4305c66b693SKris Buschelman /* This routine requires testing -- I don't think it works. */ 431dfbe8321SBarry Smith PetscErrorCode ierr; 4323985e5eaSKris Buschelman FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 4333985e5eaSKris Buschelman Mat_SeqMAIJ *pp=(Mat_SeqMAIJ*)PP->data; 4343985e5eaSKris Buschelman Mat P=pp->AIJ; 4353985e5eaSKris Buschelman Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c; 436b1d57f15SBarry Smith PetscInt *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj; 437b1d57f15SBarry Smith PetscInt *ci,*cj,*denserow,*sparserow,*ptadenserow,*ptasparserow,*ptaj; 438b1d57f15SBarry Smith PetscInt an=A->N,am=A->M,pn=P->N,pm=P->M,ppdof=pp->dof; 439b1d57f15SBarry Smith PetscInt i,j,k,dof,pdof,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi; 4403985e5eaSKris Buschelman MatScalar *ca; 4413985e5eaSKris Buschelman 4423985e5eaSKris Buschelman PetscFunctionBegin; 4433985e5eaSKris Buschelman /* Start timer */ 4449af31e4aSHong Zhang ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr); 4453985e5eaSKris Buschelman 4463985e5eaSKris Buschelman /* Get ij structure of P^T */ 4473985e5eaSKris Buschelman ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 4483985e5eaSKris Buschelman 4493985e5eaSKris Buschelman /* Allocate ci array, arrays for fill computation and */ 4503985e5eaSKris Buschelman /* free space for accumulating nonzero column info */ 451b1d57f15SBarry Smith ierr = PetscMalloc((pn+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 4523985e5eaSKris Buschelman ci[0] = 0; 4533985e5eaSKris Buschelman 454b1d57f15SBarry Smith ierr = PetscMalloc((2*pn+2*an+1)*sizeof(PetscInt),&ptadenserow);CHKERRQ(ierr); 455b1d57f15SBarry Smith ierr = PetscMemzero(ptadenserow,(2*pn+2*an+1)*sizeof(PetscInt));CHKERRQ(ierr); 4563985e5eaSKris Buschelman ptasparserow = ptadenserow + an; 4573985e5eaSKris Buschelman denserow = ptasparserow + an; 4583985e5eaSKris Buschelman sparserow = denserow + pn; 4593985e5eaSKris Buschelman 4603985e5eaSKris Buschelman /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */ 4613985e5eaSKris Buschelman /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 4623985e5eaSKris Buschelman ierr = GetMoreSpace((ai[am]/pm)*pn,&free_space); 4633985e5eaSKris Buschelman current_space = free_space; 4643985e5eaSKris Buschelman 4653985e5eaSKris Buschelman /* Determine symbolic info for each row of C: */ 4663985e5eaSKris Buschelman for (i=0;i<pn/ppdof;i++) { 4673985e5eaSKris Buschelman ptnzi = pti[i+1] - pti[i]; 4683985e5eaSKris Buschelman ptanzi = 0; 4693985e5eaSKris Buschelman ptJ = ptj + pti[i]; 4703985e5eaSKris Buschelman for (dof=0;dof<ppdof;dof++) { 4713985e5eaSKris Buschelman /* Determine symbolic row of PtA: */ 4723985e5eaSKris Buschelman for (j=0;j<ptnzi;j++) { 4733985e5eaSKris Buschelman arow = ptJ[j] + dof; 4743985e5eaSKris Buschelman anzj = ai[arow+1] - ai[arow]; 4753985e5eaSKris Buschelman ajj = aj + ai[arow]; 4763985e5eaSKris Buschelman for (k=0;k<anzj;k++) { 4773985e5eaSKris Buschelman if (!ptadenserow[ajj[k]]) { 4783985e5eaSKris Buschelman ptadenserow[ajj[k]] = -1; 4793985e5eaSKris Buschelman ptasparserow[ptanzi++] = ajj[k]; 4803985e5eaSKris Buschelman } 4813985e5eaSKris Buschelman } 4823985e5eaSKris Buschelman } 4833985e5eaSKris Buschelman /* Using symbolic info for row of PtA, determine symbolic info for row of C: */ 4843985e5eaSKris Buschelman ptaj = ptasparserow; 4853985e5eaSKris Buschelman cnzi = 0; 4863985e5eaSKris Buschelman for (j=0;j<ptanzi;j++) { 487fe05a634SKris Buschelman pdof = *ptaj%dof; 4883985e5eaSKris Buschelman prow = (*ptaj++)/dof; 4893985e5eaSKris Buschelman pnzj = pi[prow+1] - pi[prow]; 4903985e5eaSKris Buschelman pjj = pj + pi[prow]; 4913985e5eaSKris Buschelman for (k=0;k<pnzj;k++) { 492fe05a634SKris Buschelman if (!denserow[pjj[k]+pdof]) { 493fe05a634SKris Buschelman denserow[pjj[k]+pdof] = -1; 494fe05a634SKris Buschelman sparserow[cnzi++] = pjj[k]+pdof; 4953985e5eaSKris Buschelman } 4963985e5eaSKris Buschelman } 4973985e5eaSKris Buschelman } 4983985e5eaSKris Buschelman 4993985e5eaSKris Buschelman /* sort sparserow */ 5003985e5eaSKris Buschelman ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr); 5013985e5eaSKris Buschelman 5023985e5eaSKris Buschelman /* If free space is not available, make more free space */ 5033985e5eaSKris Buschelman /* Double the amount of total space in the list */ 5043985e5eaSKris Buschelman if (current_space->local_remaining<cnzi) { 5053985e5eaSKris Buschelman ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 5063985e5eaSKris Buschelman } 5073985e5eaSKris Buschelman 5083985e5eaSKris Buschelman /* Copy data into free space, and zero out denserows */ 509b1d57f15SBarry Smith ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(PetscInt));CHKERRQ(ierr); 5103985e5eaSKris Buschelman current_space->array += cnzi; 5113985e5eaSKris Buschelman current_space->local_used += cnzi; 5123985e5eaSKris Buschelman current_space->local_remaining -= cnzi; 5133985e5eaSKris Buschelman 5143985e5eaSKris Buschelman for (j=0;j<ptanzi;j++) { 5153985e5eaSKris Buschelman ptadenserow[ptasparserow[j]] = 0; 5163985e5eaSKris Buschelman } 5173985e5eaSKris Buschelman for (j=0;j<cnzi;j++) { 5183985e5eaSKris Buschelman denserow[sparserow[j]] = 0; 5193985e5eaSKris Buschelman } 5203985e5eaSKris Buschelman /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 5213985e5eaSKris Buschelman /* For now, we will recompute what is needed. */ 5223985e5eaSKris Buschelman ci[i+1+dof] = ci[i+dof] + cnzi; 5233985e5eaSKris Buschelman } 5243985e5eaSKris Buschelman } 5253985e5eaSKris Buschelman /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 5263985e5eaSKris Buschelman /* Allocate space for cj, initialize cj, and */ 5273985e5eaSKris Buschelman /* destroy list of free space and other temporary array(s) */ 528b1d57f15SBarry Smith ierr = PetscMalloc((ci[pn]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr); 5293985e5eaSKris Buschelman ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 5303985e5eaSKris Buschelman ierr = PetscFree(ptadenserow);CHKERRQ(ierr); 5313985e5eaSKris Buschelman 5323985e5eaSKris Buschelman /* Allocate space for ca */ 5333985e5eaSKris Buschelman ierr = PetscMalloc((ci[pn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 5343985e5eaSKris Buschelman ierr = PetscMemzero(ca,(ci[pn]+1)*sizeof(MatScalar));CHKERRQ(ierr); 5353985e5eaSKris Buschelman 5363985e5eaSKris Buschelman /* put together the new matrix */ 5373985e5eaSKris Buschelman ierr = MatCreateSeqAIJWithArrays(A->comm,pn,pn,ci,cj,ca,C);CHKERRQ(ierr); 5383985e5eaSKris Buschelman 5393985e5eaSKris Buschelman /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5403985e5eaSKris Buschelman /* Since these are PETSc arrays, change flags to free them as necessary. */ 5413985e5eaSKris Buschelman c = (Mat_SeqAIJ *)((*C)->data); 5423985e5eaSKris Buschelman c->freedata = PETSC_TRUE; 5433985e5eaSKris Buschelman c->nonew = 0; 5443985e5eaSKris Buschelman 5453985e5eaSKris Buschelman /* Clean up. */ 5463985e5eaSKris Buschelman ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 5473985e5eaSKris Buschelman 5489af31e4aSHong Zhang ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr); 5493985e5eaSKris Buschelman PetscFunctionReturn(0); 5503985e5eaSKris Buschelman } 5513985e5eaSKris Buschelman EXTERN_C_END 5523985e5eaSKris Buschelman 5536849ba73SBarry Smith /* 5549af31e4aSHong Zhang MatPtAPNumeric - Computes the matrix projection C = P^T * A * P 5554d3841fdSKris Buschelman 5564d3841fdSKris Buschelman Collective on Mat 5574d3841fdSKris Buschelman 5584d3841fdSKris Buschelman Input Parameters: 5594d3841fdSKris Buschelman + A - the matrix 5604d3841fdSKris Buschelman - P - the projection matrix 5614d3841fdSKris Buschelman 5624d3841fdSKris Buschelman Output Parameters: 5634d3841fdSKris Buschelman . C - the product matrix 5644d3841fdSKris Buschelman 5654d3841fdSKris Buschelman Notes: 5669af31e4aSHong Zhang C must have been created by calling MatPtAPSymbolic and must be destroyed by 5674d3841fdSKris Buschelman the user using MatDeatroy(). 5684d3841fdSKris Buschelman 569170ef064SHong Zhang This routine is currently only implemented for pairs of AIJ matrices and classes 570170ef064SHong Zhang which inherit from AIJ. C will be of type MATAIJ. 5714d3841fdSKris Buschelman 5724d3841fdSKris Buschelman Level: intermediate 5734d3841fdSKris Buschelman 5749af31e4aSHong Zhang .seealso: MatPtAP(),MatPtAPSymbolic(),MatMatMultNumeric() 5756849ba73SBarry Smith */ 576e240928fSHong Zhang #undef __FUNCT__ 577e240928fSHong Zhang #define __FUNCT__ "MatPtAPNumeric" 57855a3bba9SHong Zhang PetscErrorCode MatPtAPNumeric(Mat A,Mat P,Mat C) 57955a3bba9SHong Zhang { 580dfbe8321SBarry Smith PetscErrorCode ierr; 581534c1384SKris Buschelman PetscErrorCode (*fA)(Mat,Mat,Mat); 582534c1384SKris Buschelman PetscErrorCode (*fP)(Mat,Mat,Mat); 583eb9c0419SKris Buschelman 584eb9c0419SKris Buschelman PetscFunctionBegin; 585eb9c0419SKris Buschelman 5864482741eSBarry Smith PetscValidHeaderSpecific(A,MAT_COOKIE,1); 587c9780b6fSBarry Smith PetscValidType(A,1); 588eb9c0419SKris Buschelman MatPreallocated(A); 589eb9c0419SKris Buschelman if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 590eb9c0419SKris Buschelman if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 591eb9c0419SKris Buschelman 5924482741eSBarry Smith PetscValidHeaderSpecific(P,MAT_COOKIE,2); 593c9780b6fSBarry Smith PetscValidType(P,2); 594eb9c0419SKris Buschelman MatPreallocated(P); 595eb9c0419SKris Buschelman if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 596eb9c0419SKris Buschelman if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 597eb9c0419SKris Buschelman 5984482741eSBarry Smith PetscValidHeaderSpecific(C,MAT_COOKIE,3); 599c9780b6fSBarry Smith PetscValidType(C,3); 600eb9c0419SKris Buschelman MatPreallocated(C); 601eb9c0419SKris Buschelman if (!C->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 602eb9c0419SKris Buschelman if (C->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 603eb9c0419SKris Buschelman 604eb9c0419SKris Buschelman if (P->N!=C->M) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->N,C->M); 605eb9c0419SKris Buschelman if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->M,A->N); 606eb9c0419SKris Buschelman if (A->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %d != %d",A->M,A->N); 607eb9c0419SKris Buschelman if (P->N!=C->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->N,C->N); 608eb9c0419SKris Buschelman 609534c1384SKris Buschelman /* For now, we do not dispatch based on the type of A and P */ 610534c1384SKris Buschelman /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */ 611534c1384SKris Buschelman fA = A->ops->ptapnumeric; 612534c1384SKris Buschelman if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatPtAPNumeric not supported for A of type %s",A->type_name); 613534c1384SKris Buschelman fP = P->ops->ptapnumeric; 614534c1384SKris Buschelman if (!fP) SETERRQ1(PETSC_ERR_SUP,"MatPtAPNumeric not supported for P of type %s",P->type_name); 615534c1384SKris Buschelman if (fP!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatPtAPNumeric requires A, %s, to be compatible with P, %s",A->type_name,P->type_name); 6164d3841fdSKris Buschelman 617534c1384SKris Buschelman ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 618534c1384SKris Buschelman ierr = (*fA)(A,P,C);CHKERRQ(ierr); 619534c1384SKris Buschelman ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 620eb9c0419SKris Buschelman 621eb9c0419SKris Buschelman PetscFunctionReturn(0); 622eb9c0419SKris Buschelman } 623eb9c0419SKris Buschelman 624eb9c0419SKris Buschelman #undef __FUNCT__ 6259af31e4aSHong Zhang #define __FUNCT__ "MatPtAPNumeric_SeqAIJ_SeqAIJ" 626dfbe8321SBarry Smith PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqAIJ(Mat A,Mat P,Mat C) 627dfbe8321SBarry Smith { 628dfbe8321SBarry Smith PetscErrorCode ierr; 629b1d57f15SBarry Smith PetscInt flops=0; 630d20bfe6fSHong Zhang Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 631d20bfe6fSHong Zhang Mat_SeqAIJ *p = (Mat_SeqAIJ *) P->data; 632d20bfe6fSHong Zhang Mat_SeqAIJ *c = (Mat_SeqAIJ *) C->data; 633b1d57f15SBarry Smith PetscInt *ai=a->i,*aj=a->j,*apj,*apjdense,*pi=p->i,*pj=p->j,*pJ=p->j,*pjj; 634b1d57f15SBarry Smith PetscInt *ci=c->i,*cj=c->j,*cjj; 635b1d57f15SBarry Smith PetscInt am=A->M,cn=C->N,cm=C->M; 636b1d57f15SBarry Smith PetscInt i,j,k,anzi,pnzi,apnzj,nextap,pnzj,prow,crow; 637d20bfe6fSHong Zhang MatScalar *aa=a->a,*apa,*pa=p->a,*pA=p->a,*paj,*ca=c->a,*caj; 638eb9c0419SKris Buschelman 639eb9c0419SKris Buschelman PetscFunctionBegin; 640d20bfe6fSHong Zhang /* Allocate temporary array for storage of one row of A*P */ 641b1d57f15SBarry Smith ierr = PetscMalloc(cn*(sizeof(MatScalar)+2*sizeof(PetscInt)),&apa);CHKERRQ(ierr); 642b1d57f15SBarry Smith ierr = PetscMemzero(apa,cn*(sizeof(MatScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr); 643eb9c0419SKris Buschelman 644b1d57f15SBarry Smith apj = (PetscInt *)(apa + cn); 645d20bfe6fSHong Zhang apjdense = apj + cn; 646d20bfe6fSHong Zhang 647d20bfe6fSHong Zhang /* Clear old values in C */ 648d20bfe6fSHong Zhang ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr); 649d20bfe6fSHong Zhang 650d20bfe6fSHong Zhang for (i=0;i<am;i++) { 651d20bfe6fSHong Zhang /* Form sparse row of A*P */ 652d20bfe6fSHong Zhang anzi = ai[i+1] - ai[i]; 653d20bfe6fSHong Zhang apnzj = 0; 654d20bfe6fSHong Zhang for (j=0;j<anzi;j++) { 655d20bfe6fSHong Zhang prow = *aj++; 656d20bfe6fSHong Zhang pnzj = pi[prow+1] - pi[prow]; 657d20bfe6fSHong Zhang pjj = pj + pi[prow]; 658d20bfe6fSHong Zhang paj = pa + pi[prow]; 659d20bfe6fSHong Zhang for (k=0;k<pnzj;k++) { 660d20bfe6fSHong Zhang if (!apjdense[pjj[k]]) { 661d20bfe6fSHong Zhang apjdense[pjj[k]] = -1; 662d20bfe6fSHong Zhang apj[apnzj++] = pjj[k]; 663d20bfe6fSHong Zhang } 664d20bfe6fSHong Zhang apa[pjj[k]] += (*aa)*paj[k]; 665d20bfe6fSHong Zhang } 666d20bfe6fSHong Zhang flops += 2*pnzj; 667d20bfe6fSHong Zhang aa++; 668d20bfe6fSHong Zhang } 669d20bfe6fSHong Zhang 670d20bfe6fSHong Zhang /* Sort the j index array for quick sparse axpy. */ 671d20bfe6fSHong Zhang ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr); 672d20bfe6fSHong Zhang 673d20bfe6fSHong Zhang /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */ 674d20bfe6fSHong Zhang pnzi = pi[i+1] - pi[i]; 675d20bfe6fSHong Zhang for (j=0;j<pnzi;j++) { 676d20bfe6fSHong Zhang nextap = 0; 677d20bfe6fSHong Zhang crow = *pJ++; 678d20bfe6fSHong Zhang cjj = cj + ci[crow]; 679d20bfe6fSHong Zhang caj = ca + ci[crow]; 680d20bfe6fSHong Zhang /* Perform sparse axpy operation. Note cjj includes apj. */ 681d20bfe6fSHong Zhang for (k=0;nextap<apnzj;k++) { 682d20bfe6fSHong Zhang if (cjj[k]==apj[nextap]) { 683d20bfe6fSHong Zhang caj[k] += (*pA)*apa[apj[nextap++]]; 684d20bfe6fSHong Zhang } 685d20bfe6fSHong Zhang } 686d20bfe6fSHong Zhang flops += 2*apnzj; 687d20bfe6fSHong Zhang pA++; 688d20bfe6fSHong Zhang } 689d20bfe6fSHong Zhang 690d20bfe6fSHong Zhang /* Zero the current row info for A*P */ 691d20bfe6fSHong Zhang for (j=0;j<apnzj;j++) { 692d20bfe6fSHong Zhang apa[apj[j]] = 0.; 693d20bfe6fSHong Zhang apjdense[apj[j]] = 0; 694d20bfe6fSHong Zhang } 695d20bfe6fSHong Zhang } 696d20bfe6fSHong Zhang 697d20bfe6fSHong Zhang /* Assemble the final matrix and clean up */ 698d20bfe6fSHong Zhang ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 699d20bfe6fSHong Zhang ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 700d20bfe6fSHong Zhang ierr = PetscFree(apa);CHKERRQ(ierr); 701d20bfe6fSHong Zhang ierr = PetscLogFlops(flops);CHKERRQ(ierr); 702d20bfe6fSHong Zhang 703eb9c0419SKris Buschelman PetscFunctionReturn(0); 704eb9c0419SKris Buschelman } 7050e36024fSHong Zhang 706a61c8c0fSHong Zhang /* Compute local C = P_loc^T * A * P - used by MatPtAP_MPIAIJ_MPIAIJ() */ 707e240928fSHong Zhang static PetscEvent logkey_matptapnumeric_local = 0; 708e240928fSHong Zhang #undef __FUNCT__ 709e240928fSHong Zhang #define __FUNCT__ "MatPtAPNumeric_MPIAIJ_MPIAIJ_Local" 710e240928fSHong Zhang PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIAIJ_Local(Mat A,Mat P_loc,Mat P_oth,PetscInt prstart,Mat C) 711e240928fSHong Zhang { 712e240928fSHong Zhang PetscErrorCode ierr; 713e240928fSHong Zhang PetscInt flops=0; 714e240928fSHong Zhang Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 715e240928fSHong Zhang Mat_SeqAIJ *ad=(Mat_SeqAIJ*)(a->A)->data,*ao=(Mat_SeqAIJ*)(a->B)->data; 716e240928fSHong Zhang Mat_SeqAIJ *c = (Mat_SeqAIJ *) C->data; 717e240928fSHong Zhang Mat_SeqAIJ *p_loc=(Mat_SeqAIJ*)P_loc->data,*p_oth=(Mat_SeqAIJ*)P_oth->data; 718e240928fSHong Zhang PetscInt *pi_loc=p_loc->i,*pj_loc=p_loc->j,*pi_oth=p_oth->i,*pj_oth=p_oth->j; 719e240928fSHong Zhang PetscInt *adi=ad->i,*adj=ad->j,*aoi=ao->i,*aoj=ao->j,*apj,*apjdense,cstart=a->cstart,cend=a->cend,col; 720e240928fSHong Zhang PetscInt *pJ=pj_loc,*pjj; 721e240928fSHong Zhang PetscInt *ci=c->i,*cj=c->j,*cjj; 722e240928fSHong Zhang PetscInt am=A->m,cn=C->N,cm=C->M; 723e240928fSHong Zhang PetscInt i,j,k,anzi,pnzi,apnzj,nextap,pnzj,prow,crow; 724e240928fSHong Zhang MatScalar *ada=ad->a,*aoa=ao->a,*apa,*paj,*ca=c->a,*caj; 725e240928fSHong Zhang MatScalar *pa_loc=p_loc->a,*pA=pa_loc,*pa_oth=p_oth->a; 726e240928fSHong Zhang 727e240928fSHong Zhang PetscFunctionBegin; 728e240928fSHong Zhang if (!logkey_matptapnumeric_local) { 729a61c8c0fSHong Zhang ierr = PetscLogEventRegister(&logkey_matptapnumeric_local,"MatPtAPNumeric_MPIAIJ_MPIAIJ_Local",MAT_COOKIE);CHKERRQ(ierr); 730e240928fSHong Zhang } 731a61c8c0fSHong Zhang ierr = PetscLogEventBegin(logkey_matptapnumeric_local,A,0,0,0);CHKERRQ(ierr); 732e240928fSHong Zhang 733e240928fSHong Zhang /* Allocate temporary array for storage of one row of A*P */ 734e240928fSHong Zhang ierr = PetscMalloc(cn*(sizeof(MatScalar)+2*sizeof(PetscInt)),&apa);CHKERRQ(ierr); 735e240928fSHong Zhang ierr = PetscMemzero(apa,cn*(sizeof(MatScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr); 736e240928fSHong Zhang apj = (PetscInt *)(apa + cn); 737e240928fSHong Zhang apjdense = apj + cn; 738e240928fSHong Zhang 739e240928fSHong Zhang /* Clear old values in C */ 740e240928fSHong Zhang ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr); 741e240928fSHong Zhang 742e240928fSHong Zhang for (i=0;i<am;i++) { 743e240928fSHong Zhang /* Form i-th sparse row of A*P */ 744e240928fSHong Zhang apnzj = 0; 745e240928fSHong Zhang /* diagonal portion of A */ 746e240928fSHong Zhang anzi = adi[i+1] - adi[i]; 747e240928fSHong Zhang for (j=0;j<anzi;j++) { 748e240928fSHong Zhang prow = *adj; 749e240928fSHong Zhang adj++; 750e240928fSHong Zhang pnzj = pi_loc[prow+1] - pi_loc[prow]; 751e240928fSHong Zhang pjj = pj_loc + pi_loc[prow]; 752e240928fSHong Zhang paj = pa_loc + pi_loc[prow]; 753e240928fSHong Zhang for (k=0;k<pnzj;k++) { 754e240928fSHong Zhang if (!apjdense[pjj[k]]) { 755e240928fSHong Zhang apjdense[pjj[k]] = -1; 756e240928fSHong Zhang apj[apnzj++] = pjj[k]; 757e240928fSHong Zhang } 758e240928fSHong Zhang apa[pjj[k]] += (*ada)*paj[k]; 759e240928fSHong Zhang } 760e240928fSHong Zhang flops += 2*pnzj; 761e240928fSHong Zhang ada++; 762e240928fSHong Zhang } 763e240928fSHong Zhang /* off-diagonal portion of A */ 764e240928fSHong Zhang anzi = aoi[i+1] - aoi[i]; 765e240928fSHong Zhang for (j=0;j<anzi;j++) { 766e240928fSHong Zhang col = a->garray[*aoj]; 767e240928fSHong Zhang if (col < cstart){ 768e240928fSHong Zhang prow = *aoj; 769e240928fSHong Zhang } else if (col >= cend){ 770e240928fSHong Zhang prow = *aoj; 771e240928fSHong Zhang } else { 772e240928fSHong Zhang SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Off-diagonal portion of A has wrong column map"); 773e240928fSHong Zhang } 774e240928fSHong Zhang aoj++; 775e240928fSHong Zhang pnzj = pi_oth[prow+1] - pi_oth[prow]; 776e240928fSHong Zhang pjj = pj_oth + pi_oth[prow]; 777e240928fSHong Zhang paj = pa_oth + pi_oth[prow]; 778e240928fSHong Zhang for (k=0;k<pnzj;k++) { 779e240928fSHong Zhang if (!apjdense[pjj[k]]) { 780e240928fSHong Zhang apjdense[pjj[k]] = -1; 781e240928fSHong Zhang apj[apnzj++] = pjj[k]; 782e240928fSHong Zhang } 783e240928fSHong Zhang apa[pjj[k]] += (*aoa)*paj[k]; 784e240928fSHong Zhang } 785e240928fSHong Zhang flops += 2*pnzj; 786e240928fSHong Zhang aoa++; 787e240928fSHong Zhang } 788e240928fSHong Zhang /* Sort the j index array for quick sparse axpy. */ 789e240928fSHong Zhang ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr); 790e240928fSHong Zhang 791e240928fSHong Zhang /* Compute P_loc[i,:]^T*AP[i,:] using outer product */ 792e240928fSHong Zhang pnzi = pi_loc[i+1] - pi_loc[i]; 793e240928fSHong Zhang for (j=0;j<pnzi;j++) { 794e240928fSHong Zhang nextap = 0; 795e240928fSHong Zhang crow = *pJ++; 796e240928fSHong Zhang cjj = cj + ci[crow]; 797e240928fSHong Zhang caj = ca + ci[crow]; 798e240928fSHong Zhang /* Perform sparse axpy operation. Note cjj includes apj. */ 799e240928fSHong Zhang for (k=0;nextap<apnzj;k++) { 800e240928fSHong Zhang if (cjj[k]==apj[nextap]) { 801e240928fSHong Zhang caj[k] += (*pA)*apa[apj[nextap++]]; 802e240928fSHong Zhang } 803e240928fSHong Zhang } 804e240928fSHong Zhang flops += 2*apnzj; 805e240928fSHong Zhang pA++; 806e240928fSHong Zhang } 807e240928fSHong Zhang 808e240928fSHong Zhang /* Zero the current row info for A*P */ 809e240928fSHong Zhang for (j=0;j<apnzj;j++) { 810e240928fSHong Zhang apa[apj[j]] = 0.; 811e240928fSHong Zhang apjdense[apj[j]] = 0; 812e240928fSHong Zhang } 813e240928fSHong Zhang } 814e240928fSHong Zhang 815e240928fSHong Zhang /* Assemble the final matrix and clean up */ 816e240928fSHong Zhang ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 817e240928fSHong Zhang ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 818e240928fSHong Zhang ierr = PetscFree(apa);CHKERRQ(ierr); 819e240928fSHong Zhang ierr = PetscLogFlops(flops);CHKERRQ(ierr); 820a61c8c0fSHong Zhang ierr = PetscLogEventEnd(logkey_matptapnumeric_local,A,0,0,0);CHKERRQ(ierr); 821e240928fSHong Zhang PetscFunctionReturn(0); 822e240928fSHong Zhang } 823e240928fSHong Zhang static PetscEvent logkey_matptapsymbolic_local = 0; 824e240928fSHong Zhang #undef __FUNCT__ 825e240928fSHong Zhang #define __FUNCT__ "MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local" 826e240928fSHong Zhang PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local(Mat A,Mat P_loc,Mat P_oth,PetscInt prstart,PetscReal fill,Mat *C) 827e240928fSHong Zhang { 828e240928fSHong Zhang PetscErrorCode ierr; 829e240928fSHong Zhang FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 830e240928fSHong Zhang Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 831e240928fSHong Zhang Mat_SeqAIJ *ad=(Mat_SeqAIJ*)(a->A)->data,*ao=(Mat_SeqAIJ*)(a->B)->data,*c; 832e240928fSHong Zhang PetscInt *pti,*ptj,*ptJ,*ajj,*pjj; 833e240928fSHong Zhang PetscInt *adi=ad->i,*adj=ad->j,*aoi=ao->i,*aoj=ao->j,nnz,cstart=a->cstart,cend=a->cend,col; 834e240928fSHong Zhang PetscInt *ci,*cj,*ptaj; 835*6abd8857SHong Zhang PetscInt aN=A->N,am=A->m,pN=P_loc->N; 836e240928fSHong Zhang PetscInt i,j,k,ptnzi,arow,anzj,prow,pnzj,cnzi; 837e240928fSHong Zhang PetscInt pm=P_loc->m,nlnk,*lnk; 838e240928fSHong Zhang MatScalar *ca; 839e240928fSHong Zhang PetscBT lnkbt; 840e240928fSHong Zhang PetscInt prend,nprow_loc,nprow_oth; 841e240928fSHong Zhang PetscInt *ptadenserow_loc,*ptadenserow_oth,*ptasparserow_loc,*ptasparserow_oth; 842e240928fSHong Zhang Mat_SeqAIJ *p_loc=(Mat_SeqAIJ*)P_loc->data,*p_oth=(Mat_SeqAIJ*)P_oth->data; 843e240928fSHong Zhang PetscInt *pi_loc=p_loc->i,*pj_loc=p_loc->j,*pi_oth=p_oth->i,*pj_oth=p_oth->j; 844e240928fSHong Zhang 845e240928fSHong Zhang PetscFunctionBegin; 846e240928fSHong Zhang if (!logkey_matptapsymbolic_local) { 847e240928fSHong Zhang ierr = PetscLogEventRegister(&logkey_matptapsymbolic_local,"MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local",MAT_COOKIE); 848e240928fSHong Zhang } 849e240928fSHong Zhang ierr = PetscLogEventBegin(logkey_matptapsymbolic_local,A,P_loc,P_oth,0);CHKERRQ(ierr); 850e240928fSHong Zhang 851e240928fSHong Zhang prend = prstart + pm; 852e240928fSHong Zhang 853e240928fSHong Zhang /* get ij structure of P_loc^T */ 854e240928fSHong Zhang ierr = MatGetSymbolicTranspose_SeqAIJ(P_loc,&pti,&ptj);CHKERRQ(ierr); 855e240928fSHong Zhang ptJ=ptj; 856e240928fSHong Zhang 857e240928fSHong Zhang /* Allocate ci array, arrays for fill computation and */ 858e240928fSHong Zhang /* free space for accumulating nonzero column info */ 859e240928fSHong Zhang ierr = PetscMalloc((pN+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 860e240928fSHong Zhang ci[0] = 0; 861e240928fSHong Zhang 862*6abd8857SHong Zhang ierr = PetscMalloc((4*aN+1)*sizeof(PetscInt),&ptadenserow_loc);CHKERRQ(ierr); 863*6abd8857SHong Zhang ierr = PetscMemzero(ptadenserow_loc,(4*aN+1)*sizeof(PetscInt));CHKERRQ(ierr); 864*6abd8857SHong Zhang ptasparserow_loc = ptadenserow_loc + aN; 865*6abd8857SHong Zhang ptadenserow_oth = ptasparserow_loc + aN; 866*6abd8857SHong Zhang ptasparserow_oth = ptadenserow_oth + aN; 867e240928fSHong Zhang 868e240928fSHong Zhang /* create and initialize a linked list */ 869e240928fSHong Zhang nlnk = pN+1; 870e240928fSHong Zhang ierr = PetscLLCreate(pN,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr); 871e240928fSHong Zhang 872*6abd8857SHong Zhang /* Set initial free space to be nnz(A) scaled by fill*P->N/P->M. */ 873e240928fSHong Zhang /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 874e240928fSHong Zhang nnz = adi[am] + aoi[am]; 875*6abd8857SHong Zhang ierr = GetMoreSpace((PetscInt)(fill*nnz*pN/aN+1),&free_space); 876e240928fSHong Zhang current_space = free_space; 877e240928fSHong Zhang 878e240928fSHong Zhang /* determine symbolic info for each row of C: */ 879e240928fSHong Zhang for (i=0;i<pN;i++) { 880e240928fSHong Zhang ptnzi = pti[i+1] - pti[i]; 881e240928fSHong Zhang nprow_loc = 0; nprow_oth = 0; 882e240928fSHong Zhang /* i-th row of symbolic P_loc^T*A_loc: */ 883e240928fSHong Zhang for (j=0;j<ptnzi;j++) { 884e240928fSHong Zhang arow = *ptJ++; 885e240928fSHong Zhang /* diagonal portion of A */ 886e240928fSHong Zhang anzj = adi[arow+1] - adi[arow]; 887e240928fSHong Zhang ajj = adj + adi[arow]; 888e240928fSHong Zhang for (k=0;k<anzj;k++) { 889e240928fSHong Zhang col = ajj[k]+prstart; 890e240928fSHong Zhang if (!ptadenserow_loc[col]) { 891e240928fSHong Zhang ptadenserow_loc[col] = -1; 892e240928fSHong Zhang ptasparserow_loc[nprow_loc++] = col; 893e240928fSHong Zhang } 894e240928fSHong Zhang } 895e240928fSHong Zhang /* off-diagonal portion of A */ 896e240928fSHong Zhang anzj = aoi[arow+1] - aoi[arow]; 897e240928fSHong Zhang ajj = aoj + aoi[arow]; 898e240928fSHong Zhang for (k=0;k<anzj;k++) { 899e240928fSHong Zhang col = a->garray[ajj[k]]; /* global col */ 900e240928fSHong Zhang if (col < cstart){ 901e240928fSHong Zhang col = ajj[k]; 902e240928fSHong Zhang } else if (col >= cend){ 903e240928fSHong Zhang col = ajj[k] + pm; 904e240928fSHong Zhang } else { 905e240928fSHong Zhang SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Off-diagonal portion of A has wrong column map"); 906e240928fSHong Zhang } 907e240928fSHong Zhang if (!ptadenserow_oth[col]) { 908e240928fSHong Zhang ptadenserow_oth[col] = -1; 909e240928fSHong Zhang ptasparserow_oth[nprow_oth++] = col; 910e240928fSHong Zhang } 911e240928fSHong Zhang } 912e240928fSHong Zhang } 913e240928fSHong Zhang 914e240928fSHong Zhang /* using symbolic info of local PtA, determine symbolic info for row of C: */ 915e240928fSHong Zhang cnzi = 0; 916e240928fSHong Zhang /* rows of P_loc */ 917e240928fSHong Zhang ptaj = ptasparserow_loc; 918e240928fSHong Zhang for (j=0; j<nprow_loc; j++) { 919e240928fSHong Zhang prow = *ptaj++; 920e240928fSHong Zhang prow -= prstart; /* rm */ 921e240928fSHong Zhang pnzj = pi_loc[prow+1] - pi_loc[prow]; 922e240928fSHong Zhang pjj = pj_loc + pi_loc[prow]; 923e240928fSHong Zhang /* add non-zero cols of P into the sorted linked list lnk */ 924e240928fSHong Zhang ierr = PetscLLAdd(pnzj,pjj,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr); 925e240928fSHong Zhang cnzi += nlnk; 926e240928fSHong Zhang } 927e240928fSHong Zhang /* rows of P_oth */ 928e240928fSHong Zhang ptaj = ptasparserow_oth; 929e240928fSHong Zhang for (j=0; j<nprow_oth; j++) { 930e240928fSHong Zhang prow = *ptaj++; 931e240928fSHong Zhang if (prow >= prend) prow -= pm; /* rm */ 932e240928fSHong Zhang pnzj = pi_oth[prow+1] - pi_oth[prow]; 933e240928fSHong Zhang pjj = pj_oth + pi_oth[prow]; 934e240928fSHong Zhang /* add non-zero cols of P into the sorted linked list lnk */ 935e240928fSHong Zhang ierr = PetscLLAdd(pnzj,pjj,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr); 936e240928fSHong Zhang cnzi += nlnk; 937e240928fSHong Zhang } 938e240928fSHong Zhang 939e240928fSHong Zhang /* If free space is not available, make more free space */ 940e240928fSHong Zhang /* Double the amount of total space in the list */ 941e240928fSHong Zhang if (current_space->local_remaining<cnzi) { 942e240928fSHong Zhang ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 943e240928fSHong Zhang } 944e240928fSHong Zhang 945e240928fSHong Zhang /* Copy data into free space, and zero out denserows */ 946e240928fSHong Zhang ierr = PetscLLClean(pN,pN,cnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 947e240928fSHong Zhang current_space->array += cnzi; 948e240928fSHong Zhang current_space->local_used += cnzi; 949e240928fSHong Zhang current_space->local_remaining -= cnzi; 950e240928fSHong Zhang 951e240928fSHong Zhang for (j=0;j<nprow_loc; j++) { 952e240928fSHong Zhang ptadenserow_loc[ptasparserow_loc[j]] = 0; 953e240928fSHong Zhang } 954e240928fSHong Zhang for (j=0;j<nprow_oth; j++) { 955e240928fSHong Zhang ptadenserow_oth[ptasparserow_oth[j]] = 0; 956e240928fSHong Zhang } 957e240928fSHong Zhang 958e240928fSHong Zhang /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 959e240928fSHong Zhang /* For now, we will recompute what is needed. */ 960e240928fSHong Zhang ci[i+1] = ci[i] + cnzi; 961e240928fSHong Zhang } 962e240928fSHong Zhang /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 963e240928fSHong Zhang /* Allocate space for cj, initialize cj, and */ 964e240928fSHong Zhang /* destroy list of free space and other temporary array(s) */ 965e240928fSHong Zhang ierr = PetscMalloc((ci[pN]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr); 966e240928fSHong Zhang ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 967e240928fSHong Zhang ierr = PetscFree(ptadenserow_loc);CHKERRQ(ierr); 968e240928fSHong Zhang ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 969e240928fSHong Zhang 970e240928fSHong Zhang /* Allocate space for ca */ 971e240928fSHong Zhang ierr = PetscMalloc((ci[pN]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 972e240928fSHong Zhang ierr = PetscMemzero(ca,(ci[pN]+1)*sizeof(MatScalar));CHKERRQ(ierr); 973e240928fSHong Zhang 974e240928fSHong Zhang /* put together the new matrix */ 975e240928fSHong Zhang ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,pN,pN,ci,cj,ca,C);CHKERRQ(ierr); 976e240928fSHong Zhang 977e240928fSHong Zhang /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 978e240928fSHong Zhang /* Since these are PETSc arrays, change flags to free them as necessary. */ 979e240928fSHong Zhang c = (Mat_SeqAIJ *)((*C)->data); 980e240928fSHong Zhang c->freedata = PETSC_TRUE; 981e240928fSHong Zhang c->nonew = 0; 982e240928fSHong Zhang 983e240928fSHong Zhang /* Clean up. */ 984e240928fSHong Zhang ierr = MatRestoreSymbolicTranspose_SeqAIJ(P_loc,&pti,&ptj);CHKERRQ(ierr); 985a61c8c0fSHong Zhang 986e240928fSHong Zhang ierr = PetscLogEventEnd(logkey_matptapsymbolic_local,A,P_loc,P_oth,0);CHKERRQ(ierr); 987e240928fSHong Zhang PetscFunctionReturn(0); 988e240928fSHong Zhang } 989