1eb9c0419SKris Buschelman /* 29af31e4aSHong Zhang Defines projective product routines where A is a AIJ matrix 3eb9c0419SKris Buschelman C = P^T * A * P 4eb9c0419SKris Buschelman */ 5eb9c0419SKris Buschelman 6231952e2SKris Buschelman #include "src/mat/impls/aij/seq/aij.h" /*I "petscmat.h" I*/ 7eb9c0419SKris Buschelman #include "src/mat/utils/freespace.h" 89af31e4aSHong Zhang #include "src/mat/impls/aij/mpi/mpiaij.h" 955a3bba9SHong Zhang #include "petscbt.h" 10eb9c0419SKris Buschelman 11eb9c0419SKris Buschelman #undef __FUNCT__ 129af31e4aSHong Zhang #define __FUNCT__ "MatPtAP" 134d3841fdSKris Buschelman /*@ 149af31e4aSHong Zhang MatPtAP - Creates the matrix projection C = P^T * A * P 154d3841fdSKris Buschelman 164d3841fdSKris Buschelman Collective on Mat 174d3841fdSKris Buschelman 184d3841fdSKris Buschelman Input Parameters: 194d3841fdSKris Buschelman + A - the matrix 20f747e1aeSHong Zhang . P - the projection matrix 21f747e1aeSHong Zhang . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 22f747e1aeSHong Zhang - fill - expected fill as ratio of nnz(C)/(nnz(A) + nnz(P)) 234d3841fdSKris Buschelman 244d3841fdSKris Buschelman Output Parameters: 254d3841fdSKris Buschelman . C - the product matrix 264d3841fdSKris Buschelman 274d3841fdSKris Buschelman Notes: 284d3841fdSKris Buschelman C will be created and must be destroyed by the user with MatDestroy(). 294d3841fdSKris Buschelman 304d3841fdSKris Buschelman This routine is currently only implemented for pairs of SeqAIJ matrices and classes 314d3841fdSKris Buschelman which inherit from SeqAIJ. C will be of type MATSEQAIJ. 324d3841fdSKris Buschelman 334d3841fdSKris Buschelman Level: intermediate 344d3841fdSKris Buschelman 359af31e4aSHong Zhang .seealso: MatPtAPSymbolic(),MatPtAPNumeric(),MatMatMult() 364d3841fdSKris Buschelman @*/ 3755a3bba9SHong Zhang PetscErrorCode MatPtAP(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C) 3855a3bba9SHong Zhang { 39dfbe8321SBarry Smith PetscErrorCode ierr; 40534c1384SKris Buschelman PetscErrorCode (*fA)(Mat,Mat,MatReuse,PetscReal,Mat *); 41534c1384SKris Buschelman PetscErrorCode (*fP)(Mat,Mat,MatReuse,PetscReal,Mat *); 42eb9c0419SKris Buschelman 43eb9c0419SKris Buschelman PetscFunctionBegin; 449af31e4aSHong Zhang PetscValidHeaderSpecific(A,MAT_COOKIE,1); 459af31e4aSHong Zhang PetscValidType(A,1); 469af31e4aSHong Zhang MatPreallocated(A); 479af31e4aSHong Zhang if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 489af31e4aSHong Zhang if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 499af31e4aSHong Zhang PetscValidHeaderSpecific(P,MAT_COOKIE,2); 509af31e4aSHong Zhang PetscValidType(P,2); 519af31e4aSHong Zhang MatPreallocated(P); 529af31e4aSHong Zhang if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 539af31e4aSHong Zhang if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 549af31e4aSHong Zhang PetscValidPointer(C,3); 55534c1384SKris Buschelman 569af31e4aSHong Zhang if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->M,A->N); 57eb9c0419SKris Buschelman 589af31e4aSHong Zhang if (fill <=0.0) SETERRQ1(PETSC_ERR_ARG_SIZ,"fill=%g must be > 0.0",fill); 59eb9c0419SKris Buschelman 60534c1384SKris Buschelman /* For now, we do not dispatch based on the type of A and P */ 61534c1384SKris Buschelman /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */ 62534c1384SKris Buschelman fA = A->ops->ptap; 63534c1384SKris Buschelman if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatPtAP not supported for A of type %s",A->type_name); 64534c1384SKris Buschelman fP = P->ops->ptap; 65534c1384SKris Buschelman if (!fP) SETERRQ1(PETSC_ERR_SUP,"MatPtAP not supported for P of type %s",P->type_name); 66534c1384SKris Buschelman if (fP!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatPtAP requires A, %s, to be compatible with P, %s",A->type_name,P->type_name); 67534c1384SKris Buschelman 689af31e4aSHong Zhang ierr = PetscLogEventBegin(MAT_PtAP,A,P,0,0);CHKERRQ(ierr); 69534c1384SKris Buschelman ierr = (*fA)(A,P,scall,fill,C);CHKERRQ(ierr); 709af31e4aSHong Zhang ierr = PetscLogEventEnd(MAT_PtAP,A,P,0,0);CHKERRQ(ierr); 71eb9c0419SKris Buschelman PetscFunctionReturn(0); 72eb9c0419SKris Buschelman } 73eb9c0419SKris Buschelman 74e240928fSHong Zhang EXTERN PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local(Mat,Mat,Mat,PetscInt,PetscReal,Mat*); 75e240928fSHong Zhang EXTERN PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIAIJ_Local(Mat,Mat,Mat,PetscInt,Mat C); 76e240928fSHong Zhang 77*a61c8c0fSHong Zhang EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat); 78*a61c8c0fSHong Zhang #undef __FUNCT__ 79*a61c8c0fSHong Zhang #define __FUNCT__ "MatDestroy_MPIAIJ_PtAP" 80*a61c8c0fSHong Zhang PetscErrorCode MatDestroy_MPIAIJ_PtAP(Mat A) 81*a61c8c0fSHong Zhang { 82*a61c8c0fSHong Zhang PetscErrorCode ierr; 83*a61c8c0fSHong Zhang Mat_PtAPMPI *ptap; 84*a61c8c0fSHong Zhang PetscObjectContainer container; 85*a61c8c0fSHong Zhang 86*a61c8c0fSHong Zhang PetscFunctionBegin; 87*a61c8c0fSHong Zhang ierr = PetscObjectQuery((PetscObject)A,"MatPtAPMPI",(PetscObject *)&container);CHKERRQ(ierr); 88*a61c8c0fSHong Zhang if (container) { 89*a61c8c0fSHong Zhang ierr = PetscObjectContainerGetPointer(container,(void **)&ptap);CHKERRQ(ierr); 90*a61c8c0fSHong Zhang 91*a61c8c0fSHong Zhang ierr = MatDestroyMatrices(1,&ptap->ploc);CHKERRQ(ierr); 92*a61c8c0fSHong Zhang ierr = MatDestroy(ptap->P_oth);CHKERRQ(ierr); 93*a61c8c0fSHong Zhang 94*a61c8c0fSHong Zhang ierr = PetscObjectContainerDestroy(container);CHKERRQ(ierr); 95*a61c8c0fSHong Zhang ierr = PetscObjectCompose((PetscObject)A,"MatPtApMPI",0);CHKERRQ(ierr); 96*a61c8c0fSHong Zhang } 97*a61c8c0fSHong Zhang ierr = PetscFree(ptap);CHKERRQ(ierr); 98*a61c8c0fSHong Zhang 99*a61c8c0fSHong Zhang ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 100*a61c8c0fSHong Zhang PetscFunctionReturn(0); 101*a61c8c0fSHong Zhang } 102*a61c8c0fSHong Zhang 103eb9c0419SKris Buschelman #undef __FUNCT__ 104ff134f7aSHong Zhang #define __FUNCT__ "MatPtAP_MPIAIJ_MPIAIJ" 105ff134f7aSHong Zhang PetscErrorCode MatPtAP_MPIAIJ_MPIAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C) 106ff134f7aSHong Zhang { 107ff134f7aSHong Zhang PetscErrorCode ierr; 108*a61c8c0fSHong Zhang Mat P_loc,P_oth,*ploc; 109*a61c8c0fSHong Zhang PetscInt prstart,low; 110*a61c8c0fSHong Zhang IS isrow,iscol; 111*a61c8c0fSHong Zhang Mat_PtAPMPI *ptap; 112*a61c8c0fSHong Zhang PetscObjectContainer container; 113b90dcfe3SHong Zhang 114b90dcfe3SHong Zhang PetscFunctionBegin; 115b90dcfe3SHong Zhang if (scall == MAT_INITIAL_MATRIX){ 1164c627768SHong Zhang ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 117e240928fSHong Zhang /* get P_oth by taking rows of P (= non-zero cols of local A) from other processors */ 118429d309bSHong Zhang ierr = MatGetBrowsOfAoCols(A,P,MAT_INITIAL_MATRIX,PETSC_NULL,PETSC_NULL,&prstart,&P_oth);CHKERRQ(ierr); 119*a61c8c0fSHong Zhang 120e240928fSHong Zhang /* get P_loc by taking all local rows of P */ 121e240928fSHong Zhang ierr = MatGetOwnershipRange(P,&low,PETSC_NULL);CHKERRQ(ierr); 122e240928fSHong Zhang ierr = ISCreateStride(PETSC_COMM_SELF,P->m,low,1,&isrow);CHKERRQ(ierr); 123e240928fSHong Zhang ierr = ISCreateStride(PETSC_COMM_SELF,P->N,0,1,&iscol);CHKERRQ(ierr); 124e240928fSHong Zhang ierr = MatGetSubMatrices(P,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&ploc);CHKERRQ(ierr); 125e240928fSHong Zhang P_loc = ploc[0]; 126e240928fSHong Zhang ierr = ISDestroy(isrow);CHKERRQ(ierr); 127e240928fSHong Zhang ierr = ISDestroy(iscol);CHKERRQ(ierr); 128e240928fSHong Zhang 129*a61c8c0fSHong Zhang /* attach the supporting struct to P for reuse */ 130*a61c8c0fSHong Zhang ierr = PetscNew(Mat_PtAPMPI,&ptap);CHKERRQ(ierr); 131*a61c8c0fSHong Zhang ptap->ploc = ploc; 132*a61c8c0fSHong Zhang ptap->P_oth = P_oth; 133*a61c8c0fSHong Zhang ptap->prstart = prstart; 134*a61c8c0fSHong Zhang P->ops->destroy = MatDestroy_MPIAIJ_PtAP; 135*a61c8c0fSHong Zhang ierr = PetscObjectContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 136*a61c8c0fSHong Zhang ierr = PetscObjectContainerSetPointer(container,ptap);CHKERRQ(ierr); 137*a61c8c0fSHong Zhang ierr = PetscObjectCompose((PetscObject)P,"MatPtAPMPI",(PetscObject)container);CHKERRQ(ierr); 138e240928fSHong Zhang 139*a61c8c0fSHong Zhang /* now, compute symbolic product */ 140*a61c8c0fSHong Zhang ierr = MatPtAPSymbolic_MPIAIJ_MPIAIJ(A,P,fill,C);CHKERRQ(ierr);/* numeric product is computed as well */ 141*a61c8c0fSHong Zhang ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 142*a61c8c0fSHong Zhang } else if (scall == MAT_REUSE_MATRIX){ 143*a61c8c0fSHong Zhang ierr = PetscObjectQuery((PetscObject)P,"MatPtAPMPI",(PetscObject *)&container);CHKERRQ(ierr); 144*a61c8c0fSHong Zhang if (container) { 145*a61c8c0fSHong Zhang ierr = PetscObjectContainerGetPointer(container,(void **)&ptap);CHKERRQ(ierr); 146*a61c8c0fSHong Zhang } else { 147*a61c8c0fSHong Zhang SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 148*a61c8c0fSHong Zhang } 149*a61c8c0fSHong Zhang 150*a61c8c0fSHong Zhang /* get P_oth by taking rows of P (= non-zero cols of local A) from other processors */ 151*a61c8c0fSHong Zhang /* improve! */ 152*a61c8c0fSHong Zhang ierr = MatDestroy(ptap->P_oth);CHKERRQ(ierr); 153*a61c8c0fSHong Zhang ierr = MatGetBrowsOfAoCols(A,P,MAT_INITIAL_MATRIX,PETSC_NULL,PETSC_NULL,&prstart,&ptap->P_oth);CHKERRQ(ierr); 154*a61c8c0fSHong Zhang 155*a61c8c0fSHong Zhang /* get P_loc by taking all local rows of P */ 156*a61c8c0fSHong Zhang ierr = MatGetOwnershipRange(P,&low,PETSC_NULL);CHKERRQ(ierr); 157*a61c8c0fSHong Zhang ierr = ISCreateStride(PETSC_COMM_SELF,P->m,low,1,&isrow);CHKERRQ(ierr); 158*a61c8c0fSHong Zhang ierr = ISCreateStride(PETSC_COMM_SELF,P->N,0,1,&iscol);CHKERRQ(ierr); 159*a61c8c0fSHong Zhang ierr = MatGetSubMatrices(P,1,&isrow,&iscol,MAT_REUSE_MATRIX,&ptap->ploc);CHKERRQ(ierr); 160*a61c8c0fSHong Zhang ierr = ISDestroy(isrow);CHKERRQ(ierr); 161*a61c8c0fSHong Zhang ierr = ISDestroy(iscol);CHKERRQ(ierr); 162*a61c8c0fSHong Zhang } else { 163*a61c8c0fSHong Zhang SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",scall); 164*a61c8c0fSHong Zhang } 165*a61c8c0fSHong Zhang /* now, compute numeric product */ 166*a61c8c0fSHong Zhang ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 167*a61c8c0fSHong Zhang ierr = MatPtAPNumeric_MPIAIJ_MPIAIJ(A,P,*C);CHKERRQ(ierr); 168*a61c8c0fSHong Zhang ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 169*a61c8c0fSHong Zhang 170*a61c8c0fSHong Zhang PetscFunctionReturn(0); 171*a61c8c0fSHong Zhang } 172*a61c8c0fSHong Zhang 173*a61c8c0fSHong Zhang #undef __FUNCT__ 174*a61c8c0fSHong Zhang #define __FUNCT__ "MatPtAPSymbolic_MPIAIJ_MPIAIJ" 175*a61c8c0fSHong Zhang PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIAIJ(Mat A,Mat P,PetscReal fill,Mat *C) 176*a61c8c0fSHong Zhang { 177*a61c8c0fSHong Zhang PetscErrorCode ierr; 178*a61c8c0fSHong Zhang Mat C_seq; 179*a61c8c0fSHong Zhang Mat_PtAPMPI *ptap; 180*a61c8c0fSHong Zhang PetscObjectContainer container; 181*a61c8c0fSHong Zhang 182*a61c8c0fSHong Zhang PetscFunctionBegin; 183*a61c8c0fSHong Zhang 184*a61c8c0fSHong Zhang ierr = PetscObjectQuery((PetscObject)P,"MatPtAPMPI",(PetscObject *)&container);CHKERRQ(ierr); 185*a61c8c0fSHong Zhang if (container) { 186*a61c8c0fSHong Zhang ierr = PetscObjectContainerGetPointer(container,(void **)&ptap);CHKERRQ(ierr); 187*a61c8c0fSHong Zhang } else { 188*a61c8c0fSHong Zhang SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 189*a61c8c0fSHong Zhang } 19025616d81SHong Zhang /* compute C_seq = P_loc^T * A_loc * P_seq */ 191*a61c8c0fSHong Zhang ierr = MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local(A,ptap->ploc[0],ptap->P_oth,ptap->prstart,fill,&C_seq);CHKERRQ(ierr); 192*a61c8c0fSHong Zhang 193b90dcfe3SHong Zhang /* add C_seq into mpi C */ 194*a61c8c0fSHong Zhang ierr = MatMerge_SeqsToMPISymbolic(A->comm,C_seq,P->n,P->n,C);CHKERRQ(ierr); 195b90dcfe3SHong Zhang 196ff134f7aSHong Zhang PetscFunctionReturn(0); 197ff134f7aSHong Zhang } 198ff134f7aSHong Zhang 199ff134f7aSHong Zhang #undef __FUNCT__ 200e240928fSHong Zhang #define __FUNCT__ "MatPtAPNumeric_MPIAIJ_MPIAIJ" 201b90dcfe3SHong Zhang PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIAIJ(Mat A,Mat P,Mat C) 202ff134f7aSHong Zhang { 203b90dcfe3SHong Zhang PetscErrorCode ierr; 204671beff6SHong Zhang Mat_Merge_SeqsToMPI *merge; 205*a61c8c0fSHong Zhang Mat_PtAPMPI *ptap; 206*a61c8c0fSHong Zhang PetscObjectContainer cont_merge,cont_ptap; 207ff134f7aSHong Zhang 208ff134f7aSHong Zhang PetscFunctionBegin; 209*a61c8c0fSHong Zhang ierr = PetscObjectQuery((PetscObject)C,"MatMergeSeqsToMPI",(PetscObject *)&cont_merge);CHKERRQ(ierr); 210*a61c8c0fSHong Zhang if (cont_merge) { 211*a61c8c0fSHong Zhang ierr = PetscObjectContainerGetPointer(cont_merge,(void **)&merge);CHKERRQ(ierr); 2127f79fd58SMatthew Knepley } else { 213*a61c8c0fSHong Zhang SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix C does not posses an object container"); 214671beff6SHong Zhang } 215*a61c8c0fSHong Zhang ierr = PetscObjectQuery((PetscObject)P,"MatPtAPMPI",(PetscObject *)&cont_ptap);CHKERRQ(ierr); 216*a61c8c0fSHong Zhang if (cont_ptap) { 217*a61c8c0fSHong Zhang ierr = PetscObjectContainerGetPointer(cont_ptap,(void **)&ptap);CHKERRQ(ierr); 218*a61c8c0fSHong Zhang } else { 219*a61c8c0fSHong Zhang SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 220*a61c8c0fSHong Zhang } 221*a61c8c0fSHong Zhang ierr = MatPtAPNumeric_MPIAIJ_MPIAIJ_Local(A,ptap->ploc[0],ptap->P_oth,ptap->prstart,merge->C_seq);CHKERRQ(ierr); 222*a61c8c0fSHong Zhang ierr = MatMerge_SeqsToMPINumeric(merge->C_seq,C);CHKERRQ(ierr); 223ff134f7aSHong Zhang PetscFunctionReturn(0); 224ff134f7aSHong Zhang } 225ff134f7aSHong Zhang 226ff134f7aSHong Zhang #undef __FUNCT__ 2279af31e4aSHong Zhang #define __FUNCT__ "MatPtAP_SeqAIJ_SeqAIJ" 228dfbe8321SBarry Smith PetscErrorCode MatPtAP_SeqAIJ_SeqAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C) 2299af31e4aSHong Zhang { 230dfbe8321SBarry Smith PetscErrorCode ierr; 231b1d57f15SBarry Smith 2329af31e4aSHong Zhang PetscFunctionBegin; 2339af31e4aSHong Zhang if (scall == MAT_INITIAL_MATRIX){ 234d20bfe6fSHong Zhang ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 2359af31e4aSHong Zhang ierr = MatPtAPSymbolic_SeqAIJ_SeqAIJ(A,P,fill,C);CHKERRQ(ierr); 236d20bfe6fSHong Zhang ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 2379af31e4aSHong Zhang } 238d20bfe6fSHong Zhang ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 2399af31e4aSHong Zhang ierr = MatPtAPNumeric_SeqAIJ_SeqAIJ(A,P,*C);CHKERRQ(ierr); 240d20bfe6fSHong Zhang ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 2419af31e4aSHong Zhang PetscFunctionReturn(0); 2429af31e4aSHong Zhang } 2439af31e4aSHong Zhang 2446849ba73SBarry Smith /* 2459af31e4aSHong Zhang MatPtAPSymbolic - Creates the (i,j) structure of the matrix projection C = P^T * A * P 2464d3841fdSKris Buschelman 2474d3841fdSKris Buschelman Collective on Mat 2484d3841fdSKris Buschelman 2494d3841fdSKris Buschelman Input Parameters: 2504d3841fdSKris Buschelman + A - the matrix 2514d3841fdSKris Buschelman - P - the projection matrix 2524d3841fdSKris Buschelman 2534d3841fdSKris Buschelman Output Parameters: 2544d3841fdSKris Buschelman . C - the (i,j) structure of the product matrix 2554d3841fdSKris Buschelman 2564d3841fdSKris Buschelman Notes: 2574d3841fdSKris Buschelman C will be created and must be destroyed by the user with MatDestroy(). 2584d3841fdSKris Buschelman 2594d3841fdSKris Buschelman This routine is currently only implemented for pairs of SeqAIJ matrices and classes 2604d3841fdSKris Buschelman which inherit from SeqAIJ. C will be of type MATSEQAIJ. The product is computed using 2619af31e4aSHong Zhang this (i,j) structure by calling MatPtAPNumeric(). 2624d3841fdSKris Buschelman 2634d3841fdSKris Buschelman Level: intermediate 2644d3841fdSKris Buschelman 2659af31e4aSHong Zhang .seealso: MatPtAP(),MatPtAPNumeric(),MatMatMultSymbolic() 2666849ba73SBarry Smith */ 267e240928fSHong Zhang #undef __FUNCT__ 268e240928fSHong Zhang #define __FUNCT__ "MatPtAPSymbolic" 26955a3bba9SHong Zhang PetscErrorCode MatPtAPSymbolic(Mat A,Mat P,PetscReal fill,Mat *C) 27055a3bba9SHong Zhang { 271dfbe8321SBarry Smith PetscErrorCode ierr; 272534c1384SKris Buschelman PetscErrorCode (*fA)(Mat,Mat,PetscReal,Mat*); 273534c1384SKris Buschelman PetscErrorCode (*fP)(Mat,Mat,PetscReal,Mat*); 274eb9c0419SKris Buschelman 275eb9c0419SKris Buschelman PetscFunctionBegin; 276eb9c0419SKris Buschelman 2774482741eSBarry Smith PetscValidHeaderSpecific(A,MAT_COOKIE,1); 278c9780b6fSBarry Smith PetscValidType(A,1); 279eb9c0419SKris Buschelman MatPreallocated(A); 280eb9c0419SKris Buschelman if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 281eb9c0419SKris Buschelman if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 282eb9c0419SKris Buschelman 2834482741eSBarry Smith PetscValidHeaderSpecific(P,MAT_COOKIE,2); 284c9780b6fSBarry Smith PetscValidType(P,2); 285eb9c0419SKris Buschelman MatPreallocated(P); 286eb9c0419SKris Buschelman if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 287eb9c0419SKris Buschelman if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 288eb9c0419SKris Buschelman 2894482741eSBarry Smith PetscValidPointer(C,3); 2904482741eSBarry Smith 291eb9c0419SKris Buschelman if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->M,A->N); 292eb9c0419SKris Buschelman if (A->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %d != %d",A->M,A->N); 293eb9c0419SKris Buschelman 294534c1384SKris Buschelman /* For now, we do not dispatch based on the type of A and P */ 295534c1384SKris Buschelman /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */ 296534c1384SKris Buschelman fA = A->ops->ptapsymbolic; 297534c1384SKris Buschelman if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatPtAPSymbolic not supported for A of type %s",A->type_name); 298534c1384SKris Buschelman fP = P->ops->ptapsymbolic; 299534c1384SKris Buschelman if (!fP) SETERRQ1(PETSC_ERR_SUP,"MatPtAPSymbolic not supported for P of type %s",P->type_name); 300534c1384SKris Buschelman if (fP!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatPtAPSymbolic requires A, %s, to be compatible with P, %s",A->type_name,P->type_name); 3014d3841fdSKris Buschelman 302534c1384SKris Buschelman ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 303534c1384SKris Buschelman ierr = (*fA)(A,P,fill,C);CHKERRQ(ierr); 304534c1384SKris Buschelman ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 305eb9c0419SKris Buschelman 306eb9c0419SKris Buschelman PetscFunctionReturn(0); 307eb9c0419SKris Buschelman } 308eb9c0419SKris Buschelman 309f747e1aeSHong Zhang typedef struct { 310f747e1aeSHong Zhang Mat symAP; 311f747e1aeSHong Zhang } Mat_PtAPstruct; 312f747e1aeSHong Zhang 31378a80504SBarry Smith EXTERN PetscErrorCode MatDestroy_SeqAIJ(Mat); 31478a80504SBarry Smith 315f747e1aeSHong Zhang #undef __FUNCT__ 316f747e1aeSHong Zhang #define __FUNCT__ "MatDestroy_SeqAIJ_PtAP" 317f4a850bbSBarry Smith PetscErrorCode MatDestroy_SeqAIJ_PtAP(Mat A) 318f747e1aeSHong Zhang { 319f4a850bbSBarry Smith PetscErrorCode ierr; 320f747e1aeSHong Zhang Mat_PtAPstruct *ptap=(Mat_PtAPstruct*)A->spptr; 321f747e1aeSHong Zhang 322f747e1aeSHong Zhang PetscFunctionBegin; 323f747e1aeSHong Zhang ierr = MatDestroy(ptap->symAP);CHKERRQ(ierr); 324f747e1aeSHong Zhang ierr = PetscFree(ptap);CHKERRQ(ierr); 32578a80504SBarry Smith ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr); 326f747e1aeSHong Zhang PetscFunctionReturn(0); 327f747e1aeSHong Zhang } 328f747e1aeSHong Zhang 329eb9c0419SKris Buschelman #undef __FUNCT__ 3309af31e4aSHong Zhang #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqAIJ" 33155a3bba9SHong Zhang PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat P,PetscReal fill,Mat *C) 33255a3bba9SHong Zhang { 333dfbe8321SBarry Smith PetscErrorCode ierr; 334d20bfe6fSHong Zhang FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 335d20bfe6fSHong Zhang Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*p = (Mat_SeqAIJ*)P->data,*c; 33655a3bba9SHong Zhang PetscInt *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj; 337b8374ebeSBarry Smith PetscInt *ci,*cj,*ptadenserow,*ptasparserow,*ptaj; 33855a3bba9SHong Zhang PetscInt an=A->N,am=A->M,pn=P->N,pm=P->M; 339b8374ebeSBarry Smith PetscInt i,j,k,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi,nlnk,*lnk; 340d20bfe6fSHong Zhang MatScalar *ca; 34155a3bba9SHong Zhang PetscBT lnkbt; 342eb9c0419SKris Buschelman 343eb9c0419SKris Buschelman PetscFunctionBegin; 344d20bfe6fSHong Zhang /* Get ij structure of P^T */ 345eb9c0419SKris Buschelman ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 346d20bfe6fSHong Zhang ptJ=ptj; 347eb9c0419SKris Buschelman 348d20bfe6fSHong Zhang /* Allocate ci array, arrays for fill computation and */ 349d20bfe6fSHong Zhang /* free space for accumulating nonzero column info */ 35055a3bba9SHong Zhang ierr = PetscMalloc((pn+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 351d20bfe6fSHong Zhang ci[0] = 0; 352eb9c0419SKris Buschelman 35355a3bba9SHong Zhang ierr = PetscMalloc((2*an+1)*sizeof(PetscInt),&ptadenserow);CHKERRQ(ierr); 35455a3bba9SHong Zhang ierr = PetscMemzero(ptadenserow,(2*an+1)*sizeof(PetscInt));CHKERRQ(ierr); 355d20bfe6fSHong Zhang ptasparserow = ptadenserow + an; 35655a3bba9SHong Zhang 35755a3bba9SHong Zhang /* create and initialize a linked list */ 35855a3bba9SHong Zhang nlnk = pn+1; 35955a3bba9SHong Zhang ierr = PetscLLCreate(pn,pn,nlnk,lnk,lnkbt);CHKERRQ(ierr); 360eb9c0419SKris Buschelman 361d20bfe6fSHong Zhang /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */ 362d20bfe6fSHong Zhang /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 363d20bfe6fSHong Zhang ierr = GetMoreSpace((ai[am]/pm)*pn,&free_space); 364d20bfe6fSHong Zhang current_space = free_space; 365d20bfe6fSHong Zhang 366d20bfe6fSHong Zhang /* Determine symbolic info for each row of C: */ 367d20bfe6fSHong Zhang for (i=0;i<pn;i++) { 368d20bfe6fSHong Zhang ptnzi = pti[i+1] - pti[i]; 369d20bfe6fSHong Zhang ptanzi = 0; 370d20bfe6fSHong Zhang /* Determine symbolic row of PtA: */ 371d20bfe6fSHong Zhang for (j=0;j<ptnzi;j++) { 372d20bfe6fSHong Zhang arow = *ptJ++; 373d20bfe6fSHong Zhang anzj = ai[arow+1] - ai[arow]; 374d20bfe6fSHong Zhang ajj = aj + ai[arow]; 375d20bfe6fSHong Zhang for (k=0;k<anzj;k++) { 376d20bfe6fSHong Zhang if (!ptadenserow[ajj[k]]) { 377d20bfe6fSHong Zhang ptadenserow[ajj[k]] = -1; 378d20bfe6fSHong Zhang ptasparserow[ptanzi++] = ajj[k]; 379d20bfe6fSHong Zhang } 380d20bfe6fSHong Zhang } 381d20bfe6fSHong Zhang } 382d20bfe6fSHong Zhang /* Using symbolic info for row of PtA, determine symbolic info for row of C: */ 383d20bfe6fSHong Zhang ptaj = ptasparserow; 384d20bfe6fSHong Zhang cnzi = 0; 385d20bfe6fSHong Zhang for (j=0;j<ptanzi;j++) { 386d20bfe6fSHong Zhang prow = *ptaj++; 387d20bfe6fSHong Zhang pnzj = pi[prow+1] - pi[prow]; 388d20bfe6fSHong Zhang pjj = pj + pi[prow]; 38955a3bba9SHong Zhang /* add non-zero cols of P into the sorted linked list lnk */ 39055a3bba9SHong Zhang ierr = PetscLLAdd(pnzj,pjj,pn,nlnk,lnk,lnkbt);CHKERRQ(ierr); 39155a3bba9SHong Zhang cnzi += nlnk; 392d20bfe6fSHong Zhang } 393d20bfe6fSHong Zhang 394d20bfe6fSHong Zhang /* If free space is not available, make more free space */ 395d20bfe6fSHong Zhang /* Double the amount of total space in the list */ 396d20bfe6fSHong Zhang if (current_space->local_remaining<cnzi) { 397d20bfe6fSHong Zhang ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 398d20bfe6fSHong Zhang } 399d20bfe6fSHong Zhang 400d20bfe6fSHong Zhang /* Copy data into free space, and zero out denserows */ 40155a3bba9SHong Zhang ierr = PetscLLClean(pn,pn,cnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 402d20bfe6fSHong Zhang current_space->array += cnzi; 403d20bfe6fSHong Zhang current_space->local_used += cnzi; 404d20bfe6fSHong Zhang current_space->local_remaining -= cnzi; 405d20bfe6fSHong Zhang 406d20bfe6fSHong Zhang for (j=0;j<ptanzi;j++) { 407d20bfe6fSHong Zhang ptadenserow[ptasparserow[j]] = 0; 408d20bfe6fSHong Zhang } 409d20bfe6fSHong Zhang /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 410d20bfe6fSHong Zhang /* For now, we will recompute what is needed. */ 411d20bfe6fSHong Zhang ci[i+1] = ci[i] + cnzi; 412d20bfe6fSHong Zhang } 413d20bfe6fSHong Zhang /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 414d20bfe6fSHong Zhang /* Allocate space for cj, initialize cj, and */ 415d20bfe6fSHong Zhang /* destroy list of free space and other temporary array(s) */ 41655a3bba9SHong Zhang ierr = PetscMalloc((ci[pn]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr); 417d20bfe6fSHong Zhang ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 418d20bfe6fSHong Zhang ierr = PetscFree(ptadenserow);CHKERRQ(ierr); 41955a3bba9SHong Zhang ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 420d20bfe6fSHong Zhang 421d20bfe6fSHong Zhang /* Allocate space for ca */ 422d20bfe6fSHong Zhang ierr = PetscMalloc((ci[pn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 423d20bfe6fSHong Zhang ierr = PetscMemzero(ca,(ci[pn]+1)*sizeof(MatScalar));CHKERRQ(ierr); 424d20bfe6fSHong Zhang 425d20bfe6fSHong Zhang /* put together the new matrix */ 426d20bfe6fSHong Zhang ierr = MatCreateSeqAIJWithArrays(A->comm,pn,pn,ci,cj,ca,C);CHKERRQ(ierr); 427d20bfe6fSHong Zhang 428d20bfe6fSHong Zhang /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 429d20bfe6fSHong Zhang /* Since these are PETSc arrays, change flags to free them as necessary. */ 430d20bfe6fSHong Zhang c = (Mat_SeqAIJ *)((*C)->data); 431d20bfe6fSHong Zhang c->freedata = PETSC_TRUE; 432d20bfe6fSHong Zhang c->nonew = 0; 433d20bfe6fSHong Zhang 434d20bfe6fSHong Zhang /* Clean up. */ 435d20bfe6fSHong Zhang ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 436eb9c0419SKris Buschelman 437eb9c0419SKris Buschelman PetscFunctionReturn(0); 438eb9c0419SKris Buschelman } 439eb9c0419SKris Buschelman 4403985e5eaSKris Buschelman #include "src/mat/impls/maij/maij.h" 4413985e5eaSKris Buschelman EXTERN_C_BEGIN 4423985e5eaSKris Buschelman #undef __FUNCT__ 4439af31e4aSHong Zhang #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqMAIJ" 44455a3bba9SHong Zhang PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat *C) 44555a3bba9SHong Zhang { 4465c66b693SKris Buschelman /* This routine requires testing -- I don't think it works. */ 447dfbe8321SBarry Smith PetscErrorCode ierr; 4483985e5eaSKris Buschelman FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 4493985e5eaSKris Buschelman Mat_SeqMAIJ *pp=(Mat_SeqMAIJ*)PP->data; 4503985e5eaSKris Buschelman Mat P=pp->AIJ; 4513985e5eaSKris Buschelman Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c; 452b1d57f15SBarry Smith PetscInt *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj; 453b1d57f15SBarry Smith PetscInt *ci,*cj,*denserow,*sparserow,*ptadenserow,*ptasparserow,*ptaj; 454b1d57f15SBarry Smith PetscInt an=A->N,am=A->M,pn=P->N,pm=P->M,ppdof=pp->dof; 455b1d57f15SBarry Smith PetscInt i,j,k,dof,pdof,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi; 4563985e5eaSKris Buschelman MatScalar *ca; 4573985e5eaSKris Buschelman 4583985e5eaSKris Buschelman PetscFunctionBegin; 4593985e5eaSKris Buschelman /* Start timer */ 4609af31e4aSHong Zhang ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr); 4613985e5eaSKris Buschelman 4623985e5eaSKris Buschelman /* Get ij structure of P^T */ 4633985e5eaSKris Buschelman ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 4643985e5eaSKris Buschelman 4653985e5eaSKris Buschelman /* Allocate ci array, arrays for fill computation and */ 4663985e5eaSKris Buschelman /* free space for accumulating nonzero column info */ 467b1d57f15SBarry Smith ierr = PetscMalloc((pn+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 4683985e5eaSKris Buschelman ci[0] = 0; 4693985e5eaSKris Buschelman 470b1d57f15SBarry Smith ierr = PetscMalloc((2*pn+2*an+1)*sizeof(PetscInt),&ptadenserow);CHKERRQ(ierr); 471b1d57f15SBarry Smith ierr = PetscMemzero(ptadenserow,(2*pn+2*an+1)*sizeof(PetscInt));CHKERRQ(ierr); 4723985e5eaSKris Buschelman ptasparserow = ptadenserow + an; 4733985e5eaSKris Buschelman denserow = ptasparserow + an; 4743985e5eaSKris Buschelman sparserow = denserow + pn; 4753985e5eaSKris Buschelman 4763985e5eaSKris Buschelman /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */ 4773985e5eaSKris Buschelman /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 4783985e5eaSKris Buschelman ierr = GetMoreSpace((ai[am]/pm)*pn,&free_space); 4793985e5eaSKris Buschelman current_space = free_space; 4803985e5eaSKris Buschelman 4813985e5eaSKris Buschelman /* Determine symbolic info for each row of C: */ 4823985e5eaSKris Buschelman for (i=0;i<pn/ppdof;i++) { 4833985e5eaSKris Buschelman ptnzi = pti[i+1] - pti[i]; 4843985e5eaSKris Buschelman ptanzi = 0; 4853985e5eaSKris Buschelman ptJ = ptj + pti[i]; 4863985e5eaSKris Buschelman for (dof=0;dof<ppdof;dof++) { 4873985e5eaSKris Buschelman /* Determine symbolic row of PtA: */ 4883985e5eaSKris Buschelman for (j=0;j<ptnzi;j++) { 4893985e5eaSKris Buschelman arow = ptJ[j] + dof; 4903985e5eaSKris Buschelman anzj = ai[arow+1] - ai[arow]; 4913985e5eaSKris Buschelman ajj = aj + ai[arow]; 4923985e5eaSKris Buschelman for (k=0;k<anzj;k++) { 4933985e5eaSKris Buschelman if (!ptadenserow[ajj[k]]) { 4943985e5eaSKris Buschelman ptadenserow[ajj[k]] = -1; 4953985e5eaSKris Buschelman ptasparserow[ptanzi++] = ajj[k]; 4963985e5eaSKris Buschelman } 4973985e5eaSKris Buschelman } 4983985e5eaSKris Buschelman } 4993985e5eaSKris Buschelman /* Using symbolic info for row of PtA, determine symbolic info for row of C: */ 5003985e5eaSKris Buschelman ptaj = ptasparserow; 5013985e5eaSKris Buschelman cnzi = 0; 5023985e5eaSKris Buschelman for (j=0;j<ptanzi;j++) { 503fe05a634SKris Buschelman pdof = *ptaj%dof; 5043985e5eaSKris Buschelman prow = (*ptaj++)/dof; 5053985e5eaSKris Buschelman pnzj = pi[prow+1] - pi[prow]; 5063985e5eaSKris Buschelman pjj = pj + pi[prow]; 5073985e5eaSKris Buschelman for (k=0;k<pnzj;k++) { 508fe05a634SKris Buschelman if (!denserow[pjj[k]+pdof]) { 509fe05a634SKris Buschelman denserow[pjj[k]+pdof] = -1; 510fe05a634SKris Buschelman sparserow[cnzi++] = pjj[k]+pdof; 5113985e5eaSKris Buschelman } 5123985e5eaSKris Buschelman } 5133985e5eaSKris Buschelman } 5143985e5eaSKris Buschelman 5153985e5eaSKris Buschelman /* sort sparserow */ 5163985e5eaSKris Buschelman ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr); 5173985e5eaSKris Buschelman 5183985e5eaSKris Buschelman /* If free space is not available, make more free space */ 5193985e5eaSKris Buschelman /* Double the amount of total space in the list */ 5203985e5eaSKris Buschelman if (current_space->local_remaining<cnzi) { 5213985e5eaSKris Buschelman ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 5223985e5eaSKris Buschelman } 5233985e5eaSKris Buschelman 5243985e5eaSKris Buschelman /* Copy data into free space, and zero out denserows */ 525b1d57f15SBarry Smith ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(PetscInt));CHKERRQ(ierr); 5263985e5eaSKris Buschelman current_space->array += cnzi; 5273985e5eaSKris Buschelman current_space->local_used += cnzi; 5283985e5eaSKris Buschelman current_space->local_remaining -= cnzi; 5293985e5eaSKris Buschelman 5303985e5eaSKris Buschelman for (j=0;j<ptanzi;j++) { 5313985e5eaSKris Buschelman ptadenserow[ptasparserow[j]] = 0; 5323985e5eaSKris Buschelman } 5333985e5eaSKris Buschelman for (j=0;j<cnzi;j++) { 5343985e5eaSKris Buschelman denserow[sparserow[j]] = 0; 5353985e5eaSKris Buschelman } 5363985e5eaSKris Buschelman /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 5373985e5eaSKris Buschelman /* For now, we will recompute what is needed. */ 5383985e5eaSKris Buschelman ci[i+1+dof] = ci[i+dof] + cnzi; 5393985e5eaSKris Buschelman } 5403985e5eaSKris Buschelman } 5413985e5eaSKris Buschelman /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 5423985e5eaSKris Buschelman /* Allocate space for cj, initialize cj, and */ 5433985e5eaSKris Buschelman /* destroy list of free space and other temporary array(s) */ 544b1d57f15SBarry Smith ierr = PetscMalloc((ci[pn]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr); 5453985e5eaSKris Buschelman ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 5463985e5eaSKris Buschelman ierr = PetscFree(ptadenserow);CHKERRQ(ierr); 5473985e5eaSKris Buschelman 5483985e5eaSKris Buschelman /* Allocate space for ca */ 5493985e5eaSKris Buschelman ierr = PetscMalloc((ci[pn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 5503985e5eaSKris Buschelman ierr = PetscMemzero(ca,(ci[pn]+1)*sizeof(MatScalar));CHKERRQ(ierr); 5513985e5eaSKris Buschelman 5523985e5eaSKris Buschelman /* put together the new matrix */ 5533985e5eaSKris Buschelman ierr = MatCreateSeqAIJWithArrays(A->comm,pn,pn,ci,cj,ca,C);CHKERRQ(ierr); 5543985e5eaSKris Buschelman 5553985e5eaSKris Buschelman /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5563985e5eaSKris Buschelman /* Since these are PETSc arrays, change flags to free them as necessary. */ 5573985e5eaSKris Buschelman c = (Mat_SeqAIJ *)((*C)->data); 5583985e5eaSKris Buschelman c->freedata = PETSC_TRUE; 5593985e5eaSKris Buschelman c->nonew = 0; 5603985e5eaSKris Buschelman 5613985e5eaSKris Buschelman /* Clean up. */ 5623985e5eaSKris Buschelman ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 5633985e5eaSKris Buschelman 5649af31e4aSHong Zhang ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr); 5653985e5eaSKris Buschelman PetscFunctionReturn(0); 5663985e5eaSKris Buschelman } 5673985e5eaSKris Buschelman EXTERN_C_END 5683985e5eaSKris Buschelman 5696849ba73SBarry Smith /* 5709af31e4aSHong Zhang MatPtAPNumeric - Computes the matrix projection C = P^T * A * P 5714d3841fdSKris Buschelman 5724d3841fdSKris Buschelman Collective on Mat 5734d3841fdSKris Buschelman 5744d3841fdSKris Buschelman Input Parameters: 5754d3841fdSKris Buschelman + A - the matrix 5764d3841fdSKris Buschelman - P - the projection matrix 5774d3841fdSKris Buschelman 5784d3841fdSKris Buschelman Output Parameters: 5794d3841fdSKris Buschelman . C - the product matrix 5804d3841fdSKris Buschelman 5814d3841fdSKris Buschelman Notes: 5829af31e4aSHong Zhang C must have been created by calling MatPtAPSymbolic and must be destroyed by 5834d3841fdSKris Buschelman the user using MatDeatroy(). 5844d3841fdSKris Buschelman 585170ef064SHong Zhang This routine is currently only implemented for pairs of AIJ matrices and classes 586170ef064SHong Zhang which inherit from AIJ. C will be of type MATAIJ. 5874d3841fdSKris Buschelman 5884d3841fdSKris Buschelman Level: intermediate 5894d3841fdSKris Buschelman 5909af31e4aSHong Zhang .seealso: MatPtAP(),MatPtAPSymbolic(),MatMatMultNumeric() 5916849ba73SBarry Smith */ 592e240928fSHong Zhang #undef __FUNCT__ 593e240928fSHong Zhang #define __FUNCT__ "MatPtAPNumeric" 59455a3bba9SHong Zhang PetscErrorCode MatPtAPNumeric(Mat A,Mat P,Mat C) 59555a3bba9SHong Zhang { 596dfbe8321SBarry Smith PetscErrorCode ierr; 597534c1384SKris Buschelman PetscErrorCode (*fA)(Mat,Mat,Mat); 598534c1384SKris Buschelman PetscErrorCode (*fP)(Mat,Mat,Mat); 599eb9c0419SKris Buschelman 600eb9c0419SKris Buschelman PetscFunctionBegin; 601eb9c0419SKris Buschelman 6024482741eSBarry Smith PetscValidHeaderSpecific(A,MAT_COOKIE,1); 603c9780b6fSBarry Smith PetscValidType(A,1); 604eb9c0419SKris Buschelman MatPreallocated(A); 605eb9c0419SKris Buschelman if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 606eb9c0419SKris Buschelman if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 607eb9c0419SKris Buschelman 6084482741eSBarry Smith PetscValidHeaderSpecific(P,MAT_COOKIE,2); 609c9780b6fSBarry Smith PetscValidType(P,2); 610eb9c0419SKris Buschelman MatPreallocated(P); 611eb9c0419SKris Buschelman if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 612eb9c0419SKris Buschelman if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 613eb9c0419SKris Buschelman 6144482741eSBarry Smith PetscValidHeaderSpecific(C,MAT_COOKIE,3); 615c9780b6fSBarry Smith PetscValidType(C,3); 616eb9c0419SKris Buschelman MatPreallocated(C); 617eb9c0419SKris Buschelman if (!C->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 618eb9c0419SKris Buschelman if (C->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 619eb9c0419SKris Buschelman 620eb9c0419SKris Buschelman if (P->N!=C->M) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->N,C->M); 621eb9c0419SKris Buschelman if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->M,A->N); 622eb9c0419SKris Buschelman if (A->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %d != %d",A->M,A->N); 623eb9c0419SKris Buschelman if (P->N!=C->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->N,C->N); 624eb9c0419SKris Buschelman 625534c1384SKris Buschelman /* For now, we do not dispatch based on the type of A and P */ 626534c1384SKris Buschelman /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */ 627534c1384SKris Buschelman fA = A->ops->ptapnumeric; 628534c1384SKris Buschelman if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatPtAPNumeric not supported for A of type %s",A->type_name); 629534c1384SKris Buschelman fP = P->ops->ptapnumeric; 630534c1384SKris Buschelman if (!fP) SETERRQ1(PETSC_ERR_SUP,"MatPtAPNumeric not supported for P of type %s",P->type_name); 631534c1384SKris Buschelman if (fP!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatPtAPNumeric requires A, %s, to be compatible with P, %s",A->type_name,P->type_name); 6324d3841fdSKris Buschelman 633534c1384SKris Buschelman ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 634534c1384SKris Buschelman ierr = (*fA)(A,P,C);CHKERRQ(ierr); 635534c1384SKris Buschelman ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 636eb9c0419SKris Buschelman 637eb9c0419SKris Buschelman PetscFunctionReturn(0); 638eb9c0419SKris Buschelman } 639eb9c0419SKris Buschelman 640eb9c0419SKris Buschelman #undef __FUNCT__ 6419af31e4aSHong Zhang #define __FUNCT__ "MatPtAPNumeric_SeqAIJ_SeqAIJ" 642dfbe8321SBarry Smith PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqAIJ(Mat A,Mat P,Mat C) 643dfbe8321SBarry Smith { 644dfbe8321SBarry Smith PetscErrorCode ierr; 645b1d57f15SBarry Smith PetscInt flops=0; 646d20bfe6fSHong Zhang Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 647d20bfe6fSHong Zhang Mat_SeqAIJ *p = (Mat_SeqAIJ *) P->data; 648d20bfe6fSHong Zhang Mat_SeqAIJ *c = (Mat_SeqAIJ *) C->data; 649b1d57f15SBarry Smith PetscInt *ai=a->i,*aj=a->j,*apj,*apjdense,*pi=p->i,*pj=p->j,*pJ=p->j,*pjj; 650b1d57f15SBarry Smith PetscInt *ci=c->i,*cj=c->j,*cjj; 651b1d57f15SBarry Smith PetscInt am=A->M,cn=C->N,cm=C->M; 652b1d57f15SBarry Smith PetscInt i,j,k,anzi,pnzi,apnzj,nextap,pnzj,prow,crow; 653d20bfe6fSHong Zhang MatScalar *aa=a->a,*apa,*pa=p->a,*pA=p->a,*paj,*ca=c->a,*caj; 654eb9c0419SKris Buschelman 655eb9c0419SKris Buschelman PetscFunctionBegin; 656d20bfe6fSHong Zhang /* Allocate temporary array for storage of one row of A*P */ 657b1d57f15SBarry Smith ierr = PetscMalloc(cn*(sizeof(MatScalar)+2*sizeof(PetscInt)),&apa);CHKERRQ(ierr); 658b1d57f15SBarry Smith ierr = PetscMemzero(apa,cn*(sizeof(MatScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr); 659eb9c0419SKris Buschelman 660b1d57f15SBarry Smith apj = (PetscInt *)(apa + cn); 661d20bfe6fSHong Zhang apjdense = apj + cn; 662d20bfe6fSHong Zhang 663d20bfe6fSHong Zhang /* Clear old values in C */ 664d20bfe6fSHong Zhang ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr); 665d20bfe6fSHong Zhang 666d20bfe6fSHong Zhang for (i=0;i<am;i++) { 667d20bfe6fSHong Zhang /* Form sparse row of A*P */ 668d20bfe6fSHong Zhang anzi = ai[i+1] - ai[i]; 669d20bfe6fSHong Zhang apnzj = 0; 670d20bfe6fSHong Zhang for (j=0;j<anzi;j++) { 671d20bfe6fSHong Zhang prow = *aj++; 672d20bfe6fSHong Zhang pnzj = pi[prow+1] - pi[prow]; 673d20bfe6fSHong Zhang pjj = pj + pi[prow]; 674d20bfe6fSHong Zhang paj = pa + pi[prow]; 675d20bfe6fSHong Zhang for (k=0;k<pnzj;k++) { 676d20bfe6fSHong Zhang if (!apjdense[pjj[k]]) { 677d20bfe6fSHong Zhang apjdense[pjj[k]] = -1; 678d20bfe6fSHong Zhang apj[apnzj++] = pjj[k]; 679d20bfe6fSHong Zhang } 680d20bfe6fSHong Zhang apa[pjj[k]] += (*aa)*paj[k]; 681d20bfe6fSHong Zhang } 682d20bfe6fSHong Zhang flops += 2*pnzj; 683d20bfe6fSHong Zhang aa++; 684d20bfe6fSHong Zhang } 685d20bfe6fSHong Zhang 686d20bfe6fSHong Zhang /* Sort the j index array for quick sparse axpy. */ 687d20bfe6fSHong Zhang ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr); 688d20bfe6fSHong Zhang 689d20bfe6fSHong Zhang /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */ 690d20bfe6fSHong Zhang pnzi = pi[i+1] - pi[i]; 691d20bfe6fSHong Zhang for (j=0;j<pnzi;j++) { 692d20bfe6fSHong Zhang nextap = 0; 693d20bfe6fSHong Zhang crow = *pJ++; 694d20bfe6fSHong Zhang cjj = cj + ci[crow]; 695d20bfe6fSHong Zhang caj = ca + ci[crow]; 696d20bfe6fSHong Zhang /* Perform sparse axpy operation. Note cjj includes apj. */ 697d20bfe6fSHong Zhang for (k=0;nextap<apnzj;k++) { 698d20bfe6fSHong Zhang if (cjj[k]==apj[nextap]) { 699d20bfe6fSHong Zhang caj[k] += (*pA)*apa[apj[nextap++]]; 700d20bfe6fSHong Zhang } 701d20bfe6fSHong Zhang } 702d20bfe6fSHong Zhang flops += 2*apnzj; 703d20bfe6fSHong Zhang pA++; 704d20bfe6fSHong Zhang } 705d20bfe6fSHong Zhang 706d20bfe6fSHong Zhang /* Zero the current row info for A*P */ 707d20bfe6fSHong Zhang for (j=0;j<apnzj;j++) { 708d20bfe6fSHong Zhang apa[apj[j]] = 0.; 709d20bfe6fSHong Zhang apjdense[apj[j]] = 0; 710d20bfe6fSHong Zhang } 711d20bfe6fSHong Zhang } 712d20bfe6fSHong Zhang 713d20bfe6fSHong Zhang /* Assemble the final matrix and clean up */ 714d20bfe6fSHong Zhang ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 715d20bfe6fSHong Zhang ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 716d20bfe6fSHong Zhang ierr = PetscFree(apa);CHKERRQ(ierr); 717d20bfe6fSHong Zhang ierr = PetscLogFlops(flops);CHKERRQ(ierr); 718d20bfe6fSHong Zhang 719eb9c0419SKris Buschelman PetscFunctionReturn(0); 720eb9c0419SKris Buschelman } 7210e36024fSHong Zhang 722*a61c8c0fSHong Zhang /* Compute local C = P_loc^T * A * P - used by MatPtAP_MPIAIJ_MPIAIJ() */ 723e240928fSHong Zhang static PetscEvent logkey_matptapnumeric_local = 0; 724e240928fSHong Zhang #undef __FUNCT__ 725e240928fSHong Zhang #define __FUNCT__ "MatPtAPNumeric_MPIAIJ_MPIAIJ_Local" 726e240928fSHong Zhang PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIAIJ_Local(Mat A,Mat P_loc,Mat P_oth,PetscInt prstart,Mat C) 727e240928fSHong Zhang { 728e240928fSHong Zhang PetscErrorCode ierr; 729e240928fSHong Zhang PetscInt flops=0; 730e240928fSHong Zhang Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 731e240928fSHong Zhang Mat_SeqAIJ *ad=(Mat_SeqAIJ*)(a->A)->data,*ao=(Mat_SeqAIJ*)(a->B)->data; 732e240928fSHong Zhang Mat_SeqAIJ *c = (Mat_SeqAIJ *) C->data; 733e240928fSHong Zhang Mat_SeqAIJ *p_loc=(Mat_SeqAIJ*)P_loc->data,*p_oth=(Mat_SeqAIJ*)P_oth->data; 734e240928fSHong Zhang PetscInt *pi_loc=p_loc->i,*pj_loc=p_loc->j,*pi_oth=p_oth->i,*pj_oth=p_oth->j; 735e240928fSHong Zhang PetscInt *adi=ad->i,*adj=ad->j,*aoi=ao->i,*aoj=ao->j,*apj,*apjdense,cstart=a->cstart,cend=a->cend,col; 736e240928fSHong Zhang PetscInt *pJ=pj_loc,*pjj; 737e240928fSHong Zhang PetscInt *ci=c->i,*cj=c->j,*cjj; 738e240928fSHong Zhang PetscInt am=A->m,cn=C->N,cm=C->M; 739e240928fSHong Zhang PetscInt i,j,k,anzi,pnzi,apnzj,nextap,pnzj,prow,crow; 740e240928fSHong Zhang MatScalar *ada=ad->a,*aoa=ao->a,*apa,*paj,*ca=c->a,*caj; 741e240928fSHong Zhang MatScalar *pa_loc=p_loc->a,*pA=pa_loc,*pa_oth=p_oth->a; 742e240928fSHong Zhang 743e240928fSHong Zhang PetscFunctionBegin; 744e240928fSHong Zhang if (!logkey_matptapnumeric_local) { 745*a61c8c0fSHong Zhang ierr = PetscLogEventRegister(&logkey_matptapnumeric_local,"MatPtAPNumeric_MPIAIJ_MPIAIJ_Local",MAT_COOKIE);CHKERRQ(ierr); 746e240928fSHong Zhang } 747*a61c8c0fSHong Zhang ierr = PetscLogEventBegin(logkey_matptapnumeric_local,A,0,0,0);CHKERRQ(ierr); 748e240928fSHong Zhang 749e240928fSHong Zhang /* Allocate temporary array for storage of one row of A*P */ 750e240928fSHong Zhang ierr = PetscMalloc(cn*(sizeof(MatScalar)+2*sizeof(PetscInt)),&apa);CHKERRQ(ierr); 751e240928fSHong Zhang ierr = PetscMemzero(apa,cn*(sizeof(MatScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr); 752e240928fSHong Zhang apj = (PetscInt *)(apa + cn); 753e240928fSHong Zhang apjdense = apj + cn; 754e240928fSHong Zhang 755e240928fSHong Zhang /* Clear old values in C */ 756e240928fSHong Zhang ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr); 757e240928fSHong Zhang 758e240928fSHong Zhang for (i=0;i<am;i++) { 759e240928fSHong Zhang /* Form i-th sparse row of A*P */ 760e240928fSHong Zhang apnzj = 0; 761e240928fSHong Zhang /* diagonal portion of A */ 762e240928fSHong Zhang anzi = adi[i+1] - adi[i]; 763e240928fSHong Zhang for (j=0;j<anzi;j++) { 764e240928fSHong Zhang prow = *adj; 765e240928fSHong Zhang adj++; 766e240928fSHong Zhang pnzj = pi_loc[prow+1] - pi_loc[prow]; 767e240928fSHong Zhang pjj = pj_loc + pi_loc[prow]; 768e240928fSHong Zhang paj = pa_loc + pi_loc[prow]; 769e240928fSHong Zhang for (k=0;k<pnzj;k++) { 770e240928fSHong Zhang if (!apjdense[pjj[k]]) { 771e240928fSHong Zhang apjdense[pjj[k]] = -1; 772e240928fSHong Zhang apj[apnzj++] = pjj[k]; 773e240928fSHong Zhang } 774e240928fSHong Zhang apa[pjj[k]] += (*ada)*paj[k]; 775e240928fSHong Zhang } 776e240928fSHong Zhang flops += 2*pnzj; 777e240928fSHong Zhang ada++; 778e240928fSHong Zhang } 779e240928fSHong Zhang /* off-diagonal portion of A */ 780e240928fSHong Zhang anzi = aoi[i+1] - aoi[i]; 781e240928fSHong Zhang for (j=0;j<anzi;j++) { 782e240928fSHong Zhang col = a->garray[*aoj]; 783e240928fSHong Zhang if (col < cstart){ 784e240928fSHong Zhang prow = *aoj; 785e240928fSHong Zhang } else if (col >= cend){ 786e240928fSHong Zhang prow = *aoj; 787e240928fSHong Zhang } else { 788e240928fSHong Zhang SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Off-diagonal portion of A has wrong column map"); 789e240928fSHong Zhang } 790e240928fSHong Zhang aoj++; 791e240928fSHong Zhang pnzj = pi_oth[prow+1] - pi_oth[prow]; 792e240928fSHong Zhang pjj = pj_oth + pi_oth[prow]; 793e240928fSHong Zhang paj = pa_oth + pi_oth[prow]; 794e240928fSHong Zhang for (k=0;k<pnzj;k++) { 795e240928fSHong Zhang if (!apjdense[pjj[k]]) { 796e240928fSHong Zhang apjdense[pjj[k]] = -1; 797e240928fSHong Zhang apj[apnzj++] = pjj[k]; 798e240928fSHong Zhang } 799e240928fSHong Zhang apa[pjj[k]] += (*aoa)*paj[k]; 800e240928fSHong Zhang } 801e240928fSHong Zhang flops += 2*pnzj; 802e240928fSHong Zhang aoa++; 803e240928fSHong Zhang } 804e240928fSHong Zhang /* Sort the j index array for quick sparse axpy. */ 805e240928fSHong Zhang ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr); 806e240928fSHong Zhang 807e240928fSHong Zhang /* Compute P_loc[i,:]^T*AP[i,:] using outer product */ 808e240928fSHong Zhang pnzi = pi_loc[i+1] - pi_loc[i]; 809e240928fSHong Zhang for (j=0;j<pnzi;j++) { 810e240928fSHong Zhang nextap = 0; 811e240928fSHong Zhang crow = *pJ++; 812e240928fSHong Zhang cjj = cj + ci[crow]; 813e240928fSHong Zhang caj = ca + ci[crow]; 814e240928fSHong Zhang /* Perform sparse axpy operation. Note cjj includes apj. */ 815e240928fSHong Zhang for (k=0;nextap<apnzj;k++) { 816e240928fSHong Zhang if (cjj[k]==apj[nextap]) { 817e240928fSHong Zhang caj[k] += (*pA)*apa[apj[nextap++]]; 818e240928fSHong Zhang } 819e240928fSHong Zhang } 820e240928fSHong Zhang flops += 2*apnzj; 821e240928fSHong Zhang pA++; 822e240928fSHong Zhang } 823e240928fSHong Zhang 824e240928fSHong Zhang /* Zero the current row info for A*P */ 825e240928fSHong Zhang for (j=0;j<apnzj;j++) { 826e240928fSHong Zhang apa[apj[j]] = 0.; 827e240928fSHong Zhang apjdense[apj[j]] = 0; 828e240928fSHong Zhang } 829e240928fSHong Zhang } 830e240928fSHong Zhang 831e240928fSHong Zhang /* Assemble the final matrix and clean up */ 832e240928fSHong Zhang ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 833e240928fSHong Zhang ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 834e240928fSHong Zhang ierr = PetscFree(apa);CHKERRQ(ierr); 835e240928fSHong Zhang ierr = PetscLogFlops(flops);CHKERRQ(ierr); 836*a61c8c0fSHong Zhang ierr = PetscLogEventEnd(logkey_matptapnumeric_local,A,0,0,0);CHKERRQ(ierr); 837e240928fSHong Zhang PetscFunctionReturn(0); 838e240928fSHong Zhang } 839e240928fSHong Zhang static PetscEvent logkey_matptapsymbolic_local = 0; 840e240928fSHong Zhang #undef __FUNCT__ 841e240928fSHong Zhang #define __FUNCT__ "MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local" 842e240928fSHong Zhang PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local(Mat A,Mat P_loc,Mat P_oth,PetscInt prstart,PetscReal fill,Mat *C) 843e240928fSHong Zhang { 844e240928fSHong Zhang PetscErrorCode ierr; 845e240928fSHong Zhang FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 846e240928fSHong Zhang Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 847e240928fSHong Zhang Mat_SeqAIJ *ad=(Mat_SeqAIJ*)(a->A)->data,*ao=(Mat_SeqAIJ*)(a->B)->data,*c; 848e240928fSHong Zhang PetscInt *pti,*ptj,*ptJ,*ajj,*pjj; 849e240928fSHong Zhang PetscInt *adi=ad->i,*adj=ad->j,*aoi=ao->i,*aoj=ao->j,nnz,cstart=a->cstart,cend=a->cend,col; 850e240928fSHong Zhang PetscInt *ci,*cj,*ptaj; 851e240928fSHong Zhang PetscInt an=A->N,am=A->m,pN=P_loc->N; 852e240928fSHong Zhang PetscInt i,j,k,ptnzi,arow,anzj,prow,pnzj,cnzi; 853e240928fSHong Zhang PetscInt pm=P_loc->m,nlnk,*lnk; 854e240928fSHong Zhang MatScalar *ca; 855e240928fSHong Zhang PetscBT lnkbt; 856e240928fSHong Zhang PetscInt prend,nprow_loc,nprow_oth; 857e240928fSHong Zhang PetscInt *ptadenserow_loc,*ptadenserow_oth,*ptasparserow_loc,*ptasparserow_oth; 858e240928fSHong Zhang Mat_SeqAIJ *p_loc=(Mat_SeqAIJ*)P_loc->data,*p_oth=(Mat_SeqAIJ*)P_oth->data; 859e240928fSHong Zhang PetscInt *pi_loc=p_loc->i,*pj_loc=p_loc->j,*pi_oth=p_oth->i,*pj_oth=p_oth->j; 860e240928fSHong Zhang 861e240928fSHong Zhang PetscFunctionBegin; 862e240928fSHong Zhang if (!logkey_matptapsymbolic_local) { 863e240928fSHong Zhang ierr = PetscLogEventRegister(&logkey_matptapsymbolic_local,"MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local",MAT_COOKIE); 864e240928fSHong Zhang } 865e240928fSHong Zhang ierr = PetscLogEventBegin(logkey_matptapsymbolic_local,A,P_loc,P_oth,0);CHKERRQ(ierr); 866e240928fSHong Zhang 867e240928fSHong Zhang prend = prstart + pm; 868e240928fSHong Zhang 869e240928fSHong Zhang /* get ij structure of P_loc^T */ 870e240928fSHong Zhang ierr = MatGetSymbolicTranspose_SeqAIJ(P_loc,&pti,&ptj);CHKERRQ(ierr); 871e240928fSHong Zhang ptJ=ptj; 872e240928fSHong Zhang 873e240928fSHong Zhang /* Allocate ci array, arrays for fill computation and */ 874e240928fSHong Zhang /* free space for accumulating nonzero column info */ 875e240928fSHong Zhang ierr = PetscMalloc((pN+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 876e240928fSHong Zhang ci[0] = 0; 877e240928fSHong Zhang 878e240928fSHong Zhang ierr = PetscMalloc((4*an+1)*sizeof(PetscInt),&ptadenserow_loc);CHKERRQ(ierr); 879e240928fSHong Zhang ierr = PetscMemzero(ptadenserow_loc,(4*an+1)*sizeof(PetscInt));CHKERRQ(ierr); 880e240928fSHong Zhang ptasparserow_loc = ptadenserow_loc + an; 881e240928fSHong Zhang ptadenserow_oth = ptasparserow_loc + an; 882e240928fSHong Zhang ptasparserow_oth = ptadenserow_oth + an; 883e240928fSHong Zhang 884e240928fSHong Zhang /* create and initialize a linked list */ 885e240928fSHong Zhang nlnk = pN+1; 886e240928fSHong Zhang ierr = PetscLLCreate(pN,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr); 887e240928fSHong Zhang 888e240928fSHong Zhang /* Set initial free space to be nnz(A) scaled by aspect ratio of P (P->M=A->N). */ 889e240928fSHong Zhang /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 890e240928fSHong Zhang nnz = adi[am] + aoi[am]; 891e240928fSHong Zhang ierr = GetMoreSpace((PetscInt)(fill*nnz/A->N)*pN,&free_space); 892e240928fSHong Zhang current_space = free_space; 893e240928fSHong Zhang 894e240928fSHong Zhang /* determine symbolic info for each row of C: */ 895e240928fSHong Zhang for (i=0;i<pN;i++) { 896e240928fSHong Zhang ptnzi = pti[i+1] - pti[i]; 897e240928fSHong Zhang nprow_loc = 0; nprow_oth = 0; 898e240928fSHong Zhang /* i-th row of symbolic P_loc^T*A_loc: */ 899e240928fSHong Zhang for (j=0;j<ptnzi;j++) { 900e240928fSHong Zhang arow = *ptJ++; 901e240928fSHong Zhang /* diagonal portion of A */ 902e240928fSHong Zhang anzj = adi[arow+1] - adi[arow]; 903e240928fSHong Zhang ajj = adj + adi[arow]; 904e240928fSHong Zhang for (k=0;k<anzj;k++) { 905e240928fSHong Zhang col = ajj[k]+prstart; 906e240928fSHong Zhang if (!ptadenserow_loc[col]) { 907e240928fSHong Zhang ptadenserow_loc[col] = -1; 908e240928fSHong Zhang ptasparserow_loc[nprow_loc++] = col; 909e240928fSHong Zhang } 910e240928fSHong Zhang } 911e240928fSHong Zhang /* off-diagonal portion of A */ 912e240928fSHong Zhang anzj = aoi[arow+1] - aoi[arow]; 913e240928fSHong Zhang ajj = aoj + aoi[arow]; 914e240928fSHong Zhang for (k=0;k<anzj;k++) { 915e240928fSHong Zhang col = a->garray[ajj[k]]; /* global col */ 916e240928fSHong Zhang if (col < cstart){ 917e240928fSHong Zhang col = ajj[k]; 918e240928fSHong Zhang } else if (col >= cend){ 919e240928fSHong Zhang col = ajj[k] + pm; 920e240928fSHong Zhang } else { 921e240928fSHong Zhang SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Off-diagonal portion of A has wrong column map"); 922e240928fSHong Zhang } 923e240928fSHong Zhang if (!ptadenserow_oth[col]) { 924e240928fSHong Zhang ptadenserow_oth[col] = -1; 925e240928fSHong Zhang ptasparserow_oth[nprow_oth++] = col; 926e240928fSHong Zhang } 927e240928fSHong Zhang } 928e240928fSHong Zhang } 929e240928fSHong Zhang 930e240928fSHong Zhang /* using symbolic info of local PtA, determine symbolic info for row of C: */ 931e240928fSHong Zhang cnzi = 0; 932e240928fSHong Zhang /* rows of P_loc */ 933e240928fSHong Zhang ptaj = ptasparserow_loc; 934e240928fSHong Zhang for (j=0; j<nprow_loc; j++) { 935e240928fSHong Zhang prow = *ptaj++; 936e240928fSHong Zhang prow -= prstart; /* rm */ 937e240928fSHong Zhang pnzj = pi_loc[prow+1] - pi_loc[prow]; 938e240928fSHong Zhang pjj = pj_loc + pi_loc[prow]; 939e240928fSHong Zhang /* add non-zero cols of P into the sorted linked list lnk */ 940e240928fSHong Zhang ierr = PetscLLAdd(pnzj,pjj,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr); 941e240928fSHong Zhang cnzi += nlnk; 942e240928fSHong Zhang } 943e240928fSHong Zhang /* rows of P_oth */ 944e240928fSHong Zhang ptaj = ptasparserow_oth; 945e240928fSHong Zhang for (j=0; j<nprow_oth; j++) { 946e240928fSHong Zhang prow = *ptaj++; 947e240928fSHong Zhang if (prow >= prend) prow -= pm; /* rm */ 948e240928fSHong Zhang pnzj = pi_oth[prow+1] - pi_oth[prow]; 949e240928fSHong Zhang pjj = pj_oth + pi_oth[prow]; 950e240928fSHong Zhang /* add non-zero cols of P into the sorted linked list lnk */ 951e240928fSHong Zhang ierr = PetscLLAdd(pnzj,pjj,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr); 952e240928fSHong Zhang cnzi += nlnk; 953e240928fSHong Zhang } 954e240928fSHong Zhang 955e240928fSHong Zhang /* If free space is not available, make more free space */ 956e240928fSHong Zhang /* Double the amount of total space in the list */ 957e240928fSHong Zhang if (current_space->local_remaining<cnzi) { 958e240928fSHong Zhang ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 959e240928fSHong Zhang } 960e240928fSHong Zhang 961e240928fSHong Zhang /* Copy data into free space, and zero out denserows */ 962e240928fSHong Zhang ierr = PetscLLClean(pN,pN,cnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 963e240928fSHong Zhang current_space->array += cnzi; 964e240928fSHong Zhang current_space->local_used += cnzi; 965e240928fSHong Zhang current_space->local_remaining -= cnzi; 966e240928fSHong Zhang 967e240928fSHong Zhang for (j=0;j<nprow_loc; j++) { 968e240928fSHong Zhang ptadenserow_loc[ptasparserow_loc[j]] = 0; 969e240928fSHong Zhang } 970e240928fSHong Zhang for (j=0;j<nprow_oth; j++) { 971e240928fSHong Zhang ptadenserow_oth[ptasparserow_oth[j]] = 0; 972e240928fSHong Zhang } 973e240928fSHong Zhang 974e240928fSHong Zhang /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 975e240928fSHong Zhang /* For now, we will recompute what is needed. */ 976e240928fSHong Zhang ci[i+1] = ci[i] + cnzi; 977e240928fSHong Zhang } 978e240928fSHong Zhang /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 979e240928fSHong Zhang /* Allocate space for cj, initialize cj, and */ 980e240928fSHong Zhang /* destroy list of free space and other temporary array(s) */ 981e240928fSHong Zhang ierr = PetscMalloc((ci[pN]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr); 982e240928fSHong Zhang ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 983e240928fSHong Zhang ierr = PetscFree(ptadenserow_loc);CHKERRQ(ierr); 984e240928fSHong Zhang ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 985e240928fSHong Zhang 986e240928fSHong Zhang /* Allocate space for ca */ 987e240928fSHong Zhang ierr = PetscMalloc((ci[pN]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 988e240928fSHong Zhang ierr = PetscMemzero(ca,(ci[pN]+1)*sizeof(MatScalar));CHKERRQ(ierr); 989e240928fSHong Zhang 990e240928fSHong Zhang /* put together the new matrix */ 991e240928fSHong Zhang ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,pN,pN,ci,cj,ca,C);CHKERRQ(ierr); 992e240928fSHong Zhang 993e240928fSHong Zhang /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 994e240928fSHong Zhang /* Since these are PETSc arrays, change flags to free them as necessary. */ 995e240928fSHong Zhang c = (Mat_SeqAIJ *)((*C)->data); 996e240928fSHong Zhang c->freedata = PETSC_TRUE; 997e240928fSHong Zhang c->nonew = 0; 998e240928fSHong Zhang 999e240928fSHong Zhang /* Clean up. */ 1000e240928fSHong Zhang ierr = MatRestoreSymbolicTranspose_SeqAIJ(P_loc,&pti,&ptj);CHKERRQ(ierr); 1001*a61c8c0fSHong Zhang 1002e240928fSHong Zhang ierr = PetscLogEventEnd(logkey_matptapsymbolic_local,A,P_loc,P_oth,0);CHKERRQ(ierr); 1003e240928fSHong Zhang PetscFunctionReturn(0); 1004e240928fSHong Zhang } 1005