1eb9c0419SKris Buschelman /* 29af31e4aSHong Zhang Defines projective product routines where A is a AIJ matrix 3eb9c0419SKris Buschelman C = P^T * A * P 4eb9c0419SKris Buschelman */ 5eb9c0419SKris Buschelman 6231952e2SKris Buschelman #include "src/mat/impls/aij/seq/aij.h" /*I "petscmat.h" I*/ 7eb9c0419SKris Buschelman #include "src/mat/utils/freespace.h" 89af31e4aSHong Zhang #include "src/mat/impls/aij/mpi/mpiaij.h" 9eb9c0419SKris Buschelman 10eb9c0419SKris Buschelman #undef __FUNCT__ 119af31e4aSHong Zhang #define __FUNCT__ "MatPtAP" 124d3841fdSKris Buschelman /*@ 139af31e4aSHong Zhang MatPtAP - Creates the matrix projection C = P^T * A * P 144d3841fdSKris Buschelman 154d3841fdSKris Buschelman Collective on Mat 164d3841fdSKris Buschelman 174d3841fdSKris Buschelman Input Parameters: 184d3841fdSKris Buschelman + A - the matrix 19f747e1aeSHong Zhang . P - the projection matrix 20f747e1aeSHong Zhang . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 21f747e1aeSHong Zhang - fill - expected fill as ratio of nnz(C)/(nnz(A) + nnz(P)) 224d3841fdSKris Buschelman 234d3841fdSKris Buschelman Output Parameters: 244d3841fdSKris Buschelman . C - the product matrix 254d3841fdSKris Buschelman 264d3841fdSKris Buschelman Notes: 274d3841fdSKris Buschelman C will be created and must be destroyed by the user with MatDestroy(). 284d3841fdSKris Buschelman 294d3841fdSKris Buschelman This routine is currently only implemented for pairs of SeqAIJ matrices and classes 304d3841fdSKris Buschelman which inherit from SeqAIJ. C will be of type MATSEQAIJ. 314d3841fdSKris Buschelman 324d3841fdSKris Buschelman Level: intermediate 334d3841fdSKris Buschelman 349af31e4aSHong Zhang .seealso: MatPtAPSymbolic(),MatPtAPNumeric(),MatMatMult() 354d3841fdSKris Buschelman @*/ 36dfbe8321SBarry Smith PetscErrorCode MatPtAP(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C) { 37dfbe8321SBarry Smith PetscErrorCode ierr; 38534c1384SKris Buschelman PetscErrorCode (*fA)(Mat,Mat,MatReuse,PetscReal,Mat *); 39534c1384SKris Buschelman PetscErrorCode (*fP)(Mat,Mat,MatReuse,PetscReal,Mat *); 40eb9c0419SKris Buschelman 41eb9c0419SKris Buschelman PetscFunctionBegin; 429af31e4aSHong Zhang PetscValidHeaderSpecific(A,MAT_COOKIE,1); 439af31e4aSHong Zhang PetscValidType(A,1); 449af31e4aSHong Zhang MatPreallocated(A); 459af31e4aSHong Zhang if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 469af31e4aSHong Zhang if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 479af31e4aSHong Zhang PetscValidHeaderSpecific(P,MAT_COOKIE,2); 489af31e4aSHong Zhang PetscValidType(P,2); 499af31e4aSHong Zhang MatPreallocated(P); 509af31e4aSHong Zhang if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 519af31e4aSHong Zhang if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 529af31e4aSHong Zhang PetscValidPointer(C,3); 53534c1384SKris Buschelman 549af31e4aSHong Zhang if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->M,A->N); 55eb9c0419SKris Buschelman 569af31e4aSHong Zhang if (fill <=0.0) SETERRQ1(PETSC_ERR_ARG_SIZ,"fill=%g must be > 0.0",fill); 57eb9c0419SKris Buschelman 58534c1384SKris Buschelman /* For now, we do not dispatch based on the type of A and P */ 59534c1384SKris Buschelman /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */ 60534c1384SKris Buschelman fA = A->ops->ptap; 61534c1384SKris Buschelman if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatPtAP not supported for A of type %s",A->type_name); 62534c1384SKris Buschelman fP = P->ops->ptap; 63534c1384SKris Buschelman if (!fP) SETERRQ1(PETSC_ERR_SUP,"MatPtAP not supported for P of type %s",P->type_name); 64534c1384SKris Buschelman if (fP!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatPtAP requires A, %s, to be compatible with P, %s",A->type_name,P->type_name); 65534c1384SKris Buschelman 669af31e4aSHong Zhang ierr = PetscLogEventBegin(MAT_PtAP,A,P,0,0);CHKERRQ(ierr); 67534c1384SKris Buschelman ierr = (*fA)(A,P,scall,fill,C);CHKERRQ(ierr); 689af31e4aSHong Zhang ierr = PetscLogEventEnd(MAT_PtAP,A,P,0,0);CHKERRQ(ierr); 69eb9c0419SKris Buschelman PetscFunctionReturn(0); 70eb9c0419SKris Buschelman } 71eb9c0419SKris Buschelman 72*25616d81SHong Zhang EXTERN PetscErrorCode MatGetLocalMat(Mat,MatReuse,IS*,IS*,Mat*); 73*25616d81SHong Zhang EXTERN PetscErrorCode MatGetBrowsOfAcols(Mat,Mat,MatReuse,IS*,IS*,PetscInt*,Mat*); 740e36024fSHong Zhang EXTERN PetscErrorCode MatPtAP_SeqAIJ_SeqAIJ_ReducedPt(Mat,Mat,MatReuse,PetscReal,int,int,Mat*); 750e36024fSHong Zhang EXTERN PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqAIJ_ReducedPt(Mat,Mat,PetscReal,int,int,Mat*); 760e36024fSHong Zhang EXTERN PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqAIJ_ReducedPt(Mat,Mat,int,int,Mat); 77eb9c0419SKris Buschelman #undef __FUNCT__ 78ff134f7aSHong Zhang #define __FUNCT__ "MatPtAP_MPIAIJ_MPIAIJ" 79ff134f7aSHong Zhang PetscErrorCode MatPtAP_MPIAIJ_MPIAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C) 80ff134f7aSHong Zhang { 81ff134f7aSHong Zhang PetscErrorCode ierr; 82*25616d81SHong Zhang Mat P_seq,A_loc,C_seq; 830e36024fSHong Zhang int prstart,prend,m=P->m; 84*25616d81SHong Zhang IS isrowp,iscolp; 85ff134f7aSHong Zhang 86ff134f7aSHong Zhang PetscFunctionBegin; 87*25616d81SHong Zhang /* get P_seq = submatrix of P by taking rows of P that equal to nonzero col of A */ 88*25616d81SHong Zhang ierr = MatGetBrowsOfAcols(A,P,scall,&isrowp,&iscolp,&prstart,&P_seq);CHKERRQ(ierr); 89*25616d81SHong Zhang ierr = ISDestroy(iscolp);CHKERRQ(ierr); 90ff134f7aSHong Zhang 91*25616d81SHong Zhang /* get A_loc = submatrix of A by taking all local rows of A */ 92*25616d81SHong Zhang ierr = MatGetLocalMat(A,scall,PETSC_NULL,&isrowp,&A_loc);CHKERRQ(ierr); 93*25616d81SHong Zhang ierr = ISDestroy(isrowp);CHKERRQ(ierr); 940e36024fSHong Zhang 95*25616d81SHong Zhang /* compute C_seq = P_loc^T * A_loc * P_seq */ 96ff134f7aSHong Zhang prend = prstart + m; 9720d4747cSHong Zhang ierr = MatPtAP_SeqAIJ_SeqAIJ_ReducedPt(A_loc,P_seq,scall,fill,prstart,prend,&C_seq);CHKERRQ(ierr); 980e36024fSHong Zhang 99*25616d81SHong Zhang /* add C_seq into mpi C */ 1000e36024fSHong Zhang ierr = MatMerge_SeqsToMPI(A->comm,C_seq,P->n,P->n,scall,C);CHKERRQ(ierr); 1010e36024fSHong Zhang 1020e36024fSHong Zhang /* clean up */ 1030e36024fSHong Zhang ierr = MatDestroy(C_seq);CHKERRQ(ierr); 104*25616d81SHong Zhang ierr = MatDestroy(P_seq);CHKERRQ(ierr); 105*25616d81SHong Zhang ierr = MatDestroy(A_loc);CHKERRQ(ierr); 106ff134f7aSHong Zhang PetscFunctionReturn(0); 107ff134f7aSHong Zhang } 108ff134f7aSHong Zhang 109ff134f7aSHong Zhang #undef __FUNCT__ 110ff134f7aSHong Zhang #define __FUNCT__ "MatMatMultSymbolic_MPIAIJ_MPIAIJ" 111ff134f7aSHong Zhang PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 112ff134f7aSHong Zhang { 113ff134f7aSHong Zhang 114ff134f7aSHong Zhang PetscFunctionBegin; 115ff134f7aSHong Zhang PetscFunctionReturn(0); 116ff134f7aSHong Zhang } 117ff134f7aSHong Zhang 118ff134f7aSHong Zhang #undef __FUNCT__ 119ff134f7aSHong Zhang #define __FUNCT__ "MatMatMultSymbolic_MPIAIJ_MPIAIJ" 120ff134f7aSHong Zhang PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIAIJ(Mat A,Mat B,Mat C) 121ff134f7aSHong Zhang { 122ff134f7aSHong Zhang 123ff134f7aSHong Zhang PetscFunctionBegin; 124ff134f7aSHong Zhang PetscFunctionReturn(0); 125ff134f7aSHong Zhang } 126ff134f7aSHong Zhang 127ff134f7aSHong Zhang #undef __FUNCT__ 1289af31e4aSHong Zhang #define __FUNCT__ "MatPtAP_SeqAIJ_SeqAIJ" 129dfbe8321SBarry Smith PetscErrorCode MatPtAP_SeqAIJ_SeqAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C) 1309af31e4aSHong Zhang { 131dfbe8321SBarry Smith PetscErrorCode ierr; 1329af31e4aSHong Zhang PetscFunctionBegin; 1339af31e4aSHong Zhang if (scall == MAT_INITIAL_MATRIX){ 134d20bfe6fSHong Zhang ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 1359af31e4aSHong Zhang ierr = MatPtAPSymbolic_SeqAIJ_SeqAIJ(A,P,fill,C);CHKERRQ(ierr); 136d20bfe6fSHong Zhang ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 1379af31e4aSHong Zhang } 138d20bfe6fSHong Zhang ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 1399af31e4aSHong Zhang ierr = MatPtAPNumeric_SeqAIJ_SeqAIJ(A,P,*C);CHKERRQ(ierr); 140d20bfe6fSHong Zhang ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 1419af31e4aSHong Zhang PetscFunctionReturn(0); 1429af31e4aSHong Zhang } 1439af31e4aSHong Zhang 1449af31e4aSHong Zhang #undef __FUNCT__ 1459af31e4aSHong Zhang #define __FUNCT__ "MatPtAPSymbolic" 1466849ba73SBarry Smith /* 1479af31e4aSHong Zhang MatPtAPSymbolic - Creates the (i,j) structure of the matrix projection C = P^T * A * P 1484d3841fdSKris Buschelman 1494d3841fdSKris Buschelman Collective on Mat 1504d3841fdSKris Buschelman 1514d3841fdSKris Buschelman Input Parameters: 1524d3841fdSKris Buschelman + A - the matrix 1534d3841fdSKris Buschelman - P - the projection matrix 1544d3841fdSKris Buschelman 1554d3841fdSKris Buschelman Output Parameters: 1564d3841fdSKris Buschelman . C - the (i,j) structure of the product matrix 1574d3841fdSKris Buschelman 1584d3841fdSKris Buschelman Notes: 1594d3841fdSKris Buschelman C will be created and must be destroyed by the user with MatDestroy(). 1604d3841fdSKris Buschelman 1614d3841fdSKris Buschelman This routine is currently only implemented for pairs of SeqAIJ matrices and classes 1624d3841fdSKris Buschelman which inherit from SeqAIJ. C will be of type MATSEQAIJ. The product is computed using 1639af31e4aSHong Zhang this (i,j) structure by calling MatPtAPNumeric(). 1644d3841fdSKris Buschelman 1654d3841fdSKris Buschelman Level: intermediate 1664d3841fdSKris Buschelman 1679af31e4aSHong Zhang .seealso: MatPtAP(),MatPtAPNumeric(),MatMatMultSymbolic() 1686849ba73SBarry Smith */ 169dfbe8321SBarry Smith PetscErrorCode MatPtAPSymbolic(Mat A,Mat P,PetscReal fill,Mat *C) { 170dfbe8321SBarry Smith PetscErrorCode ierr; 171534c1384SKris Buschelman PetscErrorCode (*fA)(Mat,Mat,PetscReal,Mat*); 172534c1384SKris Buschelman PetscErrorCode (*fP)(Mat,Mat,PetscReal,Mat*); 173eb9c0419SKris Buschelman 174eb9c0419SKris Buschelman PetscFunctionBegin; 175eb9c0419SKris Buschelman 1764482741eSBarry Smith PetscValidHeaderSpecific(A,MAT_COOKIE,1); 177c9780b6fSBarry Smith PetscValidType(A,1); 178eb9c0419SKris Buschelman MatPreallocated(A); 179eb9c0419SKris Buschelman if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 180eb9c0419SKris Buschelman if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 181eb9c0419SKris Buschelman 1824482741eSBarry Smith PetscValidHeaderSpecific(P,MAT_COOKIE,2); 183c9780b6fSBarry Smith PetscValidType(P,2); 184eb9c0419SKris Buschelman MatPreallocated(P); 185eb9c0419SKris Buschelman if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 186eb9c0419SKris Buschelman if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 187eb9c0419SKris Buschelman 1884482741eSBarry Smith PetscValidPointer(C,3); 1894482741eSBarry Smith 190eb9c0419SKris Buschelman if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->M,A->N); 191eb9c0419SKris Buschelman if (A->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %d != %d",A->M,A->N); 192eb9c0419SKris Buschelman 193534c1384SKris Buschelman /* For now, we do not dispatch based on the type of A and P */ 194534c1384SKris Buschelman /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */ 195534c1384SKris Buschelman fA = A->ops->ptapsymbolic; 196534c1384SKris Buschelman if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatPtAPSymbolic not supported for A of type %s",A->type_name); 197534c1384SKris Buschelman fP = P->ops->ptapsymbolic; 198534c1384SKris Buschelman if (!fP) SETERRQ1(PETSC_ERR_SUP,"MatPtAPSymbolic not supported for P of type %s",P->type_name); 199534c1384SKris Buschelman if (fP!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatPtAPSymbolic requires A, %s, to be compatible with P, %s",A->type_name,P->type_name); 2004d3841fdSKris Buschelman 201534c1384SKris Buschelman ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 202534c1384SKris Buschelman ierr = (*fA)(A,P,fill,C);CHKERRQ(ierr); 203534c1384SKris Buschelman ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr); 204eb9c0419SKris Buschelman 205eb9c0419SKris Buschelman PetscFunctionReturn(0); 206eb9c0419SKris Buschelman } 207eb9c0419SKris Buschelman 208f747e1aeSHong Zhang typedef struct { 209f747e1aeSHong Zhang Mat symAP; 210f747e1aeSHong Zhang } Mat_PtAPstruct; 211f747e1aeSHong Zhang 21278a80504SBarry Smith EXTERN PetscErrorCode MatDestroy_SeqAIJ(Mat); 21378a80504SBarry Smith 214f747e1aeSHong Zhang #undef __FUNCT__ 215f747e1aeSHong Zhang #define __FUNCT__ "MatDestroy_SeqAIJ_PtAP" 216f4a850bbSBarry Smith PetscErrorCode MatDestroy_SeqAIJ_PtAP(Mat A) 217f747e1aeSHong Zhang { 218f4a850bbSBarry Smith PetscErrorCode ierr; 219f747e1aeSHong Zhang Mat_PtAPstruct *ptap=(Mat_PtAPstruct*)A->spptr; 220f747e1aeSHong Zhang 221f747e1aeSHong Zhang PetscFunctionBegin; 222f747e1aeSHong Zhang ierr = MatDestroy(ptap->symAP);CHKERRQ(ierr); 223f747e1aeSHong Zhang ierr = PetscFree(ptap);CHKERRQ(ierr); 22478a80504SBarry Smith ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr); 225f747e1aeSHong Zhang PetscFunctionReturn(0); 226f747e1aeSHong Zhang } 227f747e1aeSHong Zhang 228eb9c0419SKris Buschelman #undef __FUNCT__ 2299af31e4aSHong Zhang #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqAIJ" 230dfbe8321SBarry Smith PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat P,PetscReal fill,Mat *C) { 231dfbe8321SBarry Smith PetscErrorCode ierr; 232d20bfe6fSHong Zhang FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 233d20bfe6fSHong Zhang Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c; 234d20bfe6fSHong Zhang int *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj; 235d20bfe6fSHong Zhang int *ci,*cj,*denserow,*sparserow,*ptadenserow,*ptasparserow,*ptaj; 236d20bfe6fSHong Zhang int an=A->N,am=A->M,pn=P->N,pm=P->M; 237d20bfe6fSHong Zhang int i,j,k,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi; 238d20bfe6fSHong Zhang MatScalar *ca; 239eb9c0419SKris Buschelman 240eb9c0419SKris Buschelman PetscFunctionBegin; 241d20bfe6fSHong Zhang /* Get ij structure of P^T */ 242eb9c0419SKris Buschelman ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 243d20bfe6fSHong Zhang ptJ=ptj; 244eb9c0419SKris Buschelman 245d20bfe6fSHong Zhang /* Allocate ci array, arrays for fill computation and */ 246d20bfe6fSHong Zhang /* free space for accumulating nonzero column info */ 247d20bfe6fSHong Zhang ierr = PetscMalloc((pn+1)*sizeof(int),&ci);CHKERRQ(ierr); 248d20bfe6fSHong Zhang ci[0] = 0; 249eb9c0419SKris Buschelman 250d20bfe6fSHong Zhang ierr = PetscMalloc((2*pn+2*an+1)*sizeof(int),&ptadenserow);CHKERRQ(ierr); 251d20bfe6fSHong Zhang ierr = PetscMemzero(ptadenserow,(2*pn+2*an+1)*sizeof(int));CHKERRQ(ierr); 252d20bfe6fSHong Zhang ptasparserow = ptadenserow + an; 253d20bfe6fSHong Zhang denserow = ptasparserow + an; 254d20bfe6fSHong Zhang sparserow = denserow + pn; 255eb9c0419SKris Buschelman 256d20bfe6fSHong Zhang /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */ 257d20bfe6fSHong Zhang /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 258d20bfe6fSHong Zhang ierr = GetMoreSpace((ai[am]/pm)*pn,&free_space); 259d20bfe6fSHong Zhang current_space = free_space; 260d20bfe6fSHong Zhang 261d20bfe6fSHong Zhang /* Determine symbolic info for each row of C: */ 262d20bfe6fSHong Zhang for (i=0;i<pn;i++) { 263d20bfe6fSHong Zhang ptnzi = pti[i+1] - pti[i]; 264d20bfe6fSHong Zhang ptanzi = 0; 265d20bfe6fSHong Zhang /* Determine symbolic row of PtA: */ 266d20bfe6fSHong Zhang for (j=0;j<ptnzi;j++) { 267d20bfe6fSHong Zhang arow = *ptJ++; 268d20bfe6fSHong Zhang anzj = ai[arow+1] - ai[arow]; 269d20bfe6fSHong Zhang ajj = aj + ai[arow]; 270d20bfe6fSHong Zhang for (k=0;k<anzj;k++) { 271d20bfe6fSHong Zhang if (!ptadenserow[ajj[k]]) { 272d20bfe6fSHong Zhang ptadenserow[ajj[k]] = -1; 273d20bfe6fSHong Zhang ptasparserow[ptanzi++] = ajj[k]; 274d20bfe6fSHong Zhang } 275d20bfe6fSHong Zhang } 276d20bfe6fSHong Zhang } 277d20bfe6fSHong Zhang /* Using symbolic info for row of PtA, determine symbolic info for row of C: */ 278d20bfe6fSHong Zhang ptaj = ptasparserow; 279d20bfe6fSHong Zhang cnzi = 0; 280d20bfe6fSHong Zhang for (j=0;j<ptanzi;j++) { 281d20bfe6fSHong Zhang prow = *ptaj++; 282d20bfe6fSHong Zhang pnzj = pi[prow+1] - pi[prow]; 283d20bfe6fSHong Zhang pjj = pj + pi[prow]; 284d20bfe6fSHong Zhang for (k=0;k<pnzj;k++) { 285d20bfe6fSHong Zhang if (!denserow[pjj[k]]) { 286d20bfe6fSHong Zhang denserow[pjj[k]] = -1; 287d20bfe6fSHong Zhang sparserow[cnzi++] = pjj[k]; 288d20bfe6fSHong Zhang } 289d20bfe6fSHong Zhang } 290d20bfe6fSHong Zhang } 291d20bfe6fSHong Zhang 292d20bfe6fSHong Zhang /* sort sparserow */ 293d20bfe6fSHong Zhang ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr); 294d20bfe6fSHong Zhang 295d20bfe6fSHong Zhang /* If free space is not available, make more free space */ 296d20bfe6fSHong Zhang /* Double the amount of total space in the list */ 297d20bfe6fSHong Zhang if (current_space->local_remaining<cnzi) { 298d20bfe6fSHong Zhang ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 299d20bfe6fSHong Zhang } 300d20bfe6fSHong Zhang 301d20bfe6fSHong Zhang /* Copy data into free space, and zero out denserows */ 302d20bfe6fSHong Zhang ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(int));CHKERRQ(ierr); 303d20bfe6fSHong Zhang current_space->array += cnzi; 304d20bfe6fSHong Zhang current_space->local_used += cnzi; 305d20bfe6fSHong Zhang current_space->local_remaining -= cnzi; 306d20bfe6fSHong Zhang 307d20bfe6fSHong Zhang for (j=0;j<ptanzi;j++) { 308d20bfe6fSHong Zhang ptadenserow[ptasparserow[j]] = 0; 309d20bfe6fSHong Zhang } 310d20bfe6fSHong Zhang for (j=0;j<cnzi;j++) { 311d20bfe6fSHong Zhang denserow[sparserow[j]] = 0; 312d20bfe6fSHong Zhang } 313d20bfe6fSHong Zhang /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 314d20bfe6fSHong Zhang /* For now, we will recompute what is needed. */ 315d20bfe6fSHong Zhang ci[i+1] = ci[i] + cnzi; 316d20bfe6fSHong Zhang } 317d20bfe6fSHong Zhang /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 318d20bfe6fSHong Zhang /* Allocate space for cj, initialize cj, and */ 319d20bfe6fSHong Zhang /* destroy list of free space and other temporary array(s) */ 320d20bfe6fSHong Zhang ierr = PetscMalloc((ci[pn]+1)*sizeof(int),&cj);CHKERRQ(ierr); 321d20bfe6fSHong Zhang ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 322d20bfe6fSHong Zhang ierr = PetscFree(ptadenserow);CHKERRQ(ierr); 323d20bfe6fSHong Zhang 324d20bfe6fSHong Zhang /* Allocate space for ca */ 325d20bfe6fSHong Zhang ierr = PetscMalloc((ci[pn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 326d20bfe6fSHong Zhang ierr = PetscMemzero(ca,(ci[pn]+1)*sizeof(MatScalar));CHKERRQ(ierr); 327d20bfe6fSHong Zhang 328d20bfe6fSHong Zhang /* put together the new matrix */ 329d20bfe6fSHong Zhang ierr = MatCreateSeqAIJWithArrays(A->comm,pn,pn,ci,cj,ca,C);CHKERRQ(ierr); 330d20bfe6fSHong Zhang 331d20bfe6fSHong Zhang /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 332d20bfe6fSHong Zhang /* Since these are PETSc arrays, change flags to free them as necessary. */ 333d20bfe6fSHong Zhang c = (Mat_SeqAIJ *)((*C)->data); 334d20bfe6fSHong Zhang c->freedata = PETSC_TRUE; 335d20bfe6fSHong Zhang c->nonew = 0; 336d20bfe6fSHong Zhang 337d20bfe6fSHong Zhang /* Clean up. */ 338d20bfe6fSHong Zhang ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 339eb9c0419SKris Buschelman 340eb9c0419SKris Buschelman PetscFunctionReturn(0); 341eb9c0419SKris Buschelman } 342eb9c0419SKris Buschelman 3433985e5eaSKris Buschelman #include "src/mat/impls/maij/maij.h" 3443985e5eaSKris Buschelman EXTERN_C_BEGIN 3453985e5eaSKris Buschelman #undef __FUNCT__ 3469af31e4aSHong Zhang #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqMAIJ" 347dfbe8321SBarry Smith PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat *C) { 3485c66b693SKris Buschelman /* This routine requires testing -- I don't think it works. */ 349dfbe8321SBarry Smith PetscErrorCode ierr; 3503985e5eaSKris Buschelman FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 3513985e5eaSKris Buschelman Mat_SeqMAIJ *pp=(Mat_SeqMAIJ*)PP->data; 3523985e5eaSKris Buschelman Mat P=pp->AIJ; 3533985e5eaSKris Buschelman Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c; 3543985e5eaSKris Buschelman int *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj; 3553985e5eaSKris Buschelman int *ci,*cj,*denserow,*sparserow,*ptadenserow,*ptasparserow,*ptaj; 3563985e5eaSKris Buschelman int an=A->N,am=A->M,pn=P->N,pm=P->M,ppdof=pp->dof; 357fe05a634SKris Buschelman int i,j,k,dof,pdof,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi; 3583985e5eaSKris Buschelman MatScalar *ca; 3593985e5eaSKris Buschelman 3603985e5eaSKris Buschelman PetscFunctionBegin; 3613985e5eaSKris Buschelman /* Start timer */ 3629af31e4aSHong Zhang ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr); 3633985e5eaSKris Buschelman 3643985e5eaSKris Buschelman /* Get ij structure of P^T */ 3653985e5eaSKris Buschelman ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 3663985e5eaSKris Buschelman 3673985e5eaSKris Buschelman /* Allocate ci array, arrays for fill computation and */ 3683985e5eaSKris Buschelman /* free space for accumulating nonzero column info */ 3693985e5eaSKris Buschelman ierr = PetscMalloc((pn+1)*sizeof(int),&ci);CHKERRQ(ierr); 3703985e5eaSKris Buschelman ci[0] = 0; 3713985e5eaSKris Buschelman 3723985e5eaSKris Buschelman ierr = PetscMalloc((2*pn+2*an+1)*sizeof(int),&ptadenserow);CHKERRQ(ierr); 3733985e5eaSKris Buschelman ierr = PetscMemzero(ptadenserow,(2*pn+2*an+1)*sizeof(int));CHKERRQ(ierr); 3743985e5eaSKris Buschelman ptasparserow = ptadenserow + an; 3753985e5eaSKris Buschelman denserow = ptasparserow + an; 3763985e5eaSKris Buschelman sparserow = denserow + pn; 3773985e5eaSKris Buschelman 3783985e5eaSKris Buschelman /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */ 3793985e5eaSKris Buschelman /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 3803985e5eaSKris Buschelman ierr = GetMoreSpace((ai[am]/pm)*pn,&free_space); 3813985e5eaSKris Buschelman current_space = free_space; 3823985e5eaSKris Buschelman 3833985e5eaSKris Buschelman /* Determine symbolic info for each row of C: */ 3843985e5eaSKris Buschelman for (i=0;i<pn/ppdof;i++) { 3853985e5eaSKris Buschelman ptnzi = pti[i+1] - pti[i]; 3863985e5eaSKris Buschelman ptanzi = 0; 3873985e5eaSKris Buschelman ptJ = ptj + pti[i]; 3883985e5eaSKris Buschelman for (dof=0;dof<ppdof;dof++) { 3893985e5eaSKris Buschelman /* Determine symbolic row of PtA: */ 3903985e5eaSKris Buschelman for (j=0;j<ptnzi;j++) { 3913985e5eaSKris Buschelman arow = ptJ[j] + dof; 3923985e5eaSKris Buschelman anzj = ai[arow+1] - ai[arow]; 3933985e5eaSKris Buschelman ajj = aj + ai[arow]; 3943985e5eaSKris Buschelman for (k=0;k<anzj;k++) { 3953985e5eaSKris Buschelman if (!ptadenserow[ajj[k]]) { 3963985e5eaSKris Buschelman ptadenserow[ajj[k]] = -1; 3973985e5eaSKris Buschelman ptasparserow[ptanzi++] = ajj[k]; 3983985e5eaSKris Buschelman } 3993985e5eaSKris Buschelman } 4003985e5eaSKris Buschelman } 4013985e5eaSKris Buschelman /* Using symbolic info for row of PtA, determine symbolic info for row of C: */ 4023985e5eaSKris Buschelman ptaj = ptasparserow; 4033985e5eaSKris Buschelman cnzi = 0; 4043985e5eaSKris Buschelman for (j=0;j<ptanzi;j++) { 405fe05a634SKris Buschelman pdof = *ptaj%dof; 4063985e5eaSKris Buschelman prow = (*ptaj++)/dof; 4073985e5eaSKris Buschelman pnzj = pi[prow+1] - pi[prow]; 4083985e5eaSKris Buschelman pjj = pj + pi[prow]; 4093985e5eaSKris Buschelman for (k=0;k<pnzj;k++) { 410fe05a634SKris Buschelman if (!denserow[pjj[k]+pdof]) { 411fe05a634SKris Buschelman denserow[pjj[k]+pdof] = -1; 412fe05a634SKris Buschelman sparserow[cnzi++] = pjj[k]+pdof; 4133985e5eaSKris Buschelman } 4143985e5eaSKris Buschelman } 4153985e5eaSKris Buschelman } 4163985e5eaSKris Buschelman 4173985e5eaSKris Buschelman /* sort sparserow */ 4183985e5eaSKris Buschelman ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr); 4193985e5eaSKris Buschelman 4203985e5eaSKris Buschelman /* If free space is not available, make more free space */ 4213985e5eaSKris Buschelman /* Double the amount of total space in the list */ 4223985e5eaSKris Buschelman if (current_space->local_remaining<cnzi) { 4233985e5eaSKris Buschelman ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 4243985e5eaSKris Buschelman } 4253985e5eaSKris Buschelman 4263985e5eaSKris Buschelman /* Copy data into free space, and zero out denserows */ 4273985e5eaSKris Buschelman ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(int));CHKERRQ(ierr); 4283985e5eaSKris Buschelman current_space->array += cnzi; 4293985e5eaSKris Buschelman current_space->local_used += cnzi; 4303985e5eaSKris Buschelman current_space->local_remaining -= cnzi; 4313985e5eaSKris Buschelman 4323985e5eaSKris Buschelman for (j=0;j<ptanzi;j++) { 4333985e5eaSKris Buschelman ptadenserow[ptasparserow[j]] = 0; 4343985e5eaSKris Buschelman } 4353985e5eaSKris Buschelman for (j=0;j<cnzi;j++) { 4363985e5eaSKris Buschelman denserow[sparserow[j]] = 0; 4373985e5eaSKris Buschelman } 4383985e5eaSKris Buschelman /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 4393985e5eaSKris Buschelman /* For now, we will recompute what is needed. */ 4403985e5eaSKris Buschelman ci[i+1+dof] = ci[i+dof] + cnzi; 4413985e5eaSKris Buschelman } 4423985e5eaSKris Buschelman } 4433985e5eaSKris Buschelman /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 4443985e5eaSKris Buschelman /* Allocate space for cj, initialize cj, and */ 4453985e5eaSKris Buschelman /* destroy list of free space and other temporary array(s) */ 4463985e5eaSKris Buschelman ierr = PetscMalloc((ci[pn]+1)*sizeof(int),&cj);CHKERRQ(ierr); 4473985e5eaSKris Buschelman ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 4483985e5eaSKris Buschelman ierr = PetscFree(ptadenserow);CHKERRQ(ierr); 4493985e5eaSKris Buschelman 4503985e5eaSKris Buschelman /* Allocate space for ca */ 4513985e5eaSKris Buschelman ierr = PetscMalloc((ci[pn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 4523985e5eaSKris Buschelman ierr = PetscMemzero(ca,(ci[pn]+1)*sizeof(MatScalar));CHKERRQ(ierr); 4533985e5eaSKris Buschelman 4543985e5eaSKris Buschelman /* put together the new matrix */ 4553985e5eaSKris Buschelman ierr = MatCreateSeqAIJWithArrays(A->comm,pn,pn,ci,cj,ca,C);CHKERRQ(ierr); 4563985e5eaSKris Buschelman 4573985e5eaSKris Buschelman /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4583985e5eaSKris Buschelman /* Since these are PETSc arrays, change flags to free them as necessary. */ 4593985e5eaSKris Buschelman c = (Mat_SeqAIJ *)((*C)->data); 4603985e5eaSKris Buschelman c->freedata = PETSC_TRUE; 4613985e5eaSKris Buschelman c->nonew = 0; 4623985e5eaSKris Buschelman 4633985e5eaSKris Buschelman /* Clean up. */ 4643985e5eaSKris Buschelman ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 4653985e5eaSKris Buschelman 4669af31e4aSHong Zhang ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr); 4673985e5eaSKris Buschelman PetscFunctionReturn(0); 4683985e5eaSKris Buschelman } 4693985e5eaSKris Buschelman EXTERN_C_END 4703985e5eaSKris Buschelman 471eb9c0419SKris Buschelman #undef __FUNCT__ 4729af31e4aSHong Zhang #define __FUNCT__ "MatPtAPNumeric" 4736849ba73SBarry Smith /* 4749af31e4aSHong Zhang MatPtAPNumeric - Computes the matrix projection C = P^T * A * P 4754d3841fdSKris Buschelman 4764d3841fdSKris Buschelman Collective on Mat 4774d3841fdSKris Buschelman 4784d3841fdSKris Buschelman Input Parameters: 4794d3841fdSKris Buschelman + A - the matrix 4804d3841fdSKris Buschelman - P - the projection matrix 4814d3841fdSKris Buschelman 4824d3841fdSKris Buschelman Output Parameters: 4834d3841fdSKris Buschelman . C - the product matrix 4844d3841fdSKris Buschelman 4854d3841fdSKris Buschelman Notes: 4869af31e4aSHong Zhang C must have been created by calling MatPtAPSymbolic and must be destroyed by 4874d3841fdSKris Buschelman the user using MatDeatroy(). 4884d3841fdSKris Buschelman 489170ef064SHong Zhang This routine is currently only implemented for pairs of AIJ matrices and classes 490170ef064SHong Zhang which inherit from AIJ. C will be of type MATAIJ. 4914d3841fdSKris Buschelman 4924d3841fdSKris Buschelman Level: intermediate 4934d3841fdSKris Buschelman 4949af31e4aSHong Zhang .seealso: MatPtAP(),MatPtAPSymbolic(),MatMatMultNumeric() 4956849ba73SBarry Smith */ 496dfbe8321SBarry Smith PetscErrorCode MatPtAPNumeric(Mat A,Mat P,Mat C) { 497dfbe8321SBarry Smith PetscErrorCode ierr; 498534c1384SKris Buschelman PetscErrorCode (*fA)(Mat,Mat,Mat); 499534c1384SKris Buschelman PetscErrorCode (*fP)(Mat,Mat,Mat); 500eb9c0419SKris Buschelman 501eb9c0419SKris Buschelman PetscFunctionBegin; 502eb9c0419SKris Buschelman 5034482741eSBarry Smith PetscValidHeaderSpecific(A,MAT_COOKIE,1); 504c9780b6fSBarry Smith PetscValidType(A,1); 505eb9c0419SKris Buschelman MatPreallocated(A); 506eb9c0419SKris Buschelman if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 507eb9c0419SKris Buschelman if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 508eb9c0419SKris Buschelman 5094482741eSBarry Smith PetscValidHeaderSpecific(P,MAT_COOKIE,2); 510c9780b6fSBarry Smith PetscValidType(P,2); 511eb9c0419SKris Buschelman MatPreallocated(P); 512eb9c0419SKris Buschelman if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 513eb9c0419SKris Buschelman if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 514eb9c0419SKris Buschelman 5154482741eSBarry Smith PetscValidHeaderSpecific(C,MAT_COOKIE,3); 516c9780b6fSBarry Smith PetscValidType(C,3); 517eb9c0419SKris Buschelman MatPreallocated(C); 518eb9c0419SKris Buschelman if (!C->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 519eb9c0419SKris Buschelman if (C->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 520eb9c0419SKris Buschelman 521eb9c0419SKris Buschelman if (P->N!=C->M) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->N,C->M); 522eb9c0419SKris Buschelman if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->M,A->N); 523eb9c0419SKris Buschelman if (A->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %d != %d",A->M,A->N); 524eb9c0419SKris Buschelman if (P->N!=C->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->N,C->N); 525eb9c0419SKris Buschelman 526534c1384SKris Buschelman /* For now, we do not dispatch based on the type of A and P */ 527534c1384SKris Buschelman /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */ 528534c1384SKris Buschelman fA = A->ops->ptapnumeric; 529534c1384SKris Buschelman if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatPtAPNumeric not supported for A of type %s",A->type_name); 530534c1384SKris Buschelman fP = P->ops->ptapnumeric; 531534c1384SKris Buschelman if (!fP) SETERRQ1(PETSC_ERR_SUP,"MatPtAPNumeric not supported for P of type %s",P->type_name); 532534c1384SKris Buschelman if (fP!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatPtAPNumeric requires A, %s, to be compatible with P, %s",A->type_name,P->type_name); 5334d3841fdSKris Buschelman 534534c1384SKris Buschelman ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 535534c1384SKris Buschelman ierr = (*fA)(A,P,C);CHKERRQ(ierr); 536534c1384SKris Buschelman ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr); 537eb9c0419SKris Buschelman 538eb9c0419SKris Buschelman PetscFunctionReturn(0); 539eb9c0419SKris Buschelman } 540eb9c0419SKris Buschelman 541eb9c0419SKris Buschelman #undef __FUNCT__ 5429af31e4aSHong Zhang #define __FUNCT__ "MatPtAPNumeric_SeqAIJ_SeqAIJ" 543dfbe8321SBarry Smith PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqAIJ(Mat A,Mat P,Mat C) 544dfbe8321SBarry Smith { 545dfbe8321SBarry Smith PetscErrorCode ierr; 546d20bfe6fSHong Zhang int flops=0; 547d20bfe6fSHong Zhang Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 548d20bfe6fSHong Zhang Mat_SeqAIJ *p = (Mat_SeqAIJ *) P->data; 549d20bfe6fSHong Zhang Mat_SeqAIJ *c = (Mat_SeqAIJ *) C->data; 550d20bfe6fSHong Zhang int *ai=a->i,*aj=a->j,*apj,*apjdense,*pi=p->i,*pj=p->j,*pJ=p->j,*pjj; 551d20bfe6fSHong Zhang int *ci=c->i,*cj=c->j,*cjj; 552d20bfe6fSHong Zhang int am=A->M,cn=C->N,cm=C->M; 553d20bfe6fSHong Zhang int i,j,k,anzi,pnzi,apnzj,nextap,pnzj,prow,crow; 554d20bfe6fSHong Zhang MatScalar *aa=a->a,*apa,*pa=p->a,*pA=p->a,*paj,*ca=c->a,*caj; 555eb9c0419SKris Buschelman 556eb9c0419SKris Buschelman PetscFunctionBegin; 557d20bfe6fSHong Zhang /* Allocate temporary array for storage of one row of A*P */ 558d20bfe6fSHong Zhang ierr = PetscMalloc(cn*(sizeof(MatScalar)+2*sizeof(int)),&apa);CHKERRQ(ierr); 559d20bfe6fSHong Zhang ierr = PetscMemzero(apa,cn*(sizeof(MatScalar)+2*sizeof(int)));CHKERRQ(ierr); 560eb9c0419SKris Buschelman 561d20bfe6fSHong Zhang apj = (int *)(apa + cn); 562d20bfe6fSHong Zhang apjdense = apj + cn; 563d20bfe6fSHong Zhang 564d20bfe6fSHong Zhang /* Clear old values in C */ 565d20bfe6fSHong Zhang ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr); 566d20bfe6fSHong Zhang 567d20bfe6fSHong Zhang for (i=0;i<am;i++) { 568d20bfe6fSHong Zhang /* Form sparse row of A*P */ 569d20bfe6fSHong Zhang anzi = ai[i+1] - ai[i]; 570d20bfe6fSHong Zhang apnzj = 0; 571d20bfe6fSHong Zhang for (j=0;j<anzi;j++) { 572d20bfe6fSHong Zhang prow = *aj++; 573d20bfe6fSHong Zhang pnzj = pi[prow+1] - pi[prow]; 574d20bfe6fSHong Zhang pjj = pj + pi[prow]; 575d20bfe6fSHong Zhang paj = pa + pi[prow]; 576d20bfe6fSHong Zhang for (k=0;k<pnzj;k++) { 577d20bfe6fSHong Zhang if (!apjdense[pjj[k]]) { 578d20bfe6fSHong Zhang apjdense[pjj[k]] = -1; 579d20bfe6fSHong Zhang apj[apnzj++] = pjj[k]; 580d20bfe6fSHong Zhang } 581d20bfe6fSHong Zhang apa[pjj[k]] += (*aa)*paj[k]; 582d20bfe6fSHong Zhang } 583d20bfe6fSHong Zhang flops += 2*pnzj; 584d20bfe6fSHong Zhang aa++; 585d20bfe6fSHong Zhang } 586d20bfe6fSHong Zhang 587d20bfe6fSHong Zhang /* Sort the j index array for quick sparse axpy. */ 588d20bfe6fSHong Zhang ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr); 589d20bfe6fSHong Zhang 590d20bfe6fSHong Zhang /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */ 591d20bfe6fSHong Zhang pnzi = pi[i+1] - pi[i]; 592d20bfe6fSHong Zhang for (j=0;j<pnzi;j++) { 593d20bfe6fSHong Zhang nextap = 0; 594d20bfe6fSHong Zhang crow = *pJ++; 595d20bfe6fSHong Zhang cjj = cj + ci[crow]; 596d20bfe6fSHong Zhang caj = ca + ci[crow]; 597d20bfe6fSHong Zhang /* Perform sparse axpy operation. Note cjj includes apj. */ 598d20bfe6fSHong Zhang for (k=0;nextap<apnzj;k++) { 599d20bfe6fSHong Zhang if (cjj[k]==apj[nextap]) { 600d20bfe6fSHong Zhang caj[k] += (*pA)*apa[apj[nextap++]]; 601d20bfe6fSHong Zhang } 602d20bfe6fSHong Zhang } 603d20bfe6fSHong Zhang flops += 2*apnzj; 604d20bfe6fSHong Zhang pA++; 605d20bfe6fSHong Zhang } 606d20bfe6fSHong Zhang 607d20bfe6fSHong Zhang /* Zero the current row info for A*P */ 608d20bfe6fSHong Zhang for (j=0;j<apnzj;j++) { 609d20bfe6fSHong Zhang apa[apj[j]] = 0.; 610d20bfe6fSHong Zhang apjdense[apj[j]] = 0; 611d20bfe6fSHong Zhang } 612d20bfe6fSHong Zhang } 613d20bfe6fSHong Zhang 614d20bfe6fSHong Zhang /* Assemble the final matrix and clean up */ 615d20bfe6fSHong Zhang ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 616d20bfe6fSHong Zhang ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 617d20bfe6fSHong Zhang ierr = PetscFree(apa);CHKERRQ(ierr); 618d20bfe6fSHong Zhang ierr = PetscLogFlops(flops);CHKERRQ(ierr); 619d20bfe6fSHong Zhang 620eb9c0419SKris Buschelman PetscFunctionReturn(0); 621eb9c0419SKris Buschelman } 6220e36024fSHong Zhang 6230e36024fSHong Zhang /* Compute C = P[rstart:rend,:]^T * A * P of seqaij matrices - used by MatPtAP_MPIAIJ_MPIAIJ() */ 6240e36024fSHong Zhang 6250e36024fSHong Zhang #undef __FUNCT__ 6260e36024fSHong Zhang #define __FUNCT__ "MatPtAPNumeric_SeqAIJ_SeqAIJ_ReducedPt" 6270e36024fSHong Zhang PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqAIJ_ReducedPt(Mat A,Mat P,int prstart,int prend,Mat C) 6280e36024fSHong Zhang { 6290e36024fSHong Zhang PetscErrorCode ierr; 6300e36024fSHong Zhang int flops=0; 6310e36024fSHong Zhang Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 6320e36024fSHong Zhang Mat_SeqAIJ *p = (Mat_SeqAIJ *) P->data; 6330e36024fSHong Zhang Mat_SeqAIJ *c = (Mat_SeqAIJ *) C->data; 63420d4747cSHong Zhang int *ai=a->i,*aj=a->j,*apj,*apjdense; 63520d4747cSHong Zhang int *pi=p->i,*pj=p->j,*pJ=p->j+pi[prstart],*pjj; 6360e36024fSHong Zhang int *ci=c->i,*cj=c->j,*cjj; 6370e36024fSHong Zhang int am=A->M,cn=C->N,cm=C->M; 6380e36024fSHong Zhang int i,j,k,anzi,pnzi,apnzj,nextap,pnzj,prow,crow; 6390e36024fSHong Zhang MatScalar *aa=a->a,*apa,*pa=p->a,*pA=p->a,*paj,*ca=c->a,*caj; 6400e36024fSHong Zhang 6410e36024fSHong Zhang PetscFunctionBegin; 6420e36024fSHong Zhang pA=p->a+pi[prstart]; 6430e36024fSHong Zhang /* Allocate temporary array for storage of one row of A*P */ 6440e36024fSHong Zhang ierr = PetscMalloc(cn*(sizeof(MatScalar)+2*sizeof(int)),&apa);CHKERRQ(ierr); 6450e36024fSHong Zhang ierr = PetscMemzero(apa,cn*(sizeof(MatScalar)+2*sizeof(int)));CHKERRQ(ierr); 6460e36024fSHong Zhang 6470e36024fSHong Zhang apj = (int *)(apa + cn); 6480e36024fSHong Zhang apjdense = apj + cn; 6490e36024fSHong Zhang 6500e36024fSHong Zhang /* Clear old values in C */ 6510e36024fSHong Zhang ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr); 6520e36024fSHong Zhang 6530e36024fSHong Zhang for (i=0;i<am;i++) { 6540e36024fSHong Zhang /* Form sparse row of A*P */ 6550e36024fSHong Zhang anzi = ai[i+1] - ai[i]; 6560e36024fSHong Zhang apnzj = 0; 6570e36024fSHong Zhang for (j=0;j<anzi;j++) { 6580e36024fSHong Zhang prow = *aj++; 6590e36024fSHong Zhang pnzj = pi[prow+1] - pi[prow]; 6600e36024fSHong Zhang pjj = pj + pi[prow]; 6610e36024fSHong Zhang paj = pa + pi[prow]; 6620e36024fSHong Zhang for (k=0;k<pnzj;k++) { 6630e36024fSHong Zhang if (!apjdense[pjj[k]]) { 6640e36024fSHong Zhang apjdense[pjj[k]] = -1; 6650e36024fSHong Zhang apj[apnzj++] = pjj[k]; 6660e36024fSHong Zhang } 6670e36024fSHong Zhang apa[pjj[k]] += (*aa)*paj[k]; 6680e36024fSHong Zhang } 6690e36024fSHong Zhang flops += 2*pnzj; 6700e36024fSHong Zhang aa++; 6710e36024fSHong Zhang } 6720e36024fSHong Zhang 6730e36024fSHong Zhang /* Sort the j index array for quick sparse axpy. */ 6740e36024fSHong Zhang ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr); 6750e36024fSHong Zhang 6760e36024fSHong Zhang /* Compute P[[prstart:prend,:]^T*A*P using outer product (P^T)[:,j+prstart]*(A*P)[j,:]. */ 6770e36024fSHong Zhang pnzi = pi[i+1+prstart] - pi[i+prstart]; 6780e36024fSHong Zhang for (j=0;j<pnzi;j++) { 6790e36024fSHong Zhang nextap = 0; 6800e36024fSHong Zhang crow = *pJ++; 6810e36024fSHong Zhang cjj = cj + ci[crow]; 6820e36024fSHong Zhang caj = ca + ci[crow]; 6830e36024fSHong Zhang /* Perform sparse axpy operation. Note cjj includes apj. */ 6840e36024fSHong Zhang for (k=0;nextap<apnzj;k++) { 6850e36024fSHong Zhang if (cjj[k]==apj[nextap]) { 6860e36024fSHong Zhang caj[k] += (*pA)*apa[apj[nextap++]]; 6870e36024fSHong Zhang } 6880e36024fSHong Zhang } 6890e36024fSHong Zhang flops += 2*apnzj; 6900e36024fSHong Zhang pA++; 6910e36024fSHong Zhang } 6920e36024fSHong Zhang 6930e36024fSHong Zhang /* Zero the current row info for A*P */ 6940e36024fSHong Zhang for (j=0;j<apnzj;j++) { 6950e36024fSHong Zhang apa[apj[j]] = 0.; 6960e36024fSHong Zhang apjdense[apj[j]] = 0; 6970e36024fSHong Zhang } 6980e36024fSHong Zhang } 6990e36024fSHong Zhang 7000e36024fSHong Zhang /* Assemble the final matrix and clean up */ 7010e36024fSHong Zhang ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7020e36024fSHong Zhang ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7030e36024fSHong Zhang ierr = PetscFree(apa);CHKERRQ(ierr); 7040e36024fSHong Zhang ierr = PetscLogFlops(flops);CHKERRQ(ierr); 7050e36024fSHong Zhang 7060e36024fSHong Zhang PetscFunctionReturn(0); 7070e36024fSHong Zhang } 7080e36024fSHong Zhang 7090e36024fSHong Zhang #undef __FUNCT__ 7100e36024fSHong Zhang #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqAIJ_ReducedPt" 7110e36024fSHong Zhang PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqAIJ_ReducedPt(Mat A,Mat P,PetscReal fill,int prstart,int prend,Mat *C) { 7120e36024fSHong Zhang PetscErrorCode ierr; 7130e36024fSHong Zhang FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 7140e36024fSHong Zhang Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c; 7150e36024fSHong Zhang int *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj; 7160e36024fSHong Zhang int *ci,*cj,*denserow,*sparserow,*ptadenserow,*ptasparserow,*ptaj; 7170e36024fSHong Zhang int an=A->N,am=A->M,pn=P->N,pm=P->M; 7180e36024fSHong Zhang int i,j,k,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi; 7190e36024fSHong Zhang MatScalar *ca; 7200e36024fSHong Zhang Mat *psub,P_sub; 7210e36024fSHong Zhang IS isrow,iscol; 7220e36024fSHong Zhang int m = prend - prstart; 7230b89d903Svictorle 7240b89d903Svictorle PetscFunctionBegin; 7250b89d903Svictorle /* Get ij structure of P[rstart:rend,:]^T */ 7260e36024fSHong Zhang ierr = ISCreateStride(PETSC_COMM_SELF,m,prstart,1,&isrow);CHKERRQ(ierr); 7270e36024fSHong Zhang ierr = ISCreateStride(PETSC_COMM_SELF,P->n,0,1,&iscol);CHKERRQ(ierr); 7280e36024fSHong Zhang ierr = MatGetSubMatrices(P,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&psub);CHKERRQ(ierr); 7290e36024fSHong Zhang ierr = ISDestroy(isrow);CHKERRQ(ierr); 7300e36024fSHong Zhang ierr = ISDestroy(iscol);CHKERRQ(ierr); 7310e36024fSHong Zhang P_sub = psub[0]; 7320e36024fSHong Zhang ierr = MatGetSymbolicTranspose_SeqAIJ(P_sub,&pti,&ptj);CHKERRQ(ierr); 7330e36024fSHong Zhang ierr = MatDestroyMatrices(1,&psub);CHKERRQ(ierr); 7340e36024fSHong Zhang ptJ=ptj; 7350e36024fSHong Zhang 7360e36024fSHong Zhang /* Allocate ci array, arrays for fill computation and */ 7370e36024fSHong Zhang /* free space for accumulating nonzero column info */ 7380e36024fSHong Zhang ierr = PetscMalloc((pn+1)*sizeof(int),&ci);CHKERRQ(ierr); 7390e36024fSHong Zhang ci[0] = 0; 7400e36024fSHong Zhang 7410e36024fSHong Zhang ierr = PetscMalloc((2*pn+2*an+1)*sizeof(int),&ptadenserow);CHKERRQ(ierr); 7420e36024fSHong Zhang ierr = PetscMemzero(ptadenserow,(2*pn+2*an+1)*sizeof(int));CHKERRQ(ierr); 7430e36024fSHong Zhang ptasparserow = ptadenserow + an; 7440e36024fSHong Zhang denserow = ptasparserow + an; 7450e36024fSHong Zhang sparserow = denserow + pn; 7460e36024fSHong Zhang 7470e36024fSHong Zhang /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */ 7480e36024fSHong Zhang /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 7490e36024fSHong Zhang ierr = GetMoreSpace((ai[am]/pm)*pn,&free_space); 7500e36024fSHong Zhang current_space = free_space; 7510e36024fSHong Zhang 7520e36024fSHong Zhang /* Determine symbolic info for each row of C: */ 7530e36024fSHong Zhang for (i=0;i<pn;i++) { 7540e36024fSHong Zhang ptnzi = pti[i+1] - pti[i]; 7550e36024fSHong Zhang ptanzi = 0; 7560e36024fSHong Zhang /* Determine symbolic row of PtA_reduced: */ 7570e36024fSHong Zhang for (j=0;j<ptnzi;j++) { 7580e36024fSHong Zhang arow = *ptJ++; 7590e36024fSHong Zhang anzj = ai[arow+1] - ai[arow]; 7600e36024fSHong Zhang ajj = aj + ai[arow]; 7610e36024fSHong Zhang for (k=0;k<anzj;k++) { 7620e36024fSHong Zhang if (!ptadenserow[ajj[k]]) { 7630e36024fSHong Zhang ptadenserow[ajj[k]] = -1; 7640e36024fSHong Zhang ptasparserow[ptanzi++] = ajj[k]; 7650e36024fSHong Zhang } 7660e36024fSHong Zhang } 7670e36024fSHong Zhang } 7680e36024fSHong Zhang /* Using symbolic info for row of PtA, determine symbolic info for row of C: */ 7690e36024fSHong Zhang ptaj = ptasparserow; 7700e36024fSHong Zhang cnzi = 0; 7710e36024fSHong Zhang for (j=0;j<ptanzi;j++) { 7720e36024fSHong Zhang prow = *ptaj++; 7730e36024fSHong Zhang pnzj = pi[prow+1] - pi[prow]; 7740e36024fSHong Zhang pjj = pj + pi[prow]; 7750e36024fSHong Zhang for (k=0;k<pnzj;k++) { 7760e36024fSHong Zhang if (!denserow[pjj[k]]) { 7770e36024fSHong Zhang denserow[pjj[k]] = -1; 7780e36024fSHong Zhang sparserow[cnzi++] = pjj[k]; 7790e36024fSHong Zhang } 7800e36024fSHong Zhang } 7810e36024fSHong Zhang } 7820e36024fSHong Zhang 7830e36024fSHong Zhang /* sort sparserow */ 7840e36024fSHong Zhang ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr); 7850e36024fSHong Zhang 7860e36024fSHong Zhang /* If free space is not available, make more free space */ 7870e36024fSHong Zhang /* Double the amount of total space in the list */ 7880e36024fSHong Zhang if (current_space->local_remaining<cnzi) { 7890e36024fSHong Zhang ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 7900e36024fSHong Zhang } 7910e36024fSHong Zhang 7920e36024fSHong Zhang /* Copy data into free space, and zero out denserows */ 7930e36024fSHong Zhang ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(int));CHKERRQ(ierr); 7940e36024fSHong Zhang current_space->array += cnzi; 7950e36024fSHong Zhang current_space->local_used += cnzi; 7960e36024fSHong Zhang current_space->local_remaining -= cnzi; 7970e36024fSHong Zhang 7980e36024fSHong Zhang for (j=0;j<ptanzi;j++) { 7990e36024fSHong Zhang ptadenserow[ptasparserow[j]] = 0; 8000e36024fSHong Zhang } 8010e36024fSHong Zhang for (j=0;j<cnzi;j++) { 8020e36024fSHong Zhang denserow[sparserow[j]] = 0; 8030e36024fSHong Zhang } 8040e36024fSHong Zhang /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 8050e36024fSHong Zhang /* For now, we will recompute what is needed. */ 8060e36024fSHong Zhang ci[i+1] = ci[i] + cnzi; 8070e36024fSHong Zhang } 8080e36024fSHong Zhang /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 8090e36024fSHong Zhang /* Allocate space for cj, initialize cj, and */ 8100e36024fSHong Zhang /* destroy list of free space and other temporary array(s) */ 8110e36024fSHong Zhang ierr = PetscMalloc((ci[pn]+1)*sizeof(int),&cj);CHKERRQ(ierr); 8120e36024fSHong Zhang ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 8130e36024fSHong Zhang ierr = PetscFree(ptadenserow);CHKERRQ(ierr); 8140e36024fSHong Zhang 8150e36024fSHong Zhang /* Allocate space for ca */ 8160e36024fSHong Zhang ierr = PetscMalloc((ci[pn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 8170e36024fSHong Zhang ierr = PetscMemzero(ca,(ci[pn]+1)*sizeof(MatScalar));CHKERRQ(ierr); 8180e36024fSHong Zhang 8190e36024fSHong Zhang /* put together the new matrix */ 8200e36024fSHong Zhang ierr = MatCreateSeqAIJWithArrays(A->comm,pn,pn,ci,cj,ca,C);CHKERRQ(ierr); 8210e36024fSHong Zhang 8220e36024fSHong Zhang /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 8230e36024fSHong Zhang /* Since these are PETSc arrays, change flags to free them as necessary. */ 8240e36024fSHong Zhang c = (Mat_SeqAIJ *)((*C)->data); 8250e36024fSHong Zhang c->freedata = PETSC_TRUE; 8260e36024fSHong Zhang c->nonew = 0; 8270e36024fSHong Zhang 8280e36024fSHong Zhang /* Clean up. */ 8290e36024fSHong Zhang ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 8300e36024fSHong Zhang 8310e36024fSHong Zhang PetscFunctionReturn(0); 8320e36024fSHong Zhang } 8330e36024fSHong Zhang 8340e36024fSHong Zhang #undef __FUNCT__ 8350e36024fSHong Zhang #define __FUNCT__ "MatPtAP_SeqAIJ_SeqAIJ_ReducedPt" 8360e36024fSHong Zhang PetscErrorCode MatPtAP_SeqAIJ_SeqAIJ_ReducedPt(Mat A,Mat P,MatReuse scall,PetscReal fill,int prstart,int prend,Mat *C) 8370e36024fSHong Zhang { 8380e36024fSHong Zhang PetscErrorCode ierr; 8390e36024fSHong Zhang PetscFunctionBegin; 8400e36024fSHong Zhang if (A->m != prend-prstart) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",A->m,prend-prstart); 8410e36024fSHong Zhang if (prend-prstart > P->m) SETERRQ2(PETSC_ERR_ARG_SIZ," prend-prstart %d cannot be larger than P->m %d",prend-prstart,P->m); 8420e36024fSHong Zhang if (scall == MAT_INITIAL_MATRIX){ 8430e36024fSHong Zhang ierr = MatPtAPSymbolic_SeqAIJ_SeqAIJ_ReducedPt(A,P,fill,prstart,prend,C);CHKERRQ(ierr); 8440e36024fSHong Zhang } 8450e36024fSHong Zhang 8460e36024fSHong Zhang ierr = MatPtAPNumeric_SeqAIJ_SeqAIJ_ReducedPt(A,P,prstart,prend,*C);CHKERRQ(ierr); 8470e36024fSHong Zhang 8480e36024fSHong Zhang PetscFunctionReturn(0); 8490e36024fSHong Zhang } 8500e36024fSHong Zhang 851