xref: /petsc/src/mat/impls/aij/seq/matptap.c (revision d20bfe6f30cee1ec1490c5d7eb71afa766e1d3aa)
1eb9c0419SKris Buschelman /*
29af31e4aSHong Zhang   Defines projective product routines where A is a AIJ matrix
3eb9c0419SKris Buschelman           C = P^T * A * P
4eb9c0419SKris Buschelman */
5eb9c0419SKris Buschelman 
6231952e2SKris Buschelman #include "src/mat/impls/aij/seq/aij.h"   /*I "petscmat.h" I*/
7eb9c0419SKris Buschelman #include "src/mat/utils/freespace.h"
89af31e4aSHong Zhang #include "src/mat/impls/aij/mpi/mpiaij.h"
9eb9c0419SKris Buschelman 
10eb9c0419SKris Buschelman #undef __FUNCT__
119af31e4aSHong Zhang #define __FUNCT__ "MatPtAP"
124d3841fdSKris Buschelman /*@
139af31e4aSHong Zhang    MatPtAP - Creates the matrix projection C = P^T * A * P
144d3841fdSKris Buschelman 
154d3841fdSKris Buschelman    Collective on Mat
164d3841fdSKris Buschelman 
174d3841fdSKris Buschelman    Input Parameters:
184d3841fdSKris Buschelman +  A - the matrix
19f747e1aeSHong Zhang .  P - the projection matrix
20f747e1aeSHong Zhang .  scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
21f747e1aeSHong Zhang -  fill - expected fill as ratio of nnz(C)/(nnz(A) + nnz(P))
224d3841fdSKris Buschelman 
234d3841fdSKris Buschelman    Output Parameters:
244d3841fdSKris Buschelman .  C - the product matrix
254d3841fdSKris Buschelman 
264d3841fdSKris Buschelman    Notes:
274d3841fdSKris Buschelman    C will be created and must be destroyed by the user with MatDestroy().
284d3841fdSKris Buschelman 
294d3841fdSKris Buschelman    This routine is currently only implemented for pairs of SeqAIJ matrices and classes
304d3841fdSKris Buschelman    which inherit from SeqAIJ.  C will be of type MATSEQAIJ.
314d3841fdSKris Buschelman 
324d3841fdSKris Buschelman    Level: intermediate
334d3841fdSKris Buschelman 
349af31e4aSHong Zhang .seealso: MatPtAPSymbolic(),MatPtAPNumeric(),MatMatMult()
354d3841fdSKris Buschelman @*/
36dfbe8321SBarry Smith PetscErrorCode MatPtAP(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C) {
37dfbe8321SBarry Smith   PetscErrorCode ierr;
38534c1384SKris Buschelman   PetscErrorCode (*fA)(Mat,Mat,MatReuse,PetscReal,Mat *);
39534c1384SKris Buschelman   PetscErrorCode (*fP)(Mat,Mat,MatReuse,PetscReal,Mat *);
40eb9c0419SKris Buschelman 
41eb9c0419SKris Buschelman   PetscFunctionBegin;
429af31e4aSHong Zhang   PetscValidHeaderSpecific(A,MAT_COOKIE,1);
439af31e4aSHong Zhang   PetscValidType(A,1);
449af31e4aSHong Zhang   MatPreallocated(A);
459af31e4aSHong Zhang   if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
469af31e4aSHong Zhang   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
479af31e4aSHong Zhang   PetscValidHeaderSpecific(P,MAT_COOKIE,2);
489af31e4aSHong Zhang   PetscValidType(P,2);
499af31e4aSHong Zhang   MatPreallocated(P);
509af31e4aSHong Zhang   if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
519af31e4aSHong Zhang   if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
529af31e4aSHong Zhang   PetscValidPointer(C,3);
53534c1384SKris Buschelman 
549af31e4aSHong Zhang   if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->M,A->N);
55eb9c0419SKris Buschelman 
569af31e4aSHong Zhang   if (fill <=0.0) SETERRQ1(PETSC_ERR_ARG_SIZ,"fill=%g must be > 0.0",fill);
57eb9c0419SKris Buschelman 
58534c1384SKris Buschelman   /* For now, we do not dispatch based on the type of A and P */
59534c1384SKris Buschelman   /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */
60534c1384SKris Buschelman   fA = A->ops->ptap;
61534c1384SKris Buschelman   if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatPtAP not supported for A of type %s",A->type_name);
62534c1384SKris Buschelman   fP = P->ops->ptap;
63534c1384SKris Buschelman   if (!fP) SETERRQ1(PETSC_ERR_SUP,"MatPtAP not supported for P of type %s",P->type_name);
64534c1384SKris Buschelman   if (fP!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatPtAP requires A, %s, to be compatible with P, %s",A->type_name,P->type_name);
65534c1384SKris Buschelman 
669af31e4aSHong Zhang   ierr = PetscLogEventBegin(MAT_PtAP,A,P,0,0);CHKERRQ(ierr);
67534c1384SKris Buschelman   ierr = (*fA)(A,P,scall,fill,C);CHKERRQ(ierr);
689af31e4aSHong Zhang   ierr = PetscLogEventEnd(MAT_PtAP,A,P,0,0);CHKERRQ(ierr);
69eb9c0419SKris Buschelman   PetscFunctionReturn(0);
70eb9c0419SKris Buschelman }
71eb9c0419SKris Buschelman 
72eb9c0419SKris Buschelman #undef __FUNCT__
73ff134f7aSHong Zhang #define __FUNCT__ "MatPtAP_MPIAIJ_MPIAIJ"
74ff134f7aSHong Zhang PetscErrorCode MatPtAP_MPIAIJ_MPIAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C)
75ff134f7aSHong Zhang {
76170ef064SHong Zhang #ifdef TMP
77ff134f7aSHong Zhang   PetscErrorCode    ierr;
78ff134f7aSHong Zhang   Mat               C_mpi,AP_seq,P_seq,P_subseq,*psubseq;
79ff134f7aSHong Zhang   Mat_MPIAIJ        *p = (Mat_MPIAIJ*)P->data;
80ff134f7aSHong Zhang   Mat_MatMatMultMPI *mult;
81ff134f7aSHong Zhang   int               i,prow,prstart,prend,m=P->m,pncols;
82ff134f7aSHong Zhang   const int         *pcols;
83ff134f7aSHong Zhang   const PetscScalar *pvals;
8432dcc486SBarry Smith   PetscMPIInt       rank;
85ff134f7aSHong Zhang 
86ff134f7aSHong Zhang   PetscFunctionBegin;
87ff134f7aSHong Zhang   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
88ff134f7aSHong Zhang 
89ff134f7aSHong Zhang   ierr = MatMatMultSymbolic_MPIAIJ_MPIAIJ(A,P,fill,&C_mpi);CHKERRQ(ierr);
90ff134f7aSHong Zhang   mult = (Mat_MatMatMultMPI*)C_mpi->spptr;
91ff134f7aSHong Zhang   P_seq   = mult->bseq[0];
92ff134f7aSHong Zhang   AP_seq  = mult->C_seq;
93ff134f7aSHong Zhang   prstart = mult->brstart;
94ff134f7aSHong Zhang   prend   = prstart + m;
95ff134f7aSHong Zhang   ierr = PetscPrintf(PETSC_COMM_SELF," [%d] prstart: %d, prend: %d, dim of P_seq: %d, %d\n",rank,prstart,prend,P_seq->m,P_seq->n);
96ff134f7aSHong Zhang 
97ff134f7aSHong Zhang   /* get seq matrix P_subseq by taking local rows of P */
98ff134f7aSHong Zhang   IS  isrow,iscol;
99ff134f7aSHong Zhang   ierr = ISCreateStride(PETSC_COMM_SELF,m,prstart,1,&isrow);CHKERRQ(ierr);
100ff134f7aSHong Zhang   ierr = ISCreateStride(PETSC_COMM_SELF,P_seq->n,0,1,&iscol);CHKERRQ(ierr);
101ff134f7aSHong Zhang   ierr = MatGetSubMatrices(P_seq,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&psubseq);CHKERRQ(ierr);
102ff134f7aSHong Zhang   P_subseq = psubseq[0];
103ff134f7aSHong Zhang   ierr = PetscPrintf(PETSC_COMM_SELF," [%d] dim of P_subseq: %d, %d\n",rank,P_subseq->m,P_subseq->n);
104ff134f7aSHong Zhang 
105ff134f7aSHong Zhang   /* Compute P_subseq^T*C_seq using outer product (P_loc^T)[:,i]*C_seq[i,:]. */
106ff134f7aSHong Zhang   for (i=0;i<m;i++) {
107ff134f7aSHong Zhang     prow = prstart + i;
108ff134f7aSHong Zhang     ierr = MatGetRow(P_seq,prow,&pncols,&pcols,&pvals);CHKERRQ(ierr);
109ff134f7aSHong Zhang     ierr = MatRestoreRow(P_seq,prow,&pncols,&pcols,&pvals);CHKERRQ(ierr);
110ff134f7aSHong Zhang   }
111ff134f7aSHong Zhang 
112ff134f7aSHong Zhang   *C = C_mpi; /* to be removed! */
113170ef064SHong Zhang #endif /* TMP */
114ff134f7aSHong Zhang   PetscFunctionReturn(0);
115ff134f7aSHong Zhang }
116ff134f7aSHong Zhang 
117ff134f7aSHong Zhang #undef __FUNCT__
118ff134f7aSHong Zhang #define __FUNCT__ "MatMatMultSymbolic_MPIAIJ_MPIAIJ"
119ff134f7aSHong Zhang PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
120ff134f7aSHong Zhang {
121ff134f7aSHong Zhang   Mat_MPIAIJ        *a = (Mat_MPIAIJ*)A->data;
122ff134f7aSHong Zhang   PetscErrorCode    ierr;
123ff134f7aSHong Zhang 
124ff134f7aSHong Zhang   PetscFunctionBegin;
125ff134f7aSHong Zhang   PetscFunctionReturn(0);
126ff134f7aSHong Zhang }
127ff134f7aSHong Zhang 
128ff134f7aSHong Zhang #undef __FUNCT__
129ff134f7aSHong Zhang #define __FUNCT__ "MatMatMultSymbolic_MPIAIJ_MPIAIJ"
130ff134f7aSHong Zhang PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIAIJ(Mat A,Mat B,Mat C)
131ff134f7aSHong Zhang {
132ff134f7aSHong Zhang   Mat_MPIAIJ        *a = (Mat_MPIAIJ*)A->data;
133ff134f7aSHong Zhang   PetscErrorCode    ierr;
134ff134f7aSHong Zhang 
135ff134f7aSHong Zhang   PetscFunctionBegin;
136ff134f7aSHong Zhang   PetscFunctionReturn(0);
137ff134f7aSHong Zhang }
138ff134f7aSHong Zhang 
139ff134f7aSHong Zhang #undef __FUNCT__
1409af31e4aSHong Zhang #define __FUNCT__ "MatPtAP_SeqAIJ_SeqAIJ"
141dfbe8321SBarry Smith PetscErrorCode MatPtAP_SeqAIJ_SeqAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C)
1429af31e4aSHong Zhang {
143dfbe8321SBarry Smith   PetscErrorCode ierr;
1449af31e4aSHong Zhang   PetscFunctionBegin;
1459af31e4aSHong Zhang   if (scall == MAT_INITIAL_MATRIX){
146*d20bfe6fSHong Zhang     ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr);
1479af31e4aSHong Zhang     ierr = MatPtAPSymbolic_SeqAIJ_SeqAIJ(A,P,fill,C);CHKERRQ(ierr);
148*d20bfe6fSHong Zhang     ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr);
1499af31e4aSHong Zhang   }
150*d20bfe6fSHong Zhang   ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr);
1519af31e4aSHong Zhang   ierr = MatPtAPNumeric_SeqAIJ_SeqAIJ(A,P,*C);CHKERRQ(ierr);
152*d20bfe6fSHong Zhang   ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr);
1539af31e4aSHong Zhang   PetscFunctionReturn(0);
1549af31e4aSHong Zhang }
1559af31e4aSHong Zhang 
1569af31e4aSHong Zhang #undef __FUNCT__
1579af31e4aSHong Zhang #define __FUNCT__ "MatPtAPSymbolic"
1586849ba73SBarry Smith /*
1599af31e4aSHong Zhang    MatPtAPSymbolic - Creates the (i,j) structure of the matrix projection C = P^T * A * P
1604d3841fdSKris Buschelman 
1614d3841fdSKris Buschelman    Collective on Mat
1624d3841fdSKris Buschelman 
1634d3841fdSKris Buschelman    Input Parameters:
1644d3841fdSKris Buschelman +  A - the matrix
1654d3841fdSKris Buschelman -  P - the projection matrix
1664d3841fdSKris Buschelman 
1674d3841fdSKris Buschelman    Output Parameters:
1684d3841fdSKris Buschelman .  C - the (i,j) structure of the product matrix
1694d3841fdSKris Buschelman 
1704d3841fdSKris Buschelman    Notes:
1714d3841fdSKris Buschelman    C will be created and must be destroyed by the user with MatDestroy().
1724d3841fdSKris Buschelman 
1734d3841fdSKris Buschelman    This routine is currently only implemented for pairs of SeqAIJ matrices and classes
1744d3841fdSKris Buschelman    which inherit from SeqAIJ.  C will be of type MATSEQAIJ.  The product is computed using
1759af31e4aSHong Zhang    this (i,j) structure by calling MatPtAPNumeric().
1764d3841fdSKris Buschelman 
1774d3841fdSKris Buschelman    Level: intermediate
1784d3841fdSKris Buschelman 
1799af31e4aSHong Zhang .seealso: MatPtAP(),MatPtAPNumeric(),MatMatMultSymbolic()
1806849ba73SBarry Smith */
181dfbe8321SBarry Smith PetscErrorCode MatPtAPSymbolic(Mat A,Mat P,PetscReal fill,Mat *C) {
182dfbe8321SBarry Smith   PetscErrorCode ierr;
183534c1384SKris Buschelman   PetscErrorCode (*fA)(Mat,Mat,PetscReal,Mat*);
184534c1384SKris Buschelman   PetscErrorCode (*fP)(Mat,Mat,PetscReal,Mat*);
185eb9c0419SKris Buschelman 
186eb9c0419SKris Buschelman   PetscFunctionBegin;
187eb9c0419SKris Buschelman 
1884482741eSBarry Smith   PetscValidHeaderSpecific(A,MAT_COOKIE,1);
189c9780b6fSBarry Smith   PetscValidType(A,1);
190eb9c0419SKris Buschelman   MatPreallocated(A);
191eb9c0419SKris Buschelman   if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
192eb9c0419SKris Buschelman   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
193eb9c0419SKris Buschelman 
1944482741eSBarry Smith   PetscValidHeaderSpecific(P,MAT_COOKIE,2);
195c9780b6fSBarry Smith   PetscValidType(P,2);
196eb9c0419SKris Buschelman   MatPreallocated(P);
197eb9c0419SKris Buschelman   if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
198eb9c0419SKris Buschelman   if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
199eb9c0419SKris Buschelman 
2004482741eSBarry Smith   PetscValidPointer(C,3);
2014482741eSBarry Smith 
202eb9c0419SKris Buschelman   if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->M,A->N);
203eb9c0419SKris Buschelman   if (A->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %d != %d",A->M,A->N);
204eb9c0419SKris Buschelman 
205534c1384SKris Buschelman   /* For now, we do not dispatch based on the type of A and P */
206534c1384SKris Buschelman   /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */
207534c1384SKris Buschelman   fA = A->ops->ptapsymbolic;
208534c1384SKris Buschelman   if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatPtAPSymbolic not supported for A of type %s",A->type_name);
209534c1384SKris Buschelman   fP = P->ops->ptapsymbolic;
210534c1384SKris Buschelman   if (!fP) SETERRQ1(PETSC_ERR_SUP,"MatPtAPSymbolic not supported for P of type %s",P->type_name);
211534c1384SKris Buschelman   if (fP!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatPtAPSymbolic requires A, %s, to be compatible with P, %s",A->type_name,P->type_name);
2124d3841fdSKris Buschelman 
213534c1384SKris Buschelman   ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr);
214534c1384SKris Buschelman   ierr = (*fA)(A,P,fill,C);CHKERRQ(ierr);
215534c1384SKris Buschelman   ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr);
216eb9c0419SKris Buschelman 
217eb9c0419SKris Buschelman   PetscFunctionReturn(0);
218eb9c0419SKris Buschelman }
219eb9c0419SKris Buschelman 
220f747e1aeSHong Zhang typedef struct {
221f747e1aeSHong Zhang   Mat    symAP;
222f747e1aeSHong Zhang } Mat_PtAPstruct;
223f747e1aeSHong Zhang 
22478a80504SBarry Smith EXTERN PetscErrorCode MatDestroy_SeqAIJ(Mat);
22578a80504SBarry Smith 
226f747e1aeSHong Zhang #undef __FUNCT__
227f747e1aeSHong Zhang #define __FUNCT__ "MatDestroy_SeqAIJ_PtAP"
228f4a850bbSBarry Smith PetscErrorCode MatDestroy_SeqAIJ_PtAP(Mat A)
229f747e1aeSHong Zhang {
230f4a850bbSBarry Smith   PetscErrorCode    ierr;
231f747e1aeSHong Zhang   Mat_PtAPstruct    *ptap=(Mat_PtAPstruct*)A->spptr;
232f747e1aeSHong Zhang 
233f747e1aeSHong Zhang   PetscFunctionBegin;
234f747e1aeSHong Zhang   ierr = MatDestroy(ptap->symAP);CHKERRQ(ierr);
235f747e1aeSHong Zhang   ierr = PetscFree(ptap);CHKERRQ(ierr);
23678a80504SBarry Smith   ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr);
237f747e1aeSHong Zhang   PetscFunctionReturn(0);
238f747e1aeSHong Zhang }
239f747e1aeSHong Zhang 
240eb9c0419SKris Buschelman #undef __FUNCT__
2419af31e4aSHong Zhang #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqAIJ"
242dfbe8321SBarry Smith PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat P,PetscReal fill,Mat *C) {
243dfbe8321SBarry Smith   PetscErrorCode ierr;
244*d20bfe6fSHong Zhang   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
245*d20bfe6fSHong Zhang   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c;
246*d20bfe6fSHong Zhang   int            *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj;
247*d20bfe6fSHong Zhang   int            *ci,*cj,*denserow,*sparserow,*ptadenserow,*ptasparserow,*ptaj;
248*d20bfe6fSHong Zhang   int            an=A->N,am=A->M,pn=P->N,pm=P->M;
249*d20bfe6fSHong Zhang   int            i,j,k,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi;
250*d20bfe6fSHong Zhang   MatScalar      *ca;
251eb9c0419SKris Buschelman 
252eb9c0419SKris Buschelman   PetscFunctionBegin;
253*d20bfe6fSHong Zhang   /* Get ij structure of P^T */
254eb9c0419SKris Buschelman   ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
255*d20bfe6fSHong Zhang   ptJ=ptj;
256eb9c0419SKris Buschelman 
257*d20bfe6fSHong Zhang   /* Allocate ci array, arrays for fill computation and */
258*d20bfe6fSHong Zhang   /* free space for accumulating nonzero column info */
259*d20bfe6fSHong Zhang   ierr = PetscMalloc((pn+1)*sizeof(int),&ci);CHKERRQ(ierr);
260*d20bfe6fSHong Zhang   ci[0] = 0;
261eb9c0419SKris Buschelman 
262*d20bfe6fSHong Zhang   ierr = PetscMalloc((2*pn+2*an+1)*sizeof(int),&ptadenserow);CHKERRQ(ierr);
263*d20bfe6fSHong Zhang   ierr = PetscMemzero(ptadenserow,(2*pn+2*an+1)*sizeof(int));CHKERRQ(ierr);
264*d20bfe6fSHong Zhang   ptasparserow = ptadenserow  + an;
265*d20bfe6fSHong Zhang   denserow     = ptasparserow + an;
266*d20bfe6fSHong Zhang   sparserow    = denserow     + pn;
267eb9c0419SKris Buschelman 
268*d20bfe6fSHong Zhang   /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */
269*d20bfe6fSHong Zhang   /* This should be reasonable if sparsity of PtAP is similar to that of A. */
270*d20bfe6fSHong Zhang   ierr          = GetMoreSpace((ai[am]/pm)*pn,&free_space);
271*d20bfe6fSHong Zhang   current_space = free_space;
272*d20bfe6fSHong Zhang 
273*d20bfe6fSHong Zhang   /* Determine symbolic info for each row of C: */
274*d20bfe6fSHong Zhang   for (i=0;i<pn;i++) {
275*d20bfe6fSHong Zhang     ptnzi  = pti[i+1] - pti[i];
276*d20bfe6fSHong Zhang     ptanzi = 0;
277*d20bfe6fSHong Zhang     /* Determine symbolic row of PtA: */
278*d20bfe6fSHong Zhang     for (j=0;j<ptnzi;j++) {
279*d20bfe6fSHong Zhang       arow = *ptJ++;
280*d20bfe6fSHong Zhang       anzj = ai[arow+1] - ai[arow];
281*d20bfe6fSHong Zhang       ajj  = aj + ai[arow];
282*d20bfe6fSHong Zhang       for (k=0;k<anzj;k++) {
283*d20bfe6fSHong Zhang         if (!ptadenserow[ajj[k]]) {
284*d20bfe6fSHong Zhang           ptadenserow[ajj[k]]    = -1;
285*d20bfe6fSHong Zhang           ptasparserow[ptanzi++] = ajj[k];
286*d20bfe6fSHong Zhang         }
287*d20bfe6fSHong Zhang       }
288*d20bfe6fSHong Zhang     }
289*d20bfe6fSHong Zhang       /* Using symbolic info for row of PtA, determine symbolic info for row of C: */
290*d20bfe6fSHong Zhang     ptaj = ptasparserow;
291*d20bfe6fSHong Zhang     cnzi   = 0;
292*d20bfe6fSHong Zhang     for (j=0;j<ptanzi;j++) {
293*d20bfe6fSHong Zhang       prow = *ptaj++;
294*d20bfe6fSHong Zhang       pnzj = pi[prow+1] - pi[prow];
295*d20bfe6fSHong Zhang       pjj  = pj + pi[prow];
296*d20bfe6fSHong Zhang       for (k=0;k<pnzj;k++) {
297*d20bfe6fSHong Zhang         if (!denserow[pjj[k]]) {
298*d20bfe6fSHong Zhang             denserow[pjj[k]]  = -1;
299*d20bfe6fSHong Zhang             sparserow[cnzi++] = pjj[k];
300*d20bfe6fSHong Zhang         }
301*d20bfe6fSHong Zhang       }
302*d20bfe6fSHong Zhang     }
303*d20bfe6fSHong Zhang 
304*d20bfe6fSHong Zhang     /* sort sparserow */
305*d20bfe6fSHong Zhang     ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr);
306*d20bfe6fSHong Zhang 
307*d20bfe6fSHong Zhang     /* If free space is not available, make more free space */
308*d20bfe6fSHong Zhang     /* Double the amount of total space in the list */
309*d20bfe6fSHong Zhang     if (current_space->local_remaining<cnzi) {
310*d20bfe6fSHong Zhang       ierr = GetMoreSpace(current_space->total_array_size,&current_space);CHKERRQ(ierr);
311*d20bfe6fSHong Zhang     }
312*d20bfe6fSHong Zhang 
313*d20bfe6fSHong Zhang     /* Copy data into free space, and zero out denserows */
314*d20bfe6fSHong Zhang     ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(int));CHKERRQ(ierr);
315*d20bfe6fSHong Zhang     current_space->array           += cnzi;
316*d20bfe6fSHong Zhang     current_space->local_used      += cnzi;
317*d20bfe6fSHong Zhang     current_space->local_remaining -= cnzi;
318*d20bfe6fSHong Zhang 
319*d20bfe6fSHong Zhang     for (j=0;j<ptanzi;j++) {
320*d20bfe6fSHong Zhang       ptadenserow[ptasparserow[j]] = 0;
321*d20bfe6fSHong Zhang     }
322*d20bfe6fSHong Zhang     for (j=0;j<cnzi;j++) {
323*d20bfe6fSHong Zhang       denserow[sparserow[j]] = 0;
324*d20bfe6fSHong Zhang     }
325*d20bfe6fSHong Zhang       /* Aside: Perhaps we should save the pta info for the numerical factorization. */
326*d20bfe6fSHong Zhang       /*        For now, we will recompute what is needed. */
327*d20bfe6fSHong Zhang     ci[i+1] = ci[i] + cnzi;
328*d20bfe6fSHong Zhang   }
329*d20bfe6fSHong Zhang   /* nnz is now stored in ci[ptm], column indices are in the list of free space */
330*d20bfe6fSHong Zhang   /* Allocate space for cj, initialize cj, and */
331*d20bfe6fSHong Zhang   /* destroy list of free space and other temporary array(s) */
332*d20bfe6fSHong Zhang   ierr = PetscMalloc((ci[pn]+1)*sizeof(int),&cj);CHKERRQ(ierr);
333*d20bfe6fSHong Zhang   ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr);
334*d20bfe6fSHong Zhang   ierr = PetscFree(ptadenserow);CHKERRQ(ierr);
335*d20bfe6fSHong Zhang 
336*d20bfe6fSHong Zhang   /* Allocate space for ca */
337*d20bfe6fSHong Zhang   ierr = PetscMalloc((ci[pn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr);
338*d20bfe6fSHong Zhang   ierr = PetscMemzero(ca,(ci[pn]+1)*sizeof(MatScalar));CHKERRQ(ierr);
339*d20bfe6fSHong Zhang 
340*d20bfe6fSHong Zhang   /* put together the new matrix */
341*d20bfe6fSHong Zhang   ierr = MatCreateSeqAIJWithArrays(A->comm,pn,pn,ci,cj,ca,C);CHKERRQ(ierr);
342*d20bfe6fSHong Zhang 
343*d20bfe6fSHong Zhang   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
344*d20bfe6fSHong Zhang   /* Since these are PETSc arrays, change flags to free them as necessary. */
345*d20bfe6fSHong Zhang   c = (Mat_SeqAIJ *)((*C)->data);
346*d20bfe6fSHong Zhang   c->freedata = PETSC_TRUE;
347*d20bfe6fSHong Zhang   c->nonew    = 0;
348*d20bfe6fSHong Zhang 
349*d20bfe6fSHong Zhang   /* Clean up. */
350*d20bfe6fSHong Zhang   ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
351eb9c0419SKris Buschelman 
352eb9c0419SKris Buschelman   PetscFunctionReturn(0);
353eb9c0419SKris Buschelman }
354eb9c0419SKris Buschelman 
3553985e5eaSKris Buschelman #include "src/mat/impls/maij/maij.h"
3563985e5eaSKris Buschelman EXTERN_C_BEGIN
3573985e5eaSKris Buschelman #undef __FUNCT__
3589af31e4aSHong Zhang #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqMAIJ"
359dfbe8321SBarry Smith PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat *C) {
3605c66b693SKris Buschelman   /* This routine requires testing -- I don't think it works. */
361dfbe8321SBarry Smith   PetscErrorCode ierr;
3623985e5eaSKris Buschelman   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
3633985e5eaSKris Buschelman   Mat_SeqMAIJ    *pp=(Mat_SeqMAIJ*)PP->data;
3643985e5eaSKris Buschelman   Mat            P=pp->AIJ;
3653985e5eaSKris Buschelman   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c;
3663985e5eaSKris Buschelman   int            *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj;
3673985e5eaSKris Buschelman   int            *ci,*cj,*denserow,*sparserow,*ptadenserow,*ptasparserow,*ptaj;
3683985e5eaSKris Buschelman   int            an=A->N,am=A->M,pn=P->N,pm=P->M,ppdof=pp->dof;
369fe05a634SKris Buschelman   int            i,j,k,dof,pdof,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi;
3703985e5eaSKris Buschelman   MatScalar      *ca;
3713985e5eaSKris Buschelman 
3723985e5eaSKris Buschelman   PetscFunctionBegin;
3733985e5eaSKris Buschelman   /* Start timer */
3749af31e4aSHong Zhang   ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr);
3753985e5eaSKris Buschelman 
3763985e5eaSKris Buschelman   /* Get ij structure of P^T */
3773985e5eaSKris Buschelman   ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
3783985e5eaSKris Buschelman 
3793985e5eaSKris Buschelman   /* Allocate ci array, arrays for fill computation and */
3803985e5eaSKris Buschelman   /* free space for accumulating nonzero column info */
3813985e5eaSKris Buschelman   ierr = PetscMalloc((pn+1)*sizeof(int),&ci);CHKERRQ(ierr);
3823985e5eaSKris Buschelman   ci[0] = 0;
3833985e5eaSKris Buschelman 
3843985e5eaSKris Buschelman   ierr = PetscMalloc((2*pn+2*an+1)*sizeof(int),&ptadenserow);CHKERRQ(ierr);
3853985e5eaSKris Buschelman   ierr = PetscMemzero(ptadenserow,(2*pn+2*an+1)*sizeof(int));CHKERRQ(ierr);
3863985e5eaSKris Buschelman   ptasparserow = ptadenserow  + an;
3873985e5eaSKris Buschelman   denserow     = ptasparserow + an;
3883985e5eaSKris Buschelman   sparserow    = denserow     + pn;
3893985e5eaSKris Buschelman 
3903985e5eaSKris Buschelman   /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */
3913985e5eaSKris Buschelman   /* This should be reasonable if sparsity of PtAP is similar to that of A. */
3923985e5eaSKris Buschelman   ierr          = GetMoreSpace((ai[am]/pm)*pn,&free_space);
3933985e5eaSKris Buschelman   current_space = free_space;
3943985e5eaSKris Buschelman 
3953985e5eaSKris Buschelman   /* Determine symbolic info for each row of C: */
3963985e5eaSKris Buschelman   for (i=0;i<pn/ppdof;i++) {
3973985e5eaSKris Buschelman     ptnzi  = pti[i+1] - pti[i];
3983985e5eaSKris Buschelman     ptanzi = 0;
3993985e5eaSKris Buschelman     ptJ    = ptj + pti[i];
4003985e5eaSKris Buschelman     for (dof=0;dof<ppdof;dof++) {
4013985e5eaSKris Buschelman     /* Determine symbolic row of PtA: */
4023985e5eaSKris Buschelman       for (j=0;j<ptnzi;j++) {
4033985e5eaSKris Buschelman         arow = ptJ[j] + dof;
4043985e5eaSKris Buschelman         anzj = ai[arow+1] - ai[arow];
4053985e5eaSKris Buschelman         ajj  = aj + ai[arow];
4063985e5eaSKris Buschelman         for (k=0;k<anzj;k++) {
4073985e5eaSKris Buschelman           if (!ptadenserow[ajj[k]]) {
4083985e5eaSKris Buschelman             ptadenserow[ajj[k]]    = -1;
4093985e5eaSKris Buschelman             ptasparserow[ptanzi++] = ajj[k];
4103985e5eaSKris Buschelman           }
4113985e5eaSKris Buschelman         }
4123985e5eaSKris Buschelman       }
4133985e5eaSKris Buschelman       /* Using symbolic info for row of PtA, determine symbolic info for row of C: */
4143985e5eaSKris Buschelman       ptaj = ptasparserow;
4153985e5eaSKris Buschelman       cnzi   = 0;
4163985e5eaSKris Buschelman       for (j=0;j<ptanzi;j++) {
417fe05a634SKris Buschelman         pdof = *ptaj%dof;
4183985e5eaSKris Buschelman         prow = (*ptaj++)/dof;
4193985e5eaSKris Buschelman         pnzj = pi[prow+1] - pi[prow];
4203985e5eaSKris Buschelman         pjj  = pj + pi[prow];
4213985e5eaSKris Buschelman         for (k=0;k<pnzj;k++) {
422fe05a634SKris Buschelman           if (!denserow[pjj[k]+pdof]) {
423fe05a634SKris Buschelman             denserow[pjj[k]+pdof] = -1;
424fe05a634SKris Buschelman             sparserow[cnzi++]     = pjj[k]+pdof;
4253985e5eaSKris Buschelman           }
4263985e5eaSKris Buschelman         }
4273985e5eaSKris Buschelman       }
4283985e5eaSKris Buschelman 
4293985e5eaSKris Buschelman       /* sort sparserow */
4303985e5eaSKris Buschelman       ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr);
4313985e5eaSKris Buschelman 
4323985e5eaSKris Buschelman       /* If free space is not available, make more free space */
4333985e5eaSKris Buschelman       /* Double the amount of total space in the list */
4343985e5eaSKris Buschelman       if (current_space->local_remaining<cnzi) {
4353985e5eaSKris Buschelman         ierr = GetMoreSpace(current_space->total_array_size,&current_space);CHKERRQ(ierr);
4363985e5eaSKris Buschelman       }
4373985e5eaSKris Buschelman 
4383985e5eaSKris Buschelman       /* Copy data into free space, and zero out denserows */
4393985e5eaSKris Buschelman       ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(int));CHKERRQ(ierr);
4403985e5eaSKris Buschelman       current_space->array           += cnzi;
4413985e5eaSKris Buschelman       current_space->local_used      += cnzi;
4423985e5eaSKris Buschelman       current_space->local_remaining -= cnzi;
4433985e5eaSKris Buschelman 
4443985e5eaSKris Buschelman       for (j=0;j<ptanzi;j++) {
4453985e5eaSKris Buschelman         ptadenserow[ptasparserow[j]] = 0;
4463985e5eaSKris Buschelman       }
4473985e5eaSKris Buschelman       for (j=0;j<cnzi;j++) {
4483985e5eaSKris Buschelman         denserow[sparserow[j]] = 0;
4493985e5eaSKris Buschelman       }
4503985e5eaSKris Buschelman       /* Aside: Perhaps we should save the pta info for the numerical factorization. */
4513985e5eaSKris Buschelman       /*        For now, we will recompute what is needed. */
4523985e5eaSKris Buschelman       ci[i+1+dof] = ci[i+dof] + cnzi;
4533985e5eaSKris Buschelman     }
4543985e5eaSKris Buschelman   }
4553985e5eaSKris Buschelman   /* nnz is now stored in ci[ptm], column indices are in the list of free space */
4563985e5eaSKris Buschelman   /* Allocate space for cj, initialize cj, and */
4573985e5eaSKris Buschelman   /* destroy list of free space and other temporary array(s) */
4583985e5eaSKris Buschelman   ierr = PetscMalloc((ci[pn]+1)*sizeof(int),&cj);CHKERRQ(ierr);
4593985e5eaSKris Buschelman   ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr);
4603985e5eaSKris Buschelman   ierr = PetscFree(ptadenserow);CHKERRQ(ierr);
4613985e5eaSKris Buschelman 
4623985e5eaSKris Buschelman   /* Allocate space for ca */
4633985e5eaSKris Buschelman   ierr = PetscMalloc((ci[pn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr);
4643985e5eaSKris Buschelman   ierr = PetscMemzero(ca,(ci[pn]+1)*sizeof(MatScalar));CHKERRQ(ierr);
4653985e5eaSKris Buschelman 
4663985e5eaSKris Buschelman   /* put together the new matrix */
4673985e5eaSKris Buschelman   ierr = MatCreateSeqAIJWithArrays(A->comm,pn,pn,ci,cj,ca,C);CHKERRQ(ierr);
4683985e5eaSKris Buschelman 
4693985e5eaSKris Buschelman   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4703985e5eaSKris Buschelman   /* Since these are PETSc arrays, change flags to free them as necessary. */
4713985e5eaSKris Buschelman   c = (Mat_SeqAIJ *)((*C)->data);
4723985e5eaSKris Buschelman   c->freedata = PETSC_TRUE;
4733985e5eaSKris Buschelman   c->nonew    = 0;
4743985e5eaSKris Buschelman 
4753985e5eaSKris Buschelman   /* Clean up. */
4763985e5eaSKris Buschelman   ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
4773985e5eaSKris Buschelman 
4789af31e4aSHong Zhang   ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr);
4793985e5eaSKris Buschelman   PetscFunctionReturn(0);
4803985e5eaSKris Buschelman }
4813985e5eaSKris Buschelman EXTERN_C_END
4823985e5eaSKris Buschelman 
483eb9c0419SKris Buschelman #undef __FUNCT__
4849af31e4aSHong Zhang #define __FUNCT__ "MatPtAPNumeric"
4856849ba73SBarry Smith /*
4869af31e4aSHong Zhang    MatPtAPNumeric - Computes the matrix projection C = P^T * A * P
4874d3841fdSKris Buschelman 
4884d3841fdSKris Buschelman    Collective on Mat
4894d3841fdSKris Buschelman 
4904d3841fdSKris Buschelman    Input Parameters:
4914d3841fdSKris Buschelman +  A - the matrix
4924d3841fdSKris Buschelman -  P - the projection matrix
4934d3841fdSKris Buschelman 
4944d3841fdSKris Buschelman    Output Parameters:
4954d3841fdSKris Buschelman .  C - the product matrix
4964d3841fdSKris Buschelman 
4974d3841fdSKris Buschelman    Notes:
4989af31e4aSHong Zhang    C must have been created by calling MatPtAPSymbolic and must be destroyed by
4994d3841fdSKris Buschelman    the user using MatDeatroy().
5004d3841fdSKris Buschelman 
501170ef064SHong Zhang    This routine is currently only implemented for pairs of AIJ matrices and classes
502170ef064SHong Zhang    which inherit from AIJ.  C will be of type MATAIJ.
5034d3841fdSKris Buschelman 
5044d3841fdSKris Buschelman    Level: intermediate
5054d3841fdSKris Buschelman 
5069af31e4aSHong Zhang .seealso: MatPtAP(),MatPtAPSymbolic(),MatMatMultNumeric()
5076849ba73SBarry Smith */
508dfbe8321SBarry Smith PetscErrorCode MatPtAPNumeric(Mat A,Mat P,Mat C) {
509dfbe8321SBarry Smith   PetscErrorCode ierr;
510534c1384SKris Buschelman   PetscErrorCode (*fA)(Mat,Mat,Mat);
511534c1384SKris Buschelman   PetscErrorCode (*fP)(Mat,Mat,Mat);
512eb9c0419SKris Buschelman 
513eb9c0419SKris Buschelman   PetscFunctionBegin;
514eb9c0419SKris Buschelman 
5154482741eSBarry Smith   PetscValidHeaderSpecific(A,MAT_COOKIE,1);
516c9780b6fSBarry Smith   PetscValidType(A,1);
517eb9c0419SKris Buschelman   MatPreallocated(A);
518eb9c0419SKris Buschelman   if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
519eb9c0419SKris Buschelman   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
520eb9c0419SKris Buschelman 
5214482741eSBarry Smith   PetscValidHeaderSpecific(P,MAT_COOKIE,2);
522c9780b6fSBarry Smith   PetscValidType(P,2);
523eb9c0419SKris Buschelman   MatPreallocated(P);
524eb9c0419SKris Buschelman   if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
525eb9c0419SKris Buschelman   if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
526eb9c0419SKris Buschelman 
5274482741eSBarry Smith   PetscValidHeaderSpecific(C,MAT_COOKIE,3);
528c9780b6fSBarry Smith   PetscValidType(C,3);
529eb9c0419SKris Buschelman   MatPreallocated(C);
530eb9c0419SKris Buschelman   if (!C->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
531eb9c0419SKris Buschelman   if (C->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
532eb9c0419SKris Buschelman 
533eb9c0419SKris Buschelman   if (P->N!=C->M) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->N,C->M);
534eb9c0419SKris Buschelman   if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->M,A->N);
535eb9c0419SKris Buschelman   if (A->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %d != %d",A->M,A->N);
536eb9c0419SKris Buschelman   if (P->N!=C->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->N,C->N);
537eb9c0419SKris Buschelman 
538534c1384SKris Buschelman   /* For now, we do not dispatch based on the type of A and P */
539534c1384SKris Buschelman   /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */
540534c1384SKris Buschelman   fA = A->ops->ptapnumeric;
541534c1384SKris Buschelman   if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatPtAPNumeric not supported for A of type %s",A->type_name);
542534c1384SKris Buschelman   fP = P->ops->ptapnumeric;
543534c1384SKris Buschelman   if (!fP) SETERRQ1(PETSC_ERR_SUP,"MatPtAPNumeric not supported for P of type %s",P->type_name);
544534c1384SKris Buschelman   if (fP!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatPtAPNumeric requires A, %s, to be compatible with P, %s",A->type_name,P->type_name);
5454d3841fdSKris Buschelman 
546534c1384SKris Buschelman   ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr);
547534c1384SKris Buschelman   ierr = (*fA)(A,P,C);CHKERRQ(ierr);
548534c1384SKris Buschelman   ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr);
549eb9c0419SKris Buschelman 
550eb9c0419SKris Buschelman   PetscFunctionReturn(0);
551eb9c0419SKris Buschelman }
552eb9c0419SKris Buschelman 
553eb9c0419SKris Buschelman #undef __FUNCT__
5549af31e4aSHong Zhang #define __FUNCT__ "MatPtAPNumeric_SeqAIJ_SeqAIJ"
555dfbe8321SBarry Smith PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqAIJ(Mat A,Mat P,Mat C)
556dfbe8321SBarry Smith {
557dfbe8321SBarry Smith   PetscErrorCode ierr;
558*d20bfe6fSHong Zhang   int            flops=0;
559*d20bfe6fSHong Zhang   Mat_SeqAIJ     *a  = (Mat_SeqAIJ *) A->data;
560*d20bfe6fSHong Zhang   Mat_SeqAIJ     *p  = (Mat_SeqAIJ *) P->data;
561*d20bfe6fSHong Zhang   Mat_SeqAIJ     *c  = (Mat_SeqAIJ *) C->data;
562*d20bfe6fSHong Zhang   int            *ai=a->i,*aj=a->j,*apj,*apjdense,*pi=p->i,*pj=p->j,*pJ=p->j,*pjj;
563*d20bfe6fSHong Zhang   int            *ci=c->i,*cj=c->j,*cjj;
564*d20bfe6fSHong Zhang   int            am=A->M,cn=C->N,cm=C->M;
565*d20bfe6fSHong Zhang   int            i,j,k,anzi,pnzi,apnzj,nextap,pnzj,prow,crow;
566*d20bfe6fSHong Zhang   MatScalar      *aa=a->a,*apa,*pa=p->a,*pA=p->a,*paj,*ca=c->a,*caj;
567eb9c0419SKris Buschelman 
568eb9c0419SKris Buschelman   PetscFunctionBegin;
569*d20bfe6fSHong Zhang   /* Allocate temporary array for storage of one row of A*P */
570*d20bfe6fSHong Zhang   ierr = PetscMalloc(cn*(sizeof(MatScalar)+2*sizeof(int)),&apa);CHKERRQ(ierr);
571*d20bfe6fSHong Zhang   ierr = PetscMemzero(apa,cn*(sizeof(MatScalar)+2*sizeof(int)));CHKERRQ(ierr);
572eb9c0419SKris Buschelman 
573*d20bfe6fSHong Zhang   apj      = (int *)(apa + cn);
574*d20bfe6fSHong Zhang   apjdense = apj + cn;
575*d20bfe6fSHong Zhang 
576*d20bfe6fSHong Zhang   /* Clear old values in C */
577*d20bfe6fSHong Zhang   ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr);
578*d20bfe6fSHong Zhang 
579*d20bfe6fSHong Zhang   for (i=0;i<am;i++) {
580*d20bfe6fSHong Zhang     /* Form sparse row of A*P */
581*d20bfe6fSHong Zhang     anzi  = ai[i+1] - ai[i];
582*d20bfe6fSHong Zhang     apnzj = 0;
583*d20bfe6fSHong Zhang     for (j=0;j<anzi;j++) {
584*d20bfe6fSHong Zhang       prow = *aj++;
585*d20bfe6fSHong Zhang       pnzj = pi[prow+1] - pi[prow];
586*d20bfe6fSHong Zhang       pjj  = pj + pi[prow];
587*d20bfe6fSHong Zhang       paj  = pa + pi[prow];
588*d20bfe6fSHong Zhang       for (k=0;k<pnzj;k++) {
589*d20bfe6fSHong Zhang         if (!apjdense[pjj[k]]) {
590*d20bfe6fSHong Zhang           apjdense[pjj[k]] = -1;
591*d20bfe6fSHong Zhang           apj[apnzj++]     = pjj[k];
592*d20bfe6fSHong Zhang         }
593*d20bfe6fSHong Zhang         apa[pjj[k]] += (*aa)*paj[k];
594*d20bfe6fSHong Zhang       }
595*d20bfe6fSHong Zhang       flops += 2*pnzj;
596*d20bfe6fSHong Zhang       aa++;
597*d20bfe6fSHong Zhang     }
598*d20bfe6fSHong Zhang 
599*d20bfe6fSHong Zhang     /* Sort the j index array for quick sparse axpy. */
600*d20bfe6fSHong Zhang     ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr);
601*d20bfe6fSHong Zhang 
602*d20bfe6fSHong Zhang     /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */
603*d20bfe6fSHong Zhang     pnzi = pi[i+1] - pi[i];
604*d20bfe6fSHong Zhang     for (j=0;j<pnzi;j++) {
605*d20bfe6fSHong Zhang       nextap = 0;
606*d20bfe6fSHong Zhang       crow   = *pJ++;
607*d20bfe6fSHong Zhang       cjj    = cj + ci[crow];
608*d20bfe6fSHong Zhang       caj    = ca + ci[crow];
609*d20bfe6fSHong Zhang       /* Perform sparse axpy operation.  Note cjj includes apj. */
610*d20bfe6fSHong Zhang       for (k=0;nextap<apnzj;k++) {
611*d20bfe6fSHong Zhang         if (cjj[k]==apj[nextap]) {
612*d20bfe6fSHong Zhang           caj[k] += (*pA)*apa[apj[nextap++]];
613*d20bfe6fSHong Zhang         }
614*d20bfe6fSHong Zhang       }
615*d20bfe6fSHong Zhang       flops += 2*apnzj;
616*d20bfe6fSHong Zhang       pA++;
617*d20bfe6fSHong Zhang     }
618*d20bfe6fSHong Zhang 
619*d20bfe6fSHong Zhang     /* Zero the current row info for A*P */
620*d20bfe6fSHong Zhang     for (j=0;j<apnzj;j++) {
621*d20bfe6fSHong Zhang       apa[apj[j]]      = 0.;
622*d20bfe6fSHong Zhang       apjdense[apj[j]] = 0;
623*d20bfe6fSHong Zhang     }
624*d20bfe6fSHong Zhang   }
625*d20bfe6fSHong Zhang 
626*d20bfe6fSHong Zhang   /* Assemble the final matrix and clean up */
627*d20bfe6fSHong Zhang   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
628*d20bfe6fSHong Zhang   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
629*d20bfe6fSHong Zhang   ierr = PetscFree(apa);CHKERRQ(ierr);
630*d20bfe6fSHong Zhang   ierr = PetscLogFlops(flops);CHKERRQ(ierr);
631*d20bfe6fSHong Zhang 
632eb9c0419SKris Buschelman   PetscFunctionReturn(0);
633eb9c0419SKris Buschelman }
634