xref: /petsc/src/mat/impls/aij/seq/matptap.c (revision 25616d81f464dead70372e35c2cc7e6db2a8f99c)
1eb9c0419SKris Buschelman /*
29af31e4aSHong Zhang   Defines projective product routines where A is a AIJ matrix
3eb9c0419SKris Buschelman           C = P^T * A * P
4eb9c0419SKris Buschelman */
5eb9c0419SKris Buschelman 
6231952e2SKris Buschelman #include "src/mat/impls/aij/seq/aij.h"   /*I "petscmat.h" I*/
7eb9c0419SKris Buschelman #include "src/mat/utils/freespace.h"
89af31e4aSHong Zhang #include "src/mat/impls/aij/mpi/mpiaij.h"
9eb9c0419SKris Buschelman 
10eb9c0419SKris Buschelman #undef __FUNCT__
119af31e4aSHong Zhang #define __FUNCT__ "MatPtAP"
124d3841fdSKris Buschelman /*@
139af31e4aSHong Zhang    MatPtAP - Creates the matrix projection C = P^T * A * P
144d3841fdSKris Buschelman 
154d3841fdSKris Buschelman    Collective on Mat
164d3841fdSKris Buschelman 
174d3841fdSKris Buschelman    Input Parameters:
184d3841fdSKris Buschelman +  A - the matrix
19f747e1aeSHong Zhang .  P - the projection matrix
20f747e1aeSHong Zhang .  scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
21f747e1aeSHong Zhang -  fill - expected fill as ratio of nnz(C)/(nnz(A) + nnz(P))
224d3841fdSKris Buschelman 
234d3841fdSKris Buschelman    Output Parameters:
244d3841fdSKris Buschelman .  C - the product matrix
254d3841fdSKris Buschelman 
264d3841fdSKris Buschelman    Notes:
274d3841fdSKris Buschelman    C will be created and must be destroyed by the user with MatDestroy().
284d3841fdSKris Buschelman 
294d3841fdSKris Buschelman    This routine is currently only implemented for pairs of SeqAIJ matrices and classes
304d3841fdSKris Buschelman    which inherit from SeqAIJ.  C will be of type MATSEQAIJ.
314d3841fdSKris Buschelman 
324d3841fdSKris Buschelman    Level: intermediate
334d3841fdSKris Buschelman 
349af31e4aSHong Zhang .seealso: MatPtAPSymbolic(),MatPtAPNumeric(),MatMatMult()
354d3841fdSKris Buschelman @*/
36dfbe8321SBarry Smith PetscErrorCode MatPtAP(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C) {
37dfbe8321SBarry Smith   PetscErrorCode ierr;
38534c1384SKris Buschelman   PetscErrorCode (*fA)(Mat,Mat,MatReuse,PetscReal,Mat *);
39534c1384SKris Buschelman   PetscErrorCode (*fP)(Mat,Mat,MatReuse,PetscReal,Mat *);
40eb9c0419SKris Buschelman 
41eb9c0419SKris Buschelman   PetscFunctionBegin;
429af31e4aSHong Zhang   PetscValidHeaderSpecific(A,MAT_COOKIE,1);
439af31e4aSHong Zhang   PetscValidType(A,1);
449af31e4aSHong Zhang   MatPreallocated(A);
459af31e4aSHong Zhang   if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
469af31e4aSHong Zhang   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
479af31e4aSHong Zhang   PetscValidHeaderSpecific(P,MAT_COOKIE,2);
489af31e4aSHong Zhang   PetscValidType(P,2);
499af31e4aSHong Zhang   MatPreallocated(P);
509af31e4aSHong Zhang   if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
519af31e4aSHong Zhang   if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
529af31e4aSHong Zhang   PetscValidPointer(C,3);
53534c1384SKris Buschelman 
549af31e4aSHong Zhang   if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->M,A->N);
55eb9c0419SKris Buschelman 
569af31e4aSHong Zhang   if (fill <=0.0) SETERRQ1(PETSC_ERR_ARG_SIZ,"fill=%g must be > 0.0",fill);
57eb9c0419SKris Buschelman 
58534c1384SKris Buschelman   /* For now, we do not dispatch based on the type of A and P */
59534c1384SKris Buschelman   /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */
60534c1384SKris Buschelman   fA = A->ops->ptap;
61534c1384SKris Buschelman   if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatPtAP not supported for A of type %s",A->type_name);
62534c1384SKris Buschelman   fP = P->ops->ptap;
63534c1384SKris Buschelman   if (!fP) SETERRQ1(PETSC_ERR_SUP,"MatPtAP not supported for P of type %s",P->type_name);
64534c1384SKris Buschelman   if (fP!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatPtAP requires A, %s, to be compatible with P, %s",A->type_name,P->type_name);
65534c1384SKris Buschelman 
669af31e4aSHong Zhang   ierr = PetscLogEventBegin(MAT_PtAP,A,P,0,0);CHKERRQ(ierr);
67534c1384SKris Buschelman   ierr = (*fA)(A,P,scall,fill,C);CHKERRQ(ierr);
689af31e4aSHong Zhang   ierr = PetscLogEventEnd(MAT_PtAP,A,P,0,0);CHKERRQ(ierr);
69eb9c0419SKris Buschelman   PetscFunctionReturn(0);
70eb9c0419SKris Buschelman }
71eb9c0419SKris Buschelman 
72*25616d81SHong Zhang EXTERN PetscErrorCode MatGetLocalMat(Mat,MatReuse,IS*,IS*,Mat*);
73*25616d81SHong Zhang EXTERN PetscErrorCode MatGetBrowsOfAcols(Mat,Mat,MatReuse,IS*,IS*,PetscInt*,Mat*);
740e36024fSHong Zhang EXTERN PetscErrorCode MatPtAP_SeqAIJ_SeqAIJ_ReducedPt(Mat,Mat,MatReuse,PetscReal,int,int,Mat*);
750e36024fSHong Zhang EXTERN PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqAIJ_ReducedPt(Mat,Mat,PetscReal,int,int,Mat*);
760e36024fSHong Zhang EXTERN PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqAIJ_ReducedPt(Mat,Mat,int,int,Mat);
77eb9c0419SKris Buschelman #undef __FUNCT__
78ff134f7aSHong Zhang #define __FUNCT__ "MatPtAP_MPIAIJ_MPIAIJ"
79ff134f7aSHong Zhang PetscErrorCode MatPtAP_MPIAIJ_MPIAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C)
80ff134f7aSHong Zhang {
81ff134f7aSHong Zhang   PetscErrorCode    ierr;
82*25616d81SHong Zhang   Mat               P_seq,A_loc,C_seq;
830e36024fSHong Zhang   int               prstart,prend,m=P->m;
84*25616d81SHong Zhang   IS                isrowp,iscolp;
85ff134f7aSHong Zhang 
86ff134f7aSHong Zhang   PetscFunctionBegin;
87*25616d81SHong Zhang   /* get P_seq = submatrix of P by taking rows of P that equal to nonzero col of A */
88*25616d81SHong Zhang   ierr = MatGetBrowsOfAcols(A,P,scall,&isrowp,&iscolp,&prstart,&P_seq);CHKERRQ(ierr);
89*25616d81SHong Zhang   ierr = ISDestroy(iscolp);CHKERRQ(ierr);
90ff134f7aSHong Zhang 
91*25616d81SHong Zhang   /* get A_loc = submatrix of A by taking all local rows of A */
92*25616d81SHong Zhang   ierr = MatGetLocalMat(A,scall,PETSC_NULL,&isrowp,&A_loc);CHKERRQ(ierr);
93*25616d81SHong Zhang   ierr = ISDestroy(isrowp);CHKERRQ(ierr);
940e36024fSHong Zhang 
95*25616d81SHong Zhang   /* compute C_seq = P_loc^T * A_loc * P_seq */
96ff134f7aSHong Zhang   prend   = prstart + m;
9720d4747cSHong Zhang   ierr = MatPtAP_SeqAIJ_SeqAIJ_ReducedPt(A_loc,P_seq,scall,fill,prstart,prend,&C_seq);CHKERRQ(ierr);
980e36024fSHong Zhang 
99*25616d81SHong Zhang   /* add C_seq into mpi C */
1000e36024fSHong Zhang   ierr = MatMerge_SeqsToMPI(A->comm,C_seq,P->n,P->n,scall,C);CHKERRQ(ierr);
1010e36024fSHong Zhang 
1020e36024fSHong Zhang   /* clean up */
1030e36024fSHong Zhang   ierr = MatDestroy(C_seq);CHKERRQ(ierr);
104*25616d81SHong Zhang   ierr = MatDestroy(P_seq);CHKERRQ(ierr);
105*25616d81SHong Zhang   ierr = MatDestroy(A_loc);CHKERRQ(ierr);
106ff134f7aSHong Zhang   PetscFunctionReturn(0);
107ff134f7aSHong Zhang }
108ff134f7aSHong Zhang 
109ff134f7aSHong Zhang #undef __FUNCT__
110ff134f7aSHong Zhang #define __FUNCT__ "MatMatMultSymbolic_MPIAIJ_MPIAIJ"
111ff134f7aSHong Zhang PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
112ff134f7aSHong Zhang {
113ff134f7aSHong Zhang 
114ff134f7aSHong Zhang   PetscFunctionBegin;
115ff134f7aSHong Zhang   PetscFunctionReturn(0);
116ff134f7aSHong Zhang }
117ff134f7aSHong Zhang 
118ff134f7aSHong Zhang #undef __FUNCT__
119ff134f7aSHong Zhang #define __FUNCT__ "MatMatMultSymbolic_MPIAIJ_MPIAIJ"
120ff134f7aSHong Zhang PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIAIJ(Mat A,Mat B,Mat C)
121ff134f7aSHong Zhang {
122ff134f7aSHong Zhang 
123ff134f7aSHong Zhang   PetscFunctionBegin;
124ff134f7aSHong Zhang   PetscFunctionReturn(0);
125ff134f7aSHong Zhang }
126ff134f7aSHong Zhang 
127ff134f7aSHong Zhang #undef __FUNCT__
1289af31e4aSHong Zhang #define __FUNCT__ "MatPtAP_SeqAIJ_SeqAIJ"
129dfbe8321SBarry Smith PetscErrorCode MatPtAP_SeqAIJ_SeqAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C)
1309af31e4aSHong Zhang {
131dfbe8321SBarry Smith   PetscErrorCode ierr;
1329af31e4aSHong Zhang   PetscFunctionBegin;
1339af31e4aSHong Zhang   if (scall == MAT_INITIAL_MATRIX){
134d20bfe6fSHong Zhang     ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr);
1359af31e4aSHong Zhang     ierr = MatPtAPSymbolic_SeqAIJ_SeqAIJ(A,P,fill,C);CHKERRQ(ierr);
136d20bfe6fSHong Zhang     ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr);
1379af31e4aSHong Zhang   }
138d20bfe6fSHong Zhang   ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr);
1399af31e4aSHong Zhang   ierr = MatPtAPNumeric_SeqAIJ_SeqAIJ(A,P,*C);CHKERRQ(ierr);
140d20bfe6fSHong Zhang   ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr);
1419af31e4aSHong Zhang   PetscFunctionReturn(0);
1429af31e4aSHong Zhang }
1439af31e4aSHong Zhang 
1449af31e4aSHong Zhang #undef __FUNCT__
1459af31e4aSHong Zhang #define __FUNCT__ "MatPtAPSymbolic"
1466849ba73SBarry Smith /*
1479af31e4aSHong Zhang    MatPtAPSymbolic - Creates the (i,j) structure of the matrix projection C = P^T * A * P
1484d3841fdSKris Buschelman 
1494d3841fdSKris Buschelman    Collective on Mat
1504d3841fdSKris Buschelman 
1514d3841fdSKris Buschelman    Input Parameters:
1524d3841fdSKris Buschelman +  A - the matrix
1534d3841fdSKris Buschelman -  P - the projection matrix
1544d3841fdSKris Buschelman 
1554d3841fdSKris Buschelman    Output Parameters:
1564d3841fdSKris Buschelman .  C - the (i,j) structure of the product matrix
1574d3841fdSKris Buschelman 
1584d3841fdSKris Buschelman    Notes:
1594d3841fdSKris Buschelman    C will be created and must be destroyed by the user with MatDestroy().
1604d3841fdSKris Buschelman 
1614d3841fdSKris Buschelman    This routine is currently only implemented for pairs of SeqAIJ matrices and classes
1624d3841fdSKris Buschelman    which inherit from SeqAIJ.  C will be of type MATSEQAIJ.  The product is computed using
1639af31e4aSHong Zhang    this (i,j) structure by calling MatPtAPNumeric().
1644d3841fdSKris Buschelman 
1654d3841fdSKris Buschelman    Level: intermediate
1664d3841fdSKris Buschelman 
1679af31e4aSHong Zhang .seealso: MatPtAP(),MatPtAPNumeric(),MatMatMultSymbolic()
1686849ba73SBarry Smith */
169dfbe8321SBarry Smith PetscErrorCode MatPtAPSymbolic(Mat A,Mat P,PetscReal fill,Mat *C) {
170dfbe8321SBarry Smith   PetscErrorCode ierr;
171534c1384SKris Buschelman   PetscErrorCode (*fA)(Mat,Mat,PetscReal,Mat*);
172534c1384SKris Buschelman   PetscErrorCode (*fP)(Mat,Mat,PetscReal,Mat*);
173eb9c0419SKris Buschelman 
174eb9c0419SKris Buschelman   PetscFunctionBegin;
175eb9c0419SKris Buschelman 
1764482741eSBarry Smith   PetscValidHeaderSpecific(A,MAT_COOKIE,1);
177c9780b6fSBarry Smith   PetscValidType(A,1);
178eb9c0419SKris Buschelman   MatPreallocated(A);
179eb9c0419SKris Buschelman   if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
180eb9c0419SKris Buschelman   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
181eb9c0419SKris Buschelman 
1824482741eSBarry Smith   PetscValidHeaderSpecific(P,MAT_COOKIE,2);
183c9780b6fSBarry Smith   PetscValidType(P,2);
184eb9c0419SKris Buschelman   MatPreallocated(P);
185eb9c0419SKris Buschelman   if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
186eb9c0419SKris Buschelman   if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
187eb9c0419SKris Buschelman 
1884482741eSBarry Smith   PetscValidPointer(C,3);
1894482741eSBarry Smith 
190eb9c0419SKris Buschelman   if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->M,A->N);
191eb9c0419SKris Buschelman   if (A->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %d != %d",A->M,A->N);
192eb9c0419SKris Buschelman 
193534c1384SKris Buschelman   /* For now, we do not dispatch based on the type of A and P */
194534c1384SKris Buschelman   /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */
195534c1384SKris Buschelman   fA = A->ops->ptapsymbolic;
196534c1384SKris Buschelman   if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatPtAPSymbolic not supported for A of type %s",A->type_name);
197534c1384SKris Buschelman   fP = P->ops->ptapsymbolic;
198534c1384SKris Buschelman   if (!fP) SETERRQ1(PETSC_ERR_SUP,"MatPtAPSymbolic not supported for P of type %s",P->type_name);
199534c1384SKris Buschelman   if (fP!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatPtAPSymbolic requires A, %s, to be compatible with P, %s",A->type_name,P->type_name);
2004d3841fdSKris Buschelman 
201534c1384SKris Buschelman   ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr);
202534c1384SKris Buschelman   ierr = (*fA)(A,P,fill,C);CHKERRQ(ierr);
203534c1384SKris Buschelman   ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr);
204eb9c0419SKris Buschelman 
205eb9c0419SKris Buschelman   PetscFunctionReturn(0);
206eb9c0419SKris Buschelman }
207eb9c0419SKris Buschelman 
208f747e1aeSHong Zhang typedef struct {
209f747e1aeSHong Zhang   Mat    symAP;
210f747e1aeSHong Zhang } Mat_PtAPstruct;
211f747e1aeSHong Zhang 
21278a80504SBarry Smith EXTERN PetscErrorCode MatDestroy_SeqAIJ(Mat);
21378a80504SBarry Smith 
214f747e1aeSHong Zhang #undef __FUNCT__
215f747e1aeSHong Zhang #define __FUNCT__ "MatDestroy_SeqAIJ_PtAP"
216f4a850bbSBarry Smith PetscErrorCode MatDestroy_SeqAIJ_PtAP(Mat A)
217f747e1aeSHong Zhang {
218f4a850bbSBarry Smith   PetscErrorCode    ierr;
219f747e1aeSHong Zhang   Mat_PtAPstruct    *ptap=(Mat_PtAPstruct*)A->spptr;
220f747e1aeSHong Zhang 
221f747e1aeSHong Zhang   PetscFunctionBegin;
222f747e1aeSHong Zhang   ierr = MatDestroy(ptap->symAP);CHKERRQ(ierr);
223f747e1aeSHong Zhang   ierr = PetscFree(ptap);CHKERRQ(ierr);
22478a80504SBarry Smith   ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr);
225f747e1aeSHong Zhang   PetscFunctionReturn(0);
226f747e1aeSHong Zhang }
227f747e1aeSHong Zhang 
228eb9c0419SKris Buschelman #undef __FUNCT__
2299af31e4aSHong Zhang #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqAIJ"
230dfbe8321SBarry Smith PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat P,PetscReal fill,Mat *C) {
231dfbe8321SBarry Smith   PetscErrorCode ierr;
232d20bfe6fSHong Zhang   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
233d20bfe6fSHong Zhang   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c;
234d20bfe6fSHong Zhang   int            *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj;
235d20bfe6fSHong Zhang   int            *ci,*cj,*denserow,*sparserow,*ptadenserow,*ptasparserow,*ptaj;
236d20bfe6fSHong Zhang   int            an=A->N,am=A->M,pn=P->N,pm=P->M;
237d20bfe6fSHong Zhang   int            i,j,k,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi;
238d20bfe6fSHong Zhang   MatScalar      *ca;
239eb9c0419SKris Buschelman 
240eb9c0419SKris Buschelman   PetscFunctionBegin;
241d20bfe6fSHong Zhang   /* Get ij structure of P^T */
242eb9c0419SKris Buschelman   ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
243d20bfe6fSHong Zhang   ptJ=ptj;
244eb9c0419SKris Buschelman 
245d20bfe6fSHong Zhang   /* Allocate ci array, arrays for fill computation and */
246d20bfe6fSHong Zhang   /* free space for accumulating nonzero column info */
247d20bfe6fSHong Zhang   ierr = PetscMalloc((pn+1)*sizeof(int),&ci);CHKERRQ(ierr);
248d20bfe6fSHong Zhang   ci[0] = 0;
249eb9c0419SKris Buschelman 
250d20bfe6fSHong Zhang   ierr = PetscMalloc((2*pn+2*an+1)*sizeof(int),&ptadenserow);CHKERRQ(ierr);
251d20bfe6fSHong Zhang   ierr = PetscMemzero(ptadenserow,(2*pn+2*an+1)*sizeof(int));CHKERRQ(ierr);
252d20bfe6fSHong Zhang   ptasparserow = ptadenserow  + an;
253d20bfe6fSHong Zhang   denserow     = ptasparserow + an;
254d20bfe6fSHong Zhang   sparserow    = denserow     + pn;
255eb9c0419SKris Buschelman 
256d20bfe6fSHong Zhang   /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */
257d20bfe6fSHong Zhang   /* This should be reasonable if sparsity of PtAP is similar to that of A. */
258d20bfe6fSHong Zhang   ierr          = GetMoreSpace((ai[am]/pm)*pn,&free_space);
259d20bfe6fSHong Zhang   current_space = free_space;
260d20bfe6fSHong Zhang 
261d20bfe6fSHong Zhang   /* Determine symbolic info for each row of C: */
262d20bfe6fSHong Zhang   for (i=0;i<pn;i++) {
263d20bfe6fSHong Zhang     ptnzi  = pti[i+1] - pti[i];
264d20bfe6fSHong Zhang     ptanzi = 0;
265d20bfe6fSHong Zhang     /* Determine symbolic row of PtA: */
266d20bfe6fSHong Zhang     for (j=0;j<ptnzi;j++) {
267d20bfe6fSHong Zhang       arow = *ptJ++;
268d20bfe6fSHong Zhang       anzj = ai[arow+1] - ai[arow];
269d20bfe6fSHong Zhang       ajj  = aj + ai[arow];
270d20bfe6fSHong Zhang       for (k=0;k<anzj;k++) {
271d20bfe6fSHong Zhang         if (!ptadenserow[ajj[k]]) {
272d20bfe6fSHong Zhang           ptadenserow[ajj[k]]    = -1;
273d20bfe6fSHong Zhang           ptasparserow[ptanzi++] = ajj[k];
274d20bfe6fSHong Zhang         }
275d20bfe6fSHong Zhang       }
276d20bfe6fSHong Zhang     }
277d20bfe6fSHong Zhang       /* Using symbolic info for row of PtA, determine symbolic info for row of C: */
278d20bfe6fSHong Zhang     ptaj = ptasparserow;
279d20bfe6fSHong Zhang     cnzi   = 0;
280d20bfe6fSHong Zhang     for (j=0;j<ptanzi;j++) {
281d20bfe6fSHong Zhang       prow = *ptaj++;
282d20bfe6fSHong Zhang       pnzj = pi[prow+1] - pi[prow];
283d20bfe6fSHong Zhang       pjj  = pj + pi[prow];
284d20bfe6fSHong Zhang       for (k=0;k<pnzj;k++) {
285d20bfe6fSHong Zhang         if (!denserow[pjj[k]]) {
286d20bfe6fSHong Zhang             denserow[pjj[k]]  = -1;
287d20bfe6fSHong Zhang             sparserow[cnzi++] = pjj[k];
288d20bfe6fSHong Zhang         }
289d20bfe6fSHong Zhang       }
290d20bfe6fSHong Zhang     }
291d20bfe6fSHong Zhang 
292d20bfe6fSHong Zhang     /* sort sparserow */
293d20bfe6fSHong Zhang     ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr);
294d20bfe6fSHong Zhang 
295d20bfe6fSHong Zhang     /* If free space is not available, make more free space */
296d20bfe6fSHong Zhang     /* Double the amount of total space in the list */
297d20bfe6fSHong Zhang     if (current_space->local_remaining<cnzi) {
298d20bfe6fSHong Zhang       ierr = GetMoreSpace(current_space->total_array_size,&current_space);CHKERRQ(ierr);
299d20bfe6fSHong Zhang     }
300d20bfe6fSHong Zhang 
301d20bfe6fSHong Zhang     /* Copy data into free space, and zero out denserows */
302d20bfe6fSHong Zhang     ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(int));CHKERRQ(ierr);
303d20bfe6fSHong Zhang     current_space->array           += cnzi;
304d20bfe6fSHong Zhang     current_space->local_used      += cnzi;
305d20bfe6fSHong Zhang     current_space->local_remaining -= cnzi;
306d20bfe6fSHong Zhang 
307d20bfe6fSHong Zhang     for (j=0;j<ptanzi;j++) {
308d20bfe6fSHong Zhang       ptadenserow[ptasparserow[j]] = 0;
309d20bfe6fSHong Zhang     }
310d20bfe6fSHong Zhang     for (j=0;j<cnzi;j++) {
311d20bfe6fSHong Zhang       denserow[sparserow[j]] = 0;
312d20bfe6fSHong Zhang     }
313d20bfe6fSHong Zhang       /* Aside: Perhaps we should save the pta info for the numerical factorization. */
314d20bfe6fSHong Zhang       /*        For now, we will recompute what is needed. */
315d20bfe6fSHong Zhang     ci[i+1] = ci[i] + cnzi;
316d20bfe6fSHong Zhang   }
317d20bfe6fSHong Zhang   /* nnz is now stored in ci[ptm], column indices are in the list of free space */
318d20bfe6fSHong Zhang   /* Allocate space for cj, initialize cj, and */
319d20bfe6fSHong Zhang   /* destroy list of free space and other temporary array(s) */
320d20bfe6fSHong Zhang   ierr = PetscMalloc((ci[pn]+1)*sizeof(int),&cj);CHKERRQ(ierr);
321d20bfe6fSHong Zhang   ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr);
322d20bfe6fSHong Zhang   ierr = PetscFree(ptadenserow);CHKERRQ(ierr);
323d20bfe6fSHong Zhang 
324d20bfe6fSHong Zhang   /* Allocate space for ca */
325d20bfe6fSHong Zhang   ierr = PetscMalloc((ci[pn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr);
326d20bfe6fSHong Zhang   ierr = PetscMemzero(ca,(ci[pn]+1)*sizeof(MatScalar));CHKERRQ(ierr);
327d20bfe6fSHong Zhang 
328d20bfe6fSHong Zhang   /* put together the new matrix */
329d20bfe6fSHong Zhang   ierr = MatCreateSeqAIJWithArrays(A->comm,pn,pn,ci,cj,ca,C);CHKERRQ(ierr);
330d20bfe6fSHong Zhang 
331d20bfe6fSHong Zhang   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
332d20bfe6fSHong Zhang   /* Since these are PETSc arrays, change flags to free them as necessary. */
333d20bfe6fSHong Zhang   c = (Mat_SeqAIJ *)((*C)->data);
334d20bfe6fSHong Zhang   c->freedata = PETSC_TRUE;
335d20bfe6fSHong Zhang   c->nonew    = 0;
336d20bfe6fSHong Zhang 
337d20bfe6fSHong Zhang   /* Clean up. */
338d20bfe6fSHong Zhang   ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
339eb9c0419SKris Buschelman 
340eb9c0419SKris Buschelman   PetscFunctionReturn(0);
341eb9c0419SKris Buschelman }
342eb9c0419SKris Buschelman 
3433985e5eaSKris Buschelman #include "src/mat/impls/maij/maij.h"
3443985e5eaSKris Buschelman EXTERN_C_BEGIN
3453985e5eaSKris Buschelman #undef __FUNCT__
3469af31e4aSHong Zhang #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqMAIJ"
347dfbe8321SBarry Smith PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat *C) {
3485c66b693SKris Buschelman   /* This routine requires testing -- I don't think it works. */
349dfbe8321SBarry Smith   PetscErrorCode ierr;
3503985e5eaSKris Buschelman   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
3513985e5eaSKris Buschelman   Mat_SeqMAIJ    *pp=(Mat_SeqMAIJ*)PP->data;
3523985e5eaSKris Buschelman   Mat            P=pp->AIJ;
3533985e5eaSKris Buschelman   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c;
3543985e5eaSKris Buschelman   int            *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj;
3553985e5eaSKris Buschelman   int            *ci,*cj,*denserow,*sparserow,*ptadenserow,*ptasparserow,*ptaj;
3563985e5eaSKris Buschelman   int            an=A->N,am=A->M,pn=P->N,pm=P->M,ppdof=pp->dof;
357fe05a634SKris Buschelman   int            i,j,k,dof,pdof,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi;
3583985e5eaSKris Buschelman   MatScalar      *ca;
3593985e5eaSKris Buschelman 
3603985e5eaSKris Buschelman   PetscFunctionBegin;
3613985e5eaSKris Buschelman   /* Start timer */
3629af31e4aSHong Zhang   ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr);
3633985e5eaSKris Buschelman 
3643985e5eaSKris Buschelman   /* Get ij structure of P^T */
3653985e5eaSKris Buschelman   ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
3663985e5eaSKris Buschelman 
3673985e5eaSKris Buschelman   /* Allocate ci array, arrays for fill computation and */
3683985e5eaSKris Buschelman   /* free space for accumulating nonzero column info */
3693985e5eaSKris Buschelman   ierr = PetscMalloc((pn+1)*sizeof(int),&ci);CHKERRQ(ierr);
3703985e5eaSKris Buschelman   ci[0] = 0;
3713985e5eaSKris Buschelman 
3723985e5eaSKris Buschelman   ierr = PetscMalloc((2*pn+2*an+1)*sizeof(int),&ptadenserow);CHKERRQ(ierr);
3733985e5eaSKris Buschelman   ierr = PetscMemzero(ptadenserow,(2*pn+2*an+1)*sizeof(int));CHKERRQ(ierr);
3743985e5eaSKris Buschelman   ptasparserow = ptadenserow  + an;
3753985e5eaSKris Buschelman   denserow     = ptasparserow + an;
3763985e5eaSKris Buschelman   sparserow    = denserow     + pn;
3773985e5eaSKris Buschelman 
3783985e5eaSKris Buschelman   /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */
3793985e5eaSKris Buschelman   /* This should be reasonable if sparsity of PtAP is similar to that of A. */
3803985e5eaSKris Buschelman   ierr          = GetMoreSpace((ai[am]/pm)*pn,&free_space);
3813985e5eaSKris Buschelman   current_space = free_space;
3823985e5eaSKris Buschelman 
3833985e5eaSKris Buschelman   /* Determine symbolic info for each row of C: */
3843985e5eaSKris Buschelman   for (i=0;i<pn/ppdof;i++) {
3853985e5eaSKris Buschelman     ptnzi  = pti[i+1] - pti[i];
3863985e5eaSKris Buschelman     ptanzi = 0;
3873985e5eaSKris Buschelman     ptJ    = ptj + pti[i];
3883985e5eaSKris Buschelman     for (dof=0;dof<ppdof;dof++) {
3893985e5eaSKris Buschelman     /* Determine symbolic row of PtA: */
3903985e5eaSKris Buschelman       for (j=0;j<ptnzi;j++) {
3913985e5eaSKris Buschelman         arow = ptJ[j] + dof;
3923985e5eaSKris Buschelman         anzj = ai[arow+1] - ai[arow];
3933985e5eaSKris Buschelman         ajj  = aj + ai[arow];
3943985e5eaSKris Buschelman         for (k=0;k<anzj;k++) {
3953985e5eaSKris Buschelman           if (!ptadenserow[ajj[k]]) {
3963985e5eaSKris Buschelman             ptadenserow[ajj[k]]    = -1;
3973985e5eaSKris Buschelman             ptasparserow[ptanzi++] = ajj[k];
3983985e5eaSKris Buschelman           }
3993985e5eaSKris Buschelman         }
4003985e5eaSKris Buschelman       }
4013985e5eaSKris Buschelman       /* Using symbolic info for row of PtA, determine symbolic info for row of C: */
4023985e5eaSKris Buschelman       ptaj = ptasparserow;
4033985e5eaSKris Buschelman       cnzi   = 0;
4043985e5eaSKris Buschelman       for (j=0;j<ptanzi;j++) {
405fe05a634SKris Buschelman         pdof = *ptaj%dof;
4063985e5eaSKris Buschelman         prow = (*ptaj++)/dof;
4073985e5eaSKris Buschelman         pnzj = pi[prow+1] - pi[prow];
4083985e5eaSKris Buschelman         pjj  = pj + pi[prow];
4093985e5eaSKris Buschelman         for (k=0;k<pnzj;k++) {
410fe05a634SKris Buschelman           if (!denserow[pjj[k]+pdof]) {
411fe05a634SKris Buschelman             denserow[pjj[k]+pdof] = -1;
412fe05a634SKris Buschelman             sparserow[cnzi++]     = pjj[k]+pdof;
4133985e5eaSKris Buschelman           }
4143985e5eaSKris Buschelman         }
4153985e5eaSKris Buschelman       }
4163985e5eaSKris Buschelman 
4173985e5eaSKris Buschelman       /* sort sparserow */
4183985e5eaSKris Buschelman       ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr);
4193985e5eaSKris Buschelman 
4203985e5eaSKris Buschelman       /* If free space is not available, make more free space */
4213985e5eaSKris Buschelman       /* Double the amount of total space in the list */
4223985e5eaSKris Buschelman       if (current_space->local_remaining<cnzi) {
4233985e5eaSKris Buschelman         ierr = GetMoreSpace(current_space->total_array_size,&current_space);CHKERRQ(ierr);
4243985e5eaSKris Buschelman       }
4253985e5eaSKris Buschelman 
4263985e5eaSKris Buschelman       /* Copy data into free space, and zero out denserows */
4273985e5eaSKris Buschelman       ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(int));CHKERRQ(ierr);
4283985e5eaSKris Buschelman       current_space->array           += cnzi;
4293985e5eaSKris Buschelman       current_space->local_used      += cnzi;
4303985e5eaSKris Buschelman       current_space->local_remaining -= cnzi;
4313985e5eaSKris Buschelman 
4323985e5eaSKris Buschelman       for (j=0;j<ptanzi;j++) {
4333985e5eaSKris Buschelman         ptadenserow[ptasparserow[j]] = 0;
4343985e5eaSKris Buschelman       }
4353985e5eaSKris Buschelman       for (j=0;j<cnzi;j++) {
4363985e5eaSKris Buschelman         denserow[sparserow[j]] = 0;
4373985e5eaSKris Buschelman       }
4383985e5eaSKris Buschelman       /* Aside: Perhaps we should save the pta info for the numerical factorization. */
4393985e5eaSKris Buschelman       /*        For now, we will recompute what is needed. */
4403985e5eaSKris Buschelman       ci[i+1+dof] = ci[i+dof] + cnzi;
4413985e5eaSKris Buschelman     }
4423985e5eaSKris Buschelman   }
4433985e5eaSKris Buschelman   /* nnz is now stored in ci[ptm], column indices are in the list of free space */
4443985e5eaSKris Buschelman   /* Allocate space for cj, initialize cj, and */
4453985e5eaSKris Buschelman   /* destroy list of free space and other temporary array(s) */
4463985e5eaSKris Buschelman   ierr = PetscMalloc((ci[pn]+1)*sizeof(int),&cj);CHKERRQ(ierr);
4473985e5eaSKris Buschelman   ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr);
4483985e5eaSKris Buschelman   ierr = PetscFree(ptadenserow);CHKERRQ(ierr);
4493985e5eaSKris Buschelman 
4503985e5eaSKris Buschelman   /* Allocate space for ca */
4513985e5eaSKris Buschelman   ierr = PetscMalloc((ci[pn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr);
4523985e5eaSKris Buschelman   ierr = PetscMemzero(ca,(ci[pn]+1)*sizeof(MatScalar));CHKERRQ(ierr);
4533985e5eaSKris Buschelman 
4543985e5eaSKris Buschelman   /* put together the new matrix */
4553985e5eaSKris Buschelman   ierr = MatCreateSeqAIJWithArrays(A->comm,pn,pn,ci,cj,ca,C);CHKERRQ(ierr);
4563985e5eaSKris Buschelman 
4573985e5eaSKris Buschelman   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4583985e5eaSKris Buschelman   /* Since these are PETSc arrays, change flags to free them as necessary. */
4593985e5eaSKris Buschelman   c = (Mat_SeqAIJ *)((*C)->data);
4603985e5eaSKris Buschelman   c->freedata = PETSC_TRUE;
4613985e5eaSKris Buschelman   c->nonew    = 0;
4623985e5eaSKris Buschelman 
4633985e5eaSKris Buschelman   /* Clean up. */
4643985e5eaSKris Buschelman   ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
4653985e5eaSKris Buschelman 
4669af31e4aSHong Zhang   ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr);
4673985e5eaSKris Buschelman   PetscFunctionReturn(0);
4683985e5eaSKris Buschelman }
4693985e5eaSKris Buschelman EXTERN_C_END
4703985e5eaSKris Buschelman 
471eb9c0419SKris Buschelman #undef __FUNCT__
4729af31e4aSHong Zhang #define __FUNCT__ "MatPtAPNumeric"
4736849ba73SBarry Smith /*
4749af31e4aSHong Zhang    MatPtAPNumeric - Computes the matrix projection C = P^T * A * P
4754d3841fdSKris Buschelman 
4764d3841fdSKris Buschelman    Collective on Mat
4774d3841fdSKris Buschelman 
4784d3841fdSKris Buschelman    Input Parameters:
4794d3841fdSKris Buschelman +  A - the matrix
4804d3841fdSKris Buschelman -  P - the projection matrix
4814d3841fdSKris Buschelman 
4824d3841fdSKris Buschelman    Output Parameters:
4834d3841fdSKris Buschelman .  C - the product matrix
4844d3841fdSKris Buschelman 
4854d3841fdSKris Buschelman    Notes:
4869af31e4aSHong Zhang    C must have been created by calling MatPtAPSymbolic and must be destroyed by
4874d3841fdSKris Buschelman    the user using MatDeatroy().
4884d3841fdSKris Buschelman 
489170ef064SHong Zhang    This routine is currently only implemented for pairs of AIJ matrices and classes
490170ef064SHong Zhang    which inherit from AIJ.  C will be of type MATAIJ.
4914d3841fdSKris Buschelman 
4924d3841fdSKris Buschelman    Level: intermediate
4934d3841fdSKris Buschelman 
4949af31e4aSHong Zhang .seealso: MatPtAP(),MatPtAPSymbolic(),MatMatMultNumeric()
4956849ba73SBarry Smith */
496dfbe8321SBarry Smith PetscErrorCode MatPtAPNumeric(Mat A,Mat P,Mat C) {
497dfbe8321SBarry Smith   PetscErrorCode ierr;
498534c1384SKris Buschelman   PetscErrorCode (*fA)(Mat,Mat,Mat);
499534c1384SKris Buschelman   PetscErrorCode (*fP)(Mat,Mat,Mat);
500eb9c0419SKris Buschelman 
501eb9c0419SKris Buschelman   PetscFunctionBegin;
502eb9c0419SKris Buschelman 
5034482741eSBarry Smith   PetscValidHeaderSpecific(A,MAT_COOKIE,1);
504c9780b6fSBarry Smith   PetscValidType(A,1);
505eb9c0419SKris Buschelman   MatPreallocated(A);
506eb9c0419SKris Buschelman   if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
507eb9c0419SKris Buschelman   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
508eb9c0419SKris Buschelman 
5094482741eSBarry Smith   PetscValidHeaderSpecific(P,MAT_COOKIE,2);
510c9780b6fSBarry Smith   PetscValidType(P,2);
511eb9c0419SKris Buschelman   MatPreallocated(P);
512eb9c0419SKris Buschelman   if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
513eb9c0419SKris Buschelman   if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
514eb9c0419SKris Buschelman 
5154482741eSBarry Smith   PetscValidHeaderSpecific(C,MAT_COOKIE,3);
516c9780b6fSBarry Smith   PetscValidType(C,3);
517eb9c0419SKris Buschelman   MatPreallocated(C);
518eb9c0419SKris Buschelman   if (!C->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
519eb9c0419SKris Buschelman   if (C->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
520eb9c0419SKris Buschelman 
521eb9c0419SKris Buschelman   if (P->N!=C->M) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->N,C->M);
522eb9c0419SKris Buschelman   if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->M,A->N);
523eb9c0419SKris Buschelman   if (A->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %d != %d",A->M,A->N);
524eb9c0419SKris Buschelman   if (P->N!=C->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->N,C->N);
525eb9c0419SKris Buschelman 
526534c1384SKris Buschelman   /* For now, we do not dispatch based on the type of A and P */
527534c1384SKris Buschelman   /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */
528534c1384SKris Buschelman   fA = A->ops->ptapnumeric;
529534c1384SKris Buschelman   if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatPtAPNumeric not supported for A of type %s",A->type_name);
530534c1384SKris Buschelman   fP = P->ops->ptapnumeric;
531534c1384SKris Buschelman   if (!fP) SETERRQ1(PETSC_ERR_SUP,"MatPtAPNumeric not supported for P of type %s",P->type_name);
532534c1384SKris Buschelman   if (fP!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatPtAPNumeric requires A, %s, to be compatible with P, %s",A->type_name,P->type_name);
5334d3841fdSKris Buschelman 
534534c1384SKris Buschelman   ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr);
535534c1384SKris Buschelman   ierr = (*fA)(A,P,C);CHKERRQ(ierr);
536534c1384SKris Buschelman   ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr);
537eb9c0419SKris Buschelman 
538eb9c0419SKris Buschelman   PetscFunctionReturn(0);
539eb9c0419SKris Buschelman }
540eb9c0419SKris Buschelman 
541eb9c0419SKris Buschelman #undef __FUNCT__
5429af31e4aSHong Zhang #define __FUNCT__ "MatPtAPNumeric_SeqAIJ_SeqAIJ"
543dfbe8321SBarry Smith PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqAIJ(Mat A,Mat P,Mat C)
544dfbe8321SBarry Smith {
545dfbe8321SBarry Smith   PetscErrorCode ierr;
546d20bfe6fSHong Zhang   int            flops=0;
547d20bfe6fSHong Zhang   Mat_SeqAIJ     *a  = (Mat_SeqAIJ *) A->data;
548d20bfe6fSHong Zhang   Mat_SeqAIJ     *p  = (Mat_SeqAIJ *) P->data;
549d20bfe6fSHong Zhang   Mat_SeqAIJ     *c  = (Mat_SeqAIJ *) C->data;
550d20bfe6fSHong Zhang   int            *ai=a->i,*aj=a->j,*apj,*apjdense,*pi=p->i,*pj=p->j,*pJ=p->j,*pjj;
551d20bfe6fSHong Zhang   int            *ci=c->i,*cj=c->j,*cjj;
552d20bfe6fSHong Zhang   int            am=A->M,cn=C->N,cm=C->M;
553d20bfe6fSHong Zhang   int            i,j,k,anzi,pnzi,apnzj,nextap,pnzj,prow,crow;
554d20bfe6fSHong Zhang   MatScalar      *aa=a->a,*apa,*pa=p->a,*pA=p->a,*paj,*ca=c->a,*caj;
555eb9c0419SKris Buschelman 
556eb9c0419SKris Buschelman   PetscFunctionBegin;
557d20bfe6fSHong Zhang   /* Allocate temporary array for storage of one row of A*P */
558d20bfe6fSHong Zhang   ierr = PetscMalloc(cn*(sizeof(MatScalar)+2*sizeof(int)),&apa);CHKERRQ(ierr);
559d20bfe6fSHong Zhang   ierr = PetscMemzero(apa,cn*(sizeof(MatScalar)+2*sizeof(int)));CHKERRQ(ierr);
560eb9c0419SKris Buschelman 
561d20bfe6fSHong Zhang   apj      = (int *)(apa + cn);
562d20bfe6fSHong Zhang   apjdense = apj + cn;
563d20bfe6fSHong Zhang 
564d20bfe6fSHong Zhang   /* Clear old values in C */
565d20bfe6fSHong Zhang   ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr);
566d20bfe6fSHong Zhang 
567d20bfe6fSHong Zhang   for (i=0;i<am;i++) {
568d20bfe6fSHong Zhang     /* Form sparse row of A*P */
569d20bfe6fSHong Zhang     anzi  = ai[i+1] - ai[i];
570d20bfe6fSHong Zhang     apnzj = 0;
571d20bfe6fSHong Zhang     for (j=0;j<anzi;j++) {
572d20bfe6fSHong Zhang       prow = *aj++;
573d20bfe6fSHong Zhang       pnzj = pi[prow+1] - pi[prow];
574d20bfe6fSHong Zhang       pjj  = pj + pi[prow];
575d20bfe6fSHong Zhang       paj  = pa + pi[prow];
576d20bfe6fSHong Zhang       for (k=0;k<pnzj;k++) {
577d20bfe6fSHong Zhang         if (!apjdense[pjj[k]]) {
578d20bfe6fSHong Zhang           apjdense[pjj[k]] = -1;
579d20bfe6fSHong Zhang           apj[apnzj++]     = pjj[k];
580d20bfe6fSHong Zhang         }
581d20bfe6fSHong Zhang         apa[pjj[k]] += (*aa)*paj[k];
582d20bfe6fSHong Zhang       }
583d20bfe6fSHong Zhang       flops += 2*pnzj;
584d20bfe6fSHong Zhang       aa++;
585d20bfe6fSHong Zhang     }
586d20bfe6fSHong Zhang 
587d20bfe6fSHong Zhang     /* Sort the j index array for quick sparse axpy. */
588d20bfe6fSHong Zhang     ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr);
589d20bfe6fSHong Zhang 
590d20bfe6fSHong Zhang     /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */
591d20bfe6fSHong Zhang     pnzi = pi[i+1] - pi[i];
592d20bfe6fSHong Zhang     for (j=0;j<pnzi;j++) {
593d20bfe6fSHong Zhang       nextap = 0;
594d20bfe6fSHong Zhang       crow   = *pJ++;
595d20bfe6fSHong Zhang       cjj    = cj + ci[crow];
596d20bfe6fSHong Zhang       caj    = ca + ci[crow];
597d20bfe6fSHong Zhang       /* Perform sparse axpy operation.  Note cjj includes apj. */
598d20bfe6fSHong Zhang       for (k=0;nextap<apnzj;k++) {
599d20bfe6fSHong Zhang         if (cjj[k]==apj[nextap]) {
600d20bfe6fSHong Zhang           caj[k] += (*pA)*apa[apj[nextap++]];
601d20bfe6fSHong Zhang         }
602d20bfe6fSHong Zhang       }
603d20bfe6fSHong Zhang       flops += 2*apnzj;
604d20bfe6fSHong Zhang       pA++;
605d20bfe6fSHong Zhang     }
606d20bfe6fSHong Zhang 
607d20bfe6fSHong Zhang     /* Zero the current row info for A*P */
608d20bfe6fSHong Zhang     for (j=0;j<apnzj;j++) {
609d20bfe6fSHong Zhang       apa[apj[j]]      = 0.;
610d20bfe6fSHong Zhang       apjdense[apj[j]] = 0;
611d20bfe6fSHong Zhang     }
612d20bfe6fSHong Zhang   }
613d20bfe6fSHong Zhang 
614d20bfe6fSHong Zhang   /* Assemble the final matrix and clean up */
615d20bfe6fSHong Zhang   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
616d20bfe6fSHong Zhang   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
617d20bfe6fSHong Zhang   ierr = PetscFree(apa);CHKERRQ(ierr);
618d20bfe6fSHong Zhang   ierr = PetscLogFlops(flops);CHKERRQ(ierr);
619d20bfe6fSHong Zhang 
620eb9c0419SKris Buschelman   PetscFunctionReturn(0);
621eb9c0419SKris Buschelman }
6220e36024fSHong Zhang 
6230e36024fSHong Zhang /* Compute C = P[rstart:rend,:]^T * A * P of seqaij matrices - used by MatPtAP_MPIAIJ_MPIAIJ() */
6240e36024fSHong Zhang 
6250e36024fSHong Zhang #undef __FUNCT__
6260e36024fSHong Zhang #define __FUNCT__ "MatPtAPNumeric_SeqAIJ_SeqAIJ_ReducedPt"
6270e36024fSHong Zhang PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqAIJ_ReducedPt(Mat A,Mat P,int prstart,int prend,Mat C)
6280e36024fSHong Zhang {
6290e36024fSHong Zhang   PetscErrorCode ierr;
6300e36024fSHong Zhang   int            flops=0;
6310e36024fSHong Zhang   Mat_SeqAIJ     *a  = (Mat_SeqAIJ *) A->data;
6320e36024fSHong Zhang   Mat_SeqAIJ     *p  = (Mat_SeqAIJ *) P->data;
6330e36024fSHong Zhang   Mat_SeqAIJ     *c  = (Mat_SeqAIJ *) C->data;
63420d4747cSHong Zhang   int            *ai=a->i,*aj=a->j,*apj,*apjdense;
63520d4747cSHong Zhang   int            *pi=p->i,*pj=p->j,*pJ=p->j+pi[prstart],*pjj;
6360e36024fSHong Zhang   int            *ci=c->i,*cj=c->j,*cjj;
6370e36024fSHong Zhang   int            am=A->M,cn=C->N,cm=C->M;
6380e36024fSHong Zhang   int            i,j,k,anzi,pnzi,apnzj,nextap,pnzj,prow,crow;
6390e36024fSHong Zhang   MatScalar      *aa=a->a,*apa,*pa=p->a,*pA=p->a,*paj,*ca=c->a,*caj;
6400e36024fSHong Zhang 
6410e36024fSHong Zhang   PetscFunctionBegin;
6420e36024fSHong Zhang   pA=p->a+pi[prstart];
6430e36024fSHong Zhang   /* Allocate temporary array for storage of one row of A*P */
6440e36024fSHong Zhang   ierr = PetscMalloc(cn*(sizeof(MatScalar)+2*sizeof(int)),&apa);CHKERRQ(ierr);
6450e36024fSHong Zhang   ierr = PetscMemzero(apa,cn*(sizeof(MatScalar)+2*sizeof(int)));CHKERRQ(ierr);
6460e36024fSHong Zhang 
6470e36024fSHong Zhang   apj      = (int *)(apa + cn);
6480e36024fSHong Zhang   apjdense = apj + cn;
6490e36024fSHong Zhang 
6500e36024fSHong Zhang   /* Clear old values in C */
6510e36024fSHong Zhang   ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr);
6520e36024fSHong Zhang 
6530e36024fSHong Zhang   for (i=0;i<am;i++) {
6540e36024fSHong Zhang     /* Form sparse row of A*P */
6550e36024fSHong Zhang     anzi  = ai[i+1] - ai[i];
6560e36024fSHong Zhang     apnzj = 0;
6570e36024fSHong Zhang     for (j=0;j<anzi;j++) {
6580e36024fSHong Zhang       prow = *aj++;
6590e36024fSHong Zhang       pnzj = pi[prow+1] - pi[prow];
6600e36024fSHong Zhang       pjj  = pj + pi[prow];
6610e36024fSHong Zhang       paj  = pa + pi[prow];
6620e36024fSHong Zhang       for (k=0;k<pnzj;k++) {
6630e36024fSHong Zhang         if (!apjdense[pjj[k]]) {
6640e36024fSHong Zhang           apjdense[pjj[k]] = -1;
6650e36024fSHong Zhang           apj[apnzj++]     = pjj[k];
6660e36024fSHong Zhang         }
6670e36024fSHong Zhang         apa[pjj[k]] += (*aa)*paj[k];
6680e36024fSHong Zhang       }
6690e36024fSHong Zhang       flops += 2*pnzj;
6700e36024fSHong Zhang       aa++;
6710e36024fSHong Zhang     }
6720e36024fSHong Zhang 
6730e36024fSHong Zhang     /* Sort the j index array for quick sparse axpy. */
6740e36024fSHong Zhang     ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr);
6750e36024fSHong Zhang 
6760e36024fSHong Zhang     /* Compute P[[prstart:prend,:]^T*A*P using outer product (P^T)[:,j+prstart]*(A*P)[j,:]. */
6770e36024fSHong Zhang     pnzi = pi[i+1+prstart] - pi[i+prstart];
6780e36024fSHong Zhang     for (j=0;j<pnzi;j++) {
6790e36024fSHong Zhang       nextap = 0;
6800e36024fSHong Zhang       crow   = *pJ++;
6810e36024fSHong Zhang       cjj    = cj + ci[crow];
6820e36024fSHong Zhang       caj    = ca + ci[crow];
6830e36024fSHong Zhang       /* Perform sparse axpy operation.  Note cjj includes apj. */
6840e36024fSHong Zhang       for (k=0;nextap<apnzj;k++) {
6850e36024fSHong Zhang         if (cjj[k]==apj[nextap]) {
6860e36024fSHong Zhang           caj[k] += (*pA)*apa[apj[nextap++]];
6870e36024fSHong Zhang         }
6880e36024fSHong Zhang       }
6890e36024fSHong Zhang       flops += 2*apnzj;
6900e36024fSHong Zhang       pA++;
6910e36024fSHong Zhang     }
6920e36024fSHong Zhang 
6930e36024fSHong Zhang     /* Zero the current row info for A*P */
6940e36024fSHong Zhang     for (j=0;j<apnzj;j++) {
6950e36024fSHong Zhang       apa[apj[j]]      = 0.;
6960e36024fSHong Zhang       apjdense[apj[j]] = 0;
6970e36024fSHong Zhang     }
6980e36024fSHong Zhang   }
6990e36024fSHong Zhang 
7000e36024fSHong Zhang   /* Assemble the final matrix and clean up */
7010e36024fSHong Zhang   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
7020e36024fSHong Zhang   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
7030e36024fSHong Zhang   ierr = PetscFree(apa);CHKERRQ(ierr);
7040e36024fSHong Zhang   ierr = PetscLogFlops(flops);CHKERRQ(ierr);
7050e36024fSHong Zhang 
7060e36024fSHong Zhang   PetscFunctionReturn(0);
7070e36024fSHong Zhang }
7080e36024fSHong Zhang 
7090e36024fSHong Zhang #undef __FUNCT__
7100e36024fSHong Zhang #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqAIJ_ReducedPt"
7110e36024fSHong Zhang PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqAIJ_ReducedPt(Mat A,Mat P,PetscReal fill,int prstart,int prend,Mat *C) {
7120e36024fSHong Zhang   PetscErrorCode ierr;
7130e36024fSHong Zhang   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
7140e36024fSHong Zhang   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c;
7150e36024fSHong Zhang   int            *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj;
7160e36024fSHong Zhang   int            *ci,*cj,*denserow,*sparserow,*ptadenserow,*ptasparserow,*ptaj;
7170e36024fSHong Zhang   int            an=A->N,am=A->M,pn=P->N,pm=P->M;
7180e36024fSHong Zhang   int            i,j,k,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi;
7190e36024fSHong Zhang   MatScalar      *ca;
7200e36024fSHong Zhang   Mat *psub,P_sub;
7210e36024fSHong Zhang   IS  isrow,iscol;
7220e36024fSHong Zhang   int m = prend - prstart;
7230b89d903Svictorle 
7240b89d903Svictorle   PetscFunctionBegin;
7250b89d903Svictorle   /* Get ij structure of P[rstart:rend,:]^T */
7260e36024fSHong Zhang   ierr = ISCreateStride(PETSC_COMM_SELF,m,prstart,1,&isrow);CHKERRQ(ierr);
7270e36024fSHong Zhang   ierr = ISCreateStride(PETSC_COMM_SELF,P->n,0,1,&iscol);CHKERRQ(ierr);
7280e36024fSHong Zhang   ierr = MatGetSubMatrices(P,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&psub);CHKERRQ(ierr);
7290e36024fSHong Zhang   ierr = ISDestroy(isrow);CHKERRQ(ierr);
7300e36024fSHong Zhang   ierr = ISDestroy(iscol);CHKERRQ(ierr);
7310e36024fSHong Zhang   P_sub = psub[0];
7320e36024fSHong Zhang   ierr = MatGetSymbolicTranspose_SeqAIJ(P_sub,&pti,&ptj);CHKERRQ(ierr);
7330e36024fSHong Zhang   ierr = MatDestroyMatrices(1,&psub);CHKERRQ(ierr);
7340e36024fSHong Zhang   ptJ=ptj;
7350e36024fSHong Zhang 
7360e36024fSHong Zhang   /* Allocate ci array, arrays for fill computation and */
7370e36024fSHong Zhang   /* free space for accumulating nonzero column info */
7380e36024fSHong Zhang   ierr = PetscMalloc((pn+1)*sizeof(int),&ci);CHKERRQ(ierr);
7390e36024fSHong Zhang   ci[0] = 0;
7400e36024fSHong Zhang 
7410e36024fSHong Zhang   ierr = PetscMalloc((2*pn+2*an+1)*sizeof(int),&ptadenserow);CHKERRQ(ierr);
7420e36024fSHong Zhang   ierr = PetscMemzero(ptadenserow,(2*pn+2*an+1)*sizeof(int));CHKERRQ(ierr);
7430e36024fSHong Zhang   ptasparserow = ptadenserow  + an;
7440e36024fSHong Zhang   denserow     = ptasparserow + an;
7450e36024fSHong Zhang   sparserow    = denserow     + pn;
7460e36024fSHong Zhang 
7470e36024fSHong Zhang   /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */
7480e36024fSHong Zhang   /* This should be reasonable if sparsity of PtAP is similar to that of A. */
7490e36024fSHong Zhang   ierr          = GetMoreSpace((ai[am]/pm)*pn,&free_space);
7500e36024fSHong Zhang   current_space = free_space;
7510e36024fSHong Zhang 
7520e36024fSHong Zhang   /* Determine symbolic info for each row of C: */
7530e36024fSHong Zhang   for (i=0;i<pn;i++) {
7540e36024fSHong Zhang     ptnzi  = pti[i+1] - pti[i];
7550e36024fSHong Zhang     ptanzi = 0;
7560e36024fSHong Zhang     /* Determine symbolic row of PtA_reduced: */
7570e36024fSHong Zhang     for (j=0;j<ptnzi;j++) {
7580e36024fSHong Zhang       arow = *ptJ++;
7590e36024fSHong Zhang       anzj = ai[arow+1] - ai[arow];
7600e36024fSHong Zhang       ajj  = aj + ai[arow];
7610e36024fSHong Zhang       for (k=0;k<anzj;k++) {
7620e36024fSHong Zhang         if (!ptadenserow[ajj[k]]) {
7630e36024fSHong Zhang           ptadenserow[ajj[k]]    = -1;
7640e36024fSHong Zhang           ptasparserow[ptanzi++] = ajj[k];
7650e36024fSHong Zhang         }
7660e36024fSHong Zhang       }
7670e36024fSHong Zhang     }
7680e36024fSHong Zhang       /* Using symbolic info for row of PtA, determine symbolic info for row of C: */
7690e36024fSHong Zhang     ptaj = ptasparserow;
7700e36024fSHong Zhang     cnzi   = 0;
7710e36024fSHong Zhang     for (j=0;j<ptanzi;j++) {
7720e36024fSHong Zhang       prow = *ptaj++;
7730e36024fSHong Zhang       pnzj = pi[prow+1] - pi[prow];
7740e36024fSHong Zhang       pjj  = pj + pi[prow];
7750e36024fSHong Zhang       for (k=0;k<pnzj;k++) {
7760e36024fSHong Zhang         if (!denserow[pjj[k]]) {
7770e36024fSHong Zhang             denserow[pjj[k]]  = -1;
7780e36024fSHong Zhang             sparserow[cnzi++] = pjj[k];
7790e36024fSHong Zhang         }
7800e36024fSHong Zhang       }
7810e36024fSHong Zhang     }
7820e36024fSHong Zhang 
7830e36024fSHong Zhang     /* sort sparserow */
7840e36024fSHong Zhang     ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr);
7850e36024fSHong Zhang 
7860e36024fSHong Zhang     /* If free space is not available, make more free space */
7870e36024fSHong Zhang     /* Double the amount of total space in the list */
7880e36024fSHong Zhang     if (current_space->local_remaining<cnzi) {
7890e36024fSHong Zhang       ierr = GetMoreSpace(current_space->total_array_size,&current_space);CHKERRQ(ierr);
7900e36024fSHong Zhang     }
7910e36024fSHong Zhang 
7920e36024fSHong Zhang     /* Copy data into free space, and zero out denserows */
7930e36024fSHong Zhang     ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(int));CHKERRQ(ierr);
7940e36024fSHong Zhang     current_space->array           += cnzi;
7950e36024fSHong Zhang     current_space->local_used      += cnzi;
7960e36024fSHong Zhang     current_space->local_remaining -= cnzi;
7970e36024fSHong Zhang 
7980e36024fSHong Zhang     for (j=0;j<ptanzi;j++) {
7990e36024fSHong Zhang       ptadenserow[ptasparserow[j]] = 0;
8000e36024fSHong Zhang     }
8010e36024fSHong Zhang     for (j=0;j<cnzi;j++) {
8020e36024fSHong Zhang       denserow[sparserow[j]] = 0;
8030e36024fSHong Zhang     }
8040e36024fSHong Zhang       /* Aside: Perhaps we should save the pta info for the numerical factorization. */
8050e36024fSHong Zhang       /*        For now, we will recompute what is needed. */
8060e36024fSHong Zhang     ci[i+1] = ci[i] + cnzi;
8070e36024fSHong Zhang   }
8080e36024fSHong Zhang   /* nnz is now stored in ci[ptm], column indices are in the list of free space */
8090e36024fSHong Zhang   /* Allocate space for cj, initialize cj, and */
8100e36024fSHong Zhang   /* destroy list of free space and other temporary array(s) */
8110e36024fSHong Zhang   ierr = PetscMalloc((ci[pn]+1)*sizeof(int),&cj);CHKERRQ(ierr);
8120e36024fSHong Zhang   ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr);
8130e36024fSHong Zhang   ierr = PetscFree(ptadenserow);CHKERRQ(ierr);
8140e36024fSHong Zhang 
8150e36024fSHong Zhang   /* Allocate space for ca */
8160e36024fSHong Zhang   ierr = PetscMalloc((ci[pn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr);
8170e36024fSHong Zhang   ierr = PetscMemzero(ca,(ci[pn]+1)*sizeof(MatScalar));CHKERRQ(ierr);
8180e36024fSHong Zhang 
8190e36024fSHong Zhang   /* put together the new matrix */
8200e36024fSHong Zhang   ierr = MatCreateSeqAIJWithArrays(A->comm,pn,pn,ci,cj,ca,C);CHKERRQ(ierr);
8210e36024fSHong Zhang 
8220e36024fSHong Zhang   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
8230e36024fSHong Zhang   /* Since these are PETSc arrays, change flags to free them as necessary. */
8240e36024fSHong Zhang   c = (Mat_SeqAIJ *)((*C)->data);
8250e36024fSHong Zhang   c->freedata = PETSC_TRUE;
8260e36024fSHong Zhang   c->nonew    = 0;
8270e36024fSHong Zhang 
8280e36024fSHong Zhang   /* Clean up. */
8290e36024fSHong Zhang   ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
8300e36024fSHong Zhang 
8310e36024fSHong Zhang   PetscFunctionReturn(0);
8320e36024fSHong Zhang }
8330e36024fSHong Zhang 
8340e36024fSHong Zhang #undef __FUNCT__
8350e36024fSHong Zhang #define __FUNCT__ "MatPtAP_SeqAIJ_SeqAIJ_ReducedPt"
8360e36024fSHong Zhang PetscErrorCode MatPtAP_SeqAIJ_SeqAIJ_ReducedPt(Mat A,Mat P,MatReuse scall,PetscReal fill,int prstart,int prend,Mat *C)
8370e36024fSHong Zhang {
8380e36024fSHong Zhang   PetscErrorCode ierr;
8390e36024fSHong Zhang   PetscFunctionBegin;
8400e36024fSHong Zhang   if (A->m != prend-prstart) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",A->m,prend-prstart);
8410e36024fSHong Zhang   if (prend-prstart > P->m) SETERRQ2(PETSC_ERR_ARG_SIZ," prend-prstart %d cannot be larger than P->m %d",prend-prstart,P->m);
8420e36024fSHong Zhang   if (scall == MAT_INITIAL_MATRIX){
8430e36024fSHong Zhang     ierr = MatPtAPSymbolic_SeqAIJ_SeqAIJ_ReducedPt(A,P,fill,prstart,prend,C);CHKERRQ(ierr);
8440e36024fSHong Zhang   }
8450e36024fSHong Zhang 
8460e36024fSHong Zhang   ierr = MatPtAPNumeric_SeqAIJ_SeqAIJ_ReducedPt(A,P,prstart,prend,*C);CHKERRQ(ierr);
8470e36024fSHong Zhang 
8480e36024fSHong Zhang   PetscFunctionReturn(0);
8490e36024fSHong Zhang }
8500e36024fSHong Zhang 
851