xref: /petsc/src/mat/impls/aij/seq/matptap.c (revision 20d4747c175c8ac2a5a30573cec8e8dd5f35f45e)
1eb9c0419SKris Buschelman /*
29af31e4aSHong Zhang   Defines projective product routines where A is a AIJ matrix
3eb9c0419SKris Buschelman           C = P^T * A * P
4eb9c0419SKris Buschelman */
5eb9c0419SKris Buschelman 
6231952e2SKris Buschelman #include "src/mat/impls/aij/seq/aij.h"   /*I "petscmat.h" I*/
7eb9c0419SKris Buschelman #include "src/mat/utils/freespace.h"
89af31e4aSHong Zhang #include "src/mat/impls/aij/mpi/mpiaij.h"
9eb9c0419SKris Buschelman 
10eb9c0419SKris Buschelman #undef __FUNCT__
119af31e4aSHong Zhang #define __FUNCT__ "MatPtAP"
124d3841fdSKris Buschelman /*@
139af31e4aSHong Zhang    MatPtAP - Creates the matrix projection C = P^T * A * P
144d3841fdSKris Buschelman 
154d3841fdSKris Buschelman    Collective on Mat
164d3841fdSKris Buschelman 
174d3841fdSKris Buschelman    Input Parameters:
184d3841fdSKris Buschelman +  A - the matrix
19f747e1aeSHong Zhang .  P - the projection matrix
20f747e1aeSHong Zhang .  scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
21f747e1aeSHong Zhang -  fill - expected fill as ratio of nnz(C)/(nnz(A) + nnz(P))
224d3841fdSKris Buschelman 
234d3841fdSKris Buschelman    Output Parameters:
244d3841fdSKris Buschelman .  C - the product matrix
254d3841fdSKris Buschelman 
264d3841fdSKris Buschelman    Notes:
274d3841fdSKris Buschelman    C will be created and must be destroyed by the user with MatDestroy().
284d3841fdSKris Buschelman 
294d3841fdSKris Buschelman    This routine is currently only implemented for pairs of SeqAIJ matrices and classes
304d3841fdSKris Buschelman    which inherit from SeqAIJ.  C will be of type MATSEQAIJ.
314d3841fdSKris Buschelman 
324d3841fdSKris Buschelman    Level: intermediate
334d3841fdSKris Buschelman 
349af31e4aSHong Zhang .seealso: MatPtAPSymbolic(),MatPtAPNumeric(),MatMatMult()
354d3841fdSKris Buschelman @*/
36dfbe8321SBarry Smith PetscErrorCode MatPtAP(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C) {
37dfbe8321SBarry Smith   PetscErrorCode ierr;
38534c1384SKris Buschelman   PetscErrorCode (*fA)(Mat,Mat,MatReuse,PetscReal,Mat *);
39534c1384SKris Buschelman   PetscErrorCode (*fP)(Mat,Mat,MatReuse,PetscReal,Mat *);
40eb9c0419SKris Buschelman 
41eb9c0419SKris Buschelman   PetscFunctionBegin;
429af31e4aSHong Zhang   PetscValidHeaderSpecific(A,MAT_COOKIE,1);
439af31e4aSHong Zhang   PetscValidType(A,1);
449af31e4aSHong Zhang   MatPreallocated(A);
459af31e4aSHong Zhang   if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
469af31e4aSHong Zhang   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
479af31e4aSHong Zhang   PetscValidHeaderSpecific(P,MAT_COOKIE,2);
489af31e4aSHong Zhang   PetscValidType(P,2);
499af31e4aSHong Zhang   MatPreallocated(P);
509af31e4aSHong Zhang   if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
519af31e4aSHong Zhang   if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
529af31e4aSHong Zhang   PetscValidPointer(C,3);
53534c1384SKris Buschelman 
549af31e4aSHong Zhang   if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->M,A->N);
55eb9c0419SKris Buschelman 
569af31e4aSHong Zhang   if (fill <=0.0) SETERRQ1(PETSC_ERR_ARG_SIZ,"fill=%g must be > 0.0",fill);
57eb9c0419SKris Buschelman 
58534c1384SKris Buschelman   /* For now, we do not dispatch based on the type of A and P */
59534c1384SKris Buschelman   /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */
60534c1384SKris Buschelman   fA = A->ops->ptap;
61534c1384SKris Buschelman   if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatPtAP not supported for A of type %s",A->type_name);
62534c1384SKris Buschelman   fP = P->ops->ptap;
63534c1384SKris Buschelman   if (!fP) SETERRQ1(PETSC_ERR_SUP,"MatPtAP not supported for P of type %s",P->type_name);
64534c1384SKris Buschelman   if (fP!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatPtAP requires A, %s, to be compatible with P, %s",A->type_name,P->type_name);
65534c1384SKris Buschelman 
669af31e4aSHong Zhang   ierr = PetscLogEventBegin(MAT_PtAP,A,P,0,0);CHKERRQ(ierr);
67534c1384SKris Buschelman   ierr = (*fA)(A,P,scall,fill,C);CHKERRQ(ierr);
689af31e4aSHong Zhang   ierr = PetscLogEventEnd(MAT_PtAP,A,P,0,0);CHKERRQ(ierr);
69eb9c0419SKris Buschelman   PetscFunctionReturn(0);
70eb9c0419SKris Buschelman }
71eb9c0419SKris Buschelman 
720e36024fSHong Zhang EXTERN PetscErrorCode MatPtAP_SeqAIJ_SeqAIJ_ReducedPt(Mat,Mat,MatReuse,PetscReal,int,int,Mat*);
730e36024fSHong Zhang EXTERN PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqAIJ_ReducedPt(Mat,Mat,PetscReal,int,int,Mat*);
740e36024fSHong Zhang EXTERN PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqAIJ_ReducedPt(Mat,Mat,int,int,Mat);
75eb9c0419SKris Buschelman #undef __FUNCT__
76ff134f7aSHong Zhang #define __FUNCT__ "MatPtAP_MPIAIJ_MPIAIJ"
77ff134f7aSHong Zhang PetscErrorCode MatPtAP_MPIAIJ_MPIAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C)
78ff134f7aSHong Zhang {
79ff134f7aSHong Zhang   PetscErrorCode    ierr;
80*20d4747cSHong Zhang   Mat               AP,P_seq,A_loc,C_seq;
81ff134f7aSHong Zhang   Mat_MatMatMultMPI *mult;
820e36024fSHong Zhang   int               prstart,prend,m=P->m;
830e36024fSHong Zhang   int               rank,prid=10;
84ff134f7aSHong Zhang 
85ff134f7aSHong Zhang   PetscFunctionBegin;
86ff134f7aSHong Zhang   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
87ff134f7aSHong Zhang 
880e36024fSHong Zhang   /* compute symbolic and numeric AP = A*P */
890e36024fSHong Zhang   ierr = MatMatMult_MPIAIJ_MPIAIJ(A,P,scall,fill,&AP);CHKERRQ(ierr);
900e36024fSHong Zhang   mult = (Mat_MatMatMultMPI*)AP->spptr;
910e36024fSHong Zhang   P_seq = mult->bseq[0]; /* = submatrix of P by taking rows of P that equal to nonzero col of A */
920e36024fSHong Zhang 
930e36024fSHong Zhang   if (rank == prid){
940e36024fSHong Zhang     ierr = PetscPrintf(PETSC_COMM_SELF," [%d] A_loc: %d, %d\n",rank,A_loc->m,A_loc->n);CHKERRQ(ierr);
950e36024fSHong Zhang     ierr = MatView(A_loc,PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr);
960e36024fSHong Zhang     ierr = PetscPrintf(PETSC_COMM_SELF," [%d] P_seq: %d, %d\n",rank,P_seq->m,P_seq->n);CHKERRQ(ierr);
970e36024fSHong Zhang     ierr = MatView(P_seq,PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr);
98*20d4747cSHong Zhang     ierr = PetscPrintf(PETSC_COMM_SELF," [%d]  AP_seq: %d, %d\n",rank,AP->m,AP->n);
99*20d4747cSHong Zhang     ierr = MatView(AP,PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr);
1000e36024fSHong Zhang   }
1010e36024fSHong Zhang 
102ff134f7aSHong Zhang   prstart = mult->brstart;
103ff134f7aSHong Zhang   prend   = prstart + m;
104ff134f7aSHong Zhang 
105*20d4747cSHong Zhang   A_loc = mult->aseq[0]; /* = submatrix of A by taking all local rows of A */
106*20d4747cSHong Zhang   ierr = MatPtAP_SeqAIJ_SeqAIJ_ReducedPt(A_loc,P_seq,scall,fill,prstart,prend,&C_seq);CHKERRQ(ierr);
1070e36024fSHong Zhang 
1080e36024fSHong Zhang   /* add C_seq into C */
1090e36024fSHong Zhang   ierr = MatMerge_SeqsToMPI(A->comm,C_seq,P->n,P->n,scall,C);CHKERRQ(ierr);
1100e36024fSHong Zhang 
1110e36024fSHong Zhang   /* clean up */
1120e36024fSHong Zhang   ierr = MatDestroy(AP);CHKERRQ(ierr);
1130e36024fSHong Zhang   ierr = MatDestroy(C_seq);CHKERRQ(ierr);
1140e36024fSHong Zhang 
115ff134f7aSHong Zhang   PetscFunctionReturn(0);
116ff134f7aSHong Zhang }
117ff134f7aSHong Zhang 
118ff134f7aSHong Zhang #undef __FUNCT__
119ff134f7aSHong Zhang #define __FUNCT__ "MatMatMultSymbolic_MPIAIJ_MPIAIJ"
120ff134f7aSHong Zhang PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
121ff134f7aSHong Zhang {
122ff134f7aSHong Zhang 
123ff134f7aSHong Zhang   PetscFunctionBegin;
124ff134f7aSHong Zhang   PetscFunctionReturn(0);
125ff134f7aSHong Zhang }
126ff134f7aSHong Zhang 
127ff134f7aSHong Zhang #undef __FUNCT__
128ff134f7aSHong Zhang #define __FUNCT__ "MatMatMultSymbolic_MPIAIJ_MPIAIJ"
129ff134f7aSHong Zhang PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIAIJ(Mat A,Mat B,Mat C)
130ff134f7aSHong Zhang {
131ff134f7aSHong Zhang 
132ff134f7aSHong Zhang   PetscFunctionBegin;
133ff134f7aSHong Zhang   PetscFunctionReturn(0);
134ff134f7aSHong Zhang }
135ff134f7aSHong Zhang 
136ff134f7aSHong Zhang #undef __FUNCT__
1379af31e4aSHong Zhang #define __FUNCT__ "MatPtAP_SeqAIJ_SeqAIJ"
138dfbe8321SBarry Smith PetscErrorCode MatPtAP_SeqAIJ_SeqAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C)
1399af31e4aSHong Zhang {
140dfbe8321SBarry Smith   PetscErrorCode ierr;
1419af31e4aSHong Zhang   PetscFunctionBegin;
1429af31e4aSHong Zhang   if (scall == MAT_INITIAL_MATRIX){
143d20bfe6fSHong Zhang     ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr);
1449af31e4aSHong Zhang     ierr = MatPtAPSymbolic_SeqAIJ_SeqAIJ(A,P,fill,C);CHKERRQ(ierr);
145d20bfe6fSHong Zhang     ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr);
1469af31e4aSHong Zhang   }
147d20bfe6fSHong Zhang   ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr);
1489af31e4aSHong Zhang   ierr = MatPtAPNumeric_SeqAIJ_SeqAIJ(A,P,*C);CHKERRQ(ierr);
149d20bfe6fSHong Zhang   ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr);
1509af31e4aSHong Zhang   PetscFunctionReturn(0);
1519af31e4aSHong Zhang }
1529af31e4aSHong Zhang 
1539af31e4aSHong Zhang #undef __FUNCT__
1549af31e4aSHong Zhang #define __FUNCT__ "MatPtAPSymbolic"
1556849ba73SBarry Smith /*
1569af31e4aSHong Zhang    MatPtAPSymbolic - Creates the (i,j) structure of the matrix projection C = P^T * A * P
1574d3841fdSKris Buschelman 
1584d3841fdSKris Buschelman    Collective on Mat
1594d3841fdSKris Buschelman 
1604d3841fdSKris Buschelman    Input Parameters:
1614d3841fdSKris Buschelman +  A - the matrix
1624d3841fdSKris Buschelman -  P - the projection matrix
1634d3841fdSKris Buschelman 
1644d3841fdSKris Buschelman    Output Parameters:
1654d3841fdSKris Buschelman .  C - the (i,j) structure of the product matrix
1664d3841fdSKris Buschelman 
1674d3841fdSKris Buschelman    Notes:
1684d3841fdSKris Buschelman    C will be created and must be destroyed by the user with MatDestroy().
1694d3841fdSKris Buschelman 
1704d3841fdSKris Buschelman    This routine is currently only implemented for pairs of SeqAIJ matrices and classes
1714d3841fdSKris Buschelman    which inherit from SeqAIJ.  C will be of type MATSEQAIJ.  The product is computed using
1729af31e4aSHong Zhang    this (i,j) structure by calling MatPtAPNumeric().
1734d3841fdSKris Buschelman 
1744d3841fdSKris Buschelman    Level: intermediate
1754d3841fdSKris Buschelman 
1769af31e4aSHong Zhang .seealso: MatPtAP(),MatPtAPNumeric(),MatMatMultSymbolic()
1776849ba73SBarry Smith */
178dfbe8321SBarry Smith PetscErrorCode MatPtAPSymbolic(Mat A,Mat P,PetscReal fill,Mat *C) {
179dfbe8321SBarry Smith   PetscErrorCode ierr;
180534c1384SKris Buschelman   PetscErrorCode (*fA)(Mat,Mat,PetscReal,Mat*);
181534c1384SKris Buschelman   PetscErrorCode (*fP)(Mat,Mat,PetscReal,Mat*);
182eb9c0419SKris Buschelman 
183eb9c0419SKris Buschelman   PetscFunctionBegin;
184eb9c0419SKris Buschelman 
1854482741eSBarry Smith   PetscValidHeaderSpecific(A,MAT_COOKIE,1);
186c9780b6fSBarry Smith   PetscValidType(A,1);
187eb9c0419SKris Buschelman   MatPreallocated(A);
188eb9c0419SKris Buschelman   if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
189eb9c0419SKris Buschelman   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
190eb9c0419SKris Buschelman 
1914482741eSBarry Smith   PetscValidHeaderSpecific(P,MAT_COOKIE,2);
192c9780b6fSBarry Smith   PetscValidType(P,2);
193eb9c0419SKris Buschelman   MatPreallocated(P);
194eb9c0419SKris Buschelman   if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
195eb9c0419SKris Buschelman   if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
196eb9c0419SKris Buschelman 
1974482741eSBarry Smith   PetscValidPointer(C,3);
1984482741eSBarry Smith 
199eb9c0419SKris Buschelman   if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->M,A->N);
200eb9c0419SKris Buschelman   if (A->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %d != %d",A->M,A->N);
201eb9c0419SKris Buschelman 
202534c1384SKris Buschelman   /* For now, we do not dispatch based on the type of A and P */
203534c1384SKris Buschelman   /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */
204534c1384SKris Buschelman   fA = A->ops->ptapsymbolic;
205534c1384SKris Buschelman   if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatPtAPSymbolic not supported for A of type %s",A->type_name);
206534c1384SKris Buschelman   fP = P->ops->ptapsymbolic;
207534c1384SKris Buschelman   if (!fP) SETERRQ1(PETSC_ERR_SUP,"MatPtAPSymbolic not supported for P of type %s",P->type_name);
208534c1384SKris Buschelman   if (fP!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatPtAPSymbolic requires A, %s, to be compatible with P, %s",A->type_name,P->type_name);
2094d3841fdSKris Buschelman 
210534c1384SKris Buschelman   ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr);
211534c1384SKris Buschelman   ierr = (*fA)(A,P,fill,C);CHKERRQ(ierr);
212534c1384SKris Buschelman   ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr);
213eb9c0419SKris Buschelman 
214eb9c0419SKris Buschelman   PetscFunctionReturn(0);
215eb9c0419SKris Buschelman }
216eb9c0419SKris Buschelman 
217f747e1aeSHong Zhang typedef struct {
218f747e1aeSHong Zhang   Mat    symAP;
219f747e1aeSHong Zhang } Mat_PtAPstruct;
220f747e1aeSHong Zhang 
22178a80504SBarry Smith EXTERN PetscErrorCode MatDestroy_SeqAIJ(Mat);
22278a80504SBarry Smith 
223f747e1aeSHong Zhang #undef __FUNCT__
224f747e1aeSHong Zhang #define __FUNCT__ "MatDestroy_SeqAIJ_PtAP"
225f4a850bbSBarry Smith PetscErrorCode MatDestroy_SeqAIJ_PtAP(Mat A)
226f747e1aeSHong Zhang {
227f4a850bbSBarry Smith   PetscErrorCode    ierr;
228f747e1aeSHong Zhang   Mat_PtAPstruct    *ptap=(Mat_PtAPstruct*)A->spptr;
229f747e1aeSHong Zhang 
230f747e1aeSHong Zhang   PetscFunctionBegin;
231f747e1aeSHong Zhang   ierr = MatDestroy(ptap->symAP);CHKERRQ(ierr);
232f747e1aeSHong Zhang   ierr = PetscFree(ptap);CHKERRQ(ierr);
23378a80504SBarry Smith   ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr);
234f747e1aeSHong Zhang   PetscFunctionReturn(0);
235f747e1aeSHong Zhang }
236f747e1aeSHong Zhang 
237eb9c0419SKris Buschelman #undef __FUNCT__
2389af31e4aSHong Zhang #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqAIJ"
239dfbe8321SBarry Smith PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat P,PetscReal fill,Mat *C) {
240dfbe8321SBarry Smith   PetscErrorCode ierr;
241d20bfe6fSHong Zhang   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
242d20bfe6fSHong Zhang   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c;
243d20bfe6fSHong Zhang   int            *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj;
244d20bfe6fSHong Zhang   int            *ci,*cj,*denserow,*sparserow,*ptadenserow,*ptasparserow,*ptaj;
245d20bfe6fSHong Zhang   int            an=A->N,am=A->M,pn=P->N,pm=P->M;
246d20bfe6fSHong Zhang   int            i,j,k,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi;
247d20bfe6fSHong Zhang   MatScalar      *ca;
248eb9c0419SKris Buschelman 
249eb9c0419SKris Buschelman   PetscFunctionBegin;
250d20bfe6fSHong Zhang   /* Get ij structure of P^T */
251eb9c0419SKris Buschelman   ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
252d20bfe6fSHong Zhang   ptJ=ptj;
253eb9c0419SKris Buschelman 
254d20bfe6fSHong Zhang   /* Allocate ci array, arrays for fill computation and */
255d20bfe6fSHong Zhang   /* free space for accumulating nonzero column info */
256d20bfe6fSHong Zhang   ierr = PetscMalloc((pn+1)*sizeof(int),&ci);CHKERRQ(ierr);
257d20bfe6fSHong Zhang   ci[0] = 0;
258eb9c0419SKris Buschelman 
259d20bfe6fSHong Zhang   ierr = PetscMalloc((2*pn+2*an+1)*sizeof(int),&ptadenserow);CHKERRQ(ierr);
260d20bfe6fSHong Zhang   ierr = PetscMemzero(ptadenserow,(2*pn+2*an+1)*sizeof(int));CHKERRQ(ierr);
261d20bfe6fSHong Zhang   ptasparserow = ptadenserow  + an;
262d20bfe6fSHong Zhang   denserow     = ptasparserow + an;
263d20bfe6fSHong Zhang   sparserow    = denserow     + pn;
264eb9c0419SKris Buschelman 
265d20bfe6fSHong Zhang   /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */
266d20bfe6fSHong Zhang   /* This should be reasonable if sparsity of PtAP is similar to that of A. */
267d20bfe6fSHong Zhang   ierr          = GetMoreSpace((ai[am]/pm)*pn,&free_space);
268d20bfe6fSHong Zhang   current_space = free_space;
269d20bfe6fSHong Zhang 
270d20bfe6fSHong Zhang   /* Determine symbolic info for each row of C: */
271d20bfe6fSHong Zhang   for (i=0;i<pn;i++) {
272d20bfe6fSHong Zhang     ptnzi  = pti[i+1] - pti[i];
273d20bfe6fSHong Zhang     ptanzi = 0;
274d20bfe6fSHong Zhang     /* Determine symbolic row of PtA: */
275d20bfe6fSHong Zhang     for (j=0;j<ptnzi;j++) {
276d20bfe6fSHong Zhang       arow = *ptJ++;
277d20bfe6fSHong Zhang       anzj = ai[arow+1] - ai[arow];
278d20bfe6fSHong Zhang       ajj  = aj + ai[arow];
279d20bfe6fSHong Zhang       for (k=0;k<anzj;k++) {
280d20bfe6fSHong Zhang         if (!ptadenserow[ajj[k]]) {
281d20bfe6fSHong Zhang           ptadenserow[ajj[k]]    = -1;
282d20bfe6fSHong Zhang           ptasparserow[ptanzi++] = ajj[k];
283d20bfe6fSHong Zhang         }
284d20bfe6fSHong Zhang       }
285d20bfe6fSHong Zhang     }
286d20bfe6fSHong Zhang       /* Using symbolic info for row of PtA, determine symbolic info for row of C: */
287d20bfe6fSHong Zhang     ptaj = ptasparserow;
288d20bfe6fSHong Zhang     cnzi   = 0;
289d20bfe6fSHong Zhang     for (j=0;j<ptanzi;j++) {
290d20bfe6fSHong Zhang       prow = *ptaj++;
291d20bfe6fSHong Zhang       pnzj = pi[prow+1] - pi[prow];
292d20bfe6fSHong Zhang       pjj  = pj + pi[prow];
293d20bfe6fSHong Zhang       for (k=0;k<pnzj;k++) {
294d20bfe6fSHong Zhang         if (!denserow[pjj[k]]) {
295d20bfe6fSHong Zhang             denserow[pjj[k]]  = -1;
296d20bfe6fSHong Zhang             sparserow[cnzi++] = pjj[k];
297d20bfe6fSHong Zhang         }
298d20bfe6fSHong Zhang       }
299d20bfe6fSHong Zhang     }
300d20bfe6fSHong Zhang 
301d20bfe6fSHong Zhang     /* sort sparserow */
302d20bfe6fSHong Zhang     ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr);
303d20bfe6fSHong Zhang 
304d20bfe6fSHong Zhang     /* If free space is not available, make more free space */
305d20bfe6fSHong Zhang     /* Double the amount of total space in the list */
306d20bfe6fSHong Zhang     if (current_space->local_remaining<cnzi) {
307d20bfe6fSHong Zhang       ierr = GetMoreSpace(current_space->total_array_size,&current_space);CHKERRQ(ierr);
308d20bfe6fSHong Zhang     }
309d20bfe6fSHong Zhang 
310d20bfe6fSHong Zhang     /* Copy data into free space, and zero out denserows */
311d20bfe6fSHong Zhang     ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(int));CHKERRQ(ierr);
312d20bfe6fSHong Zhang     current_space->array           += cnzi;
313d20bfe6fSHong Zhang     current_space->local_used      += cnzi;
314d20bfe6fSHong Zhang     current_space->local_remaining -= cnzi;
315d20bfe6fSHong Zhang 
316d20bfe6fSHong Zhang     for (j=0;j<ptanzi;j++) {
317d20bfe6fSHong Zhang       ptadenserow[ptasparserow[j]] = 0;
318d20bfe6fSHong Zhang     }
319d20bfe6fSHong Zhang     for (j=0;j<cnzi;j++) {
320d20bfe6fSHong Zhang       denserow[sparserow[j]] = 0;
321d20bfe6fSHong Zhang     }
322d20bfe6fSHong Zhang       /* Aside: Perhaps we should save the pta info for the numerical factorization. */
323d20bfe6fSHong Zhang       /*        For now, we will recompute what is needed. */
324d20bfe6fSHong Zhang     ci[i+1] = ci[i] + cnzi;
325d20bfe6fSHong Zhang   }
326d20bfe6fSHong Zhang   /* nnz is now stored in ci[ptm], column indices are in the list of free space */
327d20bfe6fSHong Zhang   /* Allocate space for cj, initialize cj, and */
328d20bfe6fSHong Zhang   /* destroy list of free space and other temporary array(s) */
329d20bfe6fSHong Zhang   ierr = PetscMalloc((ci[pn]+1)*sizeof(int),&cj);CHKERRQ(ierr);
330d20bfe6fSHong Zhang   ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr);
331d20bfe6fSHong Zhang   ierr = PetscFree(ptadenserow);CHKERRQ(ierr);
332d20bfe6fSHong Zhang 
333d20bfe6fSHong Zhang   /* Allocate space for ca */
334d20bfe6fSHong Zhang   ierr = PetscMalloc((ci[pn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr);
335d20bfe6fSHong Zhang   ierr = PetscMemzero(ca,(ci[pn]+1)*sizeof(MatScalar));CHKERRQ(ierr);
336d20bfe6fSHong Zhang 
337d20bfe6fSHong Zhang   /* put together the new matrix */
338d20bfe6fSHong Zhang   ierr = MatCreateSeqAIJWithArrays(A->comm,pn,pn,ci,cj,ca,C);CHKERRQ(ierr);
339d20bfe6fSHong Zhang 
340d20bfe6fSHong Zhang   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
341d20bfe6fSHong Zhang   /* Since these are PETSc arrays, change flags to free them as necessary. */
342d20bfe6fSHong Zhang   c = (Mat_SeqAIJ *)((*C)->data);
343d20bfe6fSHong Zhang   c->freedata = PETSC_TRUE;
344d20bfe6fSHong Zhang   c->nonew    = 0;
345d20bfe6fSHong Zhang 
346d20bfe6fSHong Zhang   /* Clean up. */
347d20bfe6fSHong Zhang   ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
348eb9c0419SKris Buschelman 
349eb9c0419SKris Buschelman   PetscFunctionReturn(0);
350eb9c0419SKris Buschelman }
351eb9c0419SKris Buschelman 
3523985e5eaSKris Buschelman #include "src/mat/impls/maij/maij.h"
3533985e5eaSKris Buschelman EXTERN_C_BEGIN
3543985e5eaSKris Buschelman #undef __FUNCT__
3559af31e4aSHong Zhang #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqMAIJ"
356dfbe8321SBarry Smith PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat *C) {
3575c66b693SKris Buschelman   /* This routine requires testing -- I don't think it works. */
358dfbe8321SBarry Smith   PetscErrorCode ierr;
3593985e5eaSKris Buschelman   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
3603985e5eaSKris Buschelman   Mat_SeqMAIJ    *pp=(Mat_SeqMAIJ*)PP->data;
3613985e5eaSKris Buschelman   Mat            P=pp->AIJ;
3623985e5eaSKris Buschelman   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c;
3633985e5eaSKris Buschelman   int            *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj;
3643985e5eaSKris Buschelman   int            *ci,*cj,*denserow,*sparserow,*ptadenserow,*ptasparserow,*ptaj;
3653985e5eaSKris Buschelman   int            an=A->N,am=A->M,pn=P->N,pm=P->M,ppdof=pp->dof;
366fe05a634SKris Buschelman   int            i,j,k,dof,pdof,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi;
3673985e5eaSKris Buschelman   MatScalar      *ca;
3683985e5eaSKris Buschelman 
3693985e5eaSKris Buschelman   PetscFunctionBegin;
3703985e5eaSKris Buschelman   /* Start timer */
3719af31e4aSHong Zhang   ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr);
3723985e5eaSKris Buschelman 
3733985e5eaSKris Buschelman   /* Get ij structure of P^T */
3743985e5eaSKris Buschelman   ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
3753985e5eaSKris Buschelman 
3763985e5eaSKris Buschelman   /* Allocate ci array, arrays for fill computation and */
3773985e5eaSKris Buschelman   /* free space for accumulating nonzero column info */
3783985e5eaSKris Buschelman   ierr = PetscMalloc((pn+1)*sizeof(int),&ci);CHKERRQ(ierr);
3793985e5eaSKris Buschelman   ci[0] = 0;
3803985e5eaSKris Buschelman 
3813985e5eaSKris Buschelman   ierr = PetscMalloc((2*pn+2*an+1)*sizeof(int),&ptadenserow);CHKERRQ(ierr);
3823985e5eaSKris Buschelman   ierr = PetscMemzero(ptadenserow,(2*pn+2*an+1)*sizeof(int));CHKERRQ(ierr);
3833985e5eaSKris Buschelman   ptasparserow = ptadenserow  + an;
3843985e5eaSKris Buschelman   denserow     = ptasparserow + an;
3853985e5eaSKris Buschelman   sparserow    = denserow     + pn;
3863985e5eaSKris Buschelman 
3873985e5eaSKris Buschelman   /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */
3883985e5eaSKris Buschelman   /* This should be reasonable if sparsity of PtAP is similar to that of A. */
3893985e5eaSKris Buschelman   ierr          = GetMoreSpace((ai[am]/pm)*pn,&free_space);
3903985e5eaSKris Buschelman   current_space = free_space;
3913985e5eaSKris Buschelman 
3923985e5eaSKris Buschelman   /* Determine symbolic info for each row of C: */
3933985e5eaSKris Buschelman   for (i=0;i<pn/ppdof;i++) {
3943985e5eaSKris Buschelman     ptnzi  = pti[i+1] - pti[i];
3953985e5eaSKris Buschelman     ptanzi = 0;
3963985e5eaSKris Buschelman     ptJ    = ptj + pti[i];
3973985e5eaSKris Buschelman     for (dof=0;dof<ppdof;dof++) {
3983985e5eaSKris Buschelman     /* Determine symbolic row of PtA: */
3993985e5eaSKris Buschelman       for (j=0;j<ptnzi;j++) {
4003985e5eaSKris Buschelman         arow = ptJ[j] + dof;
4013985e5eaSKris Buschelman         anzj = ai[arow+1] - ai[arow];
4023985e5eaSKris Buschelman         ajj  = aj + ai[arow];
4033985e5eaSKris Buschelman         for (k=0;k<anzj;k++) {
4043985e5eaSKris Buschelman           if (!ptadenserow[ajj[k]]) {
4053985e5eaSKris Buschelman             ptadenserow[ajj[k]]    = -1;
4063985e5eaSKris Buschelman             ptasparserow[ptanzi++] = ajj[k];
4073985e5eaSKris Buschelman           }
4083985e5eaSKris Buschelman         }
4093985e5eaSKris Buschelman       }
4103985e5eaSKris Buschelman       /* Using symbolic info for row of PtA, determine symbolic info for row of C: */
4113985e5eaSKris Buschelman       ptaj = ptasparserow;
4123985e5eaSKris Buschelman       cnzi   = 0;
4133985e5eaSKris Buschelman       for (j=0;j<ptanzi;j++) {
414fe05a634SKris Buschelman         pdof = *ptaj%dof;
4153985e5eaSKris Buschelman         prow = (*ptaj++)/dof;
4163985e5eaSKris Buschelman         pnzj = pi[prow+1] - pi[prow];
4173985e5eaSKris Buschelman         pjj  = pj + pi[prow];
4183985e5eaSKris Buschelman         for (k=0;k<pnzj;k++) {
419fe05a634SKris Buschelman           if (!denserow[pjj[k]+pdof]) {
420fe05a634SKris Buschelman             denserow[pjj[k]+pdof] = -1;
421fe05a634SKris Buschelman             sparserow[cnzi++]     = pjj[k]+pdof;
4223985e5eaSKris Buschelman           }
4233985e5eaSKris Buschelman         }
4243985e5eaSKris Buschelman       }
4253985e5eaSKris Buschelman 
4263985e5eaSKris Buschelman       /* sort sparserow */
4273985e5eaSKris Buschelman       ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr);
4283985e5eaSKris Buschelman 
4293985e5eaSKris Buschelman       /* If free space is not available, make more free space */
4303985e5eaSKris Buschelman       /* Double the amount of total space in the list */
4313985e5eaSKris Buschelman       if (current_space->local_remaining<cnzi) {
4323985e5eaSKris Buschelman         ierr = GetMoreSpace(current_space->total_array_size,&current_space);CHKERRQ(ierr);
4333985e5eaSKris Buschelman       }
4343985e5eaSKris Buschelman 
4353985e5eaSKris Buschelman       /* Copy data into free space, and zero out denserows */
4363985e5eaSKris Buschelman       ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(int));CHKERRQ(ierr);
4373985e5eaSKris Buschelman       current_space->array           += cnzi;
4383985e5eaSKris Buschelman       current_space->local_used      += cnzi;
4393985e5eaSKris Buschelman       current_space->local_remaining -= cnzi;
4403985e5eaSKris Buschelman 
4413985e5eaSKris Buschelman       for (j=0;j<ptanzi;j++) {
4423985e5eaSKris Buschelman         ptadenserow[ptasparserow[j]] = 0;
4433985e5eaSKris Buschelman       }
4443985e5eaSKris Buschelman       for (j=0;j<cnzi;j++) {
4453985e5eaSKris Buschelman         denserow[sparserow[j]] = 0;
4463985e5eaSKris Buschelman       }
4473985e5eaSKris Buschelman       /* Aside: Perhaps we should save the pta info for the numerical factorization. */
4483985e5eaSKris Buschelman       /*        For now, we will recompute what is needed. */
4493985e5eaSKris Buschelman       ci[i+1+dof] = ci[i+dof] + cnzi;
4503985e5eaSKris Buschelman     }
4513985e5eaSKris Buschelman   }
4523985e5eaSKris Buschelman   /* nnz is now stored in ci[ptm], column indices are in the list of free space */
4533985e5eaSKris Buschelman   /* Allocate space for cj, initialize cj, and */
4543985e5eaSKris Buschelman   /* destroy list of free space and other temporary array(s) */
4553985e5eaSKris Buschelman   ierr = PetscMalloc((ci[pn]+1)*sizeof(int),&cj);CHKERRQ(ierr);
4563985e5eaSKris Buschelman   ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr);
4573985e5eaSKris Buschelman   ierr = PetscFree(ptadenserow);CHKERRQ(ierr);
4583985e5eaSKris Buschelman 
4593985e5eaSKris Buschelman   /* Allocate space for ca */
4603985e5eaSKris Buschelman   ierr = PetscMalloc((ci[pn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr);
4613985e5eaSKris Buschelman   ierr = PetscMemzero(ca,(ci[pn]+1)*sizeof(MatScalar));CHKERRQ(ierr);
4623985e5eaSKris Buschelman 
4633985e5eaSKris Buschelman   /* put together the new matrix */
4643985e5eaSKris Buschelman   ierr = MatCreateSeqAIJWithArrays(A->comm,pn,pn,ci,cj,ca,C);CHKERRQ(ierr);
4653985e5eaSKris Buschelman 
4663985e5eaSKris Buschelman   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4673985e5eaSKris Buschelman   /* Since these are PETSc arrays, change flags to free them as necessary. */
4683985e5eaSKris Buschelman   c = (Mat_SeqAIJ *)((*C)->data);
4693985e5eaSKris Buschelman   c->freedata = PETSC_TRUE;
4703985e5eaSKris Buschelman   c->nonew    = 0;
4713985e5eaSKris Buschelman 
4723985e5eaSKris Buschelman   /* Clean up. */
4733985e5eaSKris Buschelman   ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
4743985e5eaSKris Buschelman 
4759af31e4aSHong Zhang   ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr);
4763985e5eaSKris Buschelman   PetscFunctionReturn(0);
4773985e5eaSKris Buschelman }
4783985e5eaSKris Buschelman EXTERN_C_END
4793985e5eaSKris Buschelman 
480eb9c0419SKris Buschelman #undef __FUNCT__
4819af31e4aSHong Zhang #define __FUNCT__ "MatPtAPNumeric"
4826849ba73SBarry Smith /*
4839af31e4aSHong Zhang    MatPtAPNumeric - Computes the matrix projection C = P^T * A * P
4844d3841fdSKris Buschelman 
4854d3841fdSKris Buschelman    Collective on Mat
4864d3841fdSKris Buschelman 
4874d3841fdSKris Buschelman    Input Parameters:
4884d3841fdSKris Buschelman +  A - the matrix
4894d3841fdSKris Buschelman -  P - the projection matrix
4904d3841fdSKris Buschelman 
4914d3841fdSKris Buschelman    Output Parameters:
4924d3841fdSKris Buschelman .  C - the product matrix
4934d3841fdSKris Buschelman 
4944d3841fdSKris Buschelman    Notes:
4959af31e4aSHong Zhang    C must have been created by calling MatPtAPSymbolic and must be destroyed by
4964d3841fdSKris Buschelman    the user using MatDeatroy().
4974d3841fdSKris Buschelman 
498170ef064SHong Zhang    This routine is currently only implemented for pairs of AIJ matrices and classes
499170ef064SHong Zhang    which inherit from AIJ.  C will be of type MATAIJ.
5004d3841fdSKris Buschelman 
5014d3841fdSKris Buschelman    Level: intermediate
5024d3841fdSKris Buschelman 
5039af31e4aSHong Zhang .seealso: MatPtAP(),MatPtAPSymbolic(),MatMatMultNumeric()
5046849ba73SBarry Smith */
505dfbe8321SBarry Smith PetscErrorCode MatPtAPNumeric(Mat A,Mat P,Mat C) {
506dfbe8321SBarry Smith   PetscErrorCode ierr;
507534c1384SKris Buschelman   PetscErrorCode (*fA)(Mat,Mat,Mat);
508534c1384SKris Buschelman   PetscErrorCode (*fP)(Mat,Mat,Mat);
509eb9c0419SKris Buschelman 
510eb9c0419SKris Buschelman   PetscFunctionBegin;
511eb9c0419SKris Buschelman 
5124482741eSBarry Smith   PetscValidHeaderSpecific(A,MAT_COOKIE,1);
513c9780b6fSBarry Smith   PetscValidType(A,1);
514eb9c0419SKris Buschelman   MatPreallocated(A);
515eb9c0419SKris Buschelman   if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
516eb9c0419SKris Buschelman   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
517eb9c0419SKris Buschelman 
5184482741eSBarry Smith   PetscValidHeaderSpecific(P,MAT_COOKIE,2);
519c9780b6fSBarry Smith   PetscValidType(P,2);
520eb9c0419SKris Buschelman   MatPreallocated(P);
521eb9c0419SKris Buschelman   if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
522eb9c0419SKris Buschelman   if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
523eb9c0419SKris Buschelman 
5244482741eSBarry Smith   PetscValidHeaderSpecific(C,MAT_COOKIE,3);
525c9780b6fSBarry Smith   PetscValidType(C,3);
526eb9c0419SKris Buschelman   MatPreallocated(C);
527eb9c0419SKris Buschelman   if (!C->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
528eb9c0419SKris Buschelman   if (C->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
529eb9c0419SKris Buschelman 
530eb9c0419SKris Buschelman   if (P->N!=C->M) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->N,C->M);
531eb9c0419SKris Buschelman   if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->M,A->N);
532eb9c0419SKris Buschelman   if (A->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %d != %d",A->M,A->N);
533eb9c0419SKris Buschelman   if (P->N!=C->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->N,C->N);
534eb9c0419SKris Buschelman 
535534c1384SKris Buschelman   /* For now, we do not dispatch based on the type of A and P */
536534c1384SKris Buschelman   /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */
537534c1384SKris Buschelman   fA = A->ops->ptapnumeric;
538534c1384SKris Buschelman   if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatPtAPNumeric not supported for A of type %s",A->type_name);
539534c1384SKris Buschelman   fP = P->ops->ptapnumeric;
540534c1384SKris Buschelman   if (!fP) SETERRQ1(PETSC_ERR_SUP,"MatPtAPNumeric not supported for P of type %s",P->type_name);
541534c1384SKris Buschelman   if (fP!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatPtAPNumeric requires A, %s, to be compatible with P, %s",A->type_name,P->type_name);
5424d3841fdSKris Buschelman 
543534c1384SKris Buschelman   ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr);
544534c1384SKris Buschelman   ierr = (*fA)(A,P,C);CHKERRQ(ierr);
545534c1384SKris Buschelman   ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr);
546eb9c0419SKris Buschelman 
547eb9c0419SKris Buschelman   PetscFunctionReturn(0);
548eb9c0419SKris Buschelman }
549eb9c0419SKris Buschelman 
550eb9c0419SKris Buschelman #undef __FUNCT__
5519af31e4aSHong Zhang #define __FUNCT__ "MatPtAPNumeric_SeqAIJ_SeqAIJ"
552dfbe8321SBarry Smith PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqAIJ(Mat A,Mat P,Mat C)
553dfbe8321SBarry Smith {
554dfbe8321SBarry Smith   PetscErrorCode ierr;
555d20bfe6fSHong Zhang   int            flops=0;
556d20bfe6fSHong Zhang   Mat_SeqAIJ     *a  = (Mat_SeqAIJ *) A->data;
557d20bfe6fSHong Zhang   Mat_SeqAIJ     *p  = (Mat_SeqAIJ *) P->data;
558d20bfe6fSHong Zhang   Mat_SeqAIJ     *c  = (Mat_SeqAIJ *) C->data;
559d20bfe6fSHong Zhang   int            *ai=a->i,*aj=a->j,*apj,*apjdense,*pi=p->i,*pj=p->j,*pJ=p->j,*pjj;
560d20bfe6fSHong Zhang   int            *ci=c->i,*cj=c->j,*cjj;
561d20bfe6fSHong Zhang   int            am=A->M,cn=C->N,cm=C->M;
562d20bfe6fSHong Zhang   int            i,j,k,anzi,pnzi,apnzj,nextap,pnzj,prow,crow;
563d20bfe6fSHong Zhang   MatScalar      *aa=a->a,*apa,*pa=p->a,*pA=p->a,*paj,*ca=c->a,*caj;
564eb9c0419SKris Buschelman 
565eb9c0419SKris Buschelman   PetscFunctionBegin;
566d20bfe6fSHong Zhang   /* Allocate temporary array for storage of one row of A*P */
567d20bfe6fSHong Zhang   ierr = PetscMalloc(cn*(sizeof(MatScalar)+2*sizeof(int)),&apa);CHKERRQ(ierr);
568d20bfe6fSHong Zhang   ierr = PetscMemzero(apa,cn*(sizeof(MatScalar)+2*sizeof(int)));CHKERRQ(ierr);
569eb9c0419SKris Buschelman 
570d20bfe6fSHong Zhang   apj      = (int *)(apa + cn);
571d20bfe6fSHong Zhang   apjdense = apj + cn;
572d20bfe6fSHong Zhang 
573d20bfe6fSHong Zhang   /* Clear old values in C */
574d20bfe6fSHong Zhang   ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr);
575d20bfe6fSHong Zhang 
576d20bfe6fSHong Zhang   for (i=0;i<am;i++) {
577d20bfe6fSHong Zhang     /* Form sparse row of A*P */
578d20bfe6fSHong Zhang     anzi  = ai[i+1] - ai[i];
579d20bfe6fSHong Zhang     apnzj = 0;
580d20bfe6fSHong Zhang     for (j=0;j<anzi;j++) {
581d20bfe6fSHong Zhang       prow = *aj++;
582d20bfe6fSHong Zhang       pnzj = pi[prow+1] - pi[prow];
583d20bfe6fSHong Zhang       pjj  = pj + pi[prow];
584d20bfe6fSHong Zhang       paj  = pa + pi[prow];
585d20bfe6fSHong Zhang       for (k=0;k<pnzj;k++) {
586d20bfe6fSHong Zhang         if (!apjdense[pjj[k]]) {
587d20bfe6fSHong Zhang           apjdense[pjj[k]] = -1;
588d20bfe6fSHong Zhang           apj[apnzj++]     = pjj[k];
589d20bfe6fSHong Zhang         }
590d20bfe6fSHong Zhang         apa[pjj[k]] += (*aa)*paj[k];
591d20bfe6fSHong Zhang       }
592d20bfe6fSHong Zhang       flops += 2*pnzj;
593d20bfe6fSHong Zhang       aa++;
594d20bfe6fSHong Zhang     }
595d20bfe6fSHong Zhang 
596d20bfe6fSHong Zhang     /* Sort the j index array for quick sparse axpy. */
597d20bfe6fSHong Zhang     ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr);
598d20bfe6fSHong Zhang 
599d20bfe6fSHong Zhang     /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */
600d20bfe6fSHong Zhang     pnzi = pi[i+1] - pi[i];
601d20bfe6fSHong Zhang     for (j=0;j<pnzi;j++) {
602d20bfe6fSHong Zhang       nextap = 0;
603d20bfe6fSHong Zhang       crow   = *pJ++;
604d20bfe6fSHong Zhang       cjj    = cj + ci[crow];
605d20bfe6fSHong Zhang       caj    = ca + ci[crow];
606d20bfe6fSHong Zhang       /* Perform sparse axpy operation.  Note cjj includes apj. */
607d20bfe6fSHong Zhang       for (k=0;nextap<apnzj;k++) {
608d20bfe6fSHong Zhang         if (cjj[k]==apj[nextap]) {
609d20bfe6fSHong Zhang           caj[k] += (*pA)*apa[apj[nextap++]];
610d20bfe6fSHong Zhang         }
611d20bfe6fSHong Zhang       }
612d20bfe6fSHong Zhang       flops += 2*apnzj;
613d20bfe6fSHong Zhang       pA++;
614d20bfe6fSHong Zhang     }
615d20bfe6fSHong Zhang 
616d20bfe6fSHong Zhang     /* Zero the current row info for A*P */
617d20bfe6fSHong Zhang     for (j=0;j<apnzj;j++) {
618d20bfe6fSHong Zhang       apa[apj[j]]      = 0.;
619d20bfe6fSHong Zhang       apjdense[apj[j]] = 0;
620d20bfe6fSHong Zhang     }
621d20bfe6fSHong Zhang   }
622d20bfe6fSHong Zhang 
623d20bfe6fSHong Zhang   /* Assemble the final matrix and clean up */
624d20bfe6fSHong Zhang   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
625d20bfe6fSHong Zhang   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
626d20bfe6fSHong Zhang   ierr = PetscFree(apa);CHKERRQ(ierr);
627d20bfe6fSHong Zhang   ierr = PetscLogFlops(flops);CHKERRQ(ierr);
628d20bfe6fSHong Zhang 
629eb9c0419SKris Buschelman   PetscFunctionReturn(0);
630eb9c0419SKris Buschelman }
6310e36024fSHong Zhang 
6320e36024fSHong Zhang /* Compute C = P[rstart:rend,:]^T * A * P of seqaij matrices - used by MatPtAP_MPIAIJ_MPIAIJ() */
6330e36024fSHong Zhang 
6340e36024fSHong Zhang #undef __FUNCT__
6350e36024fSHong Zhang #define __FUNCT__ "MatPtAPNumeric_SeqAIJ_SeqAIJ_ReducedPt"
6360e36024fSHong Zhang PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqAIJ_ReducedPt(Mat A,Mat P,int prstart,int prend,Mat C)
6370e36024fSHong Zhang {
6380e36024fSHong Zhang   PetscErrorCode ierr;
6390e36024fSHong Zhang   int            flops=0;
6400e36024fSHong Zhang   Mat_SeqAIJ     *a  = (Mat_SeqAIJ *) A->data;
6410e36024fSHong Zhang   Mat_SeqAIJ     *p  = (Mat_SeqAIJ *) P->data;
6420e36024fSHong Zhang   Mat_SeqAIJ     *c  = (Mat_SeqAIJ *) C->data;
643*20d4747cSHong Zhang   int            *ai=a->i,*aj=a->j,*apj,*apjdense;
644*20d4747cSHong Zhang   int            *pi=p->i,*pj=p->j,*pJ=p->j+pi[prstart],*pjj;
6450e36024fSHong Zhang   int            *ci=c->i,*cj=c->j,*cjj;
6460e36024fSHong Zhang   int            am=A->M,cn=C->N,cm=C->M;
6470e36024fSHong Zhang   int            i,j,k,anzi,pnzi,apnzj,nextap,pnzj,prow,crow;
6480e36024fSHong Zhang   MatScalar      *aa=a->a,*apa,*pa=p->a,*pA=p->a,*paj,*ca=c->a,*caj;
6490e36024fSHong Zhang 
6500e36024fSHong Zhang   PetscFunctionBegin;
6510e36024fSHong Zhang   pA=p->a+pi[prstart];
6520e36024fSHong Zhang   /* Allocate temporary array for storage of one row of A*P */
6530e36024fSHong Zhang   ierr = PetscMalloc(cn*(sizeof(MatScalar)+2*sizeof(int)),&apa);CHKERRQ(ierr);
6540e36024fSHong Zhang   ierr = PetscMemzero(apa,cn*(sizeof(MatScalar)+2*sizeof(int)));CHKERRQ(ierr);
6550e36024fSHong Zhang 
6560e36024fSHong Zhang   apj      = (int *)(apa + cn);
6570e36024fSHong Zhang   apjdense = apj + cn;
6580e36024fSHong Zhang 
6590e36024fSHong Zhang   /* Clear old values in C */
6600e36024fSHong Zhang   ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr);
6610e36024fSHong Zhang 
6620e36024fSHong Zhang   for (i=0;i<am;i++) {
6630e36024fSHong Zhang     /* Form sparse row of A*P */
6640e36024fSHong Zhang     anzi  = ai[i+1] - ai[i];
6650e36024fSHong Zhang     apnzj = 0;
6660e36024fSHong Zhang     for (j=0;j<anzi;j++) {
6670e36024fSHong Zhang       prow = *aj++;
6680e36024fSHong Zhang       pnzj = pi[prow+1] - pi[prow];
6690e36024fSHong Zhang       pjj  = pj + pi[prow];
6700e36024fSHong Zhang       paj  = pa + pi[prow];
6710e36024fSHong Zhang       for (k=0;k<pnzj;k++) {
6720e36024fSHong Zhang         if (!apjdense[pjj[k]]) {
6730e36024fSHong Zhang           apjdense[pjj[k]] = -1;
6740e36024fSHong Zhang           apj[apnzj++]     = pjj[k];
6750e36024fSHong Zhang         }
6760e36024fSHong Zhang         apa[pjj[k]] += (*aa)*paj[k];
6770e36024fSHong Zhang       }
6780e36024fSHong Zhang       flops += 2*pnzj;
6790e36024fSHong Zhang       aa++;
6800e36024fSHong Zhang     }
6810e36024fSHong Zhang 
6820e36024fSHong Zhang     /* Sort the j index array for quick sparse axpy. */
6830e36024fSHong Zhang     ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr);
6840e36024fSHong Zhang 
6850e36024fSHong Zhang     /* Compute P[[prstart:prend,:]^T*A*P using outer product (P^T)[:,j+prstart]*(A*P)[j,:]. */
6860e36024fSHong Zhang     pnzi = pi[i+1+prstart] - pi[i+prstart];
6870e36024fSHong Zhang     for (j=0;j<pnzi;j++) {
6880e36024fSHong Zhang       nextap = 0;
6890e36024fSHong Zhang       crow   = *pJ++;
6900e36024fSHong Zhang       cjj    = cj + ci[crow];
6910e36024fSHong Zhang       caj    = ca + ci[crow];
6920e36024fSHong Zhang       /* Perform sparse axpy operation.  Note cjj includes apj. */
6930e36024fSHong Zhang       for (k=0;nextap<apnzj;k++) {
6940e36024fSHong Zhang         if (cjj[k]==apj[nextap]) {
6950e36024fSHong Zhang           caj[k] += (*pA)*apa[apj[nextap++]];
6960e36024fSHong Zhang         }
6970e36024fSHong Zhang       }
6980e36024fSHong Zhang       flops += 2*apnzj;
6990e36024fSHong Zhang       pA++;
7000e36024fSHong Zhang     }
7010e36024fSHong Zhang 
7020e36024fSHong Zhang     /* Zero the current row info for A*P */
7030e36024fSHong Zhang     for (j=0;j<apnzj;j++) {
7040e36024fSHong Zhang       apa[apj[j]]      = 0.;
7050e36024fSHong Zhang       apjdense[apj[j]] = 0;
7060e36024fSHong Zhang     }
7070e36024fSHong Zhang   }
7080e36024fSHong Zhang 
7090e36024fSHong Zhang   /* Assemble the final matrix and clean up */
7100e36024fSHong Zhang   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
7110e36024fSHong Zhang   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
7120e36024fSHong Zhang   ierr = PetscFree(apa);CHKERRQ(ierr);
7130e36024fSHong Zhang   ierr = PetscLogFlops(flops);CHKERRQ(ierr);
7140e36024fSHong Zhang 
7150e36024fSHong Zhang   PetscFunctionReturn(0);
7160e36024fSHong Zhang }
7170e36024fSHong Zhang 
7180e36024fSHong Zhang #undef __FUNCT__
7190e36024fSHong Zhang #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqAIJ_ReducedPt"
7200e36024fSHong Zhang PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqAIJ_ReducedPt(Mat A,Mat P,PetscReal fill,int prstart,int prend,Mat *C) {
7210e36024fSHong Zhang   PetscErrorCode ierr;
7220e36024fSHong Zhang   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
7230e36024fSHong Zhang   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c;
7240e36024fSHong Zhang   int            *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj;
7250e36024fSHong Zhang   int            *ci,*cj,*denserow,*sparserow,*ptadenserow,*ptasparserow,*ptaj;
7260e36024fSHong Zhang   int            an=A->N,am=A->M,pn=P->N,pm=P->M;
7270e36024fSHong Zhang   int            i,j,k,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi;
7280e36024fSHong Zhang   MatScalar      *ca;
7290e36024fSHong Zhang   Mat *psub,P_sub;
7300e36024fSHong Zhang   IS  isrow,iscol;
7310e36024fSHong Zhang   int m = prend - prstart;
7320b89d903Svictorle 
7330b89d903Svictorle   PetscFunctionBegin;
7340b89d903Svictorle   /* Get ij structure of P[rstart:rend,:]^T */
7350e36024fSHong Zhang   ierr = ISCreateStride(PETSC_COMM_SELF,m,prstart,1,&isrow);CHKERRQ(ierr);
7360e36024fSHong Zhang   ierr = ISCreateStride(PETSC_COMM_SELF,P->n,0,1,&iscol);CHKERRQ(ierr);
7370e36024fSHong Zhang   ierr = MatGetSubMatrices(P,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&psub);CHKERRQ(ierr);
7380e36024fSHong Zhang   ierr = ISDestroy(isrow);CHKERRQ(ierr);
7390e36024fSHong Zhang   ierr = ISDestroy(iscol);CHKERRQ(ierr);
7400e36024fSHong Zhang   P_sub = psub[0];
7410e36024fSHong Zhang   ierr = MatGetSymbolicTranspose_SeqAIJ(P_sub,&pti,&ptj);CHKERRQ(ierr);
7420e36024fSHong Zhang   ierr = MatDestroyMatrices(1,&psub);CHKERRQ(ierr);
7430e36024fSHong Zhang   ptJ=ptj;
7440e36024fSHong Zhang 
7450e36024fSHong Zhang   /* Allocate ci array, arrays for fill computation and */
7460e36024fSHong Zhang   /* free space for accumulating nonzero column info */
7470e36024fSHong Zhang   ierr = PetscMalloc((pn+1)*sizeof(int),&ci);CHKERRQ(ierr);
7480e36024fSHong Zhang   ci[0] = 0;
7490e36024fSHong Zhang 
7500e36024fSHong Zhang   ierr = PetscMalloc((2*pn+2*an+1)*sizeof(int),&ptadenserow);CHKERRQ(ierr);
7510e36024fSHong Zhang   ierr = PetscMemzero(ptadenserow,(2*pn+2*an+1)*sizeof(int));CHKERRQ(ierr);
7520e36024fSHong Zhang   ptasparserow = ptadenserow  + an;
7530e36024fSHong Zhang   denserow     = ptasparserow + an;
7540e36024fSHong Zhang   sparserow    = denserow     + pn;
7550e36024fSHong Zhang 
7560e36024fSHong Zhang   /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */
7570e36024fSHong Zhang   /* This should be reasonable if sparsity of PtAP is similar to that of A. */
7580e36024fSHong Zhang   ierr          = GetMoreSpace((ai[am]/pm)*pn,&free_space);
7590e36024fSHong Zhang   current_space = free_space;
7600e36024fSHong Zhang 
7610e36024fSHong Zhang   /* Determine symbolic info for each row of C: */
7620e36024fSHong Zhang   for (i=0;i<pn;i++) {
7630e36024fSHong Zhang     ptnzi  = pti[i+1] - pti[i];
7640e36024fSHong Zhang     ptanzi = 0;
7650e36024fSHong Zhang     /* Determine symbolic row of PtA_reduced: */
7660e36024fSHong Zhang     for (j=0;j<ptnzi;j++) {
7670e36024fSHong Zhang       arow = *ptJ++;
7680e36024fSHong Zhang       anzj = ai[arow+1] - ai[arow];
7690e36024fSHong Zhang       ajj  = aj + ai[arow];
7700e36024fSHong Zhang       for (k=0;k<anzj;k++) {
7710e36024fSHong Zhang         if (!ptadenserow[ajj[k]]) {
7720e36024fSHong Zhang           ptadenserow[ajj[k]]    = -1;
7730e36024fSHong Zhang           ptasparserow[ptanzi++] = ajj[k];
7740e36024fSHong Zhang         }
7750e36024fSHong Zhang       }
7760e36024fSHong Zhang     }
7770e36024fSHong Zhang       /* Using symbolic info for row of PtA, determine symbolic info for row of C: */
7780e36024fSHong Zhang     ptaj = ptasparserow;
7790e36024fSHong Zhang     cnzi   = 0;
7800e36024fSHong Zhang     for (j=0;j<ptanzi;j++) {
7810e36024fSHong Zhang       prow = *ptaj++;
7820e36024fSHong Zhang       pnzj = pi[prow+1] - pi[prow];
7830e36024fSHong Zhang       pjj  = pj + pi[prow];
7840e36024fSHong Zhang       for (k=0;k<pnzj;k++) {
7850e36024fSHong Zhang         if (!denserow[pjj[k]]) {
7860e36024fSHong Zhang             denserow[pjj[k]]  = -1;
7870e36024fSHong Zhang             sparserow[cnzi++] = pjj[k];
7880e36024fSHong Zhang         }
7890e36024fSHong Zhang       }
7900e36024fSHong Zhang     }
7910e36024fSHong Zhang 
7920e36024fSHong Zhang     /* sort sparserow */
7930e36024fSHong Zhang     ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr);
7940e36024fSHong Zhang 
7950e36024fSHong Zhang     /* If free space is not available, make more free space */
7960e36024fSHong Zhang     /* Double the amount of total space in the list */
7970e36024fSHong Zhang     if (current_space->local_remaining<cnzi) {
7980e36024fSHong Zhang       ierr = GetMoreSpace(current_space->total_array_size,&current_space);CHKERRQ(ierr);
7990e36024fSHong Zhang     }
8000e36024fSHong Zhang 
8010e36024fSHong Zhang     /* Copy data into free space, and zero out denserows */
8020e36024fSHong Zhang     ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(int));CHKERRQ(ierr);
8030e36024fSHong Zhang     current_space->array           += cnzi;
8040e36024fSHong Zhang     current_space->local_used      += cnzi;
8050e36024fSHong Zhang     current_space->local_remaining -= cnzi;
8060e36024fSHong Zhang 
8070e36024fSHong Zhang     for (j=0;j<ptanzi;j++) {
8080e36024fSHong Zhang       ptadenserow[ptasparserow[j]] = 0;
8090e36024fSHong Zhang     }
8100e36024fSHong Zhang     for (j=0;j<cnzi;j++) {
8110e36024fSHong Zhang       denserow[sparserow[j]] = 0;
8120e36024fSHong Zhang     }
8130e36024fSHong Zhang       /* Aside: Perhaps we should save the pta info for the numerical factorization. */
8140e36024fSHong Zhang       /*        For now, we will recompute what is needed. */
8150e36024fSHong Zhang     ci[i+1] = ci[i] + cnzi;
8160e36024fSHong Zhang   }
8170e36024fSHong Zhang   /* nnz is now stored in ci[ptm], column indices are in the list of free space */
8180e36024fSHong Zhang   /* Allocate space for cj, initialize cj, and */
8190e36024fSHong Zhang   /* destroy list of free space and other temporary array(s) */
8200e36024fSHong Zhang   ierr = PetscMalloc((ci[pn]+1)*sizeof(int),&cj);CHKERRQ(ierr);
8210e36024fSHong Zhang   ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr);
8220e36024fSHong Zhang   ierr = PetscFree(ptadenserow);CHKERRQ(ierr);
8230e36024fSHong Zhang 
8240e36024fSHong Zhang   /* Allocate space for ca */
8250e36024fSHong Zhang   ierr = PetscMalloc((ci[pn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr);
8260e36024fSHong Zhang   ierr = PetscMemzero(ca,(ci[pn]+1)*sizeof(MatScalar));CHKERRQ(ierr);
8270e36024fSHong Zhang 
8280e36024fSHong Zhang   /* put together the new matrix */
8290e36024fSHong Zhang   ierr = MatCreateSeqAIJWithArrays(A->comm,pn,pn,ci,cj,ca,C);CHKERRQ(ierr);
8300e36024fSHong Zhang 
8310e36024fSHong Zhang   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
8320e36024fSHong Zhang   /* Since these are PETSc arrays, change flags to free them as necessary. */
8330e36024fSHong Zhang   c = (Mat_SeqAIJ *)((*C)->data);
8340e36024fSHong Zhang   c->freedata = PETSC_TRUE;
8350e36024fSHong Zhang   c->nonew    = 0;
8360e36024fSHong Zhang 
8370e36024fSHong Zhang   /* Clean up. */
8380e36024fSHong Zhang   ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
8390e36024fSHong Zhang 
8400e36024fSHong Zhang   PetscFunctionReturn(0);
8410e36024fSHong Zhang }
8420e36024fSHong Zhang 
8430e36024fSHong Zhang #undef __FUNCT__
8440e36024fSHong Zhang #define __FUNCT__ "MatPtAP_SeqAIJ_SeqAIJ_ReducedPt"
8450e36024fSHong Zhang PetscErrorCode MatPtAP_SeqAIJ_SeqAIJ_ReducedPt(Mat A,Mat P,MatReuse scall,PetscReal fill,int prstart,int prend,Mat *C)
8460e36024fSHong Zhang {
8470e36024fSHong Zhang   PetscErrorCode ierr;
8480e36024fSHong Zhang   PetscFunctionBegin;
8490e36024fSHong Zhang   if (A->m != prend-prstart) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",A->m,prend-prstart);
8500e36024fSHong Zhang   if (prend-prstart > P->m) SETERRQ2(PETSC_ERR_ARG_SIZ," prend-prstart %d cannot be larger than P->m %d",prend-prstart,P->m);
8510e36024fSHong Zhang   if (scall == MAT_INITIAL_MATRIX){
8520e36024fSHong Zhang     ierr = MatPtAPSymbolic_SeqAIJ_SeqAIJ_ReducedPt(A,P,fill,prstart,prend,C);CHKERRQ(ierr);
8530e36024fSHong Zhang   }
8540e36024fSHong Zhang 
8550e36024fSHong Zhang   ierr = MatPtAPNumeric_SeqAIJ_SeqAIJ_ReducedPt(A,P,prstart,prend,*C);CHKERRQ(ierr);
8560e36024fSHong Zhang 
8570e36024fSHong Zhang   PetscFunctionReturn(0);
8580e36024fSHong Zhang }
8590e36024fSHong Zhang 
860