xref: /petsc/src/mat/impls/aij/seq/matptap.c (revision 01b7ae997e37fe42a80da49e299d88681e557d57)
1eb9c0419SKris Buschelman /*
29af31e4aSHong Zhang   Defines projective product routines where A is a AIJ matrix
3eb9c0419SKris Buschelman           C = P^T * A * P
4eb9c0419SKris Buschelman */
5eb9c0419SKris Buschelman 
6231952e2SKris Buschelman #include "src/mat/impls/aij/seq/aij.h"   /*I "petscmat.h" I*/
7eb9c0419SKris Buschelman #include "src/mat/utils/freespace.h"
89af31e4aSHong Zhang #include "src/mat/impls/aij/mpi/mpiaij.h"
955a3bba9SHong Zhang #include "petscbt.h"
10eb9c0419SKris Buschelman 
11eb9c0419SKris Buschelman #undef __FUNCT__
129af31e4aSHong Zhang #define __FUNCT__ "MatPtAP"
134d3841fdSKris Buschelman /*@
149af31e4aSHong Zhang    MatPtAP - Creates the matrix projection C = P^T * A * P
154d3841fdSKris Buschelman 
164d3841fdSKris Buschelman    Collective on Mat
174d3841fdSKris Buschelman 
184d3841fdSKris Buschelman    Input Parameters:
194d3841fdSKris Buschelman +  A - the matrix
20f747e1aeSHong Zhang .  P - the projection matrix
21f747e1aeSHong Zhang .  scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
22f747e1aeSHong Zhang -  fill - expected fill as ratio of nnz(C)/(nnz(A) + nnz(P))
234d3841fdSKris Buschelman 
244d3841fdSKris Buschelman    Output Parameters:
254d3841fdSKris Buschelman .  C - the product matrix
264d3841fdSKris Buschelman 
274d3841fdSKris Buschelman    Notes:
284d3841fdSKris Buschelman    C will be created and must be destroyed by the user with MatDestroy().
294d3841fdSKris Buschelman 
304d3841fdSKris Buschelman    This routine is currently only implemented for pairs of SeqAIJ matrices and classes
314d3841fdSKris Buschelman    which inherit from SeqAIJ.  C will be of type MATSEQAIJ.
324d3841fdSKris Buschelman 
334d3841fdSKris Buschelman    Level: intermediate
344d3841fdSKris Buschelman 
359af31e4aSHong Zhang .seealso: MatPtAPSymbolic(),MatPtAPNumeric(),MatMatMult()
364d3841fdSKris Buschelman @*/
3755a3bba9SHong Zhang PetscErrorCode MatPtAP(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C)
3855a3bba9SHong Zhang {
39dfbe8321SBarry Smith   PetscErrorCode ierr;
40534c1384SKris Buschelman   PetscErrorCode (*fA)(Mat,Mat,MatReuse,PetscReal,Mat *);
41534c1384SKris Buschelman   PetscErrorCode (*fP)(Mat,Mat,MatReuse,PetscReal,Mat *);
42eb9c0419SKris Buschelman 
43eb9c0419SKris Buschelman   PetscFunctionBegin;
449af31e4aSHong Zhang   PetscValidHeaderSpecific(A,MAT_COOKIE,1);
459af31e4aSHong Zhang   PetscValidType(A,1);
469af31e4aSHong Zhang   MatPreallocated(A);
479af31e4aSHong Zhang   if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
489af31e4aSHong Zhang   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
499af31e4aSHong Zhang   PetscValidHeaderSpecific(P,MAT_COOKIE,2);
509af31e4aSHong Zhang   PetscValidType(P,2);
519af31e4aSHong Zhang   MatPreallocated(P);
529af31e4aSHong Zhang   if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
539af31e4aSHong Zhang   if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
549af31e4aSHong Zhang   PetscValidPointer(C,3);
55534c1384SKris Buschelman 
569af31e4aSHong Zhang   if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->M,A->N);
57eb9c0419SKris Buschelman 
589af31e4aSHong Zhang   if (fill <=0.0) SETERRQ1(PETSC_ERR_ARG_SIZ,"fill=%g must be > 0.0",fill);
59eb9c0419SKris Buschelman 
60534c1384SKris Buschelman   /* For now, we do not dispatch based on the type of A and P */
61534c1384SKris Buschelman   /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */
62534c1384SKris Buschelman   fA = A->ops->ptap;
63534c1384SKris Buschelman   if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatPtAP not supported for A of type %s",A->type_name);
64534c1384SKris Buschelman   fP = P->ops->ptap;
65534c1384SKris Buschelman   if (!fP) SETERRQ1(PETSC_ERR_SUP,"MatPtAP not supported for P of type %s",P->type_name);
66534c1384SKris Buschelman   if (fP!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatPtAP requires A, %s, to be compatible with P, %s",A->type_name,P->type_name);
67534c1384SKris Buschelman 
689af31e4aSHong Zhang   ierr = PetscLogEventBegin(MAT_PtAP,A,P,0,0);CHKERRQ(ierr);
69534c1384SKris Buschelman   ierr = (*fA)(A,P,scall,fill,C);CHKERRQ(ierr);
709af31e4aSHong Zhang   ierr = PetscLogEventEnd(MAT_PtAP,A,P,0,0);CHKERRQ(ierr);
71eb9c0419SKris Buschelman   PetscFunctionReturn(0);
72eb9c0419SKris Buschelman }
73eb9c0419SKris Buschelman 
74e240928fSHong Zhang EXTERN PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local(Mat,Mat,Mat,PetscInt,PetscReal,Mat*);
75e240928fSHong Zhang EXTERN PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIAIJ_Local(Mat,Mat,Mat,PetscInt,Mat C);
76e240928fSHong Zhang 
77a61c8c0fSHong Zhang EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat);
78a61c8c0fSHong Zhang #undef __FUNCT__
79a61c8c0fSHong Zhang #define __FUNCT__ "MatDestroy_MPIAIJ_PtAP"
80a61c8c0fSHong Zhang PetscErrorCode MatDestroy_MPIAIJ_PtAP(Mat A)
81a61c8c0fSHong Zhang {
82a61c8c0fSHong Zhang   PetscErrorCode       ierr;
8332fba14fSHong Zhang   Mat_MatMatMultMPI    *ptap;
84a61c8c0fSHong Zhang   PetscObjectContainer container;
85a61c8c0fSHong Zhang 
86a61c8c0fSHong Zhang   PetscFunctionBegin;
87a61c8c0fSHong Zhang   ierr = PetscObjectQuery((PetscObject)A,"MatPtAPMPI",(PetscObject *)&container);CHKERRQ(ierr);
88a61c8c0fSHong Zhang   if (container) {
89a61c8c0fSHong Zhang     ierr  = PetscObjectContainerGetPointer(container,(void **)&ptap);CHKERRQ(ierr);
9032fba14fSHong Zhang     ierr = MatDestroy(ptap->B_loc);CHKERRQ(ierr);
9132fba14fSHong Zhang     ierr = MatDestroy(ptap->B_oth);CHKERRQ(ierr);
92*01b7ae99SHong Zhang     ierr = ISDestroy(ptap->isrowb);CHKERRQ(ierr);
93*01b7ae99SHong Zhang     ierr = ISDestroy(ptap->iscolb);CHKERRQ(ierr);
94a61c8c0fSHong Zhang 
95a61c8c0fSHong Zhang     ierr = PetscObjectContainerDestroy(container);CHKERRQ(ierr);
9632fba14fSHong Zhang     ierr = PetscObjectCompose((PetscObject)A,"MatPtAPMPI",0);CHKERRQ(ierr);
97a61c8c0fSHong Zhang   }
98a61c8c0fSHong Zhang   ierr = PetscFree(ptap);CHKERRQ(ierr);
99a61c8c0fSHong Zhang 
100a61c8c0fSHong Zhang   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
101a61c8c0fSHong Zhang   PetscFunctionReturn(0);
102a61c8c0fSHong Zhang }
103a61c8c0fSHong Zhang 
104eb9c0419SKris Buschelman #undef __FUNCT__
105ff134f7aSHong Zhang #define __FUNCT__ "MatPtAP_MPIAIJ_MPIAIJ"
106ff134f7aSHong Zhang PetscErrorCode MatPtAP_MPIAIJ_MPIAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C)
107ff134f7aSHong Zhang {
108ff134f7aSHong Zhang   PetscErrorCode       ierr;
10932fba14fSHong Zhang   Mat_MatMatMultMPI    *ptap;
110a61c8c0fSHong Zhang   PetscObjectContainer container;
111b90dcfe3SHong Zhang 
112b90dcfe3SHong Zhang   PetscFunctionBegin;
113b90dcfe3SHong Zhang   if (scall == MAT_INITIAL_MATRIX){
1144c627768SHong Zhang     ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr);
11532fba14fSHong Zhang     ierr = PetscNew(Mat_MatMatMultMPI,&ptap);CHKERRQ(ierr);
11632fba14fSHong Zhang 
117e240928fSHong Zhang     /* get P_oth by taking rows of P (= non-zero cols of local A) from other processors */
118*01b7ae99SHong Zhang     ierr = MatGetBrowsOfAoCols(A,P,scall,&ptap->isrowb,&ptap->iscolb,&ptap->brstart,&ptap->B_oth);CHKERRQ(ierr);
119a61c8c0fSHong Zhang 
120e240928fSHong Zhang     /* get P_loc by taking all local rows of P */
12132fba14fSHong Zhang     ierr = MatGetLocalMat(P,scall,&ptap->B_loc);CHKERRQ(ierr);
122e240928fSHong Zhang 
123a61c8c0fSHong Zhang     /* attach the supporting struct to P for reuse */
124a61c8c0fSHong Zhang     P->ops->destroy  = MatDestroy_MPIAIJ_PtAP;
125a61c8c0fSHong Zhang     ierr = PetscObjectContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
126a61c8c0fSHong Zhang     ierr = PetscObjectContainerSetPointer(container,ptap);CHKERRQ(ierr);
127a61c8c0fSHong Zhang     ierr = PetscObjectCompose((PetscObject)P,"MatPtAPMPI",(PetscObject)container);CHKERRQ(ierr);
128e240928fSHong Zhang 
12932fba14fSHong Zhang     /* now, compute symbolic local P^T*A*P */
130a61c8c0fSHong Zhang     ierr = MatPtAPSymbolic_MPIAIJ_MPIAIJ(A,P,fill,C);CHKERRQ(ierr);/* numeric product is computed as well */
131a61c8c0fSHong Zhang     ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr);
132a61c8c0fSHong Zhang   } else if (scall == MAT_REUSE_MATRIX){
133a61c8c0fSHong Zhang     ierr = PetscObjectQuery((PetscObject)P,"MatPtAPMPI",(PetscObject *)&container);CHKERRQ(ierr);
134a61c8c0fSHong Zhang     if (container) {
135a61c8c0fSHong Zhang       ierr  = PetscObjectContainerGetPointer(container,(void **)&ptap);CHKERRQ(ierr);
136a61c8c0fSHong Zhang     } else {
137a61c8c0fSHong Zhang       SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
138a61c8c0fSHong Zhang     }
139a61c8c0fSHong Zhang 
140a61c8c0fSHong Zhang     /* get P_oth by taking rows of P (= non-zero cols of local A) from other processors */
141*01b7ae99SHong Zhang     ierr = MatGetBrowsOfAoCols(A,P,scall,&ptap->isrowb,&ptap->iscolb,&ptap->brstart,&ptap->B_oth);CHKERRQ(ierr);
142a61c8c0fSHong Zhang 
143a61c8c0fSHong Zhang     /* get P_loc by taking all local rows of P */
14432fba14fSHong Zhang     ierr = MatGetLocalMat(P,scall,&ptap->B_loc);CHKERRQ(ierr);
14532fba14fSHong Zhang 
146a61c8c0fSHong Zhang   } else {
147a61c8c0fSHong Zhang     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",scall);
148a61c8c0fSHong Zhang   }
14932fba14fSHong Zhang   /* now, compute numeric local P^T*A*P */
150a61c8c0fSHong Zhang   ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr);
151a61c8c0fSHong Zhang   ierr = MatPtAPNumeric_MPIAIJ_MPIAIJ(A,P,*C);CHKERRQ(ierr);
152a61c8c0fSHong Zhang   ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr);
153a61c8c0fSHong Zhang 
154a61c8c0fSHong Zhang   PetscFunctionReturn(0);
155a61c8c0fSHong Zhang }
156a61c8c0fSHong Zhang 
157a61c8c0fSHong Zhang #undef __FUNCT__
158a61c8c0fSHong Zhang #define __FUNCT__ "MatPtAPSymbolic_MPIAIJ_MPIAIJ"
159a61c8c0fSHong Zhang PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIAIJ(Mat A,Mat P,PetscReal fill,Mat *C)
160a61c8c0fSHong Zhang {
161a61c8c0fSHong Zhang   PetscErrorCode       ierr;
162a61c8c0fSHong Zhang   Mat                  C_seq;
16332fba14fSHong Zhang   Mat_MatMatMultMPI    *ptap;
164a61c8c0fSHong Zhang   PetscObjectContainer container;
165a61c8c0fSHong Zhang 
166a61c8c0fSHong Zhang   PetscFunctionBegin;
167a61c8c0fSHong Zhang 
168a61c8c0fSHong Zhang   ierr = PetscObjectQuery((PetscObject)P,"MatPtAPMPI",(PetscObject *)&container);CHKERRQ(ierr);
169a61c8c0fSHong Zhang   if (container) {
170a61c8c0fSHong Zhang     ierr  = PetscObjectContainerGetPointer(container,(void **)&ptap);CHKERRQ(ierr);
171a61c8c0fSHong Zhang   } else {
172a61c8c0fSHong Zhang     SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
173a61c8c0fSHong Zhang   }
17425616d81SHong Zhang   /* compute C_seq = P_loc^T * A_loc * P_seq */
17532fba14fSHong Zhang   ierr = MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local(A,ptap->B_loc,ptap->B_oth,ptap->brstart,fill,&C_seq);CHKERRQ(ierr);
176a61c8c0fSHong Zhang 
177b90dcfe3SHong Zhang   /* add C_seq into mpi C */
178a61c8c0fSHong Zhang   ierr = MatMerge_SeqsToMPISymbolic(A->comm,C_seq,P->n,P->n,C);CHKERRQ(ierr);
179b90dcfe3SHong Zhang 
180ff134f7aSHong Zhang   PetscFunctionReturn(0);
181ff134f7aSHong Zhang }
182ff134f7aSHong Zhang 
183ff134f7aSHong Zhang #undef __FUNCT__
184e240928fSHong Zhang #define __FUNCT__ "MatPtAPNumeric_MPIAIJ_MPIAIJ"
185b90dcfe3SHong Zhang PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIAIJ(Mat A,Mat P,Mat C)
186ff134f7aSHong Zhang {
187b90dcfe3SHong Zhang   PetscErrorCode       ierr;
188671beff6SHong Zhang   Mat_Merge_SeqsToMPI  *merge;
18932fba14fSHong Zhang   Mat_MatMatMultMPI    *ptap;
190a61c8c0fSHong Zhang   PetscObjectContainer cont_merge,cont_ptap;
191ff134f7aSHong Zhang 
192ff134f7aSHong Zhang   PetscFunctionBegin;
193a61c8c0fSHong Zhang   ierr = PetscObjectQuery((PetscObject)C,"MatMergeSeqsToMPI",(PetscObject *)&cont_merge);CHKERRQ(ierr);
194a61c8c0fSHong Zhang   if (cont_merge) {
195a61c8c0fSHong Zhang     ierr  = PetscObjectContainerGetPointer(cont_merge,(void **)&merge);CHKERRQ(ierr);
1967f79fd58SMatthew Knepley   } else {
197a61c8c0fSHong Zhang     SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix C does not posses an object container");
198671beff6SHong Zhang   }
199a61c8c0fSHong Zhang   ierr = PetscObjectQuery((PetscObject)P,"MatPtAPMPI",(PetscObject *)&cont_ptap);CHKERRQ(ierr);
200a61c8c0fSHong Zhang   if (cont_ptap) {
201a61c8c0fSHong Zhang     ierr  = PetscObjectContainerGetPointer(cont_ptap,(void **)&ptap);CHKERRQ(ierr);
202a61c8c0fSHong Zhang   } else {
203a61c8c0fSHong Zhang     SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
204a61c8c0fSHong Zhang   }
20532fba14fSHong Zhang   ierr = MatPtAPNumeric_MPIAIJ_MPIAIJ_Local(A,ptap->B_loc,ptap->B_oth,ptap->brstart,merge->C_seq);CHKERRQ(ierr);
206a61c8c0fSHong Zhang   ierr = MatMerge_SeqsToMPINumeric(merge->C_seq,C);CHKERRQ(ierr);
207ff134f7aSHong Zhang   PetscFunctionReturn(0);
208ff134f7aSHong Zhang }
209ff134f7aSHong Zhang 
210ff134f7aSHong Zhang #undef __FUNCT__
2119af31e4aSHong Zhang #define __FUNCT__ "MatPtAP_SeqAIJ_SeqAIJ"
212dfbe8321SBarry Smith PetscErrorCode MatPtAP_SeqAIJ_SeqAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C)
2139af31e4aSHong Zhang {
214dfbe8321SBarry Smith   PetscErrorCode ierr;
215b1d57f15SBarry Smith 
2169af31e4aSHong Zhang   PetscFunctionBegin;
2179af31e4aSHong Zhang   if (scall == MAT_INITIAL_MATRIX){
218d20bfe6fSHong Zhang     ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr);
2199af31e4aSHong Zhang     ierr = MatPtAPSymbolic_SeqAIJ_SeqAIJ(A,P,fill,C);CHKERRQ(ierr);
220d20bfe6fSHong Zhang     ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr);
2219af31e4aSHong Zhang   }
222d20bfe6fSHong Zhang   ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr);
2239af31e4aSHong Zhang   ierr = MatPtAPNumeric_SeqAIJ_SeqAIJ(A,P,*C);CHKERRQ(ierr);
224d20bfe6fSHong Zhang   ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr);
2259af31e4aSHong Zhang   PetscFunctionReturn(0);
2269af31e4aSHong Zhang }
2279af31e4aSHong Zhang 
2286849ba73SBarry Smith /*
2299af31e4aSHong Zhang    MatPtAPSymbolic - Creates the (i,j) structure of the matrix projection C = P^T * A * P
2304d3841fdSKris Buschelman 
2314d3841fdSKris Buschelman    Collective on Mat
2324d3841fdSKris Buschelman 
2334d3841fdSKris Buschelman    Input Parameters:
2344d3841fdSKris Buschelman +  A - the matrix
2354d3841fdSKris Buschelman -  P - the projection matrix
2364d3841fdSKris Buschelman 
2374d3841fdSKris Buschelman    Output Parameters:
2384d3841fdSKris Buschelman .  C - the (i,j) structure of the product matrix
2394d3841fdSKris Buschelman 
2404d3841fdSKris Buschelman    Notes:
2414d3841fdSKris Buschelman    C will be created and must be destroyed by the user with MatDestroy().
2424d3841fdSKris Buschelman 
2434d3841fdSKris Buschelman    This routine is currently only implemented for pairs of SeqAIJ matrices and classes
2444d3841fdSKris Buschelman    which inherit from SeqAIJ.  C will be of type MATSEQAIJ.  The product is computed using
2459af31e4aSHong Zhang    this (i,j) structure by calling MatPtAPNumeric().
2464d3841fdSKris Buschelman 
2474d3841fdSKris Buschelman    Level: intermediate
2484d3841fdSKris Buschelman 
2499af31e4aSHong Zhang .seealso: MatPtAP(),MatPtAPNumeric(),MatMatMultSymbolic()
2506849ba73SBarry Smith */
251e240928fSHong Zhang #undef __FUNCT__
252e240928fSHong Zhang #define __FUNCT__ "MatPtAPSymbolic"
25355a3bba9SHong Zhang PetscErrorCode MatPtAPSymbolic(Mat A,Mat P,PetscReal fill,Mat *C)
25455a3bba9SHong Zhang {
255dfbe8321SBarry Smith   PetscErrorCode ierr;
256534c1384SKris Buschelman   PetscErrorCode (*fA)(Mat,Mat,PetscReal,Mat*);
257534c1384SKris Buschelman   PetscErrorCode (*fP)(Mat,Mat,PetscReal,Mat*);
258eb9c0419SKris Buschelman 
259eb9c0419SKris Buschelman   PetscFunctionBegin;
260eb9c0419SKris Buschelman 
2614482741eSBarry Smith   PetscValidHeaderSpecific(A,MAT_COOKIE,1);
262c9780b6fSBarry Smith   PetscValidType(A,1);
263eb9c0419SKris Buschelman   MatPreallocated(A);
264eb9c0419SKris Buschelman   if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
265eb9c0419SKris Buschelman   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
266eb9c0419SKris Buschelman 
2674482741eSBarry Smith   PetscValidHeaderSpecific(P,MAT_COOKIE,2);
268c9780b6fSBarry Smith   PetscValidType(P,2);
269eb9c0419SKris Buschelman   MatPreallocated(P);
270eb9c0419SKris Buschelman   if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
271eb9c0419SKris Buschelman   if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
272eb9c0419SKris Buschelman 
2734482741eSBarry Smith   PetscValidPointer(C,3);
2744482741eSBarry Smith 
275eb9c0419SKris Buschelman   if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->M,A->N);
276eb9c0419SKris Buschelman   if (A->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %d != %d",A->M,A->N);
277eb9c0419SKris Buschelman 
278534c1384SKris Buschelman   /* For now, we do not dispatch based on the type of A and P */
279534c1384SKris Buschelman   /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */
280534c1384SKris Buschelman   fA = A->ops->ptapsymbolic;
281534c1384SKris Buschelman   if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatPtAPSymbolic not supported for A of type %s",A->type_name);
282534c1384SKris Buschelman   fP = P->ops->ptapsymbolic;
283534c1384SKris Buschelman   if (!fP) SETERRQ1(PETSC_ERR_SUP,"MatPtAPSymbolic not supported for P of type %s",P->type_name);
284534c1384SKris Buschelman   if (fP!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatPtAPSymbolic requires A, %s, to be compatible with P, %s",A->type_name,P->type_name);
2854d3841fdSKris Buschelman 
286534c1384SKris Buschelman   ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr);
287534c1384SKris Buschelman   ierr = (*fA)(A,P,fill,C);CHKERRQ(ierr);
288534c1384SKris Buschelman   ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr);
289eb9c0419SKris Buschelman 
290eb9c0419SKris Buschelman   PetscFunctionReturn(0);
291eb9c0419SKris Buschelman }
292eb9c0419SKris Buschelman 
293f747e1aeSHong Zhang typedef struct {
294f747e1aeSHong Zhang   Mat    symAP;
295f747e1aeSHong Zhang } Mat_PtAPstruct;
296f747e1aeSHong Zhang 
29778a80504SBarry Smith EXTERN PetscErrorCode MatDestroy_SeqAIJ(Mat);
29878a80504SBarry Smith 
299f747e1aeSHong Zhang #undef __FUNCT__
300f747e1aeSHong Zhang #define __FUNCT__ "MatDestroy_SeqAIJ_PtAP"
301f4a850bbSBarry Smith PetscErrorCode MatDestroy_SeqAIJ_PtAP(Mat A)
302f747e1aeSHong Zhang {
303f4a850bbSBarry Smith   PetscErrorCode    ierr;
304f747e1aeSHong Zhang   Mat_PtAPstruct    *ptap=(Mat_PtAPstruct*)A->spptr;
305f747e1aeSHong Zhang 
306f747e1aeSHong Zhang   PetscFunctionBegin;
307f747e1aeSHong Zhang   ierr = MatDestroy(ptap->symAP);CHKERRQ(ierr);
308f747e1aeSHong Zhang   ierr = PetscFree(ptap);CHKERRQ(ierr);
30978a80504SBarry Smith   ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr);
310f747e1aeSHong Zhang   PetscFunctionReturn(0);
311f747e1aeSHong Zhang }
312f747e1aeSHong Zhang 
313eb9c0419SKris Buschelman #undef __FUNCT__
3149af31e4aSHong Zhang #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqAIJ"
31555a3bba9SHong Zhang PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat P,PetscReal fill,Mat *C)
31655a3bba9SHong Zhang {
317dfbe8321SBarry Smith   PetscErrorCode ierr;
318d20bfe6fSHong Zhang   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
319d20bfe6fSHong Zhang   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data,*p = (Mat_SeqAIJ*)P->data,*c;
32055a3bba9SHong Zhang   PetscInt       *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj;
321b8374ebeSBarry Smith   PetscInt       *ci,*cj,*ptadenserow,*ptasparserow,*ptaj;
32255a3bba9SHong Zhang   PetscInt       an=A->N,am=A->M,pn=P->N,pm=P->M;
323b8374ebeSBarry Smith   PetscInt       i,j,k,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi,nlnk,*lnk;
324d20bfe6fSHong Zhang   MatScalar      *ca;
32555a3bba9SHong Zhang   PetscBT        lnkbt;
326eb9c0419SKris Buschelman 
327eb9c0419SKris Buschelman   PetscFunctionBegin;
328d20bfe6fSHong Zhang   /* Get ij structure of P^T */
329eb9c0419SKris Buschelman   ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
330d20bfe6fSHong Zhang   ptJ=ptj;
331eb9c0419SKris Buschelman 
332d20bfe6fSHong Zhang   /* Allocate ci array, arrays for fill computation and */
333d20bfe6fSHong Zhang   /* free space for accumulating nonzero column info */
33455a3bba9SHong Zhang   ierr = PetscMalloc((pn+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
335d20bfe6fSHong Zhang   ci[0] = 0;
336eb9c0419SKris Buschelman 
33755a3bba9SHong Zhang   ierr = PetscMalloc((2*an+1)*sizeof(PetscInt),&ptadenserow);CHKERRQ(ierr);
33855a3bba9SHong Zhang   ierr = PetscMemzero(ptadenserow,(2*an+1)*sizeof(PetscInt));CHKERRQ(ierr);
339d20bfe6fSHong Zhang   ptasparserow = ptadenserow  + an;
34055a3bba9SHong Zhang 
34155a3bba9SHong Zhang   /* create and initialize a linked list */
34255a3bba9SHong Zhang   nlnk = pn+1;
34355a3bba9SHong Zhang   ierr = PetscLLCreate(pn,pn,nlnk,lnk,lnkbt);CHKERRQ(ierr);
344eb9c0419SKris Buschelman 
345d20bfe6fSHong Zhang   /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */
346d20bfe6fSHong Zhang   /* This should be reasonable if sparsity of PtAP is similar to that of A. */
347d20bfe6fSHong Zhang   ierr          = GetMoreSpace((ai[am]/pm)*pn,&free_space);
348d20bfe6fSHong Zhang   current_space = free_space;
349d20bfe6fSHong Zhang 
350d20bfe6fSHong Zhang   /* Determine symbolic info for each row of C: */
351d20bfe6fSHong Zhang   for (i=0;i<pn;i++) {
352d20bfe6fSHong Zhang     ptnzi  = pti[i+1] - pti[i];
353d20bfe6fSHong Zhang     ptanzi = 0;
354d20bfe6fSHong Zhang     /* Determine symbolic row of PtA: */
355d20bfe6fSHong Zhang     for (j=0;j<ptnzi;j++) {
356d20bfe6fSHong Zhang       arow = *ptJ++;
357d20bfe6fSHong Zhang       anzj = ai[arow+1] - ai[arow];
358d20bfe6fSHong Zhang       ajj  = aj + ai[arow];
359d20bfe6fSHong Zhang       for (k=0;k<anzj;k++) {
360d20bfe6fSHong Zhang         if (!ptadenserow[ajj[k]]) {
361d20bfe6fSHong Zhang           ptadenserow[ajj[k]]    = -1;
362d20bfe6fSHong Zhang           ptasparserow[ptanzi++] = ajj[k];
363d20bfe6fSHong Zhang         }
364d20bfe6fSHong Zhang       }
365d20bfe6fSHong Zhang     }
366d20bfe6fSHong Zhang       /* Using symbolic info for row of PtA, determine symbolic info for row of C: */
367d20bfe6fSHong Zhang     ptaj = ptasparserow;
368d20bfe6fSHong Zhang     cnzi   = 0;
369d20bfe6fSHong Zhang     for (j=0;j<ptanzi;j++) {
370d20bfe6fSHong Zhang       prow = *ptaj++;
371d20bfe6fSHong Zhang       pnzj = pi[prow+1] - pi[prow];
372d20bfe6fSHong Zhang       pjj  = pj + pi[prow];
37355a3bba9SHong Zhang       /* add non-zero cols of P into the sorted linked list lnk */
37455a3bba9SHong Zhang       ierr = PetscLLAdd(pnzj,pjj,pn,nlnk,lnk,lnkbt);CHKERRQ(ierr);
37555a3bba9SHong Zhang       cnzi += nlnk;
376d20bfe6fSHong Zhang     }
377d20bfe6fSHong Zhang 
378d20bfe6fSHong Zhang     /* If free space is not available, make more free space */
379d20bfe6fSHong Zhang     /* Double the amount of total space in the list */
380d20bfe6fSHong Zhang     if (current_space->local_remaining<cnzi) {
381d20bfe6fSHong Zhang       ierr = GetMoreSpace(current_space->total_array_size,&current_space);CHKERRQ(ierr);
382d20bfe6fSHong Zhang     }
383d20bfe6fSHong Zhang 
384d20bfe6fSHong Zhang     /* Copy data into free space, and zero out denserows */
38555a3bba9SHong Zhang     ierr = PetscLLClean(pn,pn,cnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
386d20bfe6fSHong Zhang     current_space->array           += cnzi;
387d20bfe6fSHong Zhang     current_space->local_used      += cnzi;
388d20bfe6fSHong Zhang     current_space->local_remaining -= cnzi;
389d20bfe6fSHong Zhang 
390d20bfe6fSHong Zhang     for (j=0;j<ptanzi;j++) {
391d20bfe6fSHong Zhang       ptadenserow[ptasparserow[j]] = 0;
392d20bfe6fSHong Zhang     }
393d20bfe6fSHong Zhang     /* Aside: Perhaps we should save the pta info for the numerical factorization. */
394d20bfe6fSHong Zhang     /*        For now, we will recompute what is needed. */
395d20bfe6fSHong Zhang     ci[i+1] = ci[i] + cnzi;
396d20bfe6fSHong Zhang   }
397d20bfe6fSHong Zhang   /* nnz is now stored in ci[ptm], column indices are in the list of free space */
398d20bfe6fSHong Zhang   /* Allocate space for cj, initialize cj, and */
399d20bfe6fSHong Zhang   /* destroy list of free space and other temporary array(s) */
40055a3bba9SHong Zhang   ierr = PetscMalloc((ci[pn]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr);
401d20bfe6fSHong Zhang   ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr);
402d20bfe6fSHong Zhang   ierr = PetscFree(ptadenserow);CHKERRQ(ierr);
40355a3bba9SHong Zhang   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
404d20bfe6fSHong Zhang 
405d20bfe6fSHong Zhang   /* Allocate space for ca */
406d20bfe6fSHong Zhang   ierr = PetscMalloc((ci[pn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr);
407d20bfe6fSHong Zhang   ierr = PetscMemzero(ca,(ci[pn]+1)*sizeof(MatScalar));CHKERRQ(ierr);
408d20bfe6fSHong Zhang 
409d20bfe6fSHong Zhang   /* put together the new matrix */
410d20bfe6fSHong Zhang   ierr = MatCreateSeqAIJWithArrays(A->comm,pn,pn,ci,cj,ca,C);CHKERRQ(ierr);
411d20bfe6fSHong Zhang 
412d20bfe6fSHong Zhang   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
413d20bfe6fSHong Zhang   /* Since these are PETSc arrays, change flags to free them as necessary. */
414d20bfe6fSHong Zhang   c = (Mat_SeqAIJ *)((*C)->data);
415d20bfe6fSHong Zhang   c->freedata = PETSC_TRUE;
416d20bfe6fSHong Zhang   c->nonew    = 0;
417d20bfe6fSHong Zhang 
418d20bfe6fSHong Zhang   /* Clean up. */
419d20bfe6fSHong Zhang   ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
420eb9c0419SKris Buschelman 
421eb9c0419SKris Buschelman   PetscFunctionReturn(0);
422eb9c0419SKris Buschelman }
423eb9c0419SKris Buschelman 
4243985e5eaSKris Buschelman #include "src/mat/impls/maij/maij.h"
4253985e5eaSKris Buschelman EXTERN_C_BEGIN
4263985e5eaSKris Buschelman #undef __FUNCT__
4279af31e4aSHong Zhang #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqMAIJ"
42855a3bba9SHong Zhang PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat *C)
42955a3bba9SHong Zhang {
4305c66b693SKris Buschelman   /* This routine requires testing -- I don't think it works. */
431dfbe8321SBarry Smith   PetscErrorCode ierr;
4323985e5eaSKris Buschelman   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
4333985e5eaSKris Buschelman   Mat_SeqMAIJ    *pp=(Mat_SeqMAIJ*)PP->data;
4343985e5eaSKris Buschelman   Mat            P=pp->AIJ;
4353985e5eaSKris Buschelman   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c;
436b1d57f15SBarry Smith   PetscInt       *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj;
437b1d57f15SBarry Smith   PetscInt       *ci,*cj,*denserow,*sparserow,*ptadenserow,*ptasparserow,*ptaj;
438b1d57f15SBarry Smith   PetscInt       an=A->N,am=A->M,pn=P->N,pm=P->M,ppdof=pp->dof;
439b1d57f15SBarry Smith   PetscInt       i,j,k,dof,pdof,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi;
4403985e5eaSKris Buschelman   MatScalar      *ca;
4413985e5eaSKris Buschelman 
4423985e5eaSKris Buschelman   PetscFunctionBegin;
4433985e5eaSKris Buschelman   /* Start timer */
4449af31e4aSHong Zhang   ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr);
4453985e5eaSKris Buschelman 
4463985e5eaSKris Buschelman   /* Get ij structure of P^T */
4473985e5eaSKris Buschelman   ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
4483985e5eaSKris Buschelman 
4493985e5eaSKris Buschelman   /* Allocate ci array, arrays for fill computation and */
4503985e5eaSKris Buschelman   /* free space for accumulating nonzero column info */
451b1d57f15SBarry Smith   ierr = PetscMalloc((pn+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
4523985e5eaSKris Buschelman   ci[0] = 0;
4533985e5eaSKris Buschelman 
454b1d57f15SBarry Smith   ierr = PetscMalloc((2*pn+2*an+1)*sizeof(PetscInt),&ptadenserow);CHKERRQ(ierr);
455b1d57f15SBarry Smith   ierr = PetscMemzero(ptadenserow,(2*pn+2*an+1)*sizeof(PetscInt));CHKERRQ(ierr);
4563985e5eaSKris Buschelman   ptasparserow = ptadenserow  + an;
4573985e5eaSKris Buschelman   denserow     = ptasparserow + an;
4583985e5eaSKris Buschelman   sparserow    = denserow     + pn;
4593985e5eaSKris Buschelman 
4603985e5eaSKris Buschelman   /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */
4613985e5eaSKris Buschelman   /* This should be reasonable if sparsity of PtAP is similar to that of A. */
4623985e5eaSKris Buschelman   ierr          = GetMoreSpace((ai[am]/pm)*pn,&free_space);
4633985e5eaSKris Buschelman   current_space = free_space;
4643985e5eaSKris Buschelman 
4653985e5eaSKris Buschelman   /* Determine symbolic info for each row of C: */
4663985e5eaSKris Buschelman   for (i=0;i<pn/ppdof;i++) {
4673985e5eaSKris Buschelman     ptnzi  = pti[i+1] - pti[i];
4683985e5eaSKris Buschelman     ptanzi = 0;
4693985e5eaSKris Buschelman     ptJ    = ptj + pti[i];
4703985e5eaSKris Buschelman     for (dof=0;dof<ppdof;dof++) {
4713985e5eaSKris Buschelman     /* Determine symbolic row of PtA: */
4723985e5eaSKris Buschelman       for (j=0;j<ptnzi;j++) {
4733985e5eaSKris Buschelman         arow = ptJ[j] + dof;
4743985e5eaSKris Buschelman         anzj = ai[arow+1] - ai[arow];
4753985e5eaSKris Buschelman         ajj  = aj + ai[arow];
4763985e5eaSKris Buschelman         for (k=0;k<anzj;k++) {
4773985e5eaSKris Buschelman           if (!ptadenserow[ajj[k]]) {
4783985e5eaSKris Buschelman             ptadenserow[ajj[k]]    = -1;
4793985e5eaSKris Buschelman             ptasparserow[ptanzi++] = ajj[k];
4803985e5eaSKris Buschelman           }
4813985e5eaSKris Buschelman         }
4823985e5eaSKris Buschelman       }
4833985e5eaSKris Buschelman       /* Using symbolic info for row of PtA, determine symbolic info for row of C: */
4843985e5eaSKris Buschelman       ptaj = ptasparserow;
4853985e5eaSKris Buschelman       cnzi   = 0;
4863985e5eaSKris Buschelman       for (j=0;j<ptanzi;j++) {
487fe05a634SKris Buschelman         pdof = *ptaj%dof;
4883985e5eaSKris Buschelman         prow = (*ptaj++)/dof;
4893985e5eaSKris Buschelman         pnzj = pi[prow+1] - pi[prow];
4903985e5eaSKris Buschelman         pjj  = pj + pi[prow];
4913985e5eaSKris Buschelman         for (k=0;k<pnzj;k++) {
492fe05a634SKris Buschelman           if (!denserow[pjj[k]+pdof]) {
493fe05a634SKris Buschelman             denserow[pjj[k]+pdof] = -1;
494fe05a634SKris Buschelman             sparserow[cnzi++]     = pjj[k]+pdof;
4953985e5eaSKris Buschelman           }
4963985e5eaSKris Buschelman         }
4973985e5eaSKris Buschelman       }
4983985e5eaSKris Buschelman 
4993985e5eaSKris Buschelman       /* sort sparserow */
5003985e5eaSKris Buschelman       ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr);
5013985e5eaSKris Buschelman 
5023985e5eaSKris Buschelman       /* If free space is not available, make more free space */
5033985e5eaSKris Buschelman       /* Double the amount of total space in the list */
5043985e5eaSKris Buschelman       if (current_space->local_remaining<cnzi) {
5053985e5eaSKris Buschelman         ierr = GetMoreSpace(current_space->total_array_size,&current_space);CHKERRQ(ierr);
5063985e5eaSKris Buschelman       }
5073985e5eaSKris Buschelman 
5083985e5eaSKris Buschelman       /* Copy data into free space, and zero out denserows */
509b1d57f15SBarry Smith       ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(PetscInt));CHKERRQ(ierr);
5103985e5eaSKris Buschelman       current_space->array           += cnzi;
5113985e5eaSKris Buschelman       current_space->local_used      += cnzi;
5123985e5eaSKris Buschelman       current_space->local_remaining -= cnzi;
5133985e5eaSKris Buschelman 
5143985e5eaSKris Buschelman       for (j=0;j<ptanzi;j++) {
5153985e5eaSKris Buschelman         ptadenserow[ptasparserow[j]] = 0;
5163985e5eaSKris Buschelman       }
5173985e5eaSKris Buschelman       for (j=0;j<cnzi;j++) {
5183985e5eaSKris Buschelman         denserow[sparserow[j]] = 0;
5193985e5eaSKris Buschelman       }
5203985e5eaSKris Buschelman       /* Aside: Perhaps we should save the pta info for the numerical factorization. */
5213985e5eaSKris Buschelman       /*        For now, we will recompute what is needed. */
5223985e5eaSKris Buschelman       ci[i+1+dof] = ci[i+dof] + cnzi;
5233985e5eaSKris Buschelman     }
5243985e5eaSKris Buschelman   }
5253985e5eaSKris Buschelman   /* nnz is now stored in ci[ptm], column indices are in the list of free space */
5263985e5eaSKris Buschelman   /* Allocate space for cj, initialize cj, and */
5273985e5eaSKris Buschelman   /* destroy list of free space and other temporary array(s) */
528b1d57f15SBarry Smith   ierr = PetscMalloc((ci[pn]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr);
5293985e5eaSKris Buschelman   ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr);
5303985e5eaSKris Buschelman   ierr = PetscFree(ptadenserow);CHKERRQ(ierr);
5313985e5eaSKris Buschelman 
5323985e5eaSKris Buschelman   /* Allocate space for ca */
5333985e5eaSKris Buschelman   ierr = PetscMalloc((ci[pn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr);
5343985e5eaSKris Buschelman   ierr = PetscMemzero(ca,(ci[pn]+1)*sizeof(MatScalar));CHKERRQ(ierr);
5353985e5eaSKris Buschelman 
5363985e5eaSKris Buschelman   /* put together the new matrix */
5373985e5eaSKris Buschelman   ierr = MatCreateSeqAIJWithArrays(A->comm,pn,pn,ci,cj,ca,C);CHKERRQ(ierr);
5383985e5eaSKris Buschelman 
5393985e5eaSKris Buschelman   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5403985e5eaSKris Buschelman   /* Since these are PETSc arrays, change flags to free them as necessary. */
5413985e5eaSKris Buschelman   c = (Mat_SeqAIJ *)((*C)->data);
5423985e5eaSKris Buschelman   c->freedata = PETSC_TRUE;
5433985e5eaSKris Buschelman   c->nonew    = 0;
5443985e5eaSKris Buschelman 
5453985e5eaSKris Buschelman   /* Clean up. */
5463985e5eaSKris Buschelman   ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
5473985e5eaSKris Buschelman 
5489af31e4aSHong Zhang   ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr);
5493985e5eaSKris Buschelman   PetscFunctionReturn(0);
5503985e5eaSKris Buschelman }
5513985e5eaSKris Buschelman EXTERN_C_END
5523985e5eaSKris Buschelman 
5536849ba73SBarry Smith /*
5549af31e4aSHong Zhang    MatPtAPNumeric - Computes the matrix projection C = P^T * A * P
5554d3841fdSKris Buschelman 
5564d3841fdSKris Buschelman    Collective on Mat
5574d3841fdSKris Buschelman 
5584d3841fdSKris Buschelman    Input Parameters:
5594d3841fdSKris Buschelman +  A - the matrix
5604d3841fdSKris Buschelman -  P - the projection matrix
5614d3841fdSKris Buschelman 
5624d3841fdSKris Buschelman    Output Parameters:
5634d3841fdSKris Buschelman .  C - the product matrix
5644d3841fdSKris Buschelman 
5654d3841fdSKris Buschelman    Notes:
5669af31e4aSHong Zhang    C must have been created by calling MatPtAPSymbolic and must be destroyed by
5674d3841fdSKris Buschelman    the user using MatDeatroy().
5684d3841fdSKris Buschelman 
569170ef064SHong Zhang    This routine is currently only implemented for pairs of AIJ matrices and classes
570170ef064SHong Zhang    which inherit from AIJ.  C will be of type MATAIJ.
5714d3841fdSKris Buschelman 
5724d3841fdSKris Buschelman    Level: intermediate
5734d3841fdSKris Buschelman 
5749af31e4aSHong Zhang .seealso: MatPtAP(),MatPtAPSymbolic(),MatMatMultNumeric()
5756849ba73SBarry Smith */
576e240928fSHong Zhang #undef __FUNCT__
577e240928fSHong Zhang #define __FUNCT__ "MatPtAPNumeric"
57855a3bba9SHong Zhang PetscErrorCode MatPtAPNumeric(Mat A,Mat P,Mat C)
57955a3bba9SHong Zhang {
580dfbe8321SBarry Smith   PetscErrorCode ierr;
581534c1384SKris Buschelman   PetscErrorCode (*fA)(Mat,Mat,Mat);
582534c1384SKris Buschelman   PetscErrorCode (*fP)(Mat,Mat,Mat);
583eb9c0419SKris Buschelman 
584eb9c0419SKris Buschelman   PetscFunctionBegin;
585eb9c0419SKris Buschelman 
5864482741eSBarry Smith   PetscValidHeaderSpecific(A,MAT_COOKIE,1);
587c9780b6fSBarry Smith   PetscValidType(A,1);
588eb9c0419SKris Buschelman   MatPreallocated(A);
589eb9c0419SKris Buschelman   if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
590eb9c0419SKris Buschelman   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
591eb9c0419SKris Buschelman 
5924482741eSBarry Smith   PetscValidHeaderSpecific(P,MAT_COOKIE,2);
593c9780b6fSBarry Smith   PetscValidType(P,2);
594eb9c0419SKris Buschelman   MatPreallocated(P);
595eb9c0419SKris Buschelman   if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
596eb9c0419SKris Buschelman   if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
597eb9c0419SKris Buschelman 
5984482741eSBarry Smith   PetscValidHeaderSpecific(C,MAT_COOKIE,3);
599c9780b6fSBarry Smith   PetscValidType(C,3);
600eb9c0419SKris Buschelman   MatPreallocated(C);
601eb9c0419SKris Buschelman   if (!C->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
602eb9c0419SKris Buschelman   if (C->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
603eb9c0419SKris Buschelman 
604eb9c0419SKris Buschelman   if (P->N!=C->M) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->N,C->M);
605eb9c0419SKris Buschelman   if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->M,A->N);
606eb9c0419SKris Buschelman   if (A->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %d != %d",A->M,A->N);
607eb9c0419SKris Buschelman   if (P->N!=C->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->N,C->N);
608eb9c0419SKris Buschelman 
609534c1384SKris Buschelman   /* For now, we do not dispatch based on the type of A and P */
610534c1384SKris Buschelman   /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */
611534c1384SKris Buschelman   fA = A->ops->ptapnumeric;
612534c1384SKris Buschelman   if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatPtAPNumeric not supported for A of type %s",A->type_name);
613534c1384SKris Buschelman   fP = P->ops->ptapnumeric;
614534c1384SKris Buschelman   if (!fP) SETERRQ1(PETSC_ERR_SUP,"MatPtAPNumeric not supported for P of type %s",P->type_name);
615534c1384SKris Buschelman   if (fP!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatPtAPNumeric requires A, %s, to be compatible with P, %s",A->type_name,P->type_name);
6164d3841fdSKris Buschelman 
617534c1384SKris Buschelman   ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr);
618534c1384SKris Buschelman   ierr = (*fA)(A,P,C);CHKERRQ(ierr);
619534c1384SKris Buschelman   ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr);
620eb9c0419SKris Buschelman 
621eb9c0419SKris Buschelman   PetscFunctionReturn(0);
622eb9c0419SKris Buschelman }
623eb9c0419SKris Buschelman 
624eb9c0419SKris Buschelman #undef __FUNCT__
6259af31e4aSHong Zhang #define __FUNCT__ "MatPtAPNumeric_SeqAIJ_SeqAIJ"
626dfbe8321SBarry Smith PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqAIJ(Mat A,Mat P,Mat C)
627dfbe8321SBarry Smith {
628dfbe8321SBarry Smith   PetscErrorCode ierr;
629b1d57f15SBarry Smith   PetscInt       flops=0;
630d20bfe6fSHong Zhang   Mat_SeqAIJ     *a  = (Mat_SeqAIJ *) A->data;
631d20bfe6fSHong Zhang   Mat_SeqAIJ     *p  = (Mat_SeqAIJ *) P->data;
632d20bfe6fSHong Zhang   Mat_SeqAIJ     *c  = (Mat_SeqAIJ *) C->data;
633b1d57f15SBarry Smith   PetscInt       *ai=a->i,*aj=a->j,*apj,*apjdense,*pi=p->i,*pj=p->j,*pJ=p->j,*pjj;
634b1d57f15SBarry Smith   PetscInt       *ci=c->i,*cj=c->j,*cjj;
635b1d57f15SBarry Smith   PetscInt       am=A->M,cn=C->N,cm=C->M;
636b1d57f15SBarry Smith   PetscInt       i,j,k,anzi,pnzi,apnzj,nextap,pnzj,prow,crow;
637d20bfe6fSHong Zhang   MatScalar      *aa=a->a,*apa,*pa=p->a,*pA=p->a,*paj,*ca=c->a,*caj;
638eb9c0419SKris Buschelman 
639eb9c0419SKris Buschelman   PetscFunctionBegin;
640d20bfe6fSHong Zhang   /* Allocate temporary array for storage of one row of A*P */
641b1d57f15SBarry Smith   ierr = PetscMalloc(cn*(sizeof(MatScalar)+2*sizeof(PetscInt)),&apa);CHKERRQ(ierr);
642b1d57f15SBarry Smith   ierr = PetscMemzero(apa,cn*(sizeof(MatScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr);
643eb9c0419SKris Buschelman 
644b1d57f15SBarry Smith   apj      = (PetscInt *)(apa + cn);
645d20bfe6fSHong Zhang   apjdense = apj + cn;
646d20bfe6fSHong Zhang 
647d20bfe6fSHong Zhang   /* Clear old values in C */
648d20bfe6fSHong Zhang   ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr);
649d20bfe6fSHong Zhang 
650d20bfe6fSHong Zhang   for (i=0;i<am;i++) {
651d20bfe6fSHong Zhang     /* Form sparse row of A*P */
652d20bfe6fSHong Zhang     anzi  = ai[i+1] - ai[i];
653d20bfe6fSHong Zhang     apnzj = 0;
654d20bfe6fSHong Zhang     for (j=0;j<anzi;j++) {
655d20bfe6fSHong Zhang       prow = *aj++;
656d20bfe6fSHong Zhang       pnzj = pi[prow+1] - pi[prow];
657d20bfe6fSHong Zhang       pjj  = pj + pi[prow];
658d20bfe6fSHong Zhang       paj  = pa + pi[prow];
659d20bfe6fSHong Zhang       for (k=0;k<pnzj;k++) {
660d20bfe6fSHong Zhang         if (!apjdense[pjj[k]]) {
661d20bfe6fSHong Zhang           apjdense[pjj[k]] = -1;
662d20bfe6fSHong Zhang           apj[apnzj++]     = pjj[k];
663d20bfe6fSHong Zhang         }
664d20bfe6fSHong Zhang         apa[pjj[k]] += (*aa)*paj[k];
665d20bfe6fSHong Zhang       }
666d20bfe6fSHong Zhang       flops += 2*pnzj;
667d20bfe6fSHong Zhang       aa++;
668d20bfe6fSHong Zhang     }
669d20bfe6fSHong Zhang 
670d20bfe6fSHong Zhang     /* Sort the j index array for quick sparse axpy. */
671d20bfe6fSHong Zhang     ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr);
672d20bfe6fSHong Zhang 
673d20bfe6fSHong Zhang     /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */
674d20bfe6fSHong Zhang     pnzi = pi[i+1] - pi[i];
675d20bfe6fSHong Zhang     for (j=0;j<pnzi;j++) {
676d20bfe6fSHong Zhang       nextap = 0;
677d20bfe6fSHong Zhang       crow   = *pJ++;
678d20bfe6fSHong Zhang       cjj    = cj + ci[crow];
679d20bfe6fSHong Zhang       caj    = ca + ci[crow];
680d20bfe6fSHong Zhang       /* Perform sparse axpy operation.  Note cjj includes apj. */
681d20bfe6fSHong Zhang       for (k=0;nextap<apnzj;k++) {
682d20bfe6fSHong Zhang         if (cjj[k]==apj[nextap]) {
683d20bfe6fSHong Zhang           caj[k] += (*pA)*apa[apj[nextap++]];
684d20bfe6fSHong Zhang         }
685d20bfe6fSHong Zhang       }
686d20bfe6fSHong Zhang       flops += 2*apnzj;
687d20bfe6fSHong Zhang       pA++;
688d20bfe6fSHong Zhang     }
689d20bfe6fSHong Zhang 
690d20bfe6fSHong Zhang     /* Zero the current row info for A*P */
691d20bfe6fSHong Zhang     for (j=0;j<apnzj;j++) {
692d20bfe6fSHong Zhang       apa[apj[j]]      = 0.;
693d20bfe6fSHong Zhang       apjdense[apj[j]] = 0;
694d20bfe6fSHong Zhang     }
695d20bfe6fSHong Zhang   }
696d20bfe6fSHong Zhang 
697d20bfe6fSHong Zhang   /* Assemble the final matrix and clean up */
698d20bfe6fSHong Zhang   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
699d20bfe6fSHong Zhang   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
700d20bfe6fSHong Zhang   ierr = PetscFree(apa);CHKERRQ(ierr);
701d20bfe6fSHong Zhang   ierr = PetscLogFlops(flops);CHKERRQ(ierr);
702d20bfe6fSHong Zhang 
703eb9c0419SKris Buschelman   PetscFunctionReturn(0);
704eb9c0419SKris Buschelman }
7050e36024fSHong Zhang 
706a61c8c0fSHong Zhang /* Compute local C = P_loc^T * A * P - used by MatPtAP_MPIAIJ_MPIAIJ() */
707e240928fSHong Zhang static PetscEvent logkey_matptapnumeric_local = 0;
708e240928fSHong Zhang #undef __FUNCT__
709e240928fSHong Zhang #define __FUNCT__ "MatPtAPNumeric_MPIAIJ_MPIAIJ_Local"
710e240928fSHong Zhang PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIAIJ_Local(Mat A,Mat P_loc,Mat P_oth,PetscInt prstart,Mat C)
711e240928fSHong Zhang {
712e240928fSHong Zhang   PetscErrorCode ierr;
713e240928fSHong Zhang   PetscInt       flops=0;
714e240928fSHong Zhang   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
715e240928fSHong Zhang   Mat_SeqAIJ     *ad=(Mat_SeqAIJ*)(a->A)->data,*ao=(Mat_SeqAIJ*)(a->B)->data;
716e240928fSHong Zhang   Mat_SeqAIJ     *c  = (Mat_SeqAIJ *) C->data;
717e240928fSHong Zhang   Mat_SeqAIJ     *p_loc=(Mat_SeqAIJ*)P_loc->data,*p_oth=(Mat_SeqAIJ*)P_oth->data;
718e240928fSHong Zhang   PetscInt       *pi_loc=p_loc->i,*pj_loc=p_loc->j,*pi_oth=p_oth->i,*pj_oth=p_oth->j;
719e240928fSHong Zhang   PetscInt       *adi=ad->i,*adj=ad->j,*aoi=ao->i,*aoj=ao->j,*apj,*apjdense,cstart=a->cstart,cend=a->cend,col;
720e240928fSHong Zhang   PetscInt       *pJ=pj_loc,*pjj;
721e240928fSHong Zhang   PetscInt       *ci=c->i,*cj=c->j,*cjj;
722e240928fSHong Zhang   PetscInt       am=A->m,cn=C->N,cm=C->M;
723e240928fSHong Zhang   PetscInt       i,j,k,anzi,pnzi,apnzj,nextap,pnzj,prow,crow;
724e240928fSHong Zhang   MatScalar      *ada=ad->a,*aoa=ao->a,*apa,*paj,*ca=c->a,*caj;
725e240928fSHong Zhang   MatScalar      *pa_loc=p_loc->a,*pA=pa_loc,*pa_oth=p_oth->a;
726e240928fSHong Zhang 
727e240928fSHong Zhang   PetscFunctionBegin;
728e240928fSHong Zhang   if (!logkey_matptapnumeric_local) {
729a61c8c0fSHong Zhang     ierr = PetscLogEventRegister(&logkey_matptapnumeric_local,"MatPtAPNumeric_MPIAIJ_MPIAIJ_Local",MAT_COOKIE);CHKERRQ(ierr);
730e240928fSHong Zhang   }
731a61c8c0fSHong Zhang   ierr = PetscLogEventBegin(logkey_matptapnumeric_local,A,0,0,0);CHKERRQ(ierr);
732e240928fSHong Zhang 
733e240928fSHong Zhang   /* Allocate temporary array for storage of one row of A*P */
734e240928fSHong Zhang   ierr = PetscMalloc(cn*(sizeof(MatScalar)+2*sizeof(PetscInt)),&apa);CHKERRQ(ierr);
735e240928fSHong Zhang   ierr = PetscMemzero(apa,cn*(sizeof(MatScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr);
736e240928fSHong Zhang   apj      = (PetscInt *)(apa + cn);
737e240928fSHong Zhang   apjdense = apj + cn;
738e240928fSHong Zhang 
739e240928fSHong Zhang   /* Clear old values in C */
740e240928fSHong Zhang   ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr);
741e240928fSHong Zhang 
742e240928fSHong Zhang   for (i=0;i<am;i++) {
743e240928fSHong Zhang     /* Form i-th sparse row of A*P */
744e240928fSHong Zhang      apnzj = 0;
745e240928fSHong Zhang     /* diagonal portion of A */
746e240928fSHong Zhang     anzi  = adi[i+1] - adi[i];
747e240928fSHong Zhang     for (j=0;j<anzi;j++) {
748e240928fSHong Zhang       prow = *adj;
749e240928fSHong Zhang       adj++;
750e240928fSHong Zhang       pnzj = pi_loc[prow+1] - pi_loc[prow];
751e240928fSHong Zhang       pjj  = pj_loc + pi_loc[prow];
752e240928fSHong Zhang       paj  = pa_loc + pi_loc[prow];
753e240928fSHong Zhang       for (k=0;k<pnzj;k++) {
754e240928fSHong Zhang         if (!apjdense[pjj[k]]) {
755e240928fSHong Zhang           apjdense[pjj[k]] = -1;
756e240928fSHong Zhang           apj[apnzj++]     = pjj[k];
757e240928fSHong Zhang         }
758e240928fSHong Zhang         apa[pjj[k]] += (*ada)*paj[k];
759e240928fSHong Zhang       }
760e240928fSHong Zhang       flops += 2*pnzj;
761e240928fSHong Zhang       ada++;
762e240928fSHong Zhang     }
763e240928fSHong Zhang     /* off-diagonal portion of A */
764e240928fSHong Zhang     anzi  = aoi[i+1] - aoi[i];
765e240928fSHong Zhang     for (j=0;j<anzi;j++) {
766e240928fSHong Zhang       col = a->garray[*aoj];
767e240928fSHong Zhang       if (col < cstart){
768e240928fSHong Zhang         prow = *aoj;
769e240928fSHong Zhang       } else if (col >= cend){
770e240928fSHong Zhang         prow = *aoj;
771e240928fSHong Zhang       } else {
772e240928fSHong Zhang         SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Off-diagonal portion of A has wrong column map");
773e240928fSHong Zhang       }
774e240928fSHong Zhang       aoj++;
775e240928fSHong Zhang       pnzj = pi_oth[prow+1] - pi_oth[prow];
776e240928fSHong Zhang       pjj  = pj_oth + pi_oth[prow];
777e240928fSHong Zhang       paj  = pa_oth + pi_oth[prow];
778e240928fSHong Zhang       for (k=0;k<pnzj;k++) {
779e240928fSHong Zhang         if (!apjdense[pjj[k]]) {
780e240928fSHong Zhang           apjdense[pjj[k]] = -1;
781e240928fSHong Zhang           apj[apnzj++]     = pjj[k];
782e240928fSHong Zhang         }
783e240928fSHong Zhang         apa[pjj[k]] += (*aoa)*paj[k];
784e240928fSHong Zhang       }
785e240928fSHong Zhang       flops += 2*pnzj;
786e240928fSHong Zhang       aoa++;
787e240928fSHong Zhang     }
788e240928fSHong Zhang     /* Sort the j index array for quick sparse axpy. */
789e240928fSHong Zhang     ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr);
790e240928fSHong Zhang 
791e240928fSHong Zhang     /* Compute P_loc[i,:]^T*AP[i,:] using outer product */
792e240928fSHong Zhang     pnzi = pi_loc[i+1] - pi_loc[i];
793e240928fSHong Zhang     for (j=0;j<pnzi;j++) {
794e240928fSHong Zhang       nextap = 0;
795e240928fSHong Zhang       crow   = *pJ++;
796e240928fSHong Zhang       cjj    = cj + ci[crow];
797e240928fSHong Zhang       caj    = ca + ci[crow];
798e240928fSHong Zhang       /* Perform sparse axpy operation.  Note cjj includes apj. */
799e240928fSHong Zhang       for (k=0;nextap<apnzj;k++) {
800e240928fSHong Zhang         if (cjj[k]==apj[nextap]) {
801e240928fSHong Zhang           caj[k] += (*pA)*apa[apj[nextap++]];
802e240928fSHong Zhang         }
803e240928fSHong Zhang       }
804e240928fSHong Zhang       flops += 2*apnzj;
805e240928fSHong Zhang       pA++;
806e240928fSHong Zhang     }
807e240928fSHong Zhang 
808e240928fSHong Zhang     /* Zero the current row info for A*P */
809e240928fSHong Zhang     for (j=0;j<apnzj;j++) {
810e240928fSHong Zhang       apa[apj[j]]      = 0.;
811e240928fSHong Zhang       apjdense[apj[j]] = 0;
812e240928fSHong Zhang     }
813e240928fSHong Zhang   }
814e240928fSHong Zhang 
815e240928fSHong Zhang   /* Assemble the final matrix and clean up */
816e240928fSHong Zhang   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
817e240928fSHong Zhang   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
818e240928fSHong Zhang   ierr = PetscFree(apa);CHKERRQ(ierr);
819e240928fSHong Zhang   ierr = PetscLogFlops(flops);CHKERRQ(ierr);
820a61c8c0fSHong Zhang   ierr = PetscLogEventEnd(logkey_matptapnumeric_local,A,0,0,0);CHKERRQ(ierr);
821e240928fSHong Zhang   PetscFunctionReturn(0);
822e240928fSHong Zhang }
823e240928fSHong Zhang static PetscEvent logkey_matptapsymbolic_local = 0;
824e240928fSHong Zhang #undef __FUNCT__
825e240928fSHong Zhang #define __FUNCT__ "MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local"
826e240928fSHong Zhang PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local(Mat A,Mat P_loc,Mat P_oth,PetscInt prstart,PetscReal fill,Mat *C)
827e240928fSHong Zhang {
828e240928fSHong Zhang   PetscErrorCode ierr;
829e240928fSHong Zhang   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
830e240928fSHong Zhang   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
831e240928fSHong Zhang   Mat_SeqAIJ     *ad=(Mat_SeqAIJ*)(a->A)->data,*ao=(Mat_SeqAIJ*)(a->B)->data,*c;
832e240928fSHong Zhang   PetscInt       *pti,*ptj,*ptJ,*ajj,*pjj;
833e240928fSHong Zhang   PetscInt       *adi=ad->i,*adj=ad->j,*aoi=ao->i,*aoj=ao->j,nnz,cstart=a->cstart,cend=a->cend,col;
834e240928fSHong Zhang   PetscInt       *ci,*cj,*ptaj;
835e240928fSHong Zhang   PetscInt       an=A->N,am=A->m,pN=P_loc->N;
836e240928fSHong Zhang   PetscInt       i,j,k,ptnzi,arow,anzj,prow,pnzj,cnzi;
837e240928fSHong Zhang   PetscInt       pm=P_loc->m,nlnk,*lnk;
838e240928fSHong Zhang   MatScalar      *ca;
839e240928fSHong Zhang   PetscBT        lnkbt;
840e240928fSHong Zhang   PetscInt       prend,nprow_loc,nprow_oth;
841e240928fSHong Zhang   PetscInt       *ptadenserow_loc,*ptadenserow_oth,*ptasparserow_loc,*ptasparserow_oth;
842e240928fSHong Zhang   Mat_SeqAIJ     *p_loc=(Mat_SeqAIJ*)P_loc->data,*p_oth=(Mat_SeqAIJ*)P_oth->data;
843e240928fSHong Zhang   PetscInt       *pi_loc=p_loc->i,*pj_loc=p_loc->j,*pi_oth=p_oth->i,*pj_oth=p_oth->j;
844e240928fSHong Zhang 
845e240928fSHong Zhang   PetscFunctionBegin;
846e240928fSHong Zhang   if (!logkey_matptapsymbolic_local) {
847e240928fSHong Zhang     ierr = PetscLogEventRegister(&logkey_matptapsymbolic_local,"MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local",MAT_COOKIE);
848e240928fSHong Zhang   }
849e240928fSHong Zhang   ierr = PetscLogEventBegin(logkey_matptapsymbolic_local,A,P_loc,P_oth,0);CHKERRQ(ierr);
850e240928fSHong Zhang 
851e240928fSHong Zhang   prend = prstart + pm;
852e240928fSHong Zhang 
853e240928fSHong Zhang   /* get ij structure of P_loc^T */
854e240928fSHong Zhang   ierr = MatGetSymbolicTranspose_SeqAIJ(P_loc,&pti,&ptj);CHKERRQ(ierr);
855e240928fSHong Zhang   ptJ=ptj;
856e240928fSHong Zhang 
857e240928fSHong Zhang   /* Allocate ci array, arrays for fill computation and */
858e240928fSHong Zhang   /* free space for accumulating nonzero column info */
859e240928fSHong Zhang   ierr = PetscMalloc((pN+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
860e240928fSHong Zhang   ci[0] = 0;
861e240928fSHong Zhang 
862e240928fSHong Zhang   ierr = PetscMalloc((4*an+1)*sizeof(PetscInt),&ptadenserow_loc);CHKERRQ(ierr);
863e240928fSHong Zhang   ierr = PetscMemzero(ptadenserow_loc,(4*an+1)*sizeof(PetscInt));CHKERRQ(ierr);
864e240928fSHong Zhang   ptasparserow_loc = ptadenserow_loc + an;
865e240928fSHong Zhang   ptadenserow_oth  = ptasparserow_loc + an;
866e240928fSHong Zhang   ptasparserow_oth = ptadenserow_oth + an;
867e240928fSHong Zhang 
868e240928fSHong Zhang   /* create and initialize a linked list */
869e240928fSHong Zhang   nlnk = pN+1;
870e240928fSHong Zhang   ierr = PetscLLCreate(pN,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr);
871e240928fSHong Zhang 
872e240928fSHong Zhang   /* Set initial free space to be nnz(A) scaled by aspect ratio of P (P->M=A->N). */
873e240928fSHong Zhang   /* This should be reasonable if sparsity of PtAP is similar to that of A. */
874e240928fSHong Zhang   nnz           = adi[am] + aoi[am];
875e240928fSHong Zhang   ierr          = GetMoreSpace((PetscInt)(fill*nnz/A->N)*pN,&free_space);
876e240928fSHong Zhang   current_space = free_space;
877e240928fSHong Zhang 
878e240928fSHong Zhang   /* determine symbolic info for each row of C: */
879e240928fSHong Zhang   for (i=0;i<pN;i++) {
880e240928fSHong Zhang     ptnzi  = pti[i+1] - pti[i];
881e240928fSHong Zhang     nprow_loc = 0; nprow_oth = 0;
882e240928fSHong Zhang     /* i-th row of symbolic P_loc^T*A_loc: */
883e240928fSHong Zhang     for (j=0;j<ptnzi;j++) {
884e240928fSHong Zhang       arow = *ptJ++;
885e240928fSHong Zhang       /* diagonal portion of A */
886e240928fSHong Zhang       anzj = adi[arow+1] - adi[arow];
887e240928fSHong Zhang       ajj  = adj + adi[arow];
888e240928fSHong Zhang       for (k=0;k<anzj;k++) {
889e240928fSHong Zhang         col = ajj[k]+prstart;
890e240928fSHong Zhang         if (!ptadenserow_loc[col]) {
891e240928fSHong Zhang           ptadenserow_loc[col]    = -1;
892e240928fSHong Zhang           ptasparserow_loc[nprow_loc++] = col;
893e240928fSHong Zhang         }
894e240928fSHong Zhang       }
895e240928fSHong Zhang       /* off-diagonal portion of A */
896e240928fSHong Zhang       anzj = aoi[arow+1] - aoi[arow];
897e240928fSHong Zhang       ajj  = aoj + aoi[arow];
898e240928fSHong Zhang       for (k=0;k<anzj;k++) {
899e240928fSHong Zhang         col = a->garray[ajj[k]];  /* global col */
900e240928fSHong Zhang         if (col < cstart){
901e240928fSHong Zhang           col = ajj[k];
902e240928fSHong Zhang         } else if (col >= cend){
903e240928fSHong Zhang           col = ajj[k] + pm;
904e240928fSHong Zhang         } else {
905e240928fSHong Zhang           SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Off-diagonal portion of A has wrong column map");
906e240928fSHong Zhang         }
907e240928fSHong Zhang         if (!ptadenserow_oth[col]) {
908e240928fSHong Zhang           ptadenserow_oth[col]    = -1;
909e240928fSHong Zhang           ptasparserow_oth[nprow_oth++] = col;
910e240928fSHong Zhang         }
911e240928fSHong Zhang       }
912e240928fSHong Zhang     }
913e240928fSHong Zhang 
914e240928fSHong Zhang     /* using symbolic info of local PtA, determine symbolic info for row of C: */
915e240928fSHong Zhang     cnzi   = 0;
916e240928fSHong Zhang     /* rows of P_loc */
917e240928fSHong Zhang     ptaj = ptasparserow_loc;
918e240928fSHong Zhang     for (j=0; j<nprow_loc; j++) {
919e240928fSHong Zhang       prow = *ptaj++;
920e240928fSHong Zhang       prow -= prstart; /* rm */
921e240928fSHong Zhang       pnzj = pi_loc[prow+1] - pi_loc[prow];
922e240928fSHong Zhang       pjj  = pj_loc + pi_loc[prow];
923e240928fSHong Zhang       /* add non-zero cols of P into the sorted linked list lnk */
924e240928fSHong Zhang       ierr = PetscLLAdd(pnzj,pjj,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr);
925e240928fSHong Zhang       cnzi += nlnk;
926e240928fSHong Zhang     }
927e240928fSHong Zhang     /* rows of P_oth */
928e240928fSHong Zhang     ptaj = ptasparserow_oth;
929e240928fSHong Zhang     for (j=0; j<nprow_oth; j++) {
930e240928fSHong Zhang       prow = *ptaj++;
931e240928fSHong Zhang       if (prow >= prend) prow -= pm; /* rm */
932e240928fSHong Zhang       pnzj = pi_oth[prow+1] - pi_oth[prow];
933e240928fSHong Zhang       pjj  = pj_oth + pi_oth[prow];
934e240928fSHong Zhang       /* add non-zero cols of P into the sorted linked list lnk */
935e240928fSHong Zhang       ierr = PetscLLAdd(pnzj,pjj,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr);
936e240928fSHong Zhang       cnzi += nlnk;
937e240928fSHong Zhang     }
938e240928fSHong Zhang 
939e240928fSHong Zhang     /* If free space is not available, make more free space */
940e240928fSHong Zhang     /* Double the amount of total space in the list */
941e240928fSHong Zhang     if (current_space->local_remaining<cnzi) {
942e240928fSHong Zhang       ierr = GetMoreSpace(current_space->total_array_size,&current_space);CHKERRQ(ierr);
943e240928fSHong Zhang     }
944e240928fSHong Zhang 
945e240928fSHong Zhang     /* Copy data into free space, and zero out denserows */
946e240928fSHong Zhang     ierr = PetscLLClean(pN,pN,cnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
947e240928fSHong Zhang     current_space->array           += cnzi;
948e240928fSHong Zhang     current_space->local_used      += cnzi;
949e240928fSHong Zhang     current_space->local_remaining -= cnzi;
950e240928fSHong Zhang 
951e240928fSHong Zhang     for (j=0;j<nprow_loc; j++) {
952e240928fSHong Zhang       ptadenserow_loc[ptasparserow_loc[j]] = 0;
953e240928fSHong Zhang     }
954e240928fSHong Zhang     for (j=0;j<nprow_oth; j++) {
955e240928fSHong Zhang       ptadenserow_oth[ptasparserow_oth[j]] = 0;
956e240928fSHong Zhang     }
957e240928fSHong Zhang 
958e240928fSHong Zhang     /* Aside: Perhaps we should save the pta info for the numerical factorization. */
959e240928fSHong Zhang     /*        For now, we will recompute what is needed. */
960e240928fSHong Zhang     ci[i+1] = ci[i] + cnzi;
961e240928fSHong Zhang   }
962e240928fSHong Zhang   /* nnz is now stored in ci[ptm], column indices are in the list of free space */
963e240928fSHong Zhang   /* Allocate space for cj, initialize cj, and */
964e240928fSHong Zhang   /* destroy list of free space and other temporary array(s) */
965e240928fSHong Zhang   ierr = PetscMalloc((ci[pN]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr);
966e240928fSHong Zhang   ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr);
967e240928fSHong Zhang   ierr = PetscFree(ptadenserow_loc);CHKERRQ(ierr);
968e240928fSHong Zhang   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
969e240928fSHong Zhang 
970e240928fSHong Zhang   /* Allocate space for ca */
971e240928fSHong Zhang   ierr = PetscMalloc((ci[pN]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr);
972e240928fSHong Zhang   ierr = PetscMemzero(ca,(ci[pN]+1)*sizeof(MatScalar));CHKERRQ(ierr);
973e240928fSHong Zhang 
974e240928fSHong Zhang   /* put together the new matrix */
975e240928fSHong Zhang   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,pN,pN,ci,cj,ca,C);CHKERRQ(ierr);
976e240928fSHong Zhang 
977e240928fSHong Zhang   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
978e240928fSHong Zhang   /* Since these are PETSc arrays, change flags to free them as necessary. */
979e240928fSHong Zhang   c = (Mat_SeqAIJ *)((*C)->data);
980e240928fSHong Zhang   c->freedata = PETSC_TRUE;
981e240928fSHong Zhang   c->nonew    = 0;
982e240928fSHong Zhang 
983e240928fSHong Zhang   /* Clean up. */
984e240928fSHong Zhang   ierr = MatRestoreSymbolicTranspose_SeqAIJ(P_loc,&pti,&ptj);CHKERRQ(ierr);
985a61c8c0fSHong Zhang 
986e240928fSHong Zhang   ierr = PetscLogEventEnd(logkey_matptapsymbolic_local,A,P_loc,P_oth,0);CHKERRQ(ierr);
987e240928fSHong Zhang   PetscFunctionReturn(0);
988e240928fSHong Zhang }
989