xref: /petsc/src/mat/impls/aij/seq/matptap.c (revision 32fba14f877989dfa2a5e2c32c1621ab30bd1582)
1eb9c0419SKris Buschelman /*
29af31e4aSHong Zhang   Defines projective product routines where A is a AIJ matrix
3eb9c0419SKris Buschelman           C = P^T * A * P
4eb9c0419SKris Buschelman */
5eb9c0419SKris Buschelman 
6231952e2SKris Buschelman #include "src/mat/impls/aij/seq/aij.h"   /*I "petscmat.h" I*/
7eb9c0419SKris Buschelman #include "src/mat/utils/freespace.h"
89af31e4aSHong Zhang #include "src/mat/impls/aij/mpi/mpiaij.h"
955a3bba9SHong Zhang #include "petscbt.h"
10eb9c0419SKris Buschelman 
11eb9c0419SKris Buschelman #undef __FUNCT__
129af31e4aSHong Zhang #define __FUNCT__ "MatPtAP"
134d3841fdSKris Buschelman /*@
149af31e4aSHong Zhang    MatPtAP - Creates the matrix projection C = P^T * A * P
154d3841fdSKris Buschelman 
164d3841fdSKris Buschelman    Collective on Mat
174d3841fdSKris Buschelman 
184d3841fdSKris Buschelman    Input Parameters:
194d3841fdSKris Buschelman +  A - the matrix
20f747e1aeSHong Zhang .  P - the projection matrix
21f747e1aeSHong Zhang .  scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
22f747e1aeSHong Zhang -  fill - expected fill as ratio of nnz(C)/(nnz(A) + nnz(P))
234d3841fdSKris Buschelman 
244d3841fdSKris Buschelman    Output Parameters:
254d3841fdSKris Buschelman .  C - the product matrix
264d3841fdSKris Buschelman 
274d3841fdSKris Buschelman    Notes:
284d3841fdSKris Buschelman    C will be created and must be destroyed by the user with MatDestroy().
294d3841fdSKris Buschelman 
304d3841fdSKris Buschelman    This routine is currently only implemented for pairs of SeqAIJ matrices and classes
314d3841fdSKris Buschelman    which inherit from SeqAIJ.  C will be of type MATSEQAIJ.
324d3841fdSKris Buschelman 
334d3841fdSKris Buschelman    Level: intermediate
344d3841fdSKris Buschelman 
359af31e4aSHong Zhang .seealso: MatPtAPSymbolic(),MatPtAPNumeric(),MatMatMult()
364d3841fdSKris Buschelman @*/
3755a3bba9SHong Zhang PetscErrorCode MatPtAP(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C)
3855a3bba9SHong Zhang {
39dfbe8321SBarry Smith   PetscErrorCode ierr;
40534c1384SKris Buschelman   PetscErrorCode (*fA)(Mat,Mat,MatReuse,PetscReal,Mat *);
41534c1384SKris Buschelman   PetscErrorCode (*fP)(Mat,Mat,MatReuse,PetscReal,Mat *);
42eb9c0419SKris Buschelman 
43eb9c0419SKris Buschelman   PetscFunctionBegin;
449af31e4aSHong Zhang   PetscValidHeaderSpecific(A,MAT_COOKIE,1);
459af31e4aSHong Zhang   PetscValidType(A,1);
469af31e4aSHong Zhang   MatPreallocated(A);
479af31e4aSHong Zhang   if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
489af31e4aSHong Zhang   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
499af31e4aSHong Zhang   PetscValidHeaderSpecific(P,MAT_COOKIE,2);
509af31e4aSHong Zhang   PetscValidType(P,2);
519af31e4aSHong Zhang   MatPreallocated(P);
529af31e4aSHong Zhang   if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
539af31e4aSHong Zhang   if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
549af31e4aSHong Zhang   PetscValidPointer(C,3);
55534c1384SKris Buschelman 
569af31e4aSHong Zhang   if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->M,A->N);
57eb9c0419SKris Buschelman 
589af31e4aSHong Zhang   if (fill <=0.0) SETERRQ1(PETSC_ERR_ARG_SIZ,"fill=%g must be > 0.0",fill);
59eb9c0419SKris Buschelman 
60534c1384SKris Buschelman   /* For now, we do not dispatch based on the type of A and P */
61534c1384SKris Buschelman   /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */
62534c1384SKris Buschelman   fA = A->ops->ptap;
63534c1384SKris Buschelman   if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatPtAP not supported for A of type %s",A->type_name);
64534c1384SKris Buschelman   fP = P->ops->ptap;
65534c1384SKris Buschelman   if (!fP) SETERRQ1(PETSC_ERR_SUP,"MatPtAP not supported for P of type %s",P->type_name);
66534c1384SKris Buschelman   if (fP!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatPtAP requires A, %s, to be compatible with P, %s",A->type_name,P->type_name);
67534c1384SKris Buschelman 
689af31e4aSHong Zhang   ierr = PetscLogEventBegin(MAT_PtAP,A,P,0,0);CHKERRQ(ierr);
69534c1384SKris Buschelman   ierr = (*fA)(A,P,scall,fill,C);CHKERRQ(ierr);
709af31e4aSHong Zhang   ierr = PetscLogEventEnd(MAT_PtAP,A,P,0,0);CHKERRQ(ierr);
71eb9c0419SKris Buschelman   PetscFunctionReturn(0);
72eb9c0419SKris Buschelman }
73eb9c0419SKris Buschelman 
74e240928fSHong Zhang EXTERN PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local(Mat,Mat,Mat,PetscInt,PetscReal,Mat*);
75e240928fSHong Zhang EXTERN PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIAIJ_Local(Mat,Mat,Mat,PetscInt,Mat C);
76e240928fSHong Zhang 
77a61c8c0fSHong Zhang EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat);
78a61c8c0fSHong Zhang #undef __FUNCT__
79a61c8c0fSHong Zhang #define __FUNCT__ "MatDestroy_MPIAIJ_PtAP"
80a61c8c0fSHong Zhang PetscErrorCode MatDestroy_MPIAIJ_PtAP(Mat A)
81a61c8c0fSHong Zhang {
82a61c8c0fSHong Zhang   PetscErrorCode       ierr;
83*32fba14fSHong Zhang   Mat_MatMatMultMPI    *ptap;
84a61c8c0fSHong Zhang   PetscObjectContainer container;
85a61c8c0fSHong Zhang 
86a61c8c0fSHong Zhang   PetscFunctionBegin;
87a61c8c0fSHong Zhang   ierr = PetscObjectQuery((PetscObject)A,"MatPtAPMPI",(PetscObject *)&container);CHKERRQ(ierr);
88a61c8c0fSHong Zhang   if (container) {
89a61c8c0fSHong Zhang     ierr  = PetscObjectContainerGetPointer(container,(void **)&ptap);CHKERRQ(ierr);
90*32fba14fSHong Zhang     ierr = MatDestroy(ptap->B_loc);CHKERRQ(ierr);
91*32fba14fSHong Zhang     ierr = MatDestroy(ptap->B_oth);CHKERRQ(ierr);
92a61c8c0fSHong Zhang 
93a61c8c0fSHong Zhang     ierr = PetscObjectContainerDestroy(container);CHKERRQ(ierr);
94*32fba14fSHong Zhang     ierr = PetscObjectCompose((PetscObject)A,"MatPtAPMPI",0);CHKERRQ(ierr);
95a61c8c0fSHong Zhang   }
96a61c8c0fSHong Zhang   ierr = PetscFree(ptap);CHKERRQ(ierr);
97a61c8c0fSHong Zhang 
98a61c8c0fSHong Zhang   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
99a61c8c0fSHong Zhang   PetscFunctionReturn(0);
100a61c8c0fSHong Zhang }
101a61c8c0fSHong Zhang 
102eb9c0419SKris Buschelman #undef __FUNCT__
103ff134f7aSHong Zhang #define __FUNCT__ "MatPtAP_MPIAIJ_MPIAIJ"
104ff134f7aSHong Zhang PetscErrorCode MatPtAP_MPIAIJ_MPIAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C)
105ff134f7aSHong Zhang {
106ff134f7aSHong Zhang   PetscErrorCode       ierr;
107*32fba14fSHong Zhang   Mat_MatMatMultMPI    *ptap;
108a61c8c0fSHong Zhang   PetscObjectContainer container;
109b90dcfe3SHong Zhang 
110b90dcfe3SHong Zhang   PetscFunctionBegin;
111b90dcfe3SHong Zhang   if (scall == MAT_INITIAL_MATRIX){
1124c627768SHong Zhang     ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr);
113*32fba14fSHong Zhang     ierr = PetscNew(Mat_MatMatMultMPI,&ptap);CHKERRQ(ierr);
114*32fba14fSHong Zhang 
115e240928fSHong Zhang     /* get P_oth by taking rows of P (= non-zero cols of local A) from other processors */
116*32fba14fSHong Zhang     ierr = MatGetBrowsOfAoCols(A,P,MAT_INITIAL_MATRIX,PETSC_NULL,PETSC_NULL,&ptap->brstart,&ptap->B_oth);CHKERRQ(ierr);
117a61c8c0fSHong Zhang 
118e240928fSHong Zhang     /* get P_loc by taking all local rows of P */
119*32fba14fSHong Zhang     ierr = MatGetLocalMat(P,scall,&ptap->B_loc);CHKERRQ(ierr);
120e240928fSHong Zhang 
121a61c8c0fSHong Zhang     /* attach the supporting struct to P for reuse */
122a61c8c0fSHong Zhang     P->ops->destroy  = MatDestroy_MPIAIJ_PtAP;
123a61c8c0fSHong Zhang     ierr = PetscObjectContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
124a61c8c0fSHong Zhang     ierr = PetscObjectContainerSetPointer(container,ptap);CHKERRQ(ierr);
125a61c8c0fSHong Zhang     ierr = PetscObjectCompose((PetscObject)P,"MatPtAPMPI",(PetscObject)container);CHKERRQ(ierr);
126e240928fSHong Zhang 
127*32fba14fSHong Zhang     /* now, compute symbolic local P^T*A*P */
128a61c8c0fSHong Zhang     ierr = MatPtAPSymbolic_MPIAIJ_MPIAIJ(A,P,fill,C);CHKERRQ(ierr);/* numeric product is computed as well */
129a61c8c0fSHong Zhang     ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr);
130a61c8c0fSHong Zhang   } else if (scall == MAT_REUSE_MATRIX){
131a61c8c0fSHong Zhang     ierr = PetscObjectQuery((PetscObject)P,"MatPtAPMPI",(PetscObject *)&container);CHKERRQ(ierr);
132a61c8c0fSHong Zhang     if (container) {
133a61c8c0fSHong Zhang       ierr  = PetscObjectContainerGetPointer(container,(void **)&ptap);CHKERRQ(ierr);
134a61c8c0fSHong Zhang     } else {
135a61c8c0fSHong Zhang       SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
136a61c8c0fSHong Zhang     }
137a61c8c0fSHong Zhang 
138a61c8c0fSHong Zhang     /* get P_oth by taking rows of P (= non-zero cols of local A) from other processors */
139a61c8c0fSHong Zhang     /* improve! */
140*32fba14fSHong Zhang     ierr = MatDestroy(ptap->B_oth);CHKERRQ(ierr);
141*32fba14fSHong Zhang     ierr = MatGetBrowsOfAoCols(A,P,MAT_INITIAL_MATRIX,PETSC_NULL,PETSC_NULL,&ptap->brstart,&ptap->B_oth);CHKERRQ(ierr);
142a61c8c0fSHong Zhang 
143a61c8c0fSHong Zhang     /* get P_loc by taking all local rows of P */
144*32fba14fSHong Zhang     ierr = MatDestroy(ptap->B_loc);CHKERRQ(ierr); /* improve! */
145*32fba14fSHong Zhang     ierr = MatGetLocalMat(P,scall,&ptap->B_loc);CHKERRQ(ierr);
146*32fba14fSHong Zhang 
147a61c8c0fSHong Zhang   } else {
148a61c8c0fSHong Zhang     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",scall);
149a61c8c0fSHong Zhang   }
150*32fba14fSHong Zhang   /* now, compute numeric local P^T*A*P */
151a61c8c0fSHong Zhang   ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr);
152a61c8c0fSHong Zhang   ierr = MatPtAPNumeric_MPIAIJ_MPIAIJ(A,P,*C);CHKERRQ(ierr);
153a61c8c0fSHong Zhang   ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr);
154a61c8c0fSHong Zhang 
155a61c8c0fSHong Zhang   PetscFunctionReturn(0);
156a61c8c0fSHong Zhang }
157a61c8c0fSHong Zhang 
158a61c8c0fSHong Zhang #undef __FUNCT__
159a61c8c0fSHong Zhang #define __FUNCT__ "MatPtAPSymbolic_MPIAIJ_MPIAIJ"
160a61c8c0fSHong Zhang PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIAIJ(Mat A,Mat P,PetscReal fill,Mat *C)
161a61c8c0fSHong Zhang {
162a61c8c0fSHong Zhang   PetscErrorCode       ierr;
163a61c8c0fSHong Zhang   Mat                  C_seq;
164*32fba14fSHong Zhang   Mat_MatMatMultMPI    *ptap;
165a61c8c0fSHong Zhang   PetscObjectContainer container;
166a61c8c0fSHong Zhang 
167a61c8c0fSHong Zhang   PetscFunctionBegin;
168a61c8c0fSHong Zhang 
169a61c8c0fSHong Zhang   ierr = PetscObjectQuery((PetscObject)P,"MatPtAPMPI",(PetscObject *)&container);CHKERRQ(ierr);
170a61c8c0fSHong Zhang   if (container) {
171a61c8c0fSHong Zhang     ierr  = PetscObjectContainerGetPointer(container,(void **)&ptap);CHKERRQ(ierr);
172a61c8c0fSHong Zhang   } else {
173a61c8c0fSHong Zhang     SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
174a61c8c0fSHong Zhang   }
17525616d81SHong Zhang   /* compute C_seq = P_loc^T * A_loc * P_seq */
176*32fba14fSHong Zhang   ierr = MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local(A,ptap->B_loc,ptap->B_oth,ptap->brstart,fill,&C_seq);CHKERRQ(ierr);
177a61c8c0fSHong Zhang 
178b90dcfe3SHong Zhang   /* add C_seq into mpi C */
179a61c8c0fSHong Zhang   ierr = MatMerge_SeqsToMPISymbolic(A->comm,C_seq,P->n,P->n,C);CHKERRQ(ierr);
180b90dcfe3SHong Zhang 
181ff134f7aSHong Zhang   PetscFunctionReturn(0);
182ff134f7aSHong Zhang }
183ff134f7aSHong Zhang 
184ff134f7aSHong Zhang #undef __FUNCT__
185e240928fSHong Zhang #define __FUNCT__ "MatPtAPNumeric_MPIAIJ_MPIAIJ"
186b90dcfe3SHong Zhang PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIAIJ(Mat A,Mat P,Mat C)
187ff134f7aSHong Zhang {
188b90dcfe3SHong Zhang   PetscErrorCode       ierr;
189671beff6SHong Zhang   Mat_Merge_SeqsToMPI  *merge;
190*32fba14fSHong Zhang   Mat_MatMatMultMPI    *ptap;
191a61c8c0fSHong Zhang   PetscObjectContainer cont_merge,cont_ptap;
192ff134f7aSHong Zhang 
193ff134f7aSHong Zhang   PetscFunctionBegin;
194a61c8c0fSHong Zhang   ierr = PetscObjectQuery((PetscObject)C,"MatMergeSeqsToMPI",(PetscObject *)&cont_merge);CHKERRQ(ierr);
195a61c8c0fSHong Zhang   if (cont_merge) {
196a61c8c0fSHong Zhang     ierr  = PetscObjectContainerGetPointer(cont_merge,(void **)&merge);CHKERRQ(ierr);
1977f79fd58SMatthew Knepley   } else {
198a61c8c0fSHong Zhang     SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix C does not posses an object container");
199671beff6SHong Zhang   }
200a61c8c0fSHong Zhang   ierr = PetscObjectQuery((PetscObject)P,"MatPtAPMPI",(PetscObject *)&cont_ptap);CHKERRQ(ierr);
201a61c8c0fSHong Zhang   if (cont_ptap) {
202a61c8c0fSHong Zhang     ierr  = PetscObjectContainerGetPointer(cont_ptap,(void **)&ptap);CHKERRQ(ierr);
203a61c8c0fSHong Zhang   } else {
204a61c8c0fSHong Zhang     SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
205a61c8c0fSHong Zhang   }
206*32fba14fSHong Zhang   ierr = MatPtAPNumeric_MPIAIJ_MPIAIJ_Local(A,ptap->B_loc,ptap->B_oth,ptap->brstart,merge->C_seq);CHKERRQ(ierr);
207a61c8c0fSHong Zhang   ierr = MatMerge_SeqsToMPINumeric(merge->C_seq,C);CHKERRQ(ierr);
208ff134f7aSHong Zhang   PetscFunctionReturn(0);
209ff134f7aSHong Zhang }
210ff134f7aSHong Zhang 
211ff134f7aSHong Zhang #undef __FUNCT__
2129af31e4aSHong Zhang #define __FUNCT__ "MatPtAP_SeqAIJ_SeqAIJ"
213dfbe8321SBarry Smith PetscErrorCode MatPtAP_SeqAIJ_SeqAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C)
2149af31e4aSHong Zhang {
215dfbe8321SBarry Smith   PetscErrorCode ierr;
216b1d57f15SBarry Smith 
2179af31e4aSHong Zhang   PetscFunctionBegin;
2189af31e4aSHong Zhang   if (scall == MAT_INITIAL_MATRIX){
219d20bfe6fSHong Zhang     ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr);
2209af31e4aSHong Zhang     ierr = MatPtAPSymbolic_SeqAIJ_SeqAIJ(A,P,fill,C);CHKERRQ(ierr);
221d20bfe6fSHong Zhang     ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr);
2229af31e4aSHong Zhang   }
223d20bfe6fSHong Zhang   ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr);
2249af31e4aSHong Zhang   ierr = MatPtAPNumeric_SeqAIJ_SeqAIJ(A,P,*C);CHKERRQ(ierr);
225d20bfe6fSHong Zhang   ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr);
2269af31e4aSHong Zhang   PetscFunctionReturn(0);
2279af31e4aSHong Zhang }
2289af31e4aSHong Zhang 
2296849ba73SBarry Smith /*
2309af31e4aSHong Zhang    MatPtAPSymbolic - Creates the (i,j) structure of the matrix projection C = P^T * A * P
2314d3841fdSKris Buschelman 
2324d3841fdSKris Buschelman    Collective on Mat
2334d3841fdSKris Buschelman 
2344d3841fdSKris Buschelman    Input Parameters:
2354d3841fdSKris Buschelman +  A - the matrix
2364d3841fdSKris Buschelman -  P - the projection matrix
2374d3841fdSKris Buschelman 
2384d3841fdSKris Buschelman    Output Parameters:
2394d3841fdSKris Buschelman .  C - the (i,j) structure of the product matrix
2404d3841fdSKris Buschelman 
2414d3841fdSKris Buschelman    Notes:
2424d3841fdSKris Buschelman    C will be created and must be destroyed by the user with MatDestroy().
2434d3841fdSKris Buschelman 
2444d3841fdSKris Buschelman    This routine is currently only implemented for pairs of SeqAIJ matrices and classes
2454d3841fdSKris Buschelman    which inherit from SeqAIJ.  C will be of type MATSEQAIJ.  The product is computed using
2469af31e4aSHong Zhang    this (i,j) structure by calling MatPtAPNumeric().
2474d3841fdSKris Buschelman 
2484d3841fdSKris Buschelman    Level: intermediate
2494d3841fdSKris Buschelman 
2509af31e4aSHong Zhang .seealso: MatPtAP(),MatPtAPNumeric(),MatMatMultSymbolic()
2516849ba73SBarry Smith */
252e240928fSHong Zhang #undef __FUNCT__
253e240928fSHong Zhang #define __FUNCT__ "MatPtAPSymbolic"
25455a3bba9SHong Zhang PetscErrorCode MatPtAPSymbolic(Mat A,Mat P,PetscReal fill,Mat *C)
25555a3bba9SHong Zhang {
256dfbe8321SBarry Smith   PetscErrorCode ierr;
257534c1384SKris Buschelman   PetscErrorCode (*fA)(Mat,Mat,PetscReal,Mat*);
258534c1384SKris Buschelman   PetscErrorCode (*fP)(Mat,Mat,PetscReal,Mat*);
259eb9c0419SKris Buschelman 
260eb9c0419SKris Buschelman   PetscFunctionBegin;
261eb9c0419SKris Buschelman 
2624482741eSBarry Smith   PetscValidHeaderSpecific(A,MAT_COOKIE,1);
263c9780b6fSBarry Smith   PetscValidType(A,1);
264eb9c0419SKris Buschelman   MatPreallocated(A);
265eb9c0419SKris Buschelman   if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
266eb9c0419SKris Buschelman   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
267eb9c0419SKris Buschelman 
2684482741eSBarry Smith   PetscValidHeaderSpecific(P,MAT_COOKIE,2);
269c9780b6fSBarry Smith   PetscValidType(P,2);
270eb9c0419SKris Buschelman   MatPreallocated(P);
271eb9c0419SKris Buschelman   if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
272eb9c0419SKris Buschelman   if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
273eb9c0419SKris Buschelman 
2744482741eSBarry Smith   PetscValidPointer(C,3);
2754482741eSBarry Smith 
276eb9c0419SKris Buschelman   if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->M,A->N);
277eb9c0419SKris Buschelman   if (A->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %d != %d",A->M,A->N);
278eb9c0419SKris Buschelman 
279534c1384SKris Buschelman   /* For now, we do not dispatch based on the type of A and P */
280534c1384SKris Buschelman   /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */
281534c1384SKris Buschelman   fA = A->ops->ptapsymbolic;
282534c1384SKris Buschelman   if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatPtAPSymbolic not supported for A of type %s",A->type_name);
283534c1384SKris Buschelman   fP = P->ops->ptapsymbolic;
284534c1384SKris Buschelman   if (!fP) SETERRQ1(PETSC_ERR_SUP,"MatPtAPSymbolic not supported for P of type %s",P->type_name);
285534c1384SKris Buschelman   if (fP!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatPtAPSymbolic requires A, %s, to be compatible with P, %s",A->type_name,P->type_name);
2864d3841fdSKris Buschelman 
287534c1384SKris Buschelman   ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr);
288534c1384SKris Buschelman   ierr = (*fA)(A,P,fill,C);CHKERRQ(ierr);
289534c1384SKris Buschelman   ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr);
290eb9c0419SKris Buschelman 
291eb9c0419SKris Buschelman   PetscFunctionReturn(0);
292eb9c0419SKris Buschelman }
293eb9c0419SKris Buschelman 
294f747e1aeSHong Zhang typedef struct {
295f747e1aeSHong Zhang   Mat    symAP;
296f747e1aeSHong Zhang } Mat_PtAPstruct;
297f747e1aeSHong Zhang 
29878a80504SBarry Smith EXTERN PetscErrorCode MatDestroy_SeqAIJ(Mat);
29978a80504SBarry Smith 
300f747e1aeSHong Zhang #undef __FUNCT__
301f747e1aeSHong Zhang #define __FUNCT__ "MatDestroy_SeqAIJ_PtAP"
302f4a850bbSBarry Smith PetscErrorCode MatDestroy_SeqAIJ_PtAP(Mat A)
303f747e1aeSHong Zhang {
304f4a850bbSBarry Smith   PetscErrorCode    ierr;
305f747e1aeSHong Zhang   Mat_PtAPstruct    *ptap=(Mat_PtAPstruct*)A->spptr;
306f747e1aeSHong Zhang 
307f747e1aeSHong Zhang   PetscFunctionBegin;
308f747e1aeSHong Zhang   ierr = MatDestroy(ptap->symAP);CHKERRQ(ierr);
309f747e1aeSHong Zhang   ierr = PetscFree(ptap);CHKERRQ(ierr);
31078a80504SBarry Smith   ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr);
311f747e1aeSHong Zhang   PetscFunctionReturn(0);
312f747e1aeSHong Zhang }
313f747e1aeSHong Zhang 
314eb9c0419SKris Buschelman #undef __FUNCT__
3159af31e4aSHong Zhang #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqAIJ"
31655a3bba9SHong Zhang PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat P,PetscReal fill,Mat *C)
31755a3bba9SHong Zhang {
318dfbe8321SBarry Smith   PetscErrorCode ierr;
319d20bfe6fSHong Zhang   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
320d20bfe6fSHong Zhang   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data,*p = (Mat_SeqAIJ*)P->data,*c;
32155a3bba9SHong Zhang   PetscInt       *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj;
322b8374ebeSBarry Smith   PetscInt       *ci,*cj,*ptadenserow,*ptasparserow,*ptaj;
32355a3bba9SHong Zhang   PetscInt       an=A->N,am=A->M,pn=P->N,pm=P->M;
324b8374ebeSBarry Smith   PetscInt       i,j,k,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi,nlnk,*lnk;
325d20bfe6fSHong Zhang   MatScalar      *ca;
32655a3bba9SHong Zhang   PetscBT        lnkbt;
327eb9c0419SKris Buschelman 
328eb9c0419SKris Buschelman   PetscFunctionBegin;
329d20bfe6fSHong Zhang   /* Get ij structure of P^T */
330eb9c0419SKris Buschelman   ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
331d20bfe6fSHong Zhang   ptJ=ptj;
332eb9c0419SKris Buschelman 
333d20bfe6fSHong Zhang   /* Allocate ci array, arrays for fill computation and */
334d20bfe6fSHong Zhang   /* free space for accumulating nonzero column info */
33555a3bba9SHong Zhang   ierr = PetscMalloc((pn+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
336d20bfe6fSHong Zhang   ci[0] = 0;
337eb9c0419SKris Buschelman 
33855a3bba9SHong Zhang   ierr = PetscMalloc((2*an+1)*sizeof(PetscInt),&ptadenserow);CHKERRQ(ierr);
33955a3bba9SHong Zhang   ierr = PetscMemzero(ptadenserow,(2*an+1)*sizeof(PetscInt));CHKERRQ(ierr);
340d20bfe6fSHong Zhang   ptasparserow = ptadenserow  + an;
34155a3bba9SHong Zhang 
34255a3bba9SHong Zhang   /* create and initialize a linked list */
34355a3bba9SHong Zhang   nlnk = pn+1;
34455a3bba9SHong Zhang   ierr = PetscLLCreate(pn,pn,nlnk,lnk,lnkbt);CHKERRQ(ierr);
345eb9c0419SKris Buschelman 
346d20bfe6fSHong Zhang   /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */
347d20bfe6fSHong Zhang   /* This should be reasonable if sparsity of PtAP is similar to that of A. */
348d20bfe6fSHong Zhang   ierr          = GetMoreSpace((ai[am]/pm)*pn,&free_space);
349d20bfe6fSHong Zhang   current_space = free_space;
350d20bfe6fSHong Zhang 
351d20bfe6fSHong Zhang   /* Determine symbolic info for each row of C: */
352d20bfe6fSHong Zhang   for (i=0;i<pn;i++) {
353d20bfe6fSHong Zhang     ptnzi  = pti[i+1] - pti[i];
354d20bfe6fSHong Zhang     ptanzi = 0;
355d20bfe6fSHong Zhang     /* Determine symbolic row of PtA: */
356d20bfe6fSHong Zhang     for (j=0;j<ptnzi;j++) {
357d20bfe6fSHong Zhang       arow = *ptJ++;
358d20bfe6fSHong Zhang       anzj = ai[arow+1] - ai[arow];
359d20bfe6fSHong Zhang       ajj  = aj + ai[arow];
360d20bfe6fSHong Zhang       for (k=0;k<anzj;k++) {
361d20bfe6fSHong Zhang         if (!ptadenserow[ajj[k]]) {
362d20bfe6fSHong Zhang           ptadenserow[ajj[k]]    = -1;
363d20bfe6fSHong Zhang           ptasparserow[ptanzi++] = ajj[k];
364d20bfe6fSHong Zhang         }
365d20bfe6fSHong Zhang       }
366d20bfe6fSHong Zhang     }
367d20bfe6fSHong Zhang       /* Using symbolic info for row of PtA, determine symbolic info for row of C: */
368d20bfe6fSHong Zhang     ptaj = ptasparserow;
369d20bfe6fSHong Zhang     cnzi   = 0;
370d20bfe6fSHong Zhang     for (j=0;j<ptanzi;j++) {
371d20bfe6fSHong Zhang       prow = *ptaj++;
372d20bfe6fSHong Zhang       pnzj = pi[prow+1] - pi[prow];
373d20bfe6fSHong Zhang       pjj  = pj + pi[prow];
37455a3bba9SHong Zhang       /* add non-zero cols of P into the sorted linked list lnk */
37555a3bba9SHong Zhang       ierr = PetscLLAdd(pnzj,pjj,pn,nlnk,lnk,lnkbt);CHKERRQ(ierr);
37655a3bba9SHong Zhang       cnzi += nlnk;
377d20bfe6fSHong Zhang     }
378d20bfe6fSHong Zhang 
379d20bfe6fSHong Zhang     /* If free space is not available, make more free space */
380d20bfe6fSHong Zhang     /* Double the amount of total space in the list */
381d20bfe6fSHong Zhang     if (current_space->local_remaining<cnzi) {
382d20bfe6fSHong Zhang       ierr = GetMoreSpace(current_space->total_array_size,&current_space);CHKERRQ(ierr);
383d20bfe6fSHong Zhang     }
384d20bfe6fSHong Zhang 
385d20bfe6fSHong Zhang     /* Copy data into free space, and zero out denserows */
38655a3bba9SHong Zhang     ierr = PetscLLClean(pn,pn,cnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
387d20bfe6fSHong Zhang     current_space->array           += cnzi;
388d20bfe6fSHong Zhang     current_space->local_used      += cnzi;
389d20bfe6fSHong Zhang     current_space->local_remaining -= cnzi;
390d20bfe6fSHong Zhang 
391d20bfe6fSHong Zhang     for (j=0;j<ptanzi;j++) {
392d20bfe6fSHong Zhang       ptadenserow[ptasparserow[j]] = 0;
393d20bfe6fSHong Zhang     }
394d20bfe6fSHong Zhang     /* Aside: Perhaps we should save the pta info for the numerical factorization. */
395d20bfe6fSHong Zhang     /*        For now, we will recompute what is needed. */
396d20bfe6fSHong Zhang     ci[i+1] = ci[i] + cnzi;
397d20bfe6fSHong Zhang   }
398d20bfe6fSHong Zhang   /* nnz is now stored in ci[ptm], column indices are in the list of free space */
399d20bfe6fSHong Zhang   /* Allocate space for cj, initialize cj, and */
400d20bfe6fSHong Zhang   /* destroy list of free space and other temporary array(s) */
40155a3bba9SHong Zhang   ierr = PetscMalloc((ci[pn]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr);
402d20bfe6fSHong Zhang   ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr);
403d20bfe6fSHong Zhang   ierr = PetscFree(ptadenserow);CHKERRQ(ierr);
40455a3bba9SHong Zhang   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
405d20bfe6fSHong Zhang 
406d20bfe6fSHong Zhang   /* Allocate space for ca */
407d20bfe6fSHong Zhang   ierr = PetscMalloc((ci[pn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr);
408d20bfe6fSHong Zhang   ierr = PetscMemzero(ca,(ci[pn]+1)*sizeof(MatScalar));CHKERRQ(ierr);
409d20bfe6fSHong Zhang 
410d20bfe6fSHong Zhang   /* put together the new matrix */
411d20bfe6fSHong Zhang   ierr = MatCreateSeqAIJWithArrays(A->comm,pn,pn,ci,cj,ca,C);CHKERRQ(ierr);
412d20bfe6fSHong Zhang 
413d20bfe6fSHong Zhang   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
414d20bfe6fSHong Zhang   /* Since these are PETSc arrays, change flags to free them as necessary. */
415d20bfe6fSHong Zhang   c = (Mat_SeqAIJ *)((*C)->data);
416d20bfe6fSHong Zhang   c->freedata = PETSC_TRUE;
417d20bfe6fSHong Zhang   c->nonew    = 0;
418d20bfe6fSHong Zhang 
419d20bfe6fSHong Zhang   /* Clean up. */
420d20bfe6fSHong Zhang   ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
421eb9c0419SKris Buschelman 
422eb9c0419SKris Buschelman   PetscFunctionReturn(0);
423eb9c0419SKris Buschelman }
424eb9c0419SKris Buschelman 
4253985e5eaSKris Buschelman #include "src/mat/impls/maij/maij.h"
4263985e5eaSKris Buschelman EXTERN_C_BEGIN
4273985e5eaSKris Buschelman #undef __FUNCT__
4289af31e4aSHong Zhang #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqMAIJ"
42955a3bba9SHong Zhang PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat *C)
43055a3bba9SHong Zhang {
4315c66b693SKris Buschelman   /* This routine requires testing -- I don't think it works. */
432dfbe8321SBarry Smith   PetscErrorCode ierr;
4333985e5eaSKris Buschelman   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
4343985e5eaSKris Buschelman   Mat_SeqMAIJ    *pp=(Mat_SeqMAIJ*)PP->data;
4353985e5eaSKris Buschelman   Mat            P=pp->AIJ;
4363985e5eaSKris Buschelman   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c;
437b1d57f15SBarry Smith   PetscInt       *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj;
438b1d57f15SBarry Smith   PetscInt       *ci,*cj,*denserow,*sparserow,*ptadenserow,*ptasparserow,*ptaj;
439b1d57f15SBarry Smith   PetscInt       an=A->N,am=A->M,pn=P->N,pm=P->M,ppdof=pp->dof;
440b1d57f15SBarry Smith   PetscInt       i,j,k,dof,pdof,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi;
4413985e5eaSKris Buschelman   MatScalar      *ca;
4423985e5eaSKris Buschelman 
4433985e5eaSKris Buschelman   PetscFunctionBegin;
4443985e5eaSKris Buschelman   /* Start timer */
4459af31e4aSHong Zhang   ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr);
4463985e5eaSKris Buschelman 
4473985e5eaSKris Buschelman   /* Get ij structure of P^T */
4483985e5eaSKris Buschelman   ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
4493985e5eaSKris Buschelman 
4503985e5eaSKris Buschelman   /* Allocate ci array, arrays for fill computation and */
4513985e5eaSKris Buschelman   /* free space for accumulating nonzero column info */
452b1d57f15SBarry Smith   ierr = PetscMalloc((pn+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
4533985e5eaSKris Buschelman   ci[0] = 0;
4543985e5eaSKris Buschelman 
455b1d57f15SBarry Smith   ierr = PetscMalloc((2*pn+2*an+1)*sizeof(PetscInt),&ptadenserow);CHKERRQ(ierr);
456b1d57f15SBarry Smith   ierr = PetscMemzero(ptadenserow,(2*pn+2*an+1)*sizeof(PetscInt));CHKERRQ(ierr);
4573985e5eaSKris Buschelman   ptasparserow = ptadenserow  + an;
4583985e5eaSKris Buschelman   denserow     = ptasparserow + an;
4593985e5eaSKris Buschelman   sparserow    = denserow     + pn;
4603985e5eaSKris Buschelman 
4613985e5eaSKris Buschelman   /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */
4623985e5eaSKris Buschelman   /* This should be reasonable if sparsity of PtAP is similar to that of A. */
4633985e5eaSKris Buschelman   ierr          = GetMoreSpace((ai[am]/pm)*pn,&free_space);
4643985e5eaSKris Buschelman   current_space = free_space;
4653985e5eaSKris Buschelman 
4663985e5eaSKris Buschelman   /* Determine symbolic info for each row of C: */
4673985e5eaSKris Buschelman   for (i=0;i<pn/ppdof;i++) {
4683985e5eaSKris Buschelman     ptnzi  = pti[i+1] - pti[i];
4693985e5eaSKris Buschelman     ptanzi = 0;
4703985e5eaSKris Buschelman     ptJ    = ptj + pti[i];
4713985e5eaSKris Buschelman     for (dof=0;dof<ppdof;dof++) {
4723985e5eaSKris Buschelman     /* Determine symbolic row of PtA: */
4733985e5eaSKris Buschelman       for (j=0;j<ptnzi;j++) {
4743985e5eaSKris Buschelman         arow = ptJ[j] + dof;
4753985e5eaSKris Buschelman         anzj = ai[arow+1] - ai[arow];
4763985e5eaSKris Buschelman         ajj  = aj + ai[arow];
4773985e5eaSKris Buschelman         for (k=0;k<anzj;k++) {
4783985e5eaSKris Buschelman           if (!ptadenserow[ajj[k]]) {
4793985e5eaSKris Buschelman             ptadenserow[ajj[k]]    = -1;
4803985e5eaSKris Buschelman             ptasparserow[ptanzi++] = ajj[k];
4813985e5eaSKris Buschelman           }
4823985e5eaSKris Buschelman         }
4833985e5eaSKris Buschelman       }
4843985e5eaSKris Buschelman       /* Using symbolic info for row of PtA, determine symbolic info for row of C: */
4853985e5eaSKris Buschelman       ptaj = ptasparserow;
4863985e5eaSKris Buschelman       cnzi   = 0;
4873985e5eaSKris Buschelman       for (j=0;j<ptanzi;j++) {
488fe05a634SKris Buschelman         pdof = *ptaj%dof;
4893985e5eaSKris Buschelman         prow = (*ptaj++)/dof;
4903985e5eaSKris Buschelman         pnzj = pi[prow+1] - pi[prow];
4913985e5eaSKris Buschelman         pjj  = pj + pi[prow];
4923985e5eaSKris Buschelman         for (k=0;k<pnzj;k++) {
493fe05a634SKris Buschelman           if (!denserow[pjj[k]+pdof]) {
494fe05a634SKris Buschelman             denserow[pjj[k]+pdof] = -1;
495fe05a634SKris Buschelman             sparserow[cnzi++]     = pjj[k]+pdof;
4963985e5eaSKris Buschelman           }
4973985e5eaSKris Buschelman         }
4983985e5eaSKris Buschelman       }
4993985e5eaSKris Buschelman 
5003985e5eaSKris Buschelman       /* sort sparserow */
5013985e5eaSKris Buschelman       ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr);
5023985e5eaSKris Buschelman 
5033985e5eaSKris Buschelman       /* If free space is not available, make more free space */
5043985e5eaSKris Buschelman       /* Double the amount of total space in the list */
5053985e5eaSKris Buschelman       if (current_space->local_remaining<cnzi) {
5063985e5eaSKris Buschelman         ierr = GetMoreSpace(current_space->total_array_size,&current_space);CHKERRQ(ierr);
5073985e5eaSKris Buschelman       }
5083985e5eaSKris Buschelman 
5093985e5eaSKris Buschelman       /* Copy data into free space, and zero out denserows */
510b1d57f15SBarry Smith       ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(PetscInt));CHKERRQ(ierr);
5113985e5eaSKris Buschelman       current_space->array           += cnzi;
5123985e5eaSKris Buschelman       current_space->local_used      += cnzi;
5133985e5eaSKris Buschelman       current_space->local_remaining -= cnzi;
5143985e5eaSKris Buschelman 
5153985e5eaSKris Buschelman       for (j=0;j<ptanzi;j++) {
5163985e5eaSKris Buschelman         ptadenserow[ptasparserow[j]] = 0;
5173985e5eaSKris Buschelman       }
5183985e5eaSKris Buschelman       for (j=0;j<cnzi;j++) {
5193985e5eaSKris Buschelman         denserow[sparserow[j]] = 0;
5203985e5eaSKris Buschelman       }
5213985e5eaSKris Buschelman       /* Aside: Perhaps we should save the pta info for the numerical factorization. */
5223985e5eaSKris Buschelman       /*        For now, we will recompute what is needed. */
5233985e5eaSKris Buschelman       ci[i+1+dof] = ci[i+dof] + cnzi;
5243985e5eaSKris Buschelman     }
5253985e5eaSKris Buschelman   }
5263985e5eaSKris Buschelman   /* nnz is now stored in ci[ptm], column indices are in the list of free space */
5273985e5eaSKris Buschelman   /* Allocate space for cj, initialize cj, and */
5283985e5eaSKris Buschelman   /* destroy list of free space and other temporary array(s) */
529b1d57f15SBarry Smith   ierr = PetscMalloc((ci[pn]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr);
5303985e5eaSKris Buschelman   ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr);
5313985e5eaSKris Buschelman   ierr = PetscFree(ptadenserow);CHKERRQ(ierr);
5323985e5eaSKris Buschelman 
5333985e5eaSKris Buschelman   /* Allocate space for ca */
5343985e5eaSKris Buschelman   ierr = PetscMalloc((ci[pn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr);
5353985e5eaSKris Buschelman   ierr = PetscMemzero(ca,(ci[pn]+1)*sizeof(MatScalar));CHKERRQ(ierr);
5363985e5eaSKris Buschelman 
5373985e5eaSKris Buschelman   /* put together the new matrix */
5383985e5eaSKris Buschelman   ierr = MatCreateSeqAIJWithArrays(A->comm,pn,pn,ci,cj,ca,C);CHKERRQ(ierr);
5393985e5eaSKris Buschelman 
5403985e5eaSKris Buschelman   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5413985e5eaSKris Buschelman   /* Since these are PETSc arrays, change flags to free them as necessary. */
5423985e5eaSKris Buschelman   c = (Mat_SeqAIJ *)((*C)->data);
5433985e5eaSKris Buschelman   c->freedata = PETSC_TRUE;
5443985e5eaSKris Buschelman   c->nonew    = 0;
5453985e5eaSKris Buschelman 
5463985e5eaSKris Buschelman   /* Clean up. */
5473985e5eaSKris Buschelman   ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
5483985e5eaSKris Buschelman 
5499af31e4aSHong Zhang   ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr);
5503985e5eaSKris Buschelman   PetscFunctionReturn(0);
5513985e5eaSKris Buschelman }
5523985e5eaSKris Buschelman EXTERN_C_END
5533985e5eaSKris Buschelman 
5546849ba73SBarry Smith /*
5559af31e4aSHong Zhang    MatPtAPNumeric - Computes the matrix projection C = P^T * A * P
5564d3841fdSKris Buschelman 
5574d3841fdSKris Buschelman    Collective on Mat
5584d3841fdSKris Buschelman 
5594d3841fdSKris Buschelman    Input Parameters:
5604d3841fdSKris Buschelman +  A - the matrix
5614d3841fdSKris Buschelman -  P - the projection matrix
5624d3841fdSKris Buschelman 
5634d3841fdSKris Buschelman    Output Parameters:
5644d3841fdSKris Buschelman .  C - the product matrix
5654d3841fdSKris Buschelman 
5664d3841fdSKris Buschelman    Notes:
5679af31e4aSHong Zhang    C must have been created by calling MatPtAPSymbolic and must be destroyed by
5684d3841fdSKris Buschelman    the user using MatDeatroy().
5694d3841fdSKris Buschelman 
570170ef064SHong Zhang    This routine is currently only implemented for pairs of AIJ matrices and classes
571170ef064SHong Zhang    which inherit from AIJ.  C will be of type MATAIJ.
5724d3841fdSKris Buschelman 
5734d3841fdSKris Buschelman    Level: intermediate
5744d3841fdSKris Buschelman 
5759af31e4aSHong Zhang .seealso: MatPtAP(),MatPtAPSymbolic(),MatMatMultNumeric()
5766849ba73SBarry Smith */
577e240928fSHong Zhang #undef __FUNCT__
578e240928fSHong Zhang #define __FUNCT__ "MatPtAPNumeric"
57955a3bba9SHong Zhang PetscErrorCode MatPtAPNumeric(Mat A,Mat P,Mat C)
58055a3bba9SHong Zhang {
581dfbe8321SBarry Smith   PetscErrorCode ierr;
582534c1384SKris Buschelman   PetscErrorCode (*fA)(Mat,Mat,Mat);
583534c1384SKris Buschelman   PetscErrorCode (*fP)(Mat,Mat,Mat);
584eb9c0419SKris Buschelman 
585eb9c0419SKris Buschelman   PetscFunctionBegin;
586eb9c0419SKris Buschelman 
5874482741eSBarry Smith   PetscValidHeaderSpecific(A,MAT_COOKIE,1);
588c9780b6fSBarry Smith   PetscValidType(A,1);
589eb9c0419SKris Buschelman   MatPreallocated(A);
590eb9c0419SKris Buschelman   if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
591eb9c0419SKris Buschelman   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
592eb9c0419SKris Buschelman 
5934482741eSBarry Smith   PetscValidHeaderSpecific(P,MAT_COOKIE,2);
594c9780b6fSBarry Smith   PetscValidType(P,2);
595eb9c0419SKris Buschelman   MatPreallocated(P);
596eb9c0419SKris Buschelman   if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
597eb9c0419SKris Buschelman   if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
598eb9c0419SKris Buschelman 
5994482741eSBarry Smith   PetscValidHeaderSpecific(C,MAT_COOKIE,3);
600c9780b6fSBarry Smith   PetscValidType(C,3);
601eb9c0419SKris Buschelman   MatPreallocated(C);
602eb9c0419SKris Buschelman   if (!C->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
603eb9c0419SKris Buschelman   if (C->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
604eb9c0419SKris Buschelman 
605eb9c0419SKris Buschelman   if (P->N!=C->M) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->N,C->M);
606eb9c0419SKris Buschelman   if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->M,A->N);
607eb9c0419SKris Buschelman   if (A->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %d != %d",A->M,A->N);
608eb9c0419SKris Buschelman   if (P->N!=C->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->N,C->N);
609eb9c0419SKris Buschelman 
610534c1384SKris Buschelman   /* For now, we do not dispatch based on the type of A and P */
611534c1384SKris Buschelman   /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */
612534c1384SKris Buschelman   fA = A->ops->ptapnumeric;
613534c1384SKris Buschelman   if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatPtAPNumeric not supported for A of type %s",A->type_name);
614534c1384SKris Buschelman   fP = P->ops->ptapnumeric;
615534c1384SKris Buschelman   if (!fP) SETERRQ1(PETSC_ERR_SUP,"MatPtAPNumeric not supported for P of type %s",P->type_name);
616534c1384SKris Buschelman   if (fP!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatPtAPNumeric requires A, %s, to be compatible with P, %s",A->type_name,P->type_name);
6174d3841fdSKris Buschelman 
618534c1384SKris Buschelman   ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr);
619534c1384SKris Buschelman   ierr = (*fA)(A,P,C);CHKERRQ(ierr);
620534c1384SKris Buschelman   ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr);
621eb9c0419SKris Buschelman 
622eb9c0419SKris Buschelman   PetscFunctionReturn(0);
623eb9c0419SKris Buschelman }
624eb9c0419SKris Buschelman 
625eb9c0419SKris Buschelman #undef __FUNCT__
6269af31e4aSHong Zhang #define __FUNCT__ "MatPtAPNumeric_SeqAIJ_SeqAIJ"
627dfbe8321SBarry Smith PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqAIJ(Mat A,Mat P,Mat C)
628dfbe8321SBarry Smith {
629dfbe8321SBarry Smith   PetscErrorCode ierr;
630b1d57f15SBarry Smith   PetscInt       flops=0;
631d20bfe6fSHong Zhang   Mat_SeqAIJ     *a  = (Mat_SeqAIJ *) A->data;
632d20bfe6fSHong Zhang   Mat_SeqAIJ     *p  = (Mat_SeqAIJ *) P->data;
633d20bfe6fSHong Zhang   Mat_SeqAIJ     *c  = (Mat_SeqAIJ *) C->data;
634b1d57f15SBarry Smith   PetscInt       *ai=a->i,*aj=a->j,*apj,*apjdense,*pi=p->i,*pj=p->j,*pJ=p->j,*pjj;
635b1d57f15SBarry Smith   PetscInt       *ci=c->i,*cj=c->j,*cjj;
636b1d57f15SBarry Smith   PetscInt       am=A->M,cn=C->N,cm=C->M;
637b1d57f15SBarry Smith   PetscInt       i,j,k,anzi,pnzi,apnzj,nextap,pnzj,prow,crow;
638d20bfe6fSHong Zhang   MatScalar      *aa=a->a,*apa,*pa=p->a,*pA=p->a,*paj,*ca=c->a,*caj;
639eb9c0419SKris Buschelman 
640eb9c0419SKris Buschelman   PetscFunctionBegin;
641d20bfe6fSHong Zhang   /* Allocate temporary array for storage of one row of A*P */
642b1d57f15SBarry Smith   ierr = PetscMalloc(cn*(sizeof(MatScalar)+2*sizeof(PetscInt)),&apa);CHKERRQ(ierr);
643b1d57f15SBarry Smith   ierr = PetscMemzero(apa,cn*(sizeof(MatScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr);
644eb9c0419SKris Buschelman 
645b1d57f15SBarry Smith   apj      = (PetscInt *)(apa + cn);
646d20bfe6fSHong Zhang   apjdense = apj + cn;
647d20bfe6fSHong Zhang 
648d20bfe6fSHong Zhang   /* Clear old values in C */
649d20bfe6fSHong Zhang   ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr);
650d20bfe6fSHong Zhang 
651d20bfe6fSHong Zhang   for (i=0;i<am;i++) {
652d20bfe6fSHong Zhang     /* Form sparse row of A*P */
653d20bfe6fSHong Zhang     anzi  = ai[i+1] - ai[i];
654d20bfe6fSHong Zhang     apnzj = 0;
655d20bfe6fSHong Zhang     for (j=0;j<anzi;j++) {
656d20bfe6fSHong Zhang       prow = *aj++;
657d20bfe6fSHong Zhang       pnzj = pi[prow+1] - pi[prow];
658d20bfe6fSHong Zhang       pjj  = pj + pi[prow];
659d20bfe6fSHong Zhang       paj  = pa + pi[prow];
660d20bfe6fSHong Zhang       for (k=0;k<pnzj;k++) {
661d20bfe6fSHong Zhang         if (!apjdense[pjj[k]]) {
662d20bfe6fSHong Zhang           apjdense[pjj[k]] = -1;
663d20bfe6fSHong Zhang           apj[apnzj++]     = pjj[k];
664d20bfe6fSHong Zhang         }
665d20bfe6fSHong Zhang         apa[pjj[k]] += (*aa)*paj[k];
666d20bfe6fSHong Zhang       }
667d20bfe6fSHong Zhang       flops += 2*pnzj;
668d20bfe6fSHong Zhang       aa++;
669d20bfe6fSHong Zhang     }
670d20bfe6fSHong Zhang 
671d20bfe6fSHong Zhang     /* Sort the j index array for quick sparse axpy. */
672d20bfe6fSHong Zhang     ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr);
673d20bfe6fSHong Zhang 
674d20bfe6fSHong Zhang     /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */
675d20bfe6fSHong Zhang     pnzi = pi[i+1] - pi[i];
676d20bfe6fSHong Zhang     for (j=0;j<pnzi;j++) {
677d20bfe6fSHong Zhang       nextap = 0;
678d20bfe6fSHong Zhang       crow   = *pJ++;
679d20bfe6fSHong Zhang       cjj    = cj + ci[crow];
680d20bfe6fSHong Zhang       caj    = ca + ci[crow];
681d20bfe6fSHong Zhang       /* Perform sparse axpy operation.  Note cjj includes apj. */
682d20bfe6fSHong Zhang       for (k=0;nextap<apnzj;k++) {
683d20bfe6fSHong Zhang         if (cjj[k]==apj[nextap]) {
684d20bfe6fSHong Zhang           caj[k] += (*pA)*apa[apj[nextap++]];
685d20bfe6fSHong Zhang         }
686d20bfe6fSHong Zhang       }
687d20bfe6fSHong Zhang       flops += 2*apnzj;
688d20bfe6fSHong Zhang       pA++;
689d20bfe6fSHong Zhang     }
690d20bfe6fSHong Zhang 
691d20bfe6fSHong Zhang     /* Zero the current row info for A*P */
692d20bfe6fSHong Zhang     for (j=0;j<apnzj;j++) {
693d20bfe6fSHong Zhang       apa[apj[j]]      = 0.;
694d20bfe6fSHong Zhang       apjdense[apj[j]] = 0;
695d20bfe6fSHong Zhang     }
696d20bfe6fSHong Zhang   }
697d20bfe6fSHong Zhang 
698d20bfe6fSHong Zhang   /* Assemble the final matrix and clean up */
699d20bfe6fSHong Zhang   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
700d20bfe6fSHong Zhang   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
701d20bfe6fSHong Zhang   ierr = PetscFree(apa);CHKERRQ(ierr);
702d20bfe6fSHong Zhang   ierr = PetscLogFlops(flops);CHKERRQ(ierr);
703d20bfe6fSHong Zhang 
704eb9c0419SKris Buschelman   PetscFunctionReturn(0);
705eb9c0419SKris Buschelman }
7060e36024fSHong Zhang 
707a61c8c0fSHong Zhang /* Compute local C = P_loc^T * A * P - used by MatPtAP_MPIAIJ_MPIAIJ() */
708e240928fSHong Zhang static PetscEvent logkey_matptapnumeric_local = 0;
709e240928fSHong Zhang #undef __FUNCT__
710e240928fSHong Zhang #define __FUNCT__ "MatPtAPNumeric_MPIAIJ_MPIAIJ_Local"
711e240928fSHong Zhang PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIAIJ_Local(Mat A,Mat P_loc,Mat P_oth,PetscInt prstart,Mat C)
712e240928fSHong Zhang {
713e240928fSHong Zhang   PetscErrorCode ierr;
714e240928fSHong Zhang   PetscInt       flops=0;
715e240928fSHong Zhang   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
716e240928fSHong Zhang   Mat_SeqAIJ     *ad=(Mat_SeqAIJ*)(a->A)->data,*ao=(Mat_SeqAIJ*)(a->B)->data;
717e240928fSHong Zhang   Mat_SeqAIJ     *c  = (Mat_SeqAIJ *) C->data;
718e240928fSHong Zhang   Mat_SeqAIJ     *p_loc=(Mat_SeqAIJ*)P_loc->data,*p_oth=(Mat_SeqAIJ*)P_oth->data;
719e240928fSHong Zhang   PetscInt       *pi_loc=p_loc->i,*pj_loc=p_loc->j,*pi_oth=p_oth->i,*pj_oth=p_oth->j;
720e240928fSHong Zhang   PetscInt       *adi=ad->i,*adj=ad->j,*aoi=ao->i,*aoj=ao->j,*apj,*apjdense,cstart=a->cstart,cend=a->cend,col;
721e240928fSHong Zhang   PetscInt       *pJ=pj_loc,*pjj;
722e240928fSHong Zhang   PetscInt       *ci=c->i,*cj=c->j,*cjj;
723e240928fSHong Zhang   PetscInt       am=A->m,cn=C->N,cm=C->M;
724e240928fSHong Zhang   PetscInt       i,j,k,anzi,pnzi,apnzj,nextap,pnzj,prow,crow;
725e240928fSHong Zhang   MatScalar      *ada=ad->a,*aoa=ao->a,*apa,*paj,*ca=c->a,*caj;
726e240928fSHong Zhang   MatScalar      *pa_loc=p_loc->a,*pA=pa_loc,*pa_oth=p_oth->a;
727e240928fSHong Zhang 
728e240928fSHong Zhang   PetscFunctionBegin;
729e240928fSHong Zhang   if (!logkey_matptapnumeric_local) {
730a61c8c0fSHong Zhang     ierr = PetscLogEventRegister(&logkey_matptapnumeric_local,"MatPtAPNumeric_MPIAIJ_MPIAIJ_Local",MAT_COOKIE);CHKERRQ(ierr);
731e240928fSHong Zhang   }
732a61c8c0fSHong Zhang   ierr = PetscLogEventBegin(logkey_matptapnumeric_local,A,0,0,0);CHKERRQ(ierr);
733e240928fSHong Zhang 
734e240928fSHong Zhang   /* Allocate temporary array for storage of one row of A*P */
735e240928fSHong Zhang   ierr = PetscMalloc(cn*(sizeof(MatScalar)+2*sizeof(PetscInt)),&apa);CHKERRQ(ierr);
736e240928fSHong Zhang   ierr = PetscMemzero(apa,cn*(sizeof(MatScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr);
737e240928fSHong Zhang   apj      = (PetscInt *)(apa + cn);
738e240928fSHong Zhang   apjdense = apj + cn;
739e240928fSHong Zhang 
740e240928fSHong Zhang   /* Clear old values in C */
741e240928fSHong Zhang   ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr);
742e240928fSHong Zhang 
743e240928fSHong Zhang   for (i=0;i<am;i++) {
744e240928fSHong Zhang     /* Form i-th sparse row of A*P */
745e240928fSHong Zhang      apnzj = 0;
746e240928fSHong Zhang     /* diagonal portion of A */
747e240928fSHong Zhang     anzi  = adi[i+1] - adi[i];
748e240928fSHong Zhang     for (j=0;j<anzi;j++) {
749e240928fSHong Zhang       prow = *adj;
750e240928fSHong Zhang       adj++;
751e240928fSHong Zhang       pnzj = pi_loc[prow+1] - pi_loc[prow];
752e240928fSHong Zhang       pjj  = pj_loc + pi_loc[prow];
753e240928fSHong Zhang       paj  = pa_loc + pi_loc[prow];
754e240928fSHong Zhang       for (k=0;k<pnzj;k++) {
755e240928fSHong Zhang         if (!apjdense[pjj[k]]) {
756e240928fSHong Zhang           apjdense[pjj[k]] = -1;
757e240928fSHong Zhang           apj[apnzj++]     = pjj[k];
758e240928fSHong Zhang         }
759e240928fSHong Zhang         apa[pjj[k]] += (*ada)*paj[k];
760e240928fSHong Zhang       }
761e240928fSHong Zhang       flops += 2*pnzj;
762e240928fSHong Zhang       ada++;
763e240928fSHong Zhang     }
764e240928fSHong Zhang     /* off-diagonal portion of A */
765e240928fSHong Zhang     anzi  = aoi[i+1] - aoi[i];
766e240928fSHong Zhang     for (j=0;j<anzi;j++) {
767e240928fSHong Zhang       col = a->garray[*aoj];
768e240928fSHong Zhang       if (col < cstart){
769e240928fSHong Zhang         prow = *aoj;
770e240928fSHong Zhang       } else if (col >= cend){
771e240928fSHong Zhang         prow = *aoj;
772e240928fSHong Zhang       } else {
773e240928fSHong Zhang         SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Off-diagonal portion of A has wrong column map");
774e240928fSHong Zhang       }
775e240928fSHong Zhang       aoj++;
776e240928fSHong Zhang       pnzj = pi_oth[prow+1] - pi_oth[prow];
777e240928fSHong Zhang       pjj  = pj_oth + pi_oth[prow];
778e240928fSHong Zhang       paj  = pa_oth + pi_oth[prow];
779e240928fSHong Zhang       for (k=0;k<pnzj;k++) {
780e240928fSHong Zhang         if (!apjdense[pjj[k]]) {
781e240928fSHong Zhang           apjdense[pjj[k]] = -1;
782e240928fSHong Zhang           apj[apnzj++]     = pjj[k];
783e240928fSHong Zhang         }
784e240928fSHong Zhang         apa[pjj[k]] += (*aoa)*paj[k];
785e240928fSHong Zhang       }
786e240928fSHong Zhang       flops += 2*pnzj;
787e240928fSHong Zhang       aoa++;
788e240928fSHong Zhang     }
789e240928fSHong Zhang     /* Sort the j index array for quick sparse axpy. */
790e240928fSHong Zhang     ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr);
791e240928fSHong Zhang 
792e240928fSHong Zhang     /* Compute P_loc[i,:]^T*AP[i,:] using outer product */
793e240928fSHong Zhang     pnzi = pi_loc[i+1] - pi_loc[i];
794e240928fSHong Zhang     for (j=0;j<pnzi;j++) {
795e240928fSHong Zhang       nextap = 0;
796e240928fSHong Zhang       crow   = *pJ++;
797e240928fSHong Zhang       cjj    = cj + ci[crow];
798e240928fSHong Zhang       caj    = ca + ci[crow];
799e240928fSHong Zhang       /* Perform sparse axpy operation.  Note cjj includes apj. */
800e240928fSHong Zhang       for (k=0;nextap<apnzj;k++) {
801e240928fSHong Zhang         if (cjj[k]==apj[nextap]) {
802e240928fSHong Zhang           caj[k] += (*pA)*apa[apj[nextap++]];
803e240928fSHong Zhang         }
804e240928fSHong Zhang       }
805e240928fSHong Zhang       flops += 2*apnzj;
806e240928fSHong Zhang       pA++;
807e240928fSHong Zhang     }
808e240928fSHong Zhang 
809e240928fSHong Zhang     /* Zero the current row info for A*P */
810e240928fSHong Zhang     for (j=0;j<apnzj;j++) {
811e240928fSHong Zhang       apa[apj[j]]      = 0.;
812e240928fSHong Zhang       apjdense[apj[j]] = 0;
813e240928fSHong Zhang     }
814e240928fSHong Zhang   }
815e240928fSHong Zhang 
816e240928fSHong Zhang   /* Assemble the final matrix and clean up */
817e240928fSHong Zhang   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
818e240928fSHong Zhang   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
819e240928fSHong Zhang   ierr = PetscFree(apa);CHKERRQ(ierr);
820e240928fSHong Zhang   ierr = PetscLogFlops(flops);CHKERRQ(ierr);
821a61c8c0fSHong Zhang   ierr = PetscLogEventEnd(logkey_matptapnumeric_local,A,0,0,0);CHKERRQ(ierr);
822e240928fSHong Zhang   PetscFunctionReturn(0);
823e240928fSHong Zhang }
824e240928fSHong Zhang static PetscEvent logkey_matptapsymbolic_local = 0;
825e240928fSHong Zhang #undef __FUNCT__
826e240928fSHong Zhang #define __FUNCT__ "MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local"
827e240928fSHong Zhang PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local(Mat A,Mat P_loc,Mat P_oth,PetscInt prstart,PetscReal fill,Mat *C)
828e240928fSHong Zhang {
829e240928fSHong Zhang   PetscErrorCode ierr;
830e240928fSHong Zhang   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
831e240928fSHong Zhang   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
832e240928fSHong Zhang   Mat_SeqAIJ     *ad=(Mat_SeqAIJ*)(a->A)->data,*ao=(Mat_SeqAIJ*)(a->B)->data,*c;
833e240928fSHong Zhang   PetscInt       *pti,*ptj,*ptJ,*ajj,*pjj;
834e240928fSHong Zhang   PetscInt       *adi=ad->i,*adj=ad->j,*aoi=ao->i,*aoj=ao->j,nnz,cstart=a->cstart,cend=a->cend,col;
835e240928fSHong Zhang   PetscInt       *ci,*cj,*ptaj;
836e240928fSHong Zhang   PetscInt       an=A->N,am=A->m,pN=P_loc->N;
837e240928fSHong Zhang   PetscInt       i,j,k,ptnzi,arow,anzj,prow,pnzj,cnzi;
838e240928fSHong Zhang   PetscInt       pm=P_loc->m,nlnk,*lnk;
839e240928fSHong Zhang   MatScalar      *ca;
840e240928fSHong Zhang   PetscBT        lnkbt;
841e240928fSHong Zhang   PetscInt       prend,nprow_loc,nprow_oth;
842e240928fSHong Zhang   PetscInt       *ptadenserow_loc,*ptadenserow_oth,*ptasparserow_loc,*ptasparserow_oth;
843e240928fSHong Zhang   Mat_SeqAIJ     *p_loc=(Mat_SeqAIJ*)P_loc->data,*p_oth=(Mat_SeqAIJ*)P_oth->data;
844e240928fSHong Zhang   PetscInt       *pi_loc=p_loc->i,*pj_loc=p_loc->j,*pi_oth=p_oth->i,*pj_oth=p_oth->j;
845e240928fSHong Zhang 
846e240928fSHong Zhang   PetscFunctionBegin;
847e240928fSHong Zhang   if (!logkey_matptapsymbolic_local) {
848e240928fSHong Zhang     ierr = PetscLogEventRegister(&logkey_matptapsymbolic_local,"MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local",MAT_COOKIE);
849e240928fSHong Zhang   }
850e240928fSHong Zhang   ierr = PetscLogEventBegin(logkey_matptapsymbolic_local,A,P_loc,P_oth,0);CHKERRQ(ierr);
851e240928fSHong Zhang 
852e240928fSHong Zhang   prend = prstart + pm;
853e240928fSHong Zhang 
854e240928fSHong Zhang   /* get ij structure of P_loc^T */
855e240928fSHong Zhang   ierr = MatGetSymbolicTranspose_SeqAIJ(P_loc,&pti,&ptj);CHKERRQ(ierr);
856e240928fSHong Zhang   ptJ=ptj;
857e240928fSHong Zhang 
858e240928fSHong Zhang   /* Allocate ci array, arrays for fill computation and */
859e240928fSHong Zhang   /* free space for accumulating nonzero column info */
860e240928fSHong Zhang   ierr = PetscMalloc((pN+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
861e240928fSHong Zhang   ci[0] = 0;
862e240928fSHong Zhang 
863e240928fSHong Zhang   ierr = PetscMalloc((4*an+1)*sizeof(PetscInt),&ptadenserow_loc);CHKERRQ(ierr);
864e240928fSHong Zhang   ierr = PetscMemzero(ptadenserow_loc,(4*an+1)*sizeof(PetscInt));CHKERRQ(ierr);
865e240928fSHong Zhang   ptasparserow_loc = ptadenserow_loc + an;
866e240928fSHong Zhang   ptadenserow_oth  = ptasparserow_loc + an;
867e240928fSHong Zhang   ptasparserow_oth = ptadenserow_oth + an;
868e240928fSHong Zhang 
869e240928fSHong Zhang   /* create and initialize a linked list */
870e240928fSHong Zhang   nlnk = pN+1;
871e240928fSHong Zhang   ierr = PetscLLCreate(pN,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr);
872e240928fSHong Zhang 
873e240928fSHong Zhang   /* Set initial free space to be nnz(A) scaled by aspect ratio of P (P->M=A->N). */
874e240928fSHong Zhang   /* This should be reasonable if sparsity of PtAP is similar to that of A. */
875e240928fSHong Zhang   nnz           = adi[am] + aoi[am];
876e240928fSHong Zhang   ierr          = GetMoreSpace((PetscInt)(fill*nnz/A->N)*pN,&free_space);
877e240928fSHong Zhang   current_space = free_space;
878e240928fSHong Zhang 
879e240928fSHong Zhang   /* determine symbolic info for each row of C: */
880e240928fSHong Zhang   for (i=0;i<pN;i++) {
881e240928fSHong Zhang     ptnzi  = pti[i+1] - pti[i];
882e240928fSHong Zhang     nprow_loc = 0; nprow_oth = 0;
883e240928fSHong Zhang     /* i-th row of symbolic P_loc^T*A_loc: */
884e240928fSHong Zhang     for (j=0;j<ptnzi;j++) {
885e240928fSHong Zhang       arow = *ptJ++;
886e240928fSHong Zhang       /* diagonal portion of A */
887e240928fSHong Zhang       anzj = adi[arow+1] - adi[arow];
888e240928fSHong Zhang       ajj  = adj + adi[arow];
889e240928fSHong Zhang       for (k=0;k<anzj;k++) {
890e240928fSHong Zhang         col = ajj[k]+prstart;
891e240928fSHong Zhang         if (!ptadenserow_loc[col]) {
892e240928fSHong Zhang           ptadenserow_loc[col]    = -1;
893e240928fSHong Zhang           ptasparserow_loc[nprow_loc++] = col;
894e240928fSHong Zhang         }
895e240928fSHong Zhang       }
896e240928fSHong Zhang       /* off-diagonal portion of A */
897e240928fSHong Zhang       anzj = aoi[arow+1] - aoi[arow];
898e240928fSHong Zhang       ajj  = aoj + aoi[arow];
899e240928fSHong Zhang       for (k=0;k<anzj;k++) {
900e240928fSHong Zhang         col = a->garray[ajj[k]];  /* global col */
901e240928fSHong Zhang         if (col < cstart){
902e240928fSHong Zhang           col = ajj[k];
903e240928fSHong Zhang         } else if (col >= cend){
904e240928fSHong Zhang           col = ajj[k] + pm;
905e240928fSHong Zhang         } else {
906e240928fSHong Zhang           SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Off-diagonal portion of A has wrong column map");
907e240928fSHong Zhang         }
908e240928fSHong Zhang         if (!ptadenserow_oth[col]) {
909e240928fSHong Zhang           ptadenserow_oth[col]    = -1;
910e240928fSHong Zhang           ptasparserow_oth[nprow_oth++] = col;
911e240928fSHong Zhang         }
912e240928fSHong Zhang       }
913e240928fSHong Zhang     }
914e240928fSHong Zhang 
915e240928fSHong Zhang     /* using symbolic info of local PtA, determine symbolic info for row of C: */
916e240928fSHong Zhang     cnzi   = 0;
917e240928fSHong Zhang     /* rows of P_loc */
918e240928fSHong Zhang     ptaj = ptasparserow_loc;
919e240928fSHong Zhang     for (j=0; j<nprow_loc; j++) {
920e240928fSHong Zhang       prow = *ptaj++;
921e240928fSHong Zhang       prow -= prstart; /* rm */
922e240928fSHong Zhang       pnzj = pi_loc[prow+1] - pi_loc[prow];
923e240928fSHong Zhang       pjj  = pj_loc + pi_loc[prow];
924e240928fSHong Zhang       /* add non-zero cols of P into the sorted linked list lnk */
925e240928fSHong Zhang       ierr = PetscLLAdd(pnzj,pjj,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr);
926e240928fSHong Zhang       cnzi += nlnk;
927e240928fSHong Zhang     }
928e240928fSHong Zhang     /* rows of P_oth */
929e240928fSHong Zhang     ptaj = ptasparserow_oth;
930e240928fSHong Zhang     for (j=0; j<nprow_oth; j++) {
931e240928fSHong Zhang       prow = *ptaj++;
932e240928fSHong Zhang       if (prow >= prend) prow -= pm; /* rm */
933e240928fSHong Zhang       pnzj = pi_oth[prow+1] - pi_oth[prow];
934e240928fSHong Zhang       pjj  = pj_oth + pi_oth[prow];
935e240928fSHong Zhang       /* add non-zero cols of P into the sorted linked list lnk */
936e240928fSHong Zhang       ierr = PetscLLAdd(pnzj,pjj,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr);
937e240928fSHong Zhang       cnzi += nlnk;
938e240928fSHong Zhang     }
939e240928fSHong Zhang 
940e240928fSHong Zhang     /* If free space is not available, make more free space */
941e240928fSHong Zhang     /* Double the amount of total space in the list */
942e240928fSHong Zhang     if (current_space->local_remaining<cnzi) {
943e240928fSHong Zhang       ierr = GetMoreSpace(current_space->total_array_size,&current_space);CHKERRQ(ierr);
944e240928fSHong Zhang     }
945e240928fSHong Zhang 
946e240928fSHong Zhang     /* Copy data into free space, and zero out denserows */
947e240928fSHong Zhang     ierr = PetscLLClean(pN,pN,cnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
948e240928fSHong Zhang     current_space->array           += cnzi;
949e240928fSHong Zhang     current_space->local_used      += cnzi;
950e240928fSHong Zhang     current_space->local_remaining -= cnzi;
951e240928fSHong Zhang 
952e240928fSHong Zhang     for (j=0;j<nprow_loc; j++) {
953e240928fSHong Zhang       ptadenserow_loc[ptasparserow_loc[j]] = 0;
954e240928fSHong Zhang     }
955e240928fSHong Zhang     for (j=0;j<nprow_oth; j++) {
956e240928fSHong Zhang       ptadenserow_oth[ptasparserow_oth[j]] = 0;
957e240928fSHong Zhang     }
958e240928fSHong Zhang 
959e240928fSHong Zhang     /* Aside: Perhaps we should save the pta info for the numerical factorization. */
960e240928fSHong Zhang     /*        For now, we will recompute what is needed. */
961e240928fSHong Zhang     ci[i+1] = ci[i] + cnzi;
962e240928fSHong Zhang   }
963e240928fSHong Zhang   /* nnz is now stored in ci[ptm], column indices are in the list of free space */
964e240928fSHong Zhang   /* Allocate space for cj, initialize cj, and */
965e240928fSHong Zhang   /* destroy list of free space and other temporary array(s) */
966e240928fSHong Zhang   ierr = PetscMalloc((ci[pN]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr);
967e240928fSHong Zhang   ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr);
968e240928fSHong Zhang   ierr = PetscFree(ptadenserow_loc);CHKERRQ(ierr);
969e240928fSHong Zhang   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
970e240928fSHong Zhang 
971e240928fSHong Zhang   /* Allocate space for ca */
972e240928fSHong Zhang   ierr = PetscMalloc((ci[pN]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr);
973e240928fSHong Zhang   ierr = PetscMemzero(ca,(ci[pN]+1)*sizeof(MatScalar));CHKERRQ(ierr);
974e240928fSHong Zhang 
975e240928fSHong Zhang   /* put together the new matrix */
976e240928fSHong Zhang   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,pN,pN,ci,cj,ca,C);CHKERRQ(ierr);
977e240928fSHong Zhang 
978e240928fSHong Zhang   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
979e240928fSHong Zhang   /* Since these are PETSc arrays, change flags to free them as necessary. */
980e240928fSHong Zhang   c = (Mat_SeqAIJ *)((*C)->data);
981e240928fSHong Zhang   c->freedata = PETSC_TRUE;
982e240928fSHong Zhang   c->nonew    = 0;
983e240928fSHong Zhang 
984e240928fSHong Zhang   /* Clean up. */
985e240928fSHong Zhang   ierr = MatRestoreSymbolicTranspose_SeqAIJ(P_loc,&pti,&ptj);CHKERRQ(ierr);
986a61c8c0fSHong Zhang 
987e240928fSHong Zhang   ierr = PetscLogEventEnd(logkey_matptapsymbolic_local,A,P_loc,P_oth,0);CHKERRQ(ierr);
988e240928fSHong Zhang   PetscFunctionReturn(0);
989e240928fSHong Zhang }
990