xref: /petsc/src/mat/impls/aij/seq/matptap.c (revision a61c8c0fcc59ec3fc7e16759f035429beb276455)
1eb9c0419SKris Buschelman /*
29af31e4aSHong Zhang   Defines projective product routines where A is a AIJ matrix
3eb9c0419SKris Buschelman           C = P^T * A * P
4eb9c0419SKris Buschelman */
5eb9c0419SKris Buschelman 
6231952e2SKris Buschelman #include "src/mat/impls/aij/seq/aij.h"   /*I "petscmat.h" I*/
7eb9c0419SKris Buschelman #include "src/mat/utils/freespace.h"
89af31e4aSHong Zhang #include "src/mat/impls/aij/mpi/mpiaij.h"
955a3bba9SHong Zhang #include "petscbt.h"
10eb9c0419SKris Buschelman 
11eb9c0419SKris Buschelman #undef __FUNCT__
129af31e4aSHong Zhang #define __FUNCT__ "MatPtAP"
134d3841fdSKris Buschelman /*@
149af31e4aSHong Zhang    MatPtAP - Creates the matrix projection C = P^T * A * P
154d3841fdSKris Buschelman 
164d3841fdSKris Buschelman    Collective on Mat
174d3841fdSKris Buschelman 
184d3841fdSKris Buschelman    Input Parameters:
194d3841fdSKris Buschelman +  A - the matrix
20f747e1aeSHong Zhang .  P - the projection matrix
21f747e1aeSHong Zhang .  scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
22f747e1aeSHong Zhang -  fill - expected fill as ratio of nnz(C)/(nnz(A) + nnz(P))
234d3841fdSKris Buschelman 
244d3841fdSKris Buschelman    Output Parameters:
254d3841fdSKris Buschelman .  C - the product matrix
264d3841fdSKris Buschelman 
274d3841fdSKris Buschelman    Notes:
284d3841fdSKris Buschelman    C will be created and must be destroyed by the user with MatDestroy().
294d3841fdSKris Buschelman 
304d3841fdSKris Buschelman    This routine is currently only implemented for pairs of SeqAIJ matrices and classes
314d3841fdSKris Buschelman    which inherit from SeqAIJ.  C will be of type MATSEQAIJ.
324d3841fdSKris Buschelman 
334d3841fdSKris Buschelman    Level: intermediate
344d3841fdSKris Buschelman 
359af31e4aSHong Zhang .seealso: MatPtAPSymbolic(),MatPtAPNumeric(),MatMatMult()
364d3841fdSKris Buschelman @*/
3755a3bba9SHong Zhang PetscErrorCode MatPtAP(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C)
3855a3bba9SHong Zhang {
39dfbe8321SBarry Smith   PetscErrorCode ierr;
40534c1384SKris Buschelman   PetscErrorCode (*fA)(Mat,Mat,MatReuse,PetscReal,Mat *);
41534c1384SKris Buschelman   PetscErrorCode (*fP)(Mat,Mat,MatReuse,PetscReal,Mat *);
42eb9c0419SKris Buschelman 
43eb9c0419SKris Buschelman   PetscFunctionBegin;
449af31e4aSHong Zhang   PetscValidHeaderSpecific(A,MAT_COOKIE,1);
459af31e4aSHong Zhang   PetscValidType(A,1);
469af31e4aSHong Zhang   MatPreallocated(A);
479af31e4aSHong Zhang   if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
489af31e4aSHong Zhang   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
499af31e4aSHong Zhang   PetscValidHeaderSpecific(P,MAT_COOKIE,2);
509af31e4aSHong Zhang   PetscValidType(P,2);
519af31e4aSHong Zhang   MatPreallocated(P);
529af31e4aSHong Zhang   if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
539af31e4aSHong Zhang   if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
549af31e4aSHong Zhang   PetscValidPointer(C,3);
55534c1384SKris Buschelman 
569af31e4aSHong Zhang   if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->M,A->N);
57eb9c0419SKris Buschelman 
589af31e4aSHong Zhang   if (fill <=0.0) SETERRQ1(PETSC_ERR_ARG_SIZ,"fill=%g must be > 0.0",fill);
59eb9c0419SKris Buschelman 
60534c1384SKris Buschelman   /* For now, we do not dispatch based on the type of A and P */
61534c1384SKris Buschelman   /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */
62534c1384SKris Buschelman   fA = A->ops->ptap;
63534c1384SKris Buschelman   if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatPtAP not supported for A of type %s",A->type_name);
64534c1384SKris Buschelman   fP = P->ops->ptap;
65534c1384SKris Buschelman   if (!fP) SETERRQ1(PETSC_ERR_SUP,"MatPtAP not supported for P of type %s",P->type_name);
66534c1384SKris Buschelman   if (fP!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatPtAP requires A, %s, to be compatible with P, %s",A->type_name,P->type_name);
67534c1384SKris Buschelman 
689af31e4aSHong Zhang   ierr = PetscLogEventBegin(MAT_PtAP,A,P,0,0);CHKERRQ(ierr);
69534c1384SKris Buschelman   ierr = (*fA)(A,P,scall,fill,C);CHKERRQ(ierr);
709af31e4aSHong Zhang   ierr = PetscLogEventEnd(MAT_PtAP,A,P,0,0);CHKERRQ(ierr);
71eb9c0419SKris Buschelman   PetscFunctionReturn(0);
72eb9c0419SKris Buschelman }
73eb9c0419SKris Buschelman 
74e240928fSHong Zhang EXTERN PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local(Mat,Mat,Mat,PetscInt,PetscReal,Mat*);
75e240928fSHong Zhang EXTERN PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIAIJ_Local(Mat,Mat,Mat,PetscInt,Mat C);
76e240928fSHong Zhang 
77*a61c8c0fSHong Zhang EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat);
78*a61c8c0fSHong Zhang #undef __FUNCT__
79*a61c8c0fSHong Zhang #define __FUNCT__ "MatDestroy_MPIAIJ_PtAP"
80*a61c8c0fSHong Zhang PetscErrorCode MatDestroy_MPIAIJ_PtAP(Mat A)
81*a61c8c0fSHong Zhang {
82*a61c8c0fSHong Zhang   PetscErrorCode       ierr;
83*a61c8c0fSHong Zhang   Mat_PtAPMPI          *ptap;
84*a61c8c0fSHong Zhang   PetscObjectContainer container;
85*a61c8c0fSHong Zhang 
86*a61c8c0fSHong Zhang   PetscFunctionBegin;
87*a61c8c0fSHong Zhang   ierr = PetscObjectQuery((PetscObject)A,"MatPtAPMPI",(PetscObject *)&container);CHKERRQ(ierr);
88*a61c8c0fSHong Zhang   if (container) {
89*a61c8c0fSHong Zhang     ierr  = PetscObjectContainerGetPointer(container,(void **)&ptap);CHKERRQ(ierr);
90*a61c8c0fSHong Zhang 
91*a61c8c0fSHong Zhang     ierr = MatDestroyMatrices(1,&ptap->ploc);CHKERRQ(ierr);
92*a61c8c0fSHong Zhang     ierr = MatDestroy(ptap->P_oth);CHKERRQ(ierr);
93*a61c8c0fSHong Zhang 
94*a61c8c0fSHong Zhang     ierr = PetscObjectContainerDestroy(container);CHKERRQ(ierr);
95*a61c8c0fSHong Zhang     ierr = PetscObjectCompose((PetscObject)A,"MatPtApMPI",0);CHKERRQ(ierr);
96*a61c8c0fSHong Zhang   }
97*a61c8c0fSHong Zhang   ierr = PetscFree(ptap);CHKERRQ(ierr);
98*a61c8c0fSHong Zhang 
99*a61c8c0fSHong Zhang   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
100*a61c8c0fSHong Zhang   PetscFunctionReturn(0);
101*a61c8c0fSHong Zhang }
102*a61c8c0fSHong Zhang 
103eb9c0419SKris Buschelman #undef __FUNCT__
104ff134f7aSHong Zhang #define __FUNCT__ "MatPtAP_MPIAIJ_MPIAIJ"
105ff134f7aSHong Zhang PetscErrorCode MatPtAP_MPIAIJ_MPIAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C)
106ff134f7aSHong Zhang {
107ff134f7aSHong Zhang   PetscErrorCode       ierr;
108*a61c8c0fSHong Zhang   Mat                  P_loc,P_oth,*ploc;
109*a61c8c0fSHong Zhang   PetscInt             prstart,low;
110*a61c8c0fSHong Zhang   IS                   isrow,iscol;
111*a61c8c0fSHong Zhang   Mat_PtAPMPI          *ptap;
112*a61c8c0fSHong Zhang   PetscObjectContainer container;
113b90dcfe3SHong Zhang 
114b90dcfe3SHong Zhang   PetscFunctionBegin;
115b90dcfe3SHong Zhang   if (scall == MAT_INITIAL_MATRIX){
1164c627768SHong Zhang     ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr);
117e240928fSHong Zhang     /* get P_oth by taking rows of P (= non-zero cols of local A) from other processors */
118429d309bSHong Zhang     ierr = MatGetBrowsOfAoCols(A,P,MAT_INITIAL_MATRIX,PETSC_NULL,PETSC_NULL,&prstart,&P_oth);CHKERRQ(ierr);
119*a61c8c0fSHong Zhang 
120e240928fSHong Zhang     /* get P_loc by taking all local rows of P */
121e240928fSHong Zhang     ierr = MatGetOwnershipRange(P,&low,PETSC_NULL);CHKERRQ(ierr);
122e240928fSHong Zhang     ierr = ISCreateStride(PETSC_COMM_SELF,P->m,low,1,&isrow);CHKERRQ(ierr);
123e240928fSHong Zhang     ierr = ISCreateStride(PETSC_COMM_SELF,P->N,0,1,&iscol);CHKERRQ(ierr);
124e240928fSHong Zhang     ierr = MatGetSubMatrices(P,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&ploc);CHKERRQ(ierr);
125e240928fSHong Zhang     P_loc = ploc[0];
126e240928fSHong Zhang     ierr = ISDestroy(isrow);CHKERRQ(ierr);
127e240928fSHong Zhang     ierr = ISDestroy(iscol);CHKERRQ(ierr);
128e240928fSHong Zhang 
129*a61c8c0fSHong Zhang     /* attach the supporting struct to P for reuse */
130*a61c8c0fSHong Zhang     ierr = PetscNew(Mat_PtAPMPI,&ptap);CHKERRQ(ierr);
131*a61c8c0fSHong Zhang     ptap->ploc    = ploc;
132*a61c8c0fSHong Zhang     ptap->P_oth   = P_oth;
133*a61c8c0fSHong Zhang     ptap->prstart = prstart;
134*a61c8c0fSHong Zhang     P->ops->destroy  = MatDestroy_MPIAIJ_PtAP;
135*a61c8c0fSHong Zhang     ierr = PetscObjectContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
136*a61c8c0fSHong Zhang     ierr = PetscObjectContainerSetPointer(container,ptap);CHKERRQ(ierr);
137*a61c8c0fSHong Zhang     ierr = PetscObjectCompose((PetscObject)P,"MatPtAPMPI",(PetscObject)container);CHKERRQ(ierr);
138e240928fSHong Zhang 
139*a61c8c0fSHong Zhang     /* now, compute symbolic product */
140*a61c8c0fSHong Zhang     ierr = MatPtAPSymbolic_MPIAIJ_MPIAIJ(A,P,fill,C);CHKERRQ(ierr);/* numeric product is computed as well */
141*a61c8c0fSHong Zhang     ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr);
142*a61c8c0fSHong Zhang   } else if (scall == MAT_REUSE_MATRIX){
143*a61c8c0fSHong Zhang     ierr = PetscObjectQuery((PetscObject)P,"MatPtAPMPI",(PetscObject *)&container);CHKERRQ(ierr);
144*a61c8c0fSHong Zhang     if (container) {
145*a61c8c0fSHong Zhang       ierr  = PetscObjectContainerGetPointer(container,(void **)&ptap);CHKERRQ(ierr);
146*a61c8c0fSHong Zhang     } else {
147*a61c8c0fSHong Zhang       SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
148*a61c8c0fSHong Zhang     }
149*a61c8c0fSHong Zhang 
150*a61c8c0fSHong Zhang     /* get P_oth by taking rows of P (= non-zero cols of local A) from other processors */
151*a61c8c0fSHong Zhang     /* improve! */
152*a61c8c0fSHong Zhang     ierr = MatDestroy(ptap->P_oth);CHKERRQ(ierr);
153*a61c8c0fSHong Zhang     ierr = MatGetBrowsOfAoCols(A,P,MAT_INITIAL_MATRIX,PETSC_NULL,PETSC_NULL,&prstart,&ptap->P_oth);CHKERRQ(ierr);
154*a61c8c0fSHong Zhang 
155*a61c8c0fSHong Zhang     /* get P_loc by taking all local rows of P */
156*a61c8c0fSHong Zhang     ierr = MatGetOwnershipRange(P,&low,PETSC_NULL);CHKERRQ(ierr);
157*a61c8c0fSHong Zhang     ierr = ISCreateStride(PETSC_COMM_SELF,P->m,low,1,&isrow);CHKERRQ(ierr);
158*a61c8c0fSHong Zhang     ierr = ISCreateStride(PETSC_COMM_SELF,P->N,0,1,&iscol);CHKERRQ(ierr);
159*a61c8c0fSHong Zhang     ierr = MatGetSubMatrices(P,1,&isrow,&iscol,MAT_REUSE_MATRIX,&ptap->ploc);CHKERRQ(ierr);
160*a61c8c0fSHong Zhang     ierr = ISDestroy(isrow);CHKERRQ(ierr);
161*a61c8c0fSHong Zhang     ierr = ISDestroy(iscol);CHKERRQ(ierr);
162*a61c8c0fSHong Zhang   } else {
163*a61c8c0fSHong Zhang     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",scall);
164*a61c8c0fSHong Zhang   }
165*a61c8c0fSHong Zhang   /* now, compute numeric product */
166*a61c8c0fSHong Zhang   ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr);
167*a61c8c0fSHong Zhang   ierr = MatPtAPNumeric_MPIAIJ_MPIAIJ(A,P,*C);CHKERRQ(ierr);
168*a61c8c0fSHong Zhang   ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr);
169*a61c8c0fSHong Zhang 
170*a61c8c0fSHong Zhang   PetscFunctionReturn(0);
171*a61c8c0fSHong Zhang }
172*a61c8c0fSHong Zhang 
173*a61c8c0fSHong Zhang #undef __FUNCT__
174*a61c8c0fSHong Zhang #define __FUNCT__ "MatPtAPSymbolic_MPIAIJ_MPIAIJ"
175*a61c8c0fSHong Zhang PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIAIJ(Mat A,Mat P,PetscReal fill,Mat *C)
176*a61c8c0fSHong Zhang {
177*a61c8c0fSHong Zhang   PetscErrorCode       ierr;
178*a61c8c0fSHong Zhang   Mat                  C_seq;
179*a61c8c0fSHong Zhang   Mat_PtAPMPI          *ptap;
180*a61c8c0fSHong Zhang   PetscObjectContainer container;
181*a61c8c0fSHong Zhang 
182*a61c8c0fSHong Zhang   PetscFunctionBegin;
183*a61c8c0fSHong Zhang 
184*a61c8c0fSHong Zhang   ierr = PetscObjectQuery((PetscObject)P,"MatPtAPMPI",(PetscObject *)&container);CHKERRQ(ierr);
185*a61c8c0fSHong Zhang   if (container) {
186*a61c8c0fSHong Zhang     ierr  = PetscObjectContainerGetPointer(container,(void **)&ptap);CHKERRQ(ierr);
187*a61c8c0fSHong Zhang   } else {
188*a61c8c0fSHong Zhang     SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
189*a61c8c0fSHong Zhang   }
19025616d81SHong Zhang   /* compute C_seq = P_loc^T * A_loc * P_seq */
191*a61c8c0fSHong Zhang   ierr = MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local(A,ptap->ploc[0],ptap->P_oth,ptap->prstart,fill,&C_seq);CHKERRQ(ierr);
192*a61c8c0fSHong Zhang 
193b90dcfe3SHong Zhang   /* add C_seq into mpi C */
194*a61c8c0fSHong Zhang   ierr = MatMerge_SeqsToMPISymbolic(A->comm,C_seq,P->n,P->n,C);CHKERRQ(ierr);
195b90dcfe3SHong Zhang 
196ff134f7aSHong Zhang   PetscFunctionReturn(0);
197ff134f7aSHong Zhang }
198ff134f7aSHong Zhang 
199ff134f7aSHong Zhang #undef __FUNCT__
200e240928fSHong Zhang #define __FUNCT__ "MatPtAPNumeric_MPIAIJ_MPIAIJ"
201b90dcfe3SHong Zhang PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIAIJ(Mat A,Mat P,Mat C)
202ff134f7aSHong Zhang {
203b90dcfe3SHong Zhang   PetscErrorCode       ierr;
204671beff6SHong Zhang   Mat_Merge_SeqsToMPI  *merge;
205*a61c8c0fSHong Zhang   Mat_PtAPMPI          *ptap;
206*a61c8c0fSHong Zhang   PetscObjectContainer cont_merge,cont_ptap;
207ff134f7aSHong Zhang 
208ff134f7aSHong Zhang   PetscFunctionBegin;
209*a61c8c0fSHong Zhang   ierr = PetscObjectQuery((PetscObject)C,"MatMergeSeqsToMPI",(PetscObject *)&cont_merge);CHKERRQ(ierr);
210*a61c8c0fSHong Zhang   if (cont_merge) {
211*a61c8c0fSHong Zhang     ierr  = PetscObjectContainerGetPointer(cont_merge,(void **)&merge);CHKERRQ(ierr);
2127f79fd58SMatthew Knepley   } else {
213*a61c8c0fSHong Zhang     SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix C does not posses an object container");
214671beff6SHong Zhang   }
215*a61c8c0fSHong Zhang   ierr = PetscObjectQuery((PetscObject)P,"MatPtAPMPI",(PetscObject *)&cont_ptap);CHKERRQ(ierr);
216*a61c8c0fSHong Zhang   if (cont_ptap) {
217*a61c8c0fSHong Zhang     ierr  = PetscObjectContainerGetPointer(cont_ptap,(void **)&ptap);CHKERRQ(ierr);
218*a61c8c0fSHong Zhang   } else {
219*a61c8c0fSHong Zhang     SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
220*a61c8c0fSHong Zhang   }
221*a61c8c0fSHong Zhang   ierr = MatPtAPNumeric_MPIAIJ_MPIAIJ_Local(A,ptap->ploc[0],ptap->P_oth,ptap->prstart,merge->C_seq);CHKERRQ(ierr);
222*a61c8c0fSHong Zhang   ierr = MatMerge_SeqsToMPINumeric(merge->C_seq,C);CHKERRQ(ierr);
223ff134f7aSHong Zhang   PetscFunctionReturn(0);
224ff134f7aSHong Zhang }
225ff134f7aSHong Zhang 
226ff134f7aSHong Zhang #undef __FUNCT__
2279af31e4aSHong Zhang #define __FUNCT__ "MatPtAP_SeqAIJ_SeqAIJ"
228dfbe8321SBarry Smith PetscErrorCode MatPtAP_SeqAIJ_SeqAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C)
2299af31e4aSHong Zhang {
230dfbe8321SBarry Smith   PetscErrorCode ierr;
231b1d57f15SBarry Smith 
2329af31e4aSHong Zhang   PetscFunctionBegin;
2339af31e4aSHong Zhang   if (scall == MAT_INITIAL_MATRIX){
234d20bfe6fSHong Zhang     ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr);
2359af31e4aSHong Zhang     ierr = MatPtAPSymbolic_SeqAIJ_SeqAIJ(A,P,fill,C);CHKERRQ(ierr);
236d20bfe6fSHong Zhang     ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr);
2379af31e4aSHong Zhang   }
238d20bfe6fSHong Zhang   ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr);
2399af31e4aSHong Zhang   ierr = MatPtAPNumeric_SeqAIJ_SeqAIJ(A,P,*C);CHKERRQ(ierr);
240d20bfe6fSHong Zhang   ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr);
2419af31e4aSHong Zhang   PetscFunctionReturn(0);
2429af31e4aSHong Zhang }
2439af31e4aSHong Zhang 
2446849ba73SBarry Smith /*
2459af31e4aSHong Zhang    MatPtAPSymbolic - Creates the (i,j) structure of the matrix projection C = P^T * A * P
2464d3841fdSKris Buschelman 
2474d3841fdSKris Buschelman    Collective on Mat
2484d3841fdSKris Buschelman 
2494d3841fdSKris Buschelman    Input Parameters:
2504d3841fdSKris Buschelman +  A - the matrix
2514d3841fdSKris Buschelman -  P - the projection matrix
2524d3841fdSKris Buschelman 
2534d3841fdSKris Buschelman    Output Parameters:
2544d3841fdSKris Buschelman .  C - the (i,j) structure of the product matrix
2554d3841fdSKris Buschelman 
2564d3841fdSKris Buschelman    Notes:
2574d3841fdSKris Buschelman    C will be created and must be destroyed by the user with MatDestroy().
2584d3841fdSKris Buschelman 
2594d3841fdSKris Buschelman    This routine is currently only implemented for pairs of SeqAIJ matrices and classes
2604d3841fdSKris Buschelman    which inherit from SeqAIJ.  C will be of type MATSEQAIJ.  The product is computed using
2619af31e4aSHong Zhang    this (i,j) structure by calling MatPtAPNumeric().
2624d3841fdSKris Buschelman 
2634d3841fdSKris Buschelman    Level: intermediate
2644d3841fdSKris Buschelman 
2659af31e4aSHong Zhang .seealso: MatPtAP(),MatPtAPNumeric(),MatMatMultSymbolic()
2666849ba73SBarry Smith */
267e240928fSHong Zhang #undef __FUNCT__
268e240928fSHong Zhang #define __FUNCT__ "MatPtAPSymbolic"
26955a3bba9SHong Zhang PetscErrorCode MatPtAPSymbolic(Mat A,Mat P,PetscReal fill,Mat *C)
27055a3bba9SHong Zhang {
271dfbe8321SBarry Smith   PetscErrorCode ierr;
272534c1384SKris Buschelman   PetscErrorCode (*fA)(Mat,Mat,PetscReal,Mat*);
273534c1384SKris Buschelman   PetscErrorCode (*fP)(Mat,Mat,PetscReal,Mat*);
274eb9c0419SKris Buschelman 
275eb9c0419SKris Buschelman   PetscFunctionBegin;
276eb9c0419SKris Buschelman 
2774482741eSBarry Smith   PetscValidHeaderSpecific(A,MAT_COOKIE,1);
278c9780b6fSBarry Smith   PetscValidType(A,1);
279eb9c0419SKris Buschelman   MatPreallocated(A);
280eb9c0419SKris Buschelman   if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
281eb9c0419SKris Buschelman   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
282eb9c0419SKris Buschelman 
2834482741eSBarry Smith   PetscValidHeaderSpecific(P,MAT_COOKIE,2);
284c9780b6fSBarry Smith   PetscValidType(P,2);
285eb9c0419SKris Buschelman   MatPreallocated(P);
286eb9c0419SKris Buschelman   if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
287eb9c0419SKris Buschelman   if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
288eb9c0419SKris Buschelman 
2894482741eSBarry Smith   PetscValidPointer(C,3);
2904482741eSBarry Smith 
291eb9c0419SKris Buschelman   if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->M,A->N);
292eb9c0419SKris Buschelman   if (A->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %d != %d",A->M,A->N);
293eb9c0419SKris Buschelman 
294534c1384SKris Buschelman   /* For now, we do not dispatch based on the type of A and P */
295534c1384SKris Buschelman   /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */
296534c1384SKris Buschelman   fA = A->ops->ptapsymbolic;
297534c1384SKris Buschelman   if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatPtAPSymbolic not supported for A of type %s",A->type_name);
298534c1384SKris Buschelman   fP = P->ops->ptapsymbolic;
299534c1384SKris Buschelman   if (!fP) SETERRQ1(PETSC_ERR_SUP,"MatPtAPSymbolic not supported for P of type %s",P->type_name);
300534c1384SKris Buschelman   if (fP!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatPtAPSymbolic requires A, %s, to be compatible with P, %s",A->type_name,P->type_name);
3014d3841fdSKris Buschelman 
302534c1384SKris Buschelman   ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr);
303534c1384SKris Buschelman   ierr = (*fA)(A,P,fill,C);CHKERRQ(ierr);
304534c1384SKris Buschelman   ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr);
305eb9c0419SKris Buschelman 
306eb9c0419SKris Buschelman   PetscFunctionReturn(0);
307eb9c0419SKris Buschelman }
308eb9c0419SKris Buschelman 
309f747e1aeSHong Zhang typedef struct {
310f747e1aeSHong Zhang   Mat    symAP;
311f747e1aeSHong Zhang } Mat_PtAPstruct;
312f747e1aeSHong Zhang 
31378a80504SBarry Smith EXTERN PetscErrorCode MatDestroy_SeqAIJ(Mat);
31478a80504SBarry Smith 
315f747e1aeSHong Zhang #undef __FUNCT__
316f747e1aeSHong Zhang #define __FUNCT__ "MatDestroy_SeqAIJ_PtAP"
317f4a850bbSBarry Smith PetscErrorCode MatDestroy_SeqAIJ_PtAP(Mat A)
318f747e1aeSHong Zhang {
319f4a850bbSBarry Smith   PetscErrorCode    ierr;
320f747e1aeSHong Zhang   Mat_PtAPstruct    *ptap=(Mat_PtAPstruct*)A->spptr;
321f747e1aeSHong Zhang 
322f747e1aeSHong Zhang   PetscFunctionBegin;
323f747e1aeSHong Zhang   ierr = MatDestroy(ptap->symAP);CHKERRQ(ierr);
324f747e1aeSHong Zhang   ierr = PetscFree(ptap);CHKERRQ(ierr);
32578a80504SBarry Smith   ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr);
326f747e1aeSHong Zhang   PetscFunctionReturn(0);
327f747e1aeSHong Zhang }
328f747e1aeSHong Zhang 
329eb9c0419SKris Buschelman #undef __FUNCT__
3309af31e4aSHong Zhang #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqAIJ"
33155a3bba9SHong Zhang PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat P,PetscReal fill,Mat *C)
33255a3bba9SHong Zhang {
333dfbe8321SBarry Smith   PetscErrorCode ierr;
334d20bfe6fSHong Zhang   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
335d20bfe6fSHong Zhang   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data,*p = (Mat_SeqAIJ*)P->data,*c;
33655a3bba9SHong Zhang   PetscInt       *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj;
337b8374ebeSBarry Smith   PetscInt       *ci,*cj,*ptadenserow,*ptasparserow,*ptaj;
33855a3bba9SHong Zhang   PetscInt       an=A->N,am=A->M,pn=P->N,pm=P->M;
339b8374ebeSBarry Smith   PetscInt       i,j,k,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi,nlnk,*lnk;
340d20bfe6fSHong Zhang   MatScalar      *ca;
34155a3bba9SHong Zhang   PetscBT        lnkbt;
342eb9c0419SKris Buschelman 
343eb9c0419SKris Buschelman   PetscFunctionBegin;
344d20bfe6fSHong Zhang   /* Get ij structure of P^T */
345eb9c0419SKris Buschelman   ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
346d20bfe6fSHong Zhang   ptJ=ptj;
347eb9c0419SKris Buschelman 
348d20bfe6fSHong Zhang   /* Allocate ci array, arrays for fill computation and */
349d20bfe6fSHong Zhang   /* free space for accumulating nonzero column info */
35055a3bba9SHong Zhang   ierr = PetscMalloc((pn+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
351d20bfe6fSHong Zhang   ci[0] = 0;
352eb9c0419SKris Buschelman 
35355a3bba9SHong Zhang   ierr = PetscMalloc((2*an+1)*sizeof(PetscInt),&ptadenserow);CHKERRQ(ierr);
35455a3bba9SHong Zhang   ierr = PetscMemzero(ptadenserow,(2*an+1)*sizeof(PetscInt));CHKERRQ(ierr);
355d20bfe6fSHong Zhang   ptasparserow = ptadenserow  + an;
35655a3bba9SHong Zhang 
35755a3bba9SHong Zhang   /* create and initialize a linked list */
35855a3bba9SHong Zhang   nlnk = pn+1;
35955a3bba9SHong Zhang   ierr = PetscLLCreate(pn,pn,nlnk,lnk,lnkbt);CHKERRQ(ierr);
360eb9c0419SKris Buschelman 
361d20bfe6fSHong Zhang   /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */
362d20bfe6fSHong Zhang   /* This should be reasonable if sparsity of PtAP is similar to that of A. */
363d20bfe6fSHong Zhang   ierr          = GetMoreSpace((ai[am]/pm)*pn,&free_space);
364d20bfe6fSHong Zhang   current_space = free_space;
365d20bfe6fSHong Zhang 
366d20bfe6fSHong Zhang   /* Determine symbolic info for each row of C: */
367d20bfe6fSHong Zhang   for (i=0;i<pn;i++) {
368d20bfe6fSHong Zhang     ptnzi  = pti[i+1] - pti[i];
369d20bfe6fSHong Zhang     ptanzi = 0;
370d20bfe6fSHong Zhang     /* Determine symbolic row of PtA: */
371d20bfe6fSHong Zhang     for (j=0;j<ptnzi;j++) {
372d20bfe6fSHong Zhang       arow = *ptJ++;
373d20bfe6fSHong Zhang       anzj = ai[arow+1] - ai[arow];
374d20bfe6fSHong Zhang       ajj  = aj + ai[arow];
375d20bfe6fSHong Zhang       for (k=0;k<anzj;k++) {
376d20bfe6fSHong Zhang         if (!ptadenserow[ajj[k]]) {
377d20bfe6fSHong Zhang           ptadenserow[ajj[k]]    = -1;
378d20bfe6fSHong Zhang           ptasparserow[ptanzi++] = ajj[k];
379d20bfe6fSHong Zhang         }
380d20bfe6fSHong Zhang       }
381d20bfe6fSHong Zhang     }
382d20bfe6fSHong Zhang       /* Using symbolic info for row of PtA, determine symbolic info for row of C: */
383d20bfe6fSHong Zhang     ptaj = ptasparserow;
384d20bfe6fSHong Zhang     cnzi   = 0;
385d20bfe6fSHong Zhang     for (j=0;j<ptanzi;j++) {
386d20bfe6fSHong Zhang       prow = *ptaj++;
387d20bfe6fSHong Zhang       pnzj = pi[prow+1] - pi[prow];
388d20bfe6fSHong Zhang       pjj  = pj + pi[prow];
38955a3bba9SHong Zhang       /* add non-zero cols of P into the sorted linked list lnk */
39055a3bba9SHong Zhang       ierr = PetscLLAdd(pnzj,pjj,pn,nlnk,lnk,lnkbt);CHKERRQ(ierr);
39155a3bba9SHong Zhang       cnzi += nlnk;
392d20bfe6fSHong Zhang     }
393d20bfe6fSHong Zhang 
394d20bfe6fSHong Zhang     /* If free space is not available, make more free space */
395d20bfe6fSHong Zhang     /* Double the amount of total space in the list */
396d20bfe6fSHong Zhang     if (current_space->local_remaining<cnzi) {
397d20bfe6fSHong Zhang       ierr = GetMoreSpace(current_space->total_array_size,&current_space);CHKERRQ(ierr);
398d20bfe6fSHong Zhang     }
399d20bfe6fSHong Zhang 
400d20bfe6fSHong Zhang     /* Copy data into free space, and zero out denserows */
40155a3bba9SHong Zhang     ierr = PetscLLClean(pn,pn,cnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
402d20bfe6fSHong Zhang     current_space->array           += cnzi;
403d20bfe6fSHong Zhang     current_space->local_used      += cnzi;
404d20bfe6fSHong Zhang     current_space->local_remaining -= cnzi;
405d20bfe6fSHong Zhang 
406d20bfe6fSHong Zhang     for (j=0;j<ptanzi;j++) {
407d20bfe6fSHong Zhang       ptadenserow[ptasparserow[j]] = 0;
408d20bfe6fSHong Zhang     }
409d20bfe6fSHong Zhang     /* Aside: Perhaps we should save the pta info for the numerical factorization. */
410d20bfe6fSHong Zhang     /*        For now, we will recompute what is needed. */
411d20bfe6fSHong Zhang     ci[i+1] = ci[i] + cnzi;
412d20bfe6fSHong Zhang   }
413d20bfe6fSHong Zhang   /* nnz is now stored in ci[ptm], column indices are in the list of free space */
414d20bfe6fSHong Zhang   /* Allocate space for cj, initialize cj, and */
415d20bfe6fSHong Zhang   /* destroy list of free space and other temporary array(s) */
41655a3bba9SHong Zhang   ierr = PetscMalloc((ci[pn]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr);
417d20bfe6fSHong Zhang   ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr);
418d20bfe6fSHong Zhang   ierr = PetscFree(ptadenserow);CHKERRQ(ierr);
41955a3bba9SHong Zhang   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
420d20bfe6fSHong Zhang 
421d20bfe6fSHong Zhang   /* Allocate space for ca */
422d20bfe6fSHong Zhang   ierr = PetscMalloc((ci[pn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr);
423d20bfe6fSHong Zhang   ierr = PetscMemzero(ca,(ci[pn]+1)*sizeof(MatScalar));CHKERRQ(ierr);
424d20bfe6fSHong Zhang 
425d20bfe6fSHong Zhang   /* put together the new matrix */
426d20bfe6fSHong Zhang   ierr = MatCreateSeqAIJWithArrays(A->comm,pn,pn,ci,cj,ca,C);CHKERRQ(ierr);
427d20bfe6fSHong Zhang 
428d20bfe6fSHong Zhang   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
429d20bfe6fSHong Zhang   /* Since these are PETSc arrays, change flags to free them as necessary. */
430d20bfe6fSHong Zhang   c = (Mat_SeqAIJ *)((*C)->data);
431d20bfe6fSHong Zhang   c->freedata = PETSC_TRUE;
432d20bfe6fSHong Zhang   c->nonew    = 0;
433d20bfe6fSHong Zhang 
434d20bfe6fSHong Zhang   /* Clean up. */
435d20bfe6fSHong Zhang   ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
436eb9c0419SKris Buschelman 
437eb9c0419SKris Buschelman   PetscFunctionReturn(0);
438eb9c0419SKris Buschelman }
439eb9c0419SKris Buschelman 
4403985e5eaSKris Buschelman #include "src/mat/impls/maij/maij.h"
4413985e5eaSKris Buschelman EXTERN_C_BEGIN
4423985e5eaSKris Buschelman #undef __FUNCT__
4439af31e4aSHong Zhang #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqMAIJ"
44455a3bba9SHong Zhang PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat *C)
44555a3bba9SHong Zhang {
4465c66b693SKris Buschelman   /* This routine requires testing -- I don't think it works. */
447dfbe8321SBarry Smith   PetscErrorCode ierr;
4483985e5eaSKris Buschelman   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
4493985e5eaSKris Buschelman   Mat_SeqMAIJ    *pp=(Mat_SeqMAIJ*)PP->data;
4503985e5eaSKris Buschelman   Mat            P=pp->AIJ;
4513985e5eaSKris Buschelman   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c;
452b1d57f15SBarry Smith   PetscInt       *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj;
453b1d57f15SBarry Smith   PetscInt       *ci,*cj,*denserow,*sparserow,*ptadenserow,*ptasparserow,*ptaj;
454b1d57f15SBarry Smith   PetscInt       an=A->N,am=A->M,pn=P->N,pm=P->M,ppdof=pp->dof;
455b1d57f15SBarry Smith   PetscInt       i,j,k,dof,pdof,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi;
4563985e5eaSKris Buschelman   MatScalar      *ca;
4573985e5eaSKris Buschelman 
4583985e5eaSKris Buschelman   PetscFunctionBegin;
4593985e5eaSKris Buschelman   /* Start timer */
4609af31e4aSHong Zhang   ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr);
4613985e5eaSKris Buschelman 
4623985e5eaSKris Buschelman   /* Get ij structure of P^T */
4633985e5eaSKris Buschelman   ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
4643985e5eaSKris Buschelman 
4653985e5eaSKris Buschelman   /* Allocate ci array, arrays for fill computation and */
4663985e5eaSKris Buschelman   /* free space for accumulating nonzero column info */
467b1d57f15SBarry Smith   ierr = PetscMalloc((pn+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
4683985e5eaSKris Buschelman   ci[0] = 0;
4693985e5eaSKris Buschelman 
470b1d57f15SBarry Smith   ierr = PetscMalloc((2*pn+2*an+1)*sizeof(PetscInt),&ptadenserow);CHKERRQ(ierr);
471b1d57f15SBarry Smith   ierr = PetscMemzero(ptadenserow,(2*pn+2*an+1)*sizeof(PetscInt));CHKERRQ(ierr);
4723985e5eaSKris Buschelman   ptasparserow = ptadenserow  + an;
4733985e5eaSKris Buschelman   denserow     = ptasparserow + an;
4743985e5eaSKris Buschelman   sparserow    = denserow     + pn;
4753985e5eaSKris Buschelman 
4763985e5eaSKris Buschelman   /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */
4773985e5eaSKris Buschelman   /* This should be reasonable if sparsity of PtAP is similar to that of A. */
4783985e5eaSKris Buschelman   ierr          = GetMoreSpace((ai[am]/pm)*pn,&free_space);
4793985e5eaSKris Buschelman   current_space = free_space;
4803985e5eaSKris Buschelman 
4813985e5eaSKris Buschelman   /* Determine symbolic info for each row of C: */
4823985e5eaSKris Buschelman   for (i=0;i<pn/ppdof;i++) {
4833985e5eaSKris Buschelman     ptnzi  = pti[i+1] - pti[i];
4843985e5eaSKris Buschelman     ptanzi = 0;
4853985e5eaSKris Buschelman     ptJ    = ptj + pti[i];
4863985e5eaSKris Buschelman     for (dof=0;dof<ppdof;dof++) {
4873985e5eaSKris Buschelman     /* Determine symbolic row of PtA: */
4883985e5eaSKris Buschelman       for (j=0;j<ptnzi;j++) {
4893985e5eaSKris Buschelman         arow = ptJ[j] + dof;
4903985e5eaSKris Buschelman         anzj = ai[arow+1] - ai[arow];
4913985e5eaSKris Buschelman         ajj  = aj + ai[arow];
4923985e5eaSKris Buschelman         for (k=0;k<anzj;k++) {
4933985e5eaSKris Buschelman           if (!ptadenserow[ajj[k]]) {
4943985e5eaSKris Buschelman             ptadenserow[ajj[k]]    = -1;
4953985e5eaSKris Buschelman             ptasparserow[ptanzi++] = ajj[k];
4963985e5eaSKris Buschelman           }
4973985e5eaSKris Buschelman         }
4983985e5eaSKris Buschelman       }
4993985e5eaSKris Buschelman       /* Using symbolic info for row of PtA, determine symbolic info for row of C: */
5003985e5eaSKris Buschelman       ptaj = ptasparserow;
5013985e5eaSKris Buschelman       cnzi   = 0;
5023985e5eaSKris Buschelman       for (j=0;j<ptanzi;j++) {
503fe05a634SKris Buschelman         pdof = *ptaj%dof;
5043985e5eaSKris Buschelman         prow = (*ptaj++)/dof;
5053985e5eaSKris Buschelman         pnzj = pi[prow+1] - pi[prow];
5063985e5eaSKris Buschelman         pjj  = pj + pi[prow];
5073985e5eaSKris Buschelman         for (k=0;k<pnzj;k++) {
508fe05a634SKris Buschelman           if (!denserow[pjj[k]+pdof]) {
509fe05a634SKris Buschelman             denserow[pjj[k]+pdof] = -1;
510fe05a634SKris Buschelman             sparserow[cnzi++]     = pjj[k]+pdof;
5113985e5eaSKris Buschelman           }
5123985e5eaSKris Buschelman         }
5133985e5eaSKris Buschelman       }
5143985e5eaSKris Buschelman 
5153985e5eaSKris Buschelman       /* sort sparserow */
5163985e5eaSKris Buschelman       ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr);
5173985e5eaSKris Buschelman 
5183985e5eaSKris Buschelman       /* If free space is not available, make more free space */
5193985e5eaSKris Buschelman       /* Double the amount of total space in the list */
5203985e5eaSKris Buschelman       if (current_space->local_remaining<cnzi) {
5213985e5eaSKris Buschelman         ierr = GetMoreSpace(current_space->total_array_size,&current_space);CHKERRQ(ierr);
5223985e5eaSKris Buschelman       }
5233985e5eaSKris Buschelman 
5243985e5eaSKris Buschelman       /* Copy data into free space, and zero out denserows */
525b1d57f15SBarry Smith       ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(PetscInt));CHKERRQ(ierr);
5263985e5eaSKris Buschelman       current_space->array           += cnzi;
5273985e5eaSKris Buschelman       current_space->local_used      += cnzi;
5283985e5eaSKris Buschelman       current_space->local_remaining -= cnzi;
5293985e5eaSKris Buschelman 
5303985e5eaSKris Buschelman       for (j=0;j<ptanzi;j++) {
5313985e5eaSKris Buschelman         ptadenserow[ptasparserow[j]] = 0;
5323985e5eaSKris Buschelman       }
5333985e5eaSKris Buschelman       for (j=0;j<cnzi;j++) {
5343985e5eaSKris Buschelman         denserow[sparserow[j]] = 0;
5353985e5eaSKris Buschelman       }
5363985e5eaSKris Buschelman       /* Aside: Perhaps we should save the pta info for the numerical factorization. */
5373985e5eaSKris Buschelman       /*        For now, we will recompute what is needed. */
5383985e5eaSKris Buschelman       ci[i+1+dof] = ci[i+dof] + cnzi;
5393985e5eaSKris Buschelman     }
5403985e5eaSKris Buschelman   }
5413985e5eaSKris Buschelman   /* nnz is now stored in ci[ptm], column indices are in the list of free space */
5423985e5eaSKris Buschelman   /* Allocate space for cj, initialize cj, and */
5433985e5eaSKris Buschelman   /* destroy list of free space and other temporary array(s) */
544b1d57f15SBarry Smith   ierr = PetscMalloc((ci[pn]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr);
5453985e5eaSKris Buschelman   ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr);
5463985e5eaSKris Buschelman   ierr = PetscFree(ptadenserow);CHKERRQ(ierr);
5473985e5eaSKris Buschelman 
5483985e5eaSKris Buschelman   /* Allocate space for ca */
5493985e5eaSKris Buschelman   ierr = PetscMalloc((ci[pn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr);
5503985e5eaSKris Buschelman   ierr = PetscMemzero(ca,(ci[pn]+1)*sizeof(MatScalar));CHKERRQ(ierr);
5513985e5eaSKris Buschelman 
5523985e5eaSKris Buschelman   /* put together the new matrix */
5533985e5eaSKris Buschelman   ierr = MatCreateSeqAIJWithArrays(A->comm,pn,pn,ci,cj,ca,C);CHKERRQ(ierr);
5543985e5eaSKris Buschelman 
5553985e5eaSKris Buschelman   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5563985e5eaSKris Buschelman   /* Since these are PETSc arrays, change flags to free them as necessary. */
5573985e5eaSKris Buschelman   c = (Mat_SeqAIJ *)((*C)->data);
5583985e5eaSKris Buschelman   c->freedata = PETSC_TRUE;
5593985e5eaSKris Buschelman   c->nonew    = 0;
5603985e5eaSKris Buschelman 
5613985e5eaSKris Buschelman   /* Clean up. */
5623985e5eaSKris Buschelman   ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
5633985e5eaSKris Buschelman 
5649af31e4aSHong Zhang   ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr);
5653985e5eaSKris Buschelman   PetscFunctionReturn(0);
5663985e5eaSKris Buschelman }
5673985e5eaSKris Buschelman EXTERN_C_END
5683985e5eaSKris Buschelman 
5696849ba73SBarry Smith /*
5709af31e4aSHong Zhang    MatPtAPNumeric - Computes the matrix projection C = P^T * A * P
5714d3841fdSKris Buschelman 
5724d3841fdSKris Buschelman    Collective on Mat
5734d3841fdSKris Buschelman 
5744d3841fdSKris Buschelman    Input Parameters:
5754d3841fdSKris Buschelman +  A - the matrix
5764d3841fdSKris Buschelman -  P - the projection matrix
5774d3841fdSKris Buschelman 
5784d3841fdSKris Buschelman    Output Parameters:
5794d3841fdSKris Buschelman .  C - the product matrix
5804d3841fdSKris Buschelman 
5814d3841fdSKris Buschelman    Notes:
5829af31e4aSHong Zhang    C must have been created by calling MatPtAPSymbolic and must be destroyed by
5834d3841fdSKris Buschelman    the user using MatDeatroy().
5844d3841fdSKris Buschelman 
585170ef064SHong Zhang    This routine is currently only implemented for pairs of AIJ matrices and classes
586170ef064SHong Zhang    which inherit from AIJ.  C will be of type MATAIJ.
5874d3841fdSKris Buschelman 
5884d3841fdSKris Buschelman    Level: intermediate
5894d3841fdSKris Buschelman 
5909af31e4aSHong Zhang .seealso: MatPtAP(),MatPtAPSymbolic(),MatMatMultNumeric()
5916849ba73SBarry Smith */
592e240928fSHong Zhang #undef __FUNCT__
593e240928fSHong Zhang #define __FUNCT__ "MatPtAPNumeric"
59455a3bba9SHong Zhang PetscErrorCode MatPtAPNumeric(Mat A,Mat P,Mat C)
59555a3bba9SHong Zhang {
596dfbe8321SBarry Smith   PetscErrorCode ierr;
597534c1384SKris Buschelman   PetscErrorCode (*fA)(Mat,Mat,Mat);
598534c1384SKris Buschelman   PetscErrorCode (*fP)(Mat,Mat,Mat);
599eb9c0419SKris Buschelman 
600eb9c0419SKris Buschelman   PetscFunctionBegin;
601eb9c0419SKris Buschelman 
6024482741eSBarry Smith   PetscValidHeaderSpecific(A,MAT_COOKIE,1);
603c9780b6fSBarry Smith   PetscValidType(A,1);
604eb9c0419SKris Buschelman   MatPreallocated(A);
605eb9c0419SKris Buschelman   if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
606eb9c0419SKris Buschelman   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
607eb9c0419SKris Buschelman 
6084482741eSBarry Smith   PetscValidHeaderSpecific(P,MAT_COOKIE,2);
609c9780b6fSBarry Smith   PetscValidType(P,2);
610eb9c0419SKris Buschelman   MatPreallocated(P);
611eb9c0419SKris Buschelman   if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
612eb9c0419SKris Buschelman   if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
613eb9c0419SKris Buschelman 
6144482741eSBarry Smith   PetscValidHeaderSpecific(C,MAT_COOKIE,3);
615c9780b6fSBarry Smith   PetscValidType(C,3);
616eb9c0419SKris Buschelman   MatPreallocated(C);
617eb9c0419SKris Buschelman   if (!C->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
618eb9c0419SKris Buschelman   if (C->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
619eb9c0419SKris Buschelman 
620eb9c0419SKris Buschelman   if (P->N!=C->M) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->N,C->M);
621eb9c0419SKris Buschelman   if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->M,A->N);
622eb9c0419SKris Buschelman   if (A->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %d != %d",A->M,A->N);
623eb9c0419SKris Buschelman   if (P->N!=C->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->N,C->N);
624eb9c0419SKris Buschelman 
625534c1384SKris Buschelman   /* For now, we do not dispatch based on the type of A and P */
626534c1384SKris Buschelman   /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */
627534c1384SKris Buschelman   fA = A->ops->ptapnumeric;
628534c1384SKris Buschelman   if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatPtAPNumeric not supported for A of type %s",A->type_name);
629534c1384SKris Buschelman   fP = P->ops->ptapnumeric;
630534c1384SKris Buschelman   if (!fP) SETERRQ1(PETSC_ERR_SUP,"MatPtAPNumeric not supported for P of type %s",P->type_name);
631534c1384SKris Buschelman   if (fP!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatPtAPNumeric requires A, %s, to be compatible with P, %s",A->type_name,P->type_name);
6324d3841fdSKris Buschelman 
633534c1384SKris Buschelman   ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr);
634534c1384SKris Buschelman   ierr = (*fA)(A,P,C);CHKERRQ(ierr);
635534c1384SKris Buschelman   ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr);
636eb9c0419SKris Buschelman 
637eb9c0419SKris Buschelman   PetscFunctionReturn(0);
638eb9c0419SKris Buschelman }
639eb9c0419SKris Buschelman 
640eb9c0419SKris Buschelman #undef __FUNCT__
6419af31e4aSHong Zhang #define __FUNCT__ "MatPtAPNumeric_SeqAIJ_SeqAIJ"
642dfbe8321SBarry Smith PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqAIJ(Mat A,Mat P,Mat C)
643dfbe8321SBarry Smith {
644dfbe8321SBarry Smith   PetscErrorCode ierr;
645b1d57f15SBarry Smith   PetscInt       flops=0;
646d20bfe6fSHong Zhang   Mat_SeqAIJ     *a  = (Mat_SeqAIJ *) A->data;
647d20bfe6fSHong Zhang   Mat_SeqAIJ     *p  = (Mat_SeqAIJ *) P->data;
648d20bfe6fSHong Zhang   Mat_SeqAIJ     *c  = (Mat_SeqAIJ *) C->data;
649b1d57f15SBarry Smith   PetscInt       *ai=a->i,*aj=a->j,*apj,*apjdense,*pi=p->i,*pj=p->j,*pJ=p->j,*pjj;
650b1d57f15SBarry Smith   PetscInt       *ci=c->i,*cj=c->j,*cjj;
651b1d57f15SBarry Smith   PetscInt       am=A->M,cn=C->N,cm=C->M;
652b1d57f15SBarry Smith   PetscInt       i,j,k,anzi,pnzi,apnzj,nextap,pnzj,prow,crow;
653d20bfe6fSHong Zhang   MatScalar      *aa=a->a,*apa,*pa=p->a,*pA=p->a,*paj,*ca=c->a,*caj;
654eb9c0419SKris Buschelman 
655eb9c0419SKris Buschelman   PetscFunctionBegin;
656d20bfe6fSHong Zhang   /* Allocate temporary array for storage of one row of A*P */
657b1d57f15SBarry Smith   ierr = PetscMalloc(cn*(sizeof(MatScalar)+2*sizeof(PetscInt)),&apa);CHKERRQ(ierr);
658b1d57f15SBarry Smith   ierr = PetscMemzero(apa,cn*(sizeof(MatScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr);
659eb9c0419SKris Buschelman 
660b1d57f15SBarry Smith   apj      = (PetscInt *)(apa + cn);
661d20bfe6fSHong Zhang   apjdense = apj + cn;
662d20bfe6fSHong Zhang 
663d20bfe6fSHong Zhang   /* Clear old values in C */
664d20bfe6fSHong Zhang   ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr);
665d20bfe6fSHong Zhang 
666d20bfe6fSHong Zhang   for (i=0;i<am;i++) {
667d20bfe6fSHong Zhang     /* Form sparse row of A*P */
668d20bfe6fSHong Zhang     anzi  = ai[i+1] - ai[i];
669d20bfe6fSHong Zhang     apnzj = 0;
670d20bfe6fSHong Zhang     for (j=0;j<anzi;j++) {
671d20bfe6fSHong Zhang       prow = *aj++;
672d20bfe6fSHong Zhang       pnzj = pi[prow+1] - pi[prow];
673d20bfe6fSHong Zhang       pjj  = pj + pi[prow];
674d20bfe6fSHong Zhang       paj  = pa + pi[prow];
675d20bfe6fSHong Zhang       for (k=0;k<pnzj;k++) {
676d20bfe6fSHong Zhang         if (!apjdense[pjj[k]]) {
677d20bfe6fSHong Zhang           apjdense[pjj[k]] = -1;
678d20bfe6fSHong Zhang           apj[apnzj++]     = pjj[k];
679d20bfe6fSHong Zhang         }
680d20bfe6fSHong Zhang         apa[pjj[k]] += (*aa)*paj[k];
681d20bfe6fSHong Zhang       }
682d20bfe6fSHong Zhang       flops += 2*pnzj;
683d20bfe6fSHong Zhang       aa++;
684d20bfe6fSHong Zhang     }
685d20bfe6fSHong Zhang 
686d20bfe6fSHong Zhang     /* Sort the j index array for quick sparse axpy. */
687d20bfe6fSHong Zhang     ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr);
688d20bfe6fSHong Zhang 
689d20bfe6fSHong Zhang     /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */
690d20bfe6fSHong Zhang     pnzi = pi[i+1] - pi[i];
691d20bfe6fSHong Zhang     for (j=0;j<pnzi;j++) {
692d20bfe6fSHong Zhang       nextap = 0;
693d20bfe6fSHong Zhang       crow   = *pJ++;
694d20bfe6fSHong Zhang       cjj    = cj + ci[crow];
695d20bfe6fSHong Zhang       caj    = ca + ci[crow];
696d20bfe6fSHong Zhang       /* Perform sparse axpy operation.  Note cjj includes apj. */
697d20bfe6fSHong Zhang       for (k=0;nextap<apnzj;k++) {
698d20bfe6fSHong Zhang         if (cjj[k]==apj[nextap]) {
699d20bfe6fSHong Zhang           caj[k] += (*pA)*apa[apj[nextap++]];
700d20bfe6fSHong Zhang         }
701d20bfe6fSHong Zhang       }
702d20bfe6fSHong Zhang       flops += 2*apnzj;
703d20bfe6fSHong Zhang       pA++;
704d20bfe6fSHong Zhang     }
705d20bfe6fSHong Zhang 
706d20bfe6fSHong Zhang     /* Zero the current row info for A*P */
707d20bfe6fSHong Zhang     for (j=0;j<apnzj;j++) {
708d20bfe6fSHong Zhang       apa[apj[j]]      = 0.;
709d20bfe6fSHong Zhang       apjdense[apj[j]] = 0;
710d20bfe6fSHong Zhang     }
711d20bfe6fSHong Zhang   }
712d20bfe6fSHong Zhang 
713d20bfe6fSHong Zhang   /* Assemble the final matrix and clean up */
714d20bfe6fSHong Zhang   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
715d20bfe6fSHong Zhang   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
716d20bfe6fSHong Zhang   ierr = PetscFree(apa);CHKERRQ(ierr);
717d20bfe6fSHong Zhang   ierr = PetscLogFlops(flops);CHKERRQ(ierr);
718d20bfe6fSHong Zhang 
719eb9c0419SKris Buschelman   PetscFunctionReturn(0);
720eb9c0419SKris Buschelman }
7210e36024fSHong Zhang 
722*a61c8c0fSHong Zhang /* Compute local C = P_loc^T * A * P - used by MatPtAP_MPIAIJ_MPIAIJ() */
723e240928fSHong Zhang static PetscEvent logkey_matptapnumeric_local = 0;
724e240928fSHong Zhang #undef __FUNCT__
725e240928fSHong Zhang #define __FUNCT__ "MatPtAPNumeric_MPIAIJ_MPIAIJ_Local"
726e240928fSHong Zhang PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIAIJ_Local(Mat A,Mat P_loc,Mat P_oth,PetscInt prstart,Mat C)
727e240928fSHong Zhang {
728e240928fSHong Zhang   PetscErrorCode ierr;
729e240928fSHong Zhang   PetscInt       flops=0;
730e240928fSHong Zhang   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
731e240928fSHong Zhang   Mat_SeqAIJ     *ad=(Mat_SeqAIJ*)(a->A)->data,*ao=(Mat_SeqAIJ*)(a->B)->data;
732e240928fSHong Zhang   Mat_SeqAIJ     *c  = (Mat_SeqAIJ *) C->data;
733e240928fSHong Zhang   Mat_SeqAIJ     *p_loc=(Mat_SeqAIJ*)P_loc->data,*p_oth=(Mat_SeqAIJ*)P_oth->data;
734e240928fSHong Zhang   PetscInt       *pi_loc=p_loc->i,*pj_loc=p_loc->j,*pi_oth=p_oth->i,*pj_oth=p_oth->j;
735e240928fSHong Zhang   PetscInt       *adi=ad->i,*adj=ad->j,*aoi=ao->i,*aoj=ao->j,*apj,*apjdense,cstart=a->cstart,cend=a->cend,col;
736e240928fSHong Zhang   PetscInt       *pJ=pj_loc,*pjj;
737e240928fSHong Zhang   PetscInt       *ci=c->i,*cj=c->j,*cjj;
738e240928fSHong Zhang   PetscInt       am=A->m,cn=C->N,cm=C->M;
739e240928fSHong Zhang   PetscInt       i,j,k,anzi,pnzi,apnzj,nextap,pnzj,prow,crow;
740e240928fSHong Zhang   MatScalar      *ada=ad->a,*aoa=ao->a,*apa,*paj,*ca=c->a,*caj;
741e240928fSHong Zhang   MatScalar      *pa_loc=p_loc->a,*pA=pa_loc,*pa_oth=p_oth->a;
742e240928fSHong Zhang 
743e240928fSHong Zhang   PetscFunctionBegin;
744e240928fSHong Zhang   if (!logkey_matptapnumeric_local) {
745*a61c8c0fSHong Zhang     ierr = PetscLogEventRegister(&logkey_matptapnumeric_local,"MatPtAPNumeric_MPIAIJ_MPIAIJ_Local",MAT_COOKIE);CHKERRQ(ierr);
746e240928fSHong Zhang   }
747*a61c8c0fSHong Zhang   ierr = PetscLogEventBegin(logkey_matptapnumeric_local,A,0,0,0);CHKERRQ(ierr);
748e240928fSHong Zhang 
749e240928fSHong Zhang   /* Allocate temporary array for storage of one row of A*P */
750e240928fSHong Zhang   ierr = PetscMalloc(cn*(sizeof(MatScalar)+2*sizeof(PetscInt)),&apa);CHKERRQ(ierr);
751e240928fSHong Zhang   ierr = PetscMemzero(apa,cn*(sizeof(MatScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr);
752e240928fSHong Zhang   apj      = (PetscInt *)(apa + cn);
753e240928fSHong Zhang   apjdense = apj + cn;
754e240928fSHong Zhang 
755e240928fSHong Zhang   /* Clear old values in C */
756e240928fSHong Zhang   ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr);
757e240928fSHong Zhang 
758e240928fSHong Zhang   for (i=0;i<am;i++) {
759e240928fSHong Zhang     /* Form i-th sparse row of A*P */
760e240928fSHong Zhang      apnzj = 0;
761e240928fSHong Zhang     /* diagonal portion of A */
762e240928fSHong Zhang     anzi  = adi[i+1] - adi[i];
763e240928fSHong Zhang     for (j=0;j<anzi;j++) {
764e240928fSHong Zhang       prow = *adj;
765e240928fSHong Zhang       adj++;
766e240928fSHong Zhang       pnzj = pi_loc[prow+1] - pi_loc[prow];
767e240928fSHong Zhang       pjj  = pj_loc + pi_loc[prow];
768e240928fSHong Zhang       paj  = pa_loc + pi_loc[prow];
769e240928fSHong Zhang       for (k=0;k<pnzj;k++) {
770e240928fSHong Zhang         if (!apjdense[pjj[k]]) {
771e240928fSHong Zhang           apjdense[pjj[k]] = -1;
772e240928fSHong Zhang           apj[apnzj++]     = pjj[k];
773e240928fSHong Zhang         }
774e240928fSHong Zhang         apa[pjj[k]] += (*ada)*paj[k];
775e240928fSHong Zhang       }
776e240928fSHong Zhang       flops += 2*pnzj;
777e240928fSHong Zhang       ada++;
778e240928fSHong Zhang     }
779e240928fSHong Zhang     /* off-diagonal portion of A */
780e240928fSHong Zhang     anzi  = aoi[i+1] - aoi[i];
781e240928fSHong Zhang     for (j=0;j<anzi;j++) {
782e240928fSHong Zhang       col = a->garray[*aoj];
783e240928fSHong Zhang       if (col < cstart){
784e240928fSHong Zhang         prow = *aoj;
785e240928fSHong Zhang       } else if (col >= cend){
786e240928fSHong Zhang         prow = *aoj;
787e240928fSHong Zhang       } else {
788e240928fSHong Zhang         SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Off-diagonal portion of A has wrong column map");
789e240928fSHong Zhang       }
790e240928fSHong Zhang       aoj++;
791e240928fSHong Zhang       pnzj = pi_oth[prow+1] - pi_oth[prow];
792e240928fSHong Zhang       pjj  = pj_oth + pi_oth[prow];
793e240928fSHong Zhang       paj  = pa_oth + pi_oth[prow];
794e240928fSHong Zhang       for (k=0;k<pnzj;k++) {
795e240928fSHong Zhang         if (!apjdense[pjj[k]]) {
796e240928fSHong Zhang           apjdense[pjj[k]] = -1;
797e240928fSHong Zhang           apj[apnzj++]     = pjj[k];
798e240928fSHong Zhang         }
799e240928fSHong Zhang         apa[pjj[k]] += (*aoa)*paj[k];
800e240928fSHong Zhang       }
801e240928fSHong Zhang       flops += 2*pnzj;
802e240928fSHong Zhang       aoa++;
803e240928fSHong Zhang     }
804e240928fSHong Zhang     /* Sort the j index array for quick sparse axpy. */
805e240928fSHong Zhang     ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr);
806e240928fSHong Zhang 
807e240928fSHong Zhang     /* Compute P_loc[i,:]^T*AP[i,:] using outer product */
808e240928fSHong Zhang     pnzi = pi_loc[i+1] - pi_loc[i];
809e240928fSHong Zhang     for (j=0;j<pnzi;j++) {
810e240928fSHong Zhang       nextap = 0;
811e240928fSHong Zhang       crow   = *pJ++;
812e240928fSHong Zhang       cjj    = cj + ci[crow];
813e240928fSHong Zhang       caj    = ca + ci[crow];
814e240928fSHong Zhang       /* Perform sparse axpy operation.  Note cjj includes apj. */
815e240928fSHong Zhang       for (k=0;nextap<apnzj;k++) {
816e240928fSHong Zhang         if (cjj[k]==apj[nextap]) {
817e240928fSHong Zhang           caj[k] += (*pA)*apa[apj[nextap++]];
818e240928fSHong Zhang         }
819e240928fSHong Zhang       }
820e240928fSHong Zhang       flops += 2*apnzj;
821e240928fSHong Zhang       pA++;
822e240928fSHong Zhang     }
823e240928fSHong Zhang 
824e240928fSHong Zhang     /* Zero the current row info for A*P */
825e240928fSHong Zhang     for (j=0;j<apnzj;j++) {
826e240928fSHong Zhang       apa[apj[j]]      = 0.;
827e240928fSHong Zhang       apjdense[apj[j]] = 0;
828e240928fSHong Zhang     }
829e240928fSHong Zhang   }
830e240928fSHong Zhang 
831e240928fSHong Zhang   /* Assemble the final matrix and clean up */
832e240928fSHong Zhang   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
833e240928fSHong Zhang   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
834e240928fSHong Zhang   ierr = PetscFree(apa);CHKERRQ(ierr);
835e240928fSHong Zhang   ierr = PetscLogFlops(flops);CHKERRQ(ierr);
836*a61c8c0fSHong Zhang   ierr = PetscLogEventEnd(logkey_matptapnumeric_local,A,0,0,0);CHKERRQ(ierr);
837e240928fSHong Zhang   PetscFunctionReturn(0);
838e240928fSHong Zhang }
839e240928fSHong Zhang static PetscEvent logkey_matptapsymbolic_local = 0;
840e240928fSHong Zhang #undef __FUNCT__
841e240928fSHong Zhang #define __FUNCT__ "MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local"
842e240928fSHong Zhang PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local(Mat A,Mat P_loc,Mat P_oth,PetscInt prstart,PetscReal fill,Mat *C)
843e240928fSHong Zhang {
844e240928fSHong Zhang   PetscErrorCode ierr;
845e240928fSHong Zhang   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
846e240928fSHong Zhang   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
847e240928fSHong Zhang   Mat_SeqAIJ     *ad=(Mat_SeqAIJ*)(a->A)->data,*ao=(Mat_SeqAIJ*)(a->B)->data,*c;
848e240928fSHong Zhang   PetscInt       *pti,*ptj,*ptJ,*ajj,*pjj;
849e240928fSHong Zhang   PetscInt       *adi=ad->i,*adj=ad->j,*aoi=ao->i,*aoj=ao->j,nnz,cstart=a->cstart,cend=a->cend,col;
850e240928fSHong Zhang   PetscInt       *ci,*cj,*ptaj;
851e240928fSHong Zhang   PetscInt       an=A->N,am=A->m,pN=P_loc->N;
852e240928fSHong Zhang   PetscInt       i,j,k,ptnzi,arow,anzj,prow,pnzj,cnzi;
853e240928fSHong Zhang   PetscInt       pm=P_loc->m,nlnk,*lnk;
854e240928fSHong Zhang   MatScalar      *ca;
855e240928fSHong Zhang   PetscBT        lnkbt;
856e240928fSHong Zhang   PetscInt       prend,nprow_loc,nprow_oth;
857e240928fSHong Zhang   PetscInt       *ptadenserow_loc,*ptadenserow_oth,*ptasparserow_loc,*ptasparserow_oth;
858e240928fSHong Zhang   Mat_SeqAIJ     *p_loc=(Mat_SeqAIJ*)P_loc->data,*p_oth=(Mat_SeqAIJ*)P_oth->data;
859e240928fSHong Zhang   PetscInt       *pi_loc=p_loc->i,*pj_loc=p_loc->j,*pi_oth=p_oth->i,*pj_oth=p_oth->j;
860e240928fSHong Zhang 
861e240928fSHong Zhang   PetscFunctionBegin;
862e240928fSHong Zhang   if (!logkey_matptapsymbolic_local) {
863e240928fSHong Zhang     ierr = PetscLogEventRegister(&logkey_matptapsymbolic_local,"MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local",MAT_COOKIE);
864e240928fSHong Zhang   }
865e240928fSHong Zhang   ierr = PetscLogEventBegin(logkey_matptapsymbolic_local,A,P_loc,P_oth,0);CHKERRQ(ierr);
866e240928fSHong Zhang 
867e240928fSHong Zhang   prend = prstart + pm;
868e240928fSHong Zhang 
869e240928fSHong Zhang   /* get ij structure of P_loc^T */
870e240928fSHong Zhang   ierr = MatGetSymbolicTranspose_SeqAIJ(P_loc,&pti,&ptj);CHKERRQ(ierr);
871e240928fSHong Zhang   ptJ=ptj;
872e240928fSHong Zhang 
873e240928fSHong Zhang   /* Allocate ci array, arrays for fill computation and */
874e240928fSHong Zhang   /* free space for accumulating nonzero column info */
875e240928fSHong Zhang   ierr = PetscMalloc((pN+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
876e240928fSHong Zhang   ci[0] = 0;
877e240928fSHong Zhang 
878e240928fSHong Zhang   ierr = PetscMalloc((4*an+1)*sizeof(PetscInt),&ptadenserow_loc);CHKERRQ(ierr);
879e240928fSHong Zhang   ierr = PetscMemzero(ptadenserow_loc,(4*an+1)*sizeof(PetscInt));CHKERRQ(ierr);
880e240928fSHong Zhang   ptasparserow_loc = ptadenserow_loc + an;
881e240928fSHong Zhang   ptadenserow_oth  = ptasparserow_loc + an;
882e240928fSHong Zhang   ptasparserow_oth = ptadenserow_oth + an;
883e240928fSHong Zhang 
884e240928fSHong Zhang   /* create and initialize a linked list */
885e240928fSHong Zhang   nlnk = pN+1;
886e240928fSHong Zhang   ierr = PetscLLCreate(pN,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr);
887e240928fSHong Zhang 
888e240928fSHong Zhang   /* Set initial free space to be nnz(A) scaled by aspect ratio of P (P->M=A->N). */
889e240928fSHong Zhang   /* This should be reasonable if sparsity of PtAP is similar to that of A. */
890e240928fSHong Zhang   nnz           = adi[am] + aoi[am];
891e240928fSHong Zhang   ierr          = GetMoreSpace((PetscInt)(fill*nnz/A->N)*pN,&free_space);
892e240928fSHong Zhang   current_space = free_space;
893e240928fSHong Zhang 
894e240928fSHong Zhang   /* determine symbolic info for each row of C: */
895e240928fSHong Zhang   for (i=0;i<pN;i++) {
896e240928fSHong Zhang     ptnzi  = pti[i+1] - pti[i];
897e240928fSHong Zhang     nprow_loc = 0; nprow_oth = 0;
898e240928fSHong Zhang     /* i-th row of symbolic P_loc^T*A_loc: */
899e240928fSHong Zhang     for (j=0;j<ptnzi;j++) {
900e240928fSHong Zhang       arow = *ptJ++;
901e240928fSHong Zhang       /* diagonal portion of A */
902e240928fSHong Zhang       anzj = adi[arow+1] - adi[arow];
903e240928fSHong Zhang       ajj  = adj + adi[arow];
904e240928fSHong Zhang       for (k=0;k<anzj;k++) {
905e240928fSHong Zhang         col = ajj[k]+prstart;
906e240928fSHong Zhang         if (!ptadenserow_loc[col]) {
907e240928fSHong Zhang           ptadenserow_loc[col]    = -1;
908e240928fSHong Zhang           ptasparserow_loc[nprow_loc++] = col;
909e240928fSHong Zhang         }
910e240928fSHong Zhang       }
911e240928fSHong Zhang       /* off-diagonal portion of A */
912e240928fSHong Zhang       anzj = aoi[arow+1] - aoi[arow];
913e240928fSHong Zhang       ajj  = aoj + aoi[arow];
914e240928fSHong Zhang       for (k=0;k<anzj;k++) {
915e240928fSHong Zhang         col = a->garray[ajj[k]];  /* global col */
916e240928fSHong Zhang         if (col < cstart){
917e240928fSHong Zhang           col = ajj[k];
918e240928fSHong Zhang         } else if (col >= cend){
919e240928fSHong Zhang           col = ajj[k] + pm;
920e240928fSHong Zhang         } else {
921e240928fSHong Zhang           SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Off-diagonal portion of A has wrong column map");
922e240928fSHong Zhang         }
923e240928fSHong Zhang         if (!ptadenserow_oth[col]) {
924e240928fSHong Zhang           ptadenserow_oth[col]    = -1;
925e240928fSHong Zhang           ptasparserow_oth[nprow_oth++] = col;
926e240928fSHong Zhang         }
927e240928fSHong Zhang       }
928e240928fSHong Zhang     }
929e240928fSHong Zhang 
930e240928fSHong Zhang     /* using symbolic info of local PtA, determine symbolic info for row of C: */
931e240928fSHong Zhang     cnzi   = 0;
932e240928fSHong Zhang     /* rows of P_loc */
933e240928fSHong Zhang     ptaj = ptasparserow_loc;
934e240928fSHong Zhang     for (j=0; j<nprow_loc; j++) {
935e240928fSHong Zhang       prow = *ptaj++;
936e240928fSHong Zhang       prow -= prstart; /* rm */
937e240928fSHong Zhang       pnzj = pi_loc[prow+1] - pi_loc[prow];
938e240928fSHong Zhang       pjj  = pj_loc + pi_loc[prow];
939e240928fSHong Zhang       /* add non-zero cols of P into the sorted linked list lnk */
940e240928fSHong Zhang       ierr = PetscLLAdd(pnzj,pjj,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr);
941e240928fSHong Zhang       cnzi += nlnk;
942e240928fSHong Zhang     }
943e240928fSHong Zhang     /* rows of P_oth */
944e240928fSHong Zhang     ptaj = ptasparserow_oth;
945e240928fSHong Zhang     for (j=0; j<nprow_oth; j++) {
946e240928fSHong Zhang       prow = *ptaj++;
947e240928fSHong Zhang       if (prow >= prend) prow -= pm; /* rm */
948e240928fSHong Zhang       pnzj = pi_oth[prow+1] - pi_oth[prow];
949e240928fSHong Zhang       pjj  = pj_oth + pi_oth[prow];
950e240928fSHong Zhang       /* add non-zero cols of P into the sorted linked list lnk */
951e240928fSHong Zhang       ierr = PetscLLAdd(pnzj,pjj,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr);
952e240928fSHong Zhang       cnzi += nlnk;
953e240928fSHong Zhang     }
954e240928fSHong Zhang 
955e240928fSHong Zhang     /* If free space is not available, make more free space */
956e240928fSHong Zhang     /* Double the amount of total space in the list */
957e240928fSHong Zhang     if (current_space->local_remaining<cnzi) {
958e240928fSHong Zhang       ierr = GetMoreSpace(current_space->total_array_size,&current_space);CHKERRQ(ierr);
959e240928fSHong Zhang     }
960e240928fSHong Zhang 
961e240928fSHong Zhang     /* Copy data into free space, and zero out denserows */
962e240928fSHong Zhang     ierr = PetscLLClean(pN,pN,cnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
963e240928fSHong Zhang     current_space->array           += cnzi;
964e240928fSHong Zhang     current_space->local_used      += cnzi;
965e240928fSHong Zhang     current_space->local_remaining -= cnzi;
966e240928fSHong Zhang 
967e240928fSHong Zhang     for (j=0;j<nprow_loc; j++) {
968e240928fSHong Zhang       ptadenserow_loc[ptasparserow_loc[j]] = 0;
969e240928fSHong Zhang     }
970e240928fSHong Zhang     for (j=0;j<nprow_oth; j++) {
971e240928fSHong Zhang       ptadenserow_oth[ptasparserow_oth[j]] = 0;
972e240928fSHong Zhang     }
973e240928fSHong Zhang 
974e240928fSHong Zhang     /* Aside: Perhaps we should save the pta info for the numerical factorization. */
975e240928fSHong Zhang     /*        For now, we will recompute what is needed. */
976e240928fSHong Zhang     ci[i+1] = ci[i] + cnzi;
977e240928fSHong Zhang   }
978e240928fSHong Zhang   /* nnz is now stored in ci[ptm], column indices are in the list of free space */
979e240928fSHong Zhang   /* Allocate space for cj, initialize cj, and */
980e240928fSHong Zhang   /* destroy list of free space and other temporary array(s) */
981e240928fSHong Zhang   ierr = PetscMalloc((ci[pN]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr);
982e240928fSHong Zhang   ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr);
983e240928fSHong Zhang   ierr = PetscFree(ptadenserow_loc);CHKERRQ(ierr);
984e240928fSHong Zhang   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
985e240928fSHong Zhang 
986e240928fSHong Zhang   /* Allocate space for ca */
987e240928fSHong Zhang   ierr = PetscMalloc((ci[pN]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr);
988e240928fSHong Zhang   ierr = PetscMemzero(ca,(ci[pN]+1)*sizeof(MatScalar));CHKERRQ(ierr);
989e240928fSHong Zhang 
990e240928fSHong Zhang   /* put together the new matrix */
991e240928fSHong Zhang   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,pN,pN,ci,cj,ca,C);CHKERRQ(ierr);
992e240928fSHong Zhang 
993e240928fSHong Zhang   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
994e240928fSHong Zhang   /* Since these are PETSc arrays, change flags to free them as necessary. */
995e240928fSHong Zhang   c = (Mat_SeqAIJ *)((*C)->data);
996e240928fSHong Zhang   c->freedata = PETSC_TRUE;
997e240928fSHong Zhang   c->nonew    = 0;
998e240928fSHong Zhang 
999e240928fSHong Zhang   /* Clean up. */
1000e240928fSHong Zhang   ierr = MatRestoreSymbolicTranspose_SeqAIJ(P_loc,&pti,&ptj);CHKERRQ(ierr);
1001*a61c8c0fSHong Zhang 
1002e240928fSHong Zhang   ierr = PetscLogEventEnd(logkey_matptapsymbolic_local,A,P_loc,P_oth,0);CHKERRQ(ierr);
1003e240928fSHong Zhang   PetscFunctionReturn(0);
1004e240928fSHong Zhang }
1005