xref: /petsc/src/mat/impls/aij/seq/matptap.c (revision 429d309be48c0aee64a78f02eb86154797cb536a)
1eb9c0419SKris Buschelman /*
29af31e4aSHong Zhang   Defines projective product routines where A is a AIJ matrix
3eb9c0419SKris Buschelman           C = P^T * A * P
4eb9c0419SKris Buschelman */
5eb9c0419SKris Buschelman 
6231952e2SKris Buschelman #include "src/mat/impls/aij/seq/aij.h"   /*I "petscmat.h" I*/
7eb9c0419SKris Buschelman #include "src/mat/utils/freespace.h"
89af31e4aSHong Zhang #include "src/mat/impls/aij/mpi/mpiaij.h"
955a3bba9SHong Zhang #include "petscbt.h"
10eb9c0419SKris Buschelman 
11eb9c0419SKris Buschelman #undef __FUNCT__
129af31e4aSHong Zhang #define __FUNCT__ "MatPtAP"
134d3841fdSKris Buschelman /*@
149af31e4aSHong Zhang    MatPtAP - Creates the matrix projection C = P^T * A * P
154d3841fdSKris Buschelman 
164d3841fdSKris Buschelman    Collective on Mat
174d3841fdSKris Buschelman 
184d3841fdSKris Buschelman    Input Parameters:
194d3841fdSKris Buschelman +  A - the matrix
20f747e1aeSHong Zhang .  P - the projection matrix
21f747e1aeSHong Zhang .  scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
22f747e1aeSHong Zhang -  fill - expected fill as ratio of nnz(C)/(nnz(A) + nnz(P))
234d3841fdSKris Buschelman 
244d3841fdSKris Buschelman    Output Parameters:
254d3841fdSKris Buschelman .  C - the product matrix
264d3841fdSKris Buschelman 
274d3841fdSKris Buschelman    Notes:
284d3841fdSKris Buschelman    C will be created and must be destroyed by the user with MatDestroy().
294d3841fdSKris Buschelman 
304d3841fdSKris Buschelman    This routine is currently only implemented for pairs of SeqAIJ matrices and classes
314d3841fdSKris Buschelman    which inherit from SeqAIJ.  C will be of type MATSEQAIJ.
324d3841fdSKris Buschelman 
334d3841fdSKris Buschelman    Level: intermediate
344d3841fdSKris Buschelman 
359af31e4aSHong Zhang .seealso: MatPtAPSymbolic(),MatPtAPNumeric(),MatMatMult()
364d3841fdSKris Buschelman @*/
3755a3bba9SHong Zhang PetscErrorCode MatPtAP(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C)
3855a3bba9SHong Zhang {
39dfbe8321SBarry Smith   PetscErrorCode ierr;
40534c1384SKris Buschelman   PetscErrorCode (*fA)(Mat,Mat,MatReuse,PetscReal,Mat *);
41534c1384SKris Buschelman   PetscErrorCode (*fP)(Mat,Mat,MatReuse,PetscReal,Mat *);
42eb9c0419SKris Buschelman 
43eb9c0419SKris Buschelman   PetscFunctionBegin;
449af31e4aSHong Zhang   PetscValidHeaderSpecific(A,MAT_COOKIE,1);
459af31e4aSHong Zhang   PetscValidType(A,1);
469af31e4aSHong Zhang   MatPreallocated(A);
479af31e4aSHong Zhang   if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
489af31e4aSHong Zhang   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
499af31e4aSHong Zhang   PetscValidHeaderSpecific(P,MAT_COOKIE,2);
509af31e4aSHong Zhang   PetscValidType(P,2);
519af31e4aSHong Zhang   MatPreallocated(P);
529af31e4aSHong Zhang   if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
539af31e4aSHong Zhang   if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
549af31e4aSHong Zhang   PetscValidPointer(C,3);
55534c1384SKris Buschelman 
569af31e4aSHong Zhang   if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->M,A->N);
57eb9c0419SKris Buschelman 
589af31e4aSHong Zhang   if (fill <=0.0) SETERRQ1(PETSC_ERR_ARG_SIZ,"fill=%g must be > 0.0",fill);
59eb9c0419SKris Buschelman 
60534c1384SKris Buschelman   /* For now, we do not dispatch based on the type of A and P */
61534c1384SKris Buschelman   /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */
62534c1384SKris Buschelman   fA = A->ops->ptap;
63534c1384SKris Buschelman   if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatPtAP not supported for A of type %s",A->type_name);
64534c1384SKris Buschelman   fP = P->ops->ptap;
65534c1384SKris Buschelman   if (!fP) SETERRQ1(PETSC_ERR_SUP,"MatPtAP not supported for P of type %s",P->type_name);
66534c1384SKris Buschelman   if (fP!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatPtAP requires A, %s, to be compatible with P, %s",A->type_name,P->type_name);
67534c1384SKris Buschelman 
689af31e4aSHong Zhang   ierr = PetscLogEventBegin(MAT_PtAP,A,P,0,0);CHKERRQ(ierr);
69534c1384SKris Buschelman   ierr = (*fA)(A,P,scall,fill,C);CHKERRQ(ierr);
709af31e4aSHong Zhang   ierr = PetscLogEventEnd(MAT_PtAP,A,P,0,0);CHKERRQ(ierr);
71eb9c0419SKris Buschelman   PetscFunctionReturn(0);
72eb9c0419SKris Buschelman }
73eb9c0419SKris Buschelman 
740390132cSHong Zhang EXTERN PetscErrorCode MatPtAPReducedPt_MPIAIJ_SeqAIJ(Mat,Mat,MatReuse,PetscReal,PetscInt,PetscInt,Mat*);
750390132cSHong Zhang EXTERN PetscErrorCode MatPtAPReducedPtSymbolic_MPIAIJ_SeqAIJ(Mat,Mat,PetscReal,PetscInt,PetscInt,Mat*);
760390132cSHong Zhang EXTERN PetscErrorCode MatPtAPReducedPtNumeric_MPIAIJ_SeqAIJ(Mat,Mat,PetscInt,PetscInt,Mat);
7722e3da61SHong Zhang 
78e240928fSHong Zhang 
79e240928fSHong Zhang EXTERN PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local(Mat,Mat,Mat,PetscInt,PetscReal,Mat*);
80e240928fSHong Zhang EXTERN PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIAIJ_Local(Mat,Mat,Mat,PetscInt,Mat C);
81e240928fSHong Zhang 
82eb9c0419SKris Buschelman #undef __FUNCT__
83ff134f7aSHong Zhang #define __FUNCT__ "MatPtAP_MPIAIJ_MPIAIJ"
84ff134f7aSHong Zhang PetscErrorCode MatPtAP_MPIAIJ_MPIAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C)
85ff134f7aSHong Zhang {
86ff134f7aSHong Zhang   PetscErrorCode    ierr;
87b90dcfe3SHong Zhang 
88b90dcfe3SHong Zhang   PetscFunctionBegin;
89b90dcfe3SHong Zhang   if (scall == MAT_INITIAL_MATRIX){
904c627768SHong Zhang     ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr);
91b90dcfe3SHong Zhang     ierr = MatPtAPSymbolic_MPIAIJ_MPIAIJ(A,P,fill,C);CHKERRQ(ierr);/* numeric product is computed as well */
924c627768SHong Zhang     ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr);
93e240928fSHong Zhang   } else if (scall == MAT_REUSE_MATRIX){ /* not done yet! */
944c627768SHong Zhang     ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr);
95e240928fSHong Zhang     ierr = MatDestroy(*C);CHKERRQ(ierr);
96e240928fSHong Zhang     ierr = MatPtAPSymbolic_MPIAIJ_MPIAIJ(A,P,fill,C);CHKERRQ(ierr);
97e240928fSHong Zhang 
98e240928fSHong Zhang     /*
99b90dcfe3SHong Zhang     ierr = MatPtAPNumeric_MPIAIJ_MPIAIJ(A,P,*C);CHKERRQ(ierr);
100e240928fSHong Zhang     */
1014c627768SHong Zhang     ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr);
102b90dcfe3SHong Zhang   } else {
103b90dcfe3SHong Zhang     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",scall);
104b90dcfe3SHong Zhang   }
105b90dcfe3SHong Zhang   PetscFunctionReturn(0);
106b90dcfe3SHong Zhang }
107e240928fSHong Zhang #define NEW
108b90dcfe3SHong Zhang #undef __FUNCT__
109e240928fSHong Zhang #define __FUNCT__ "MatPtAPSymbolic_MPIAIJ_MPIAIJ"
110b90dcfe3SHong Zhang PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIAIJ(Mat A,Mat P,PetscReal fill,Mat *C)
111b90dcfe3SHong Zhang {
112b90dcfe3SHong Zhang   PetscErrorCode    ierr;
113*429d309bSHong Zhang   Mat               C_seq;
114e240928fSHong Zhang   Mat               P_loc,P_oth,*ploc;
115e240928fSHong Zhang   PetscInt          prstart,prend,m=P->m,low;
116e240928fSHong Zhang   IS                isrow,iscol;
117ff134f7aSHong Zhang 
118ff134f7aSHong Zhang   PetscFunctionBegin;
119e240928fSHong Zhang   /* get P_oth by taking rows of P (= non-zero cols of local A) from other processors */
120*429d309bSHong Zhang   ierr = MatGetBrowsOfAoCols(A,P,MAT_INITIAL_MATRIX,PETSC_NULL,PETSC_NULL,&prstart,&P_oth);CHKERRQ(ierr);
121e240928fSHong Zhang   prend = prstart + m;
122e240928fSHong Zhang   /* get P_loc by taking all local rows of P */
123e240928fSHong Zhang   ierr = MatGetOwnershipRange(P,&low,PETSC_NULL);CHKERRQ(ierr);
124e240928fSHong Zhang   ierr = ISCreateStride(PETSC_COMM_SELF,P->m,low,1,&isrow);CHKERRQ(ierr);
125e240928fSHong Zhang   ierr = ISCreateStride(PETSC_COMM_SELF,P->N,0,1,&iscol);CHKERRQ(ierr);
126e240928fSHong Zhang   ierr = MatGetSubMatrices(P,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&ploc);CHKERRQ(ierr);
127e240928fSHong Zhang   P_loc = ploc[0];
128e240928fSHong Zhang   ierr = ISDestroy(isrow);CHKERRQ(ierr);
129e240928fSHong Zhang   ierr = ISDestroy(iscol);CHKERRQ(ierr);
130e240928fSHong Zhang 
131e240928fSHong Zhang   /* compute C_seq = P_loc^T * A_loc * P_seq */
132e240928fSHong Zhang   ierr = MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local(A,P_loc,P_oth,prstart,fill,&C_seq);CHKERRQ(ierr);
133e240928fSHong Zhang   ierr = MatPtAPNumeric_MPIAIJ_MPIAIJ_Local(A,P_loc,P_oth,prstart,C_seq);CHKERRQ(ierr);
134e240928fSHong Zhang 
135e240928fSHong Zhang   ierr = MatDestroyMatrices(1,&ploc);CHKERRQ(ierr);
136e240928fSHong Zhang   ierr = MatDestroy(P_oth);CHKERRQ(ierr);
137e240928fSHong Zhang #ifdef OLD
138*429d309bSHong Zhang   Mat               P_seq;
13925616d81SHong Zhang   /* get P_seq = submatrix of P by taking rows of P that equal to nonzero col of A */
14022e3da61SHong Zhang   ierr = MatGetBrowsOfAcols(A,P,MAT_INITIAL_MATRIX,PETSC_NULL,PETSC_NULL,&prstart,&P_seq);CHKERRQ(ierr);
14125616d81SHong Zhang   /* compute C_seq = P_loc^T * A_loc * P_seq */
142ff134f7aSHong Zhang   prend  = prstart + m;
1430390132cSHong Zhang   ierr = MatPtAPReducedPt_MPIAIJ_SeqAIJ(A,P_seq,MAT_INITIAL_MATRIX,fill,prstart,prend,&C_seq);CHKERRQ(ierr);
14425616d81SHong Zhang   ierr = MatDestroy(P_seq);CHKERRQ(ierr);
145e240928fSHong Zhang #endif
146b90dcfe3SHong Zhang   /* add C_seq into mpi C */
147b90dcfe3SHong Zhang   ierr = MatMerge_SeqsToMPI(A->comm,C_seq,P->n,P->n,MAT_INITIAL_MATRIX,C);CHKERRQ(ierr);
148b90dcfe3SHong Zhang 
149ff134f7aSHong Zhang   PetscFunctionReturn(0);
150ff134f7aSHong Zhang }
151ff134f7aSHong Zhang 
152ff134f7aSHong Zhang #undef __FUNCT__
153e240928fSHong Zhang #define __FUNCT__ "MatPtAPNumeric_MPIAIJ_MPIAIJ"
154b90dcfe3SHong Zhang PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIAIJ(Mat A,Mat P,Mat C)
155ff134f7aSHong Zhang {
156b90dcfe3SHong Zhang   PetscErrorCode       ierr;
1570390132cSHong Zhang   Mat                  P_seq,C_seq;
158b1d57f15SBarry Smith   PetscInt             prstart,prend,m=P->m;
159671beff6SHong Zhang   Mat_Merge_SeqsToMPI  *merge;
160671beff6SHong Zhang   PetscObjectContainer container;
161ff134f7aSHong Zhang 
162ff134f7aSHong Zhang   PetscFunctionBegin;
163671beff6SHong Zhang   ierr = PetscObjectQuery((PetscObject)C,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
164671beff6SHong Zhang   if (container) {
1657f79fd58SMatthew Knepley     ierr  = PetscObjectContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
1667f79fd58SMatthew Knepley   } else {
1677f79fd58SMatthew Knepley     SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix C does not posses and object container");
168671beff6SHong Zhang   }
169671beff6SHong Zhang 
170b90dcfe3SHong Zhang   /* get P_seq = submatrix of P by taking rows of P that equal to nonzero col of A */
1710390132cSHong Zhang   ierr = MatGetBrowsOfAcols(A,P,MAT_INITIAL_MATRIX,PETSC_NULL,PETSC_NULL,&prstart,&P_seq);CHKERRQ(ierr);
172ff134f7aSHong Zhang 
173b90dcfe3SHong Zhang   /* compute C_seq = P_loc^T * A_loc * P_seq */
174b90dcfe3SHong Zhang   prend = prstart + m;
175b90dcfe3SHong Zhang   C_seq = merge->C_seq;
1760390132cSHong Zhang   ierr = MatPtAPReducedPt_MPIAIJ_SeqAIJ(A,P_seq,MAT_REUSE_MATRIX,1.0,prstart,prend,&C_seq);CHKERRQ(ierr);
177b90dcfe3SHong Zhang   ierr = MatDestroy(P_seq);CHKERRQ(ierr);
178b90dcfe3SHong Zhang 
179b90dcfe3SHong Zhang   /* add C_seq into mpi C */
180b90dcfe3SHong Zhang   ierr = MatMerge_SeqsToMPI(A->comm,C_seq,P->n,P->n,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
181b90dcfe3SHong Zhang 
182ff134f7aSHong Zhang   PetscFunctionReturn(0);
183ff134f7aSHong Zhang }
184ff134f7aSHong Zhang 
185ff134f7aSHong Zhang #undef __FUNCT__
1869af31e4aSHong Zhang #define __FUNCT__ "MatPtAP_SeqAIJ_SeqAIJ"
187dfbe8321SBarry Smith PetscErrorCode MatPtAP_SeqAIJ_SeqAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C)
1889af31e4aSHong Zhang {
189dfbe8321SBarry Smith   PetscErrorCode ierr;
190b1d57f15SBarry Smith 
1919af31e4aSHong Zhang   PetscFunctionBegin;
1929af31e4aSHong Zhang   if (scall == MAT_INITIAL_MATRIX){
193d20bfe6fSHong Zhang     ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr);
1949af31e4aSHong Zhang     ierr = MatPtAPSymbolic_SeqAIJ_SeqAIJ(A,P,fill,C);CHKERRQ(ierr);
195d20bfe6fSHong Zhang     ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr);
1969af31e4aSHong Zhang   }
197d20bfe6fSHong Zhang   ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr);
1989af31e4aSHong Zhang   ierr = MatPtAPNumeric_SeqAIJ_SeqAIJ(A,P,*C);CHKERRQ(ierr);
199d20bfe6fSHong Zhang   ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr);
2009af31e4aSHong Zhang   PetscFunctionReturn(0);
2019af31e4aSHong Zhang }
2029af31e4aSHong Zhang 
2036849ba73SBarry Smith /*
2049af31e4aSHong Zhang    MatPtAPSymbolic - Creates the (i,j) structure of the matrix projection C = P^T * A * P
2054d3841fdSKris Buschelman 
2064d3841fdSKris Buschelman    Collective on Mat
2074d3841fdSKris Buschelman 
2084d3841fdSKris Buschelman    Input Parameters:
2094d3841fdSKris Buschelman +  A - the matrix
2104d3841fdSKris Buschelman -  P - the projection matrix
2114d3841fdSKris Buschelman 
2124d3841fdSKris Buschelman    Output Parameters:
2134d3841fdSKris Buschelman .  C - the (i,j) structure of the product matrix
2144d3841fdSKris Buschelman 
2154d3841fdSKris Buschelman    Notes:
2164d3841fdSKris Buschelman    C will be created and must be destroyed by the user with MatDestroy().
2174d3841fdSKris Buschelman 
2184d3841fdSKris Buschelman    This routine is currently only implemented for pairs of SeqAIJ matrices and classes
2194d3841fdSKris Buschelman    which inherit from SeqAIJ.  C will be of type MATSEQAIJ.  The product is computed using
2209af31e4aSHong Zhang    this (i,j) structure by calling MatPtAPNumeric().
2214d3841fdSKris Buschelman 
2224d3841fdSKris Buschelman    Level: intermediate
2234d3841fdSKris Buschelman 
2249af31e4aSHong Zhang .seealso: MatPtAP(),MatPtAPNumeric(),MatMatMultSymbolic()
2256849ba73SBarry Smith */
226e240928fSHong Zhang #undef __FUNCT__
227e240928fSHong Zhang #define __FUNCT__ "MatPtAPSymbolic"
22855a3bba9SHong Zhang PetscErrorCode MatPtAPSymbolic(Mat A,Mat P,PetscReal fill,Mat *C)
22955a3bba9SHong Zhang {
230dfbe8321SBarry Smith   PetscErrorCode ierr;
231534c1384SKris Buschelman   PetscErrorCode (*fA)(Mat,Mat,PetscReal,Mat*);
232534c1384SKris Buschelman   PetscErrorCode (*fP)(Mat,Mat,PetscReal,Mat*);
233eb9c0419SKris Buschelman 
234eb9c0419SKris Buschelman   PetscFunctionBegin;
235eb9c0419SKris Buschelman 
2364482741eSBarry Smith   PetscValidHeaderSpecific(A,MAT_COOKIE,1);
237c9780b6fSBarry Smith   PetscValidType(A,1);
238eb9c0419SKris Buschelman   MatPreallocated(A);
239eb9c0419SKris Buschelman   if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
240eb9c0419SKris Buschelman   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
241eb9c0419SKris Buschelman 
2424482741eSBarry Smith   PetscValidHeaderSpecific(P,MAT_COOKIE,2);
243c9780b6fSBarry Smith   PetscValidType(P,2);
244eb9c0419SKris Buschelman   MatPreallocated(P);
245eb9c0419SKris Buschelman   if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
246eb9c0419SKris Buschelman   if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
247eb9c0419SKris Buschelman 
2484482741eSBarry Smith   PetscValidPointer(C,3);
2494482741eSBarry Smith 
250eb9c0419SKris Buschelman   if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->M,A->N);
251eb9c0419SKris Buschelman   if (A->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %d != %d",A->M,A->N);
252eb9c0419SKris Buschelman 
253534c1384SKris Buschelman   /* For now, we do not dispatch based on the type of A and P */
254534c1384SKris Buschelman   /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */
255534c1384SKris Buschelman   fA = A->ops->ptapsymbolic;
256534c1384SKris Buschelman   if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatPtAPSymbolic not supported for A of type %s",A->type_name);
257534c1384SKris Buschelman   fP = P->ops->ptapsymbolic;
258534c1384SKris Buschelman   if (!fP) SETERRQ1(PETSC_ERR_SUP,"MatPtAPSymbolic not supported for P of type %s",P->type_name);
259534c1384SKris Buschelman   if (fP!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatPtAPSymbolic requires A, %s, to be compatible with P, %s",A->type_name,P->type_name);
2604d3841fdSKris Buschelman 
261534c1384SKris Buschelman   ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr);
262534c1384SKris Buschelman   ierr = (*fA)(A,P,fill,C);CHKERRQ(ierr);
263534c1384SKris Buschelman   ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr);
264eb9c0419SKris Buschelman 
265eb9c0419SKris Buschelman   PetscFunctionReturn(0);
266eb9c0419SKris Buschelman }
267eb9c0419SKris Buschelman 
268f747e1aeSHong Zhang typedef struct {
269f747e1aeSHong Zhang   Mat    symAP;
270f747e1aeSHong Zhang } Mat_PtAPstruct;
271f747e1aeSHong Zhang 
27278a80504SBarry Smith EXTERN PetscErrorCode MatDestroy_SeqAIJ(Mat);
27378a80504SBarry Smith 
274f747e1aeSHong Zhang #undef __FUNCT__
275f747e1aeSHong Zhang #define __FUNCT__ "MatDestroy_SeqAIJ_PtAP"
276f4a850bbSBarry Smith PetscErrorCode MatDestroy_SeqAIJ_PtAP(Mat A)
277f747e1aeSHong Zhang {
278f4a850bbSBarry Smith   PetscErrorCode    ierr;
279f747e1aeSHong Zhang   Mat_PtAPstruct    *ptap=(Mat_PtAPstruct*)A->spptr;
280f747e1aeSHong Zhang 
281f747e1aeSHong Zhang   PetscFunctionBegin;
282f747e1aeSHong Zhang   ierr = MatDestroy(ptap->symAP);CHKERRQ(ierr);
283f747e1aeSHong Zhang   ierr = PetscFree(ptap);CHKERRQ(ierr);
28478a80504SBarry Smith   ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr);
285f747e1aeSHong Zhang   PetscFunctionReturn(0);
286f747e1aeSHong Zhang }
287f747e1aeSHong Zhang 
288eb9c0419SKris Buschelman #undef __FUNCT__
2899af31e4aSHong Zhang #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqAIJ"
29055a3bba9SHong Zhang PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat P,PetscReal fill,Mat *C)
29155a3bba9SHong Zhang {
292dfbe8321SBarry Smith   PetscErrorCode ierr;
293d20bfe6fSHong Zhang   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
294d20bfe6fSHong Zhang   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data,*p = (Mat_SeqAIJ*)P->data,*c;
29555a3bba9SHong Zhang   PetscInt       *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj;
296b8374ebeSBarry Smith   PetscInt       *ci,*cj,*ptadenserow,*ptasparserow,*ptaj;
29755a3bba9SHong Zhang   PetscInt       an=A->N,am=A->M,pn=P->N,pm=P->M;
298b8374ebeSBarry Smith   PetscInt       i,j,k,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi,nlnk,*lnk;
299d20bfe6fSHong Zhang   MatScalar      *ca;
30055a3bba9SHong Zhang   PetscBT        lnkbt;
301eb9c0419SKris Buschelman 
302eb9c0419SKris Buschelman   PetscFunctionBegin;
303d20bfe6fSHong Zhang   /* Get ij structure of P^T */
304eb9c0419SKris Buschelman   ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
305d20bfe6fSHong Zhang   ptJ=ptj;
306eb9c0419SKris Buschelman 
307d20bfe6fSHong Zhang   /* Allocate ci array, arrays for fill computation and */
308d20bfe6fSHong Zhang   /* free space for accumulating nonzero column info */
30955a3bba9SHong Zhang   ierr = PetscMalloc((pn+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
310d20bfe6fSHong Zhang   ci[0] = 0;
311eb9c0419SKris Buschelman 
31255a3bba9SHong Zhang   ierr = PetscMalloc((2*an+1)*sizeof(PetscInt),&ptadenserow);CHKERRQ(ierr);
31355a3bba9SHong Zhang   ierr = PetscMemzero(ptadenserow,(2*an+1)*sizeof(PetscInt));CHKERRQ(ierr);
314d20bfe6fSHong Zhang   ptasparserow = ptadenserow  + an;
31555a3bba9SHong Zhang 
31655a3bba9SHong Zhang   /* create and initialize a linked list */
31755a3bba9SHong Zhang   nlnk = pn+1;
31855a3bba9SHong Zhang   ierr = PetscLLCreate(pn,pn,nlnk,lnk,lnkbt);CHKERRQ(ierr);
319eb9c0419SKris Buschelman 
320d20bfe6fSHong Zhang   /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */
321d20bfe6fSHong Zhang   /* This should be reasonable if sparsity of PtAP is similar to that of A. */
322d20bfe6fSHong Zhang   ierr          = GetMoreSpace((ai[am]/pm)*pn,&free_space);
323d20bfe6fSHong Zhang   current_space = free_space;
324d20bfe6fSHong Zhang 
325d20bfe6fSHong Zhang   /* Determine symbolic info for each row of C: */
326d20bfe6fSHong Zhang   for (i=0;i<pn;i++) {
327d20bfe6fSHong Zhang     ptnzi  = pti[i+1] - pti[i];
328d20bfe6fSHong Zhang     ptanzi = 0;
329d20bfe6fSHong Zhang     /* Determine symbolic row of PtA: */
330d20bfe6fSHong Zhang     for (j=0;j<ptnzi;j++) {
331d20bfe6fSHong Zhang       arow = *ptJ++;
332d20bfe6fSHong Zhang       anzj = ai[arow+1] - ai[arow];
333d20bfe6fSHong Zhang       ajj  = aj + ai[arow];
334d20bfe6fSHong Zhang       for (k=0;k<anzj;k++) {
335d20bfe6fSHong Zhang         if (!ptadenserow[ajj[k]]) {
336d20bfe6fSHong Zhang           ptadenserow[ajj[k]]    = -1;
337d20bfe6fSHong Zhang           ptasparserow[ptanzi++] = ajj[k];
338d20bfe6fSHong Zhang         }
339d20bfe6fSHong Zhang       }
340d20bfe6fSHong Zhang     }
341d20bfe6fSHong Zhang       /* Using symbolic info for row of PtA, determine symbolic info for row of C: */
342d20bfe6fSHong Zhang     ptaj = ptasparserow;
343d20bfe6fSHong Zhang     cnzi   = 0;
344d20bfe6fSHong Zhang     for (j=0;j<ptanzi;j++) {
345d20bfe6fSHong Zhang       prow = *ptaj++;
346d20bfe6fSHong Zhang       pnzj = pi[prow+1] - pi[prow];
347d20bfe6fSHong Zhang       pjj  = pj + pi[prow];
34855a3bba9SHong Zhang       /* add non-zero cols of P into the sorted linked list lnk */
34955a3bba9SHong Zhang       ierr = PetscLLAdd(pnzj,pjj,pn,nlnk,lnk,lnkbt);CHKERRQ(ierr);
35055a3bba9SHong Zhang       cnzi += nlnk;
351d20bfe6fSHong Zhang     }
352d20bfe6fSHong Zhang 
353d20bfe6fSHong Zhang     /* If free space is not available, make more free space */
354d20bfe6fSHong Zhang     /* Double the amount of total space in the list */
355d20bfe6fSHong Zhang     if (current_space->local_remaining<cnzi) {
356d20bfe6fSHong Zhang       ierr = GetMoreSpace(current_space->total_array_size,&current_space);CHKERRQ(ierr);
357d20bfe6fSHong Zhang     }
358d20bfe6fSHong Zhang 
359d20bfe6fSHong Zhang     /* Copy data into free space, and zero out denserows */
36055a3bba9SHong Zhang     ierr = PetscLLClean(pn,pn,cnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
361d20bfe6fSHong Zhang     current_space->array           += cnzi;
362d20bfe6fSHong Zhang     current_space->local_used      += cnzi;
363d20bfe6fSHong Zhang     current_space->local_remaining -= cnzi;
364d20bfe6fSHong Zhang 
365d20bfe6fSHong Zhang     for (j=0;j<ptanzi;j++) {
366d20bfe6fSHong Zhang       ptadenserow[ptasparserow[j]] = 0;
367d20bfe6fSHong Zhang     }
368d20bfe6fSHong Zhang     /* Aside: Perhaps we should save the pta info for the numerical factorization. */
369d20bfe6fSHong Zhang     /*        For now, we will recompute what is needed. */
370d20bfe6fSHong Zhang     ci[i+1] = ci[i] + cnzi;
371d20bfe6fSHong Zhang   }
372d20bfe6fSHong Zhang   /* nnz is now stored in ci[ptm], column indices are in the list of free space */
373d20bfe6fSHong Zhang   /* Allocate space for cj, initialize cj, and */
374d20bfe6fSHong Zhang   /* destroy list of free space and other temporary array(s) */
37555a3bba9SHong Zhang   ierr = PetscMalloc((ci[pn]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr);
376d20bfe6fSHong Zhang   ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr);
377d20bfe6fSHong Zhang   ierr = PetscFree(ptadenserow);CHKERRQ(ierr);
37855a3bba9SHong Zhang   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
379d20bfe6fSHong Zhang 
380d20bfe6fSHong Zhang   /* Allocate space for ca */
381d20bfe6fSHong Zhang   ierr = PetscMalloc((ci[pn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr);
382d20bfe6fSHong Zhang   ierr = PetscMemzero(ca,(ci[pn]+1)*sizeof(MatScalar));CHKERRQ(ierr);
383d20bfe6fSHong Zhang 
384d20bfe6fSHong Zhang   /* put together the new matrix */
385d20bfe6fSHong Zhang   ierr = MatCreateSeqAIJWithArrays(A->comm,pn,pn,ci,cj,ca,C);CHKERRQ(ierr);
386d20bfe6fSHong Zhang 
387d20bfe6fSHong Zhang   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
388d20bfe6fSHong Zhang   /* Since these are PETSc arrays, change flags to free them as necessary. */
389d20bfe6fSHong Zhang   c = (Mat_SeqAIJ *)((*C)->data);
390d20bfe6fSHong Zhang   c->freedata = PETSC_TRUE;
391d20bfe6fSHong Zhang   c->nonew    = 0;
392d20bfe6fSHong Zhang 
393d20bfe6fSHong Zhang   /* Clean up. */
394d20bfe6fSHong Zhang   ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
395eb9c0419SKris Buschelman 
396eb9c0419SKris Buschelman   PetscFunctionReturn(0);
397eb9c0419SKris Buschelman }
398eb9c0419SKris Buschelman 
3993985e5eaSKris Buschelman #include "src/mat/impls/maij/maij.h"
4003985e5eaSKris Buschelman EXTERN_C_BEGIN
4013985e5eaSKris Buschelman #undef __FUNCT__
4029af31e4aSHong Zhang #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqMAIJ"
40355a3bba9SHong Zhang PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat *C)
40455a3bba9SHong Zhang {
4055c66b693SKris Buschelman   /* This routine requires testing -- I don't think it works. */
406dfbe8321SBarry Smith   PetscErrorCode ierr;
4073985e5eaSKris Buschelman   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
4083985e5eaSKris Buschelman   Mat_SeqMAIJ    *pp=(Mat_SeqMAIJ*)PP->data;
4093985e5eaSKris Buschelman   Mat            P=pp->AIJ;
4103985e5eaSKris Buschelman   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c;
411b1d57f15SBarry Smith   PetscInt       *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj;
412b1d57f15SBarry Smith   PetscInt       *ci,*cj,*denserow,*sparserow,*ptadenserow,*ptasparserow,*ptaj;
413b1d57f15SBarry Smith   PetscInt       an=A->N,am=A->M,pn=P->N,pm=P->M,ppdof=pp->dof;
414b1d57f15SBarry Smith   PetscInt       i,j,k,dof,pdof,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi;
4153985e5eaSKris Buschelman   MatScalar      *ca;
4163985e5eaSKris Buschelman 
4173985e5eaSKris Buschelman   PetscFunctionBegin;
4183985e5eaSKris Buschelman   /* Start timer */
4199af31e4aSHong Zhang   ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr);
4203985e5eaSKris Buschelman 
4213985e5eaSKris Buschelman   /* Get ij structure of P^T */
4223985e5eaSKris Buschelman   ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
4233985e5eaSKris Buschelman 
4243985e5eaSKris Buschelman   /* Allocate ci array, arrays for fill computation and */
4253985e5eaSKris Buschelman   /* free space for accumulating nonzero column info */
426b1d57f15SBarry Smith   ierr = PetscMalloc((pn+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
4273985e5eaSKris Buschelman   ci[0] = 0;
4283985e5eaSKris Buschelman 
429b1d57f15SBarry Smith   ierr = PetscMalloc((2*pn+2*an+1)*sizeof(PetscInt),&ptadenserow);CHKERRQ(ierr);
430b1d57f15SBarry Smith   ierr = PetscMemzero(ptadenserow,(2*pn+2*an+1)*sizeof(PetscInt));CHKERRQ(ierr);
4313985e5eaSKris Buschelman   ptasparserow = ptadenserow  + an;
4323985e5eaSKris Buschelman   denserow     = ptasparserow + an;
4333985e5eaSKris Buschelman   sparserow    = denserow     + pn;
4343985e5eaSKris Buschelman 
4353985e5eaSKris Buschelman   /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */
4363985e5eaSKris Buschelman   /* This should be reasonable if sparsity of PtAP is similar to that of A. */
4373985e5eaSKris Buschelman   ierr          = GetMoreSpace((ai[am]/pm)*pn,&free_space);
4383985e5eaSKris Buschelman   current_space = free_space;
4393985e5eaSKris Buschelman 
4403985e5eaSKris Buschelman   /* Determine symbolic info for each row of C: */
4413985e5eaSKris Buschelman   for (i=0;i<pn/ppdof;i++) {
4423985e5eaSKris Buschelman     ptnzi  = pti[i+1] - pti[i];
4433985e5eaSKris Buschelman     ptanzi = 0;
4443985e5eaSKris Buschelman     ptJ    = ptj + pti[i];
4453985e5eaSKris Buschelman     for (dof=0;dof<ppdof;dof++) {
4463985e5eaSKris Buschelman     /* Determine symbolic row of PtA: */
4473985e5eaSKris Buschelman       for (j=0;j<ptnzi;j++) {
4483985e5eaSKris Buschelman         arow = ptJ[j] + dof;
4493985e5eaSKris Buschelman         anzj = ai[arow+1] - ai[arow];
4503985e5eaSKris Buschelman         ajj  = aj + ai[arow];
4513985e5eaSKris Buschelman         for (k=0;k<anzj;k++) {
4523985e5eaSKris Buschelman           if (!ptadenserow[ajj[k]]) {
4533985e5eaSKris Buschelman             ptadenserow[ajj[k]]    = -1;
4543985e5eaSKris Buschelman             ptasparserow[ptanzi++] = ajj[k];
4553985e5eaSKris Buschelman           }
4563985e5eaSKris Buschelman         }
4573985e5eaSKris Buschelman       }
4583985e5eaSKris Buschelman       /* Using symbolic info for row of PtA, determine symbolic info for row of C: */
4593985e5eaSKris Buschelman       ptaj = ptasparserow;
4603985e5eaSKris Buschelman       cnzi   = 0;
4613985e5eaSKris Buschelman       for (j=0;j<ptanzi;j++) {
462fe05a634SKris Buschelman         pdof = *ptaj%dof;
4633985e5eaSKris Buschelman         prow = (*ptaj++)/dof;
4643985e5eaSKris Buschelman         pnzj = pi[prow+1] - pi[prow];
4653985e5eaSKris Buschelman         pjj  = pj + pi[prow];
4663985e5eaSKris Buschelman         for (k=0;k<pnzj;k++) {
467fe05a634SKris Buschelman           if (!denserow[pjj[k]+pdof]) {
468fe05a634SKris Buschelman             denserow[pjj[k]+pdof] = -1;
469fe05a634SKris Buschelman             sparserow[cnzi++]     = pjj[k]+pdof;
4703985e5eaSKris Buschelman           }
4713985e5eaSKris Buschelman         }
4723985e5eaSKris Buschelman       }
4733985e5eaSKris Buschelman 
4743985e5eaSKris Buschelman       /* sort sparserow */
4753985e5eaSKris Buschelman       ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr);
4763985e5eaSKris Buschelman 
4773985e5eaSKris Buschelman       /* If free space is not available, make more free space */
4783985e5eaSKris Buschelman       /* Double the amount of total space in the list */
4793985e5eaSKris Buschelman       if (current_space->local_remaining<cnzi) {
4803985e5eaSKris Buschelman         ierr = GetMoreSpace(current_space->total_array_size,&current_space);CHKERRQ(ierr);
4813985e5eaSKris Buschelman       }
4823985e5eaSKris Buschelman 
4833985e5eaSKris Buschelman       /* Copy data into free space, and zero out denserows */
484b1d57f15SBarry Smith       ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(PetscInt));CHKERRQ(ierr);
4853985e5eaSKris Buschelman       current_space->array           += cnzi;
4863985e5eaSKris Buschelman       current_space->local_used      += cnzi;
4873985e5eaSKris Buschelman       current_space->local_remaining -= cnzi;
4883985e5eaSKris Buschelman 
4893985e5eaSKris Buschelman       for (j=0;j<ptanzi;j++) {
4903985e5eaSKris Buschelman         ptadenserow[ptasparserow[j]] = 0;
4913985e5eaSKris Buschelman       }
4923985e5eaSKris Buschelman       for (j=0;j<cnzi;j++) {
4933985e5eaSKris Buschelman         denserow[sparserow[j]] = 0;
4943985e5eaSKris Buschelman       }
4953985e5eaSKris Buschelman       /* Aside: Perhaps we should save the pta info for the numerical factorization. */
4963985e5eaSKris Buschelman       /*        For now, we will recompute what is needed. */
4973985e5eaSKris Buschelman       ci[i+1+dof] = ci[i+dof] + cnzi;
4983985e5eaSKris Buschelman     }
4993985e5eaSKris Buschelman   }
5003985e5eaSKris Buschelman   /* nnz is now stored in ci[ptm], column indices are in the list of free space */
5013985e5eaSKris Buschelman   /* Allocate space for cj, initialize cj, and */
5023985e5eaSKris Buschelman   /* destroy list of free space and other temporary array(s) */
503b1d57f15SBarry Smith   ierr = PetscMalloc((ci[pn]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr);
5043985e5eaSKris Buschelman   ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr);
5053985e5eaSKris Buschelman   ierr = PetscFree(ptadenserow);CHKERRQ(ierr);
5063985e5eaSKris Buschelman 
5073985e5eaSKris Buschelman   /* Allocate space for ca */
5083985e5eaSKris Buschelman   ierr = PetscMalloc((ci[pn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr);
5093985e5eaSKris Buschelman   ierr = PetscMemzero(ca,(ci[pn]+1)*sizeof(MatScalar));CHKERRQ(ierr);
5103985e5eaSKris Buschelman 
5113985e5eaSKris Buschelman   /* put together the new matrix */
5123985e5eaSKris Buschelman   ierr = MatCreateSeqAIJWithArrays(A->comm,pn,pn,ci,cj,ca,C);CHKERRQ(ierr);
5133985e5eaSKris Buschelman 
5143985e5eaSKris Buschelman   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5153985e5eaSKris Buschelman   /* Since these are PETSc arrays, change flags to free them as necessary. */
5163985e5eaSKris Buschelman   c = (Mat_SeqAIJ *)((*C)->data);
5173985e5eaSKris Buschelman   c->freedata = PETSC_TRUE;
5183985e5eaSKris Buschelman   c->nonew    = 0;
5193985e5eaSKris Buschelman 
5203985e5eaSKris Buschelman   /* Clean up. */
5213985e5eaSKris Buschelman   ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
5223985e5eaSKris Buschelman 
5239af31e4aSHong Zhang   ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr);
5243985e5eaSKris Buschelman   PetscFunctionReturn(0);
5253985e5eaSKris Buschelman }
5263985e5eaSKris Buschelman EXTERN_C_END
5273985e5eaSKris Buschelman 
5286849ba73SBarry Smith /*
5299af31e4aSHong Zhang    MatPtAPNumeric - Computes the matrix projection C = P^T * A * P
5304d3841fdSKris Buschelman 
5314d3841fdSKris Buschelman    Collective on Mat
5324d3841fdSKris Buschelman 
5334d3841fdSKris Buschelman    Input Parameters:
5344d3841fdSKris Buschelman +  A - the matrix
5354d3841fdSKris Buschelman -  P - the projection matrix
5364d3841fdSKris Buschelman 
5374d3841fdSKris Buschelman    Output Parameters:
5384d3841fdSKris Buschelman .  C - the product matrix
5394d3841fdSKris Buschelman 
5404d3841fdSKris Buschelman    Notes:
5419af31e4aSHong Zhang    C must have been created by calling MatPtAPSymbolic and must be destroyed by
5424d3841fdSKris Buschelman    the user using MatDeatroy().
5434d3841fdSKris Buschelman 
544170ef064SHong Zhang    This routine is currently only implemented for pairs of AIJ matrices and classes
545170ef064SHong Zhang    which inherit from AIJ.  C will be of type MATAIJ.
5464d3841fdSKris Buschelman 
5474d3841fdSKris Buschelman    Level: intermediate
5484d3841fdSKris Buschelman 
5499af31e4aSHong Zhang .seealso: MatPtAP(),MatPtAPSymbolic(),MatMatMultNumeric()
5506849ba73SBarry Smith */
551e240928fSHong Zhang #undef __FUNCT__
552e240928fSHong Zhang #define __FUNCT__ "MatPtAPNumeric"
55355a3bba9SHong Zhang PetscErrorCode MatPtAPNumeric(Mat A,Mat P,Mat C)
55455a3bba9SHong Zhang {
555dfbe8321SBarry Smith   PetscErrorCode ierr;
556534c1384SKris Buschelman   PetscErrorCode (*fA)(Mat,Mat,Mat);
557534c1384SKris Buschelman   PetscErrorCode (*fP)(Mat,Mat,Mat);
558eb9c0419SKris Buschelman 
559eb9c0419SKris Buschelman   PetscFunctionBegin;
560eb9c0419SKris Buschelman 
5614482741eSBarry Smith   PetscValidHeaderSpecific(A,MAT_COOKIE,1);
562c9780b6fSBarry Smith   PetscValidType(A,1);
563eb9c0419SKris Buschelman   MatPreallocated(A);
564eb9c0419SKris Buschelman   if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
565eb9c0419SKris Buschelman   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
566eb9c0419SKris Buschelman 
5674482741eSBarry Smith   PetscValidHeaderSpecific(P,MAT_COOKIE,2);
568c9780b6fSBarry Smith   PetscValidType(P,2);
569eb9c0419SKris Buschelman   MatPreallocated(P);
570eb9c0419SKris Buschelman   if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
571eb9c0419SKris Buschelman   if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
572eb9c0419SKris Buschelman 
5734482741eSBarry Smith   PetscValidHeaderSpecific(C,MAT_COOKIE,3);
574c9780b6fSBarry Smith   PetscValidType(C,3);
575eb9c0419SKris Buschelman   MatPreallocated(C);
576eb9c0419SKris Buschelman   if (!C->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
577eb9c0419SKris Buschelman   if (C->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
578eb9c0419SKris Buschelman 
579eb9c0419SKris Buschelman   if (P->N!=C->M) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->N,C->M);
580eb9c0419SKris Buschelman   if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->M,A->N);
581eb9c0419SKris Buschelman   if (A->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %d != %d",A->M,A->N);
582eb9c0419SKris Buschelman   if (P->N!=C->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->N,C->N);
583eb9c0419SKris Buschelman 
584534c1384SKris Buschelman   /* For now, we do not dispatch based on the type of A and P */
585534c1384SKris Buschelman   /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */
586534c1384SKris Buschelman   fA = A->ops->ptapnumeric;
587534c1384SKris Buschelman   if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatPtAPNumeric not supported for A of type %s",A->type_name);
588534c1384SKris Buschelman   fP = P->ops->ptapnumeric;
589534c1384SKris Buschelman   if (!fP) SETERRQ1(PETSC_ERR_SUP,"MatPtAPNumeric not supported for P of type %s",P->type_name);
590534c1384SKris Buschelman   if (fP!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatPtAPNumeric requires A, %s, to be compatible with P, %s",A->type_name,P->type_name);
5914d3841fdSKris Buschelman 
592534c1384SKris Buschelman   ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr);
593534c1384SKris Buschelman   ierr = (*fA)(A,P,C);CHKERRQ(ierr);
594534c1384SKris Buschelman   ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr);
595eb9c0419SKris Buschelman 
596eb9c0419SKris Buschelman   PetscFunctionReturn(0);
597eb9c0419SKris Buschelman }
598eb9c0419SKris Buschelman 
599eb9c0419SKris Buschelman #undef __FUNCT__
6009af31e4aSHong Zhang #define __FUNCT__ "MatPtAPNumeric_SeqAIJ_SeqAIJ"
601dfbe8321SBarry Smith PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqAIJ(Mat A,Mat P,Mat C)
602dfbe8321SBarry Smith {
603dfbe8321SBarry Smith   PetscErrorCode ierr;
604b1d57f15SBarry Smith   PetscInt       flops=0;
605d20bfe6fSHong Zhang   Mat_SeqAIJ     *a  = (Mat_SeqAIJ *) A->data;
606d20bfe6fSHong Zhang   Mat_SeqAIJ     *p  = (Mat_SeqAIJ *) P->data;
607d20bfe6fSHong Zhang   Mat_SeqAIJ     *c  = (Mat_SeqAIJ *) C->data;
608b1d57f15SBarry Smith   PetscInt       *ai=a->i,*aj=a->j,*apj,*apjdense,*pi=p->i,*pj=p->j,*pJ=p->j,*pjj;
609b1d57f15SBarry Smith   PetscInt       *ci=c->i,*cj=c->j,*cjj;
610b1d57f15SBarry Smith   PetscInt       am=A->M,cn=C->N,cm=C->M;
611b1d57f15SBarry Smith   PetscInt       i,j,k,anzi,pnzi,apnzj,nextap,pnzj,prow,crow;
612d20bfe6fSHong Zhang   MatScalar      *aa=a->a,*apa,*pa=p->a,*pA=p->a,*paj,*ca=c->a,*caj;
613eb9c0419SKris Buschelman 
614eb9c0419SKris Buschelman   PetscFunctionBegin;
615d20bfe6fSHong Zhang   /* Allocate temporary array for storage of one row of A*P */
616b1d57f15SBarry Smith   ierr = PetscMalloc(cn*(sizeof(MatScalar)+2*sizeof(PetscInt)),&apa);CHKERRQ(ierr);
617b1d57f15SBarry Smith   ierr = PetscMemzero(apa,cn*(sizeof(MatScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr);
618eb9c0419SKris Buschelman 
619b1d57f15SBarry Smith   apj      = (PetscInt *)(apa + cn);
620d20bfe6fSHong Zhang   apjdense = apj + cn;
621d20bfe6fSHong Zhang 
622d20bfe6fSHong Zhang   /* Clear old values in C */
623d20bfe6fSHong Zhang   ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr);
624d20bfe6fSHong Zhang 
625d20bfe6fSHong Zhang   for (i=0;i<am;i++) {
626d20bfe6fSHong Zhang     /* Form sparse row of A*P */
627d20bfe6fSHong Zhang     anzi  = ai[i+1] - ai[i];
628d20bfe6fSHong Zhang     apnzj = 0;
629d20bfe6fSHong Zhang     for (j=0;j<anzi;j++) {
630d20bfe6fSHong Zhang       prow = *aj++;
631d20bfe6fSHong Zhang       pnzj = pi[prow+1] - pi[prow];
632d20bfe6fSHong Zhang       pjj  = pj + pi[prow];
633d20bfe6fSHong Zhang       paj  = pa + pi[prow];
634d20bfe6fSHong Zhang       for (k=0;k<pnzj;k++) {
635d20bfe6fSHong Zhang         if (!apjdense[pjj[k]]) {
636d20bfe6fSHong Zhang           apjdense[pjj[k]] = -1;
637d20bfe6fSHong Zhang           apj[apnzj++]     = pjj[k];
638d20bfe6fSHong Zhang         }
639d20bfe6fSHong Zhang         apa[pjj[k]] += (*aa)*paj[k];
640d20bfe6fSHong Zhang       }
641d20bfe6fSHong Zhang       flops += 2*pnzj;
642d20bfe6fSHong Zhang       aa++;
643d20bfe6fSHong Zhang     }
644d20bfe6fSHong Zhang 
645d20bfe6fSHong Zhang     /* Sort the j index array for quick sparse axpy. */
646d20bfe6fSHong Zhang     ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr);
647d20bfe6fSHong Zhang 
648d20bfe6fSHong Zhang     /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */
649d20bfe6fSHong Zhang     pnzi = pi[i+1] - pi[i];
650d20bfe6fSHong Zhang     for (j=0;j<pnzi;j++) {
651d20bfe6fSHong Zhang       nextap = 0;
652d20bfe6fSHong Zhang       crow   = *pJ++;
653d20bfe6fSHong Zhang       cjj    = cj + ci[crow];
654d20bfe6fSHong Zhang       caj    = ca + ci[crow];
655d20bfe6fSHong Zhang       /* Perform sparse axpy operation.  Note cjj includes apj. */
656d20bfe6fSHong Zhang       for (k=0;nextap<apnzj;k++) {
657d20bfe6fSHong Zhang         if (cjj[k]==apj[nextap]) {
658d20bfe6fSHong Zhang           caj[k] += (*pA)*apa[apj[nextap++]];
659d20bfe6fSHong Zhang         }
660d20bfe6fSHong Zhang       }
661d20bfe6fSHong Zhang       flops += 2*apnzj;
662d20bfe6fSHong Zhang       pA++;
663d20bfe6fSHong Zhang     }
664d20bfe6fSHong Zhang 
665d20bfe6fSHong Zhang     /* Zero the current row info for A*P */
666d20bfe6fSHong Zhang     for (j=0;j<apnzj;j++) {
667d20bfe6fSHong Zhang       apa[apj[j]]      = 0.;
668d20bfe6fSHong Zhang       apjdense[apj[j]] = 0;
669d20bfe6fSHong Zhang     }
670d20bfe6fSHong Zhang   }
671d20bfe6fSHong Zhang 
672d20bfe6fSHong Zhang   /* Assemble the final matrix and clean up */
673d20bfe6fSHong Zhang   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
674d20bfe6fSHong Zhang   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
675d20bfe6fSHong Zhang   ierr = PetscFree(apa);CHKERRQ(ierr);
676d20bfe6fSHong Zhang   ierr = PetscLogFlops(flops);CHKERRQ(ierr);
677d20bfe6fSHong Zhang 
678eb9c0419SKris Buschelman   PetscFunctionReturn(0);
679eb9c0419SKris Buschelman }
6800e36024fSHong Zhang 
6810390132cSHong Zhang /* Compute local C = P[rstart:rend,:]^T * A * P - used by MatPtAP_MPIAIJ_MPIAIJ() */
6820e36024fSHong Zhang 
6830e36024fSHong Zhang #undef __FUNCT__
6840390132cSHong Zhang #define __FUNCT__ "MatPtAPReducedPtNumeric_MPIAIJ_SeqAIJ"
6850390132cSHong Zhang PetscErrorCode MatPtAPReducedPtNumeric_MPIAIJ_SeqAIJ(Mat A,Mat P,PetscInt prstart,PetscInt prend,Mat C)
68622e3da61SHong Zhang {
68722e3da61SHong Zhang   PetscErrorCode ierr;
68822e3da61SHong Zhang   PetscInt       flops=0;
68922e3da61SHong Zhang   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
69022e3da61SHong Zhang   Mat_SeqAIJ     *ad=(Mat_SeqAIJ*)(a->A)->data,*ao=(Mat_SeqAIJ*)(a->B)->data;
69122e3da61SHong Zhang   Mat_SeqAIJ     *p  = (Mat_SeqAIJ *) P->data;
69222e3da61SHong Zhang   Mat_SeqAIJ     *c  = (Mat_SeqAIJ *) C->data;
69322e3da61SHong Zhang   PetscInt       *adi=ad->i,*adj=ad->j,*aoi=ao->i,*aoj=ao->j,*apj,*apjdense,cstart=a->cstart,cend=a->cend,col;
69422e3da61SHong Zhang   PetscInt       *pi=p->i,*pj=p->j,*pJ=p->j+pi[prstart],*pjj;
69522e3da61SHong Zhang   PetscInt       *ci=c->i,*cj=c->j,*cjj;
69622e3da61SHong Zhang   PetscInt       am=A->m,cn=C->N,cm=C->M;
69722e3da61SHong Zhang   PetscInt       i,j,k,anzi,pnzi,apnzj,nextap,pnzj,prow,crow;
69822e3da61SHong Zhang   MatScalar      *ada=ad->a,*aoa=ao->a,*apa,*pa=p->a,*pA=p->a,*paj,*ca=c->a,*caj;
69922e3da61SHong Zhang 
70022e3da61SHong Zhang   PetscFunctionBegin;
70122e3da61SHong Zhang   pA=p->a+pi[prstart];
70222e3da61SHong Zhang   /* Allocate temporary array for storage of one row of A*P */
70322e3da61SHong Zhang   ierr = PetscMalloc(cn*(sizeof(MatScalar)+2*sizeof(PetscInt)),&apa);CHKERRQ(ierr);
70422e3da61SHong Zhang   ierr = PetscMemzero(apa,cn*(sizeof(MatScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr);
70522e3da61SHong Zhang 
70622e3da61SHong Zhang   apj      = (PetscInt *)(apa + cn);
70722e3da61SHong Zhang   apjdense = apj + cn;
70822e3da61SHong Zhang 
70922e3da61SHong Zhang   /* Clear old values in C */
71022e3da61SHong Zhang   ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr);
71122e3da61SHong Zhang 
71222e3da61SHong Zhang   for (i=0;i<am;i++) {
71322e3da61SHong Zhang     /* Form sparse row of A*P */
71422e3da61SHong Zhang     /* diagonal portion of A */
71522e3da61SHong Zhang     anzi  = adi[i+1] - adi[i];
71622e3da61SHong Zhang     apnzj = 0;
71722e3da61SHong Zhang     for (j=0;j<anzi;j++) {
71822e3da61SHong Zhang       prow = *adj + prstart;
71922e3da61SHong Zhang       adj++;
72022e3da61SHong Zhang       pnzj = pi[prow+1] - pi[prow];
72122e3da61SHong Zhang       pjj  = pj + pi[prow];
72222e3da61SHong Zhang       paj  = pa + pi[prow];
72322e3da61SHong Zhang       for (k=0;k<pnzj;k++) {
72422e3da61SHong Zhang         if (!apjdense[pjj[k]]) {
72522e3da61SHong Zhang           apjdense[pjj[k]] = -1;
72622e3da61SHong Zhang           apj[apnzj++]     = pjj[k];
72722e3da61SHong Zhang         }
72822e3da61SHong Zhang         apa[pjj[k]] += (*ada)*paj[k];
72922e3da61SHong Zhang       }
73022e3da61SHong Zhang       flops += 2*pnzj;
73122e3da61SHong Zhang       ada++;
73222e3da61SHong Zhang     }
73322e3da61SHong Zhang     /* off-diagonal portion of A */
73422e3da61SHong Zhang     anzi  = aoi[i+1] - aoi[i];
73522e3da61SHong Zhang     for (j=0;j<anzi;j++) {
73622e3da61SHong Zhang       col = a->garray[*aoj];
73722e3da61SHong Zhang       if (col < cstart){
73822e3da61SHong Zhang         prow = *aoj;
73922e3da61SHong Zhang       } else if (col >= cend){
74022e3da61SHong Zhang         prow = *aoj + prend-prstart;
74122e3da61SHong Zhang       } else {
74222e3da61SHong Zhang         SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Off-diagonal portion of A has wrong column map");
74322e3da61SHong Zhang       }
74422e3da61SHong Zhang       aoj++;
74522e3da61SHong Zhang       pnzj = pi[prow+1] - pi[prow];
74622e3da61SHong Zhang       pjj  = pj + pi[prow];
74722e3da61SHong Zhang       paj  = pa + pi[prow];
74822e3da61SHong Zhang       for (k=0;k<pnzj;k++) {
74922e3da61SHong Zhang         if (!apjdense[pjj[k]]) {
75022e3da61SHong Zhang           apjdense[pjj[k]] = -1;
75122e3da61SHong Zhang           apj[apnzj++]     = pjj[k];
75222e3da61SHong Zhang         }
75322e3da61SHong Zhang         apa[pjj[k]] += (*aoa)*paj[k];
75422e3da61SHong Zhang       }
75522e3da61SHong Zhang       flops += 2*pnzj;
75622e3da61SHong Zhang       aoa++;
75722e3da61SHong Zhang     }
75822e3da61SHong Zhang 
75922e3da61SHong Zhang     /* Sort the j index array for quick sparse axpy. */
76022e3da61SHong Zhang     ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr);
76122e3da61SHong Zhang 
76222e3da61SHong Zhang     /* Compute P[[prstart:prend,:]^T*A*P using outer product (P^T)[:,j+prstart]*(A*P)[j,:]. */
76322e3da61SHong Zhang     pnzi = pi[i+1+prstart] - pi[i+prstart];
76422e3da61SHong Zhang     for (j=0;j<pnzi;j++) {
76522e3da61SHong Zhang       nextap = 0;
76622e3da61SHong Zhang       crow   = *pJ++;
76722e3da61SHong Zhang       cjj    = cj + ci[crow];
76822e3da61SHong Zhang       caj    = ca + ci[crow];
76922e3da61SHong Zhang       /* Perform sparse axpy operation.  Note cjj includes apj. */
77022e3da61SHong Zhang       for (k=0;nextap<apnzj;k++) {
77122e3da61SHong Zhang         if (cjj[k]==apj[nextap]) {
77222e3da61SHong Zhang           caj[k] += (*pA)*apa[apj[nextap++]];
77322e3da61SHong Zhang         }
77422e3da61SHong Zhang       }
77522e3da61SHong Zhang       flops += 2*apnzj;
77622e3da61SHong Zhang       pA++;
77722e3da61SHong Zhang     }
77822e3da61SHong Zhang 
77922e3da61SHong Zhang     /* Zero the current row info for A*P */
78022e3da61SHong Zhang     for (j=0;j<apnzj;j++) {
78122e3da61SHong Zhang       apa[apj[j]]      = 0.;
78222e3da61SHong Zhang       apjdense[apj[j]] = 0;
78322e3da61SHong Zhang     }
78422e3da61SHong Zhang   }
78522e3da61SHong Zhang 
78622e3da61SHong Zhang   /* Assemble the final matrix and clean up */
78722e3da61SHong Zhang   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
78822e3da61SHong Zhang   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
78922e3da61SHong Zhang   ierr = PetscFree(apa);CHKERRQ(ierr);
79022e3da61SHong Zhang   ierr = PetscLogFlops(flops);CHKERRQ(ierr);
79122e3da61SHong Zhang 
79222e3da61SHong Zhang   PetscFunctionReturn(0);
79322e3da61SHong Zhang }
79422e3da61SHong Zhang 
79522e3da61SHong Zhang #undef __FUNCT__
7960390132cSHong Zhang #define __FUNCT__ "MatPtAPReducedPtSymbolic_MPIAIJ_SeqAIJ"
7970390132cSHong Zhang PetscErrorCode MatPtAPReducedPtSymbolic_MPIAIJ_SeqAIJ(Mat A,Mat P,PetscReal fill,PetscInt prstart,PetscInt prend,Mat *C)
79822e3da61SHong Zhang {
79922e3da61SHong Zhang   PetscErrorCode ierr;
80022e3da61SHong Zhang   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
80122e3da61SHong Zhang   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
80222e3da61SHong Zhang   Mat_SeqAIJ     *ad=(Mat_SeqAIJ*)(a->A)->data,*ao=(Mat_SeqAIJ*)(a->B)->data;
80322e3da61SHong Zhang   Mat_SeqAIJ     *p=(Mat_SeqAIJ*)P->data,*c;
80422e3da61SHong Zhang   PetscInt       *pti,*ptj,*ptJ,*ajj,*pi=p->i,*pj=p->j,*pjj;
80522e3da61SHong Zhang   PetscInt       *adi=ad->i,*adj=ad->j,*aoi=ao->i,*aoj=ao->j,nnz,cstart=a->cstart,cend=a->cend,col;
80622e3da61SHong Zhang   PetscInt       *ci,*cj,*ptadenserow,*ptasparserow,*ptaj;
80722e3da61SHong Zhang   PetscInt       an=A->N,am=A->m,pn=P->N,pm=P->M;
80822e3da61SHong Zhang   PetscInt       i,j,k,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi;
80922e3da61SHong Zhang   PetscInt       m=prend-prstart,nlnk,*lnk;
81022e3da61SHong Zhang   MatScalar      *ca;
81122e3da61SHong Zhang   PetscBT        lnkbt;
81222e3da61SHong Zhang 
81322e3da61SHong Zhang   PetscFunctionBegin;
814e240928fSHong Zhang   int rank;
815e240928fSHong Zhang   ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr);
81622e3da61SHong Zhang 
81722e3da61SHong Zhang   /* Get ij structure of P[rstart:rend,:]^T */
818e462e02cSHong Zhang   ierr = MatGetSymbolicTransposeReduced_SeqAIJ(P,prstart,prend,&pti,&ptj);CHKERRQ(ierr);
81922e3da61SHong Zhang   ptJ=ptj;
82022e3da61SHong Zhang 
82122e3da61SHong Zhang   /* Allocate ci array, arrays for fill computation and */
82222e3da61SHong Zhang   /* free space for accumulating nonzero column info */
82322e3da61SHong Zhang   ierr = PetscMalloc((pn+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
82422e3da61SHong Zhang   ci[0] = 0;
82522e3da61SHong Zhang 
82622e3da61SHong Zhang   ierr = PetscMalloc((2*an+1)*sizeof(PetscInt),&ptadenserow);CHKERRQ(ierr);
82722e3da61SHong Zhang   ierr = PetscMemzero(ptadenserow,(2*an+1)*sizeof(PetscInt));CHKERRQ(ierr);
82822e3da61SHong Zhang   ptasparserow = ptadenserow  + an;
82922e3da61SHong Zhang 
83022e3da61SHong Zhang   /* create and initialize a linked list */
83122e3da61SHong Zhang   nlnk = pn+1;
83222e3da61SHong Zhang   ierr = PetscLLCreate(pn,pn,nlnk,lnk,lnkbt);CHKERRQ(ierr);
83322e3da61SHong Zhang 
83422e3da61SHong Zhang   /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */
83522e3da61SHong Zhang   /* This should be reasonable if sparsity of PtAP is similar to that of A. */
83622e3da61SHong Zhang   nnz           = adi[am] + aoi[am];
83722e3da61SHong Zhang   ierr          = GetMoreSpace((PetscInt)(fill*nnz/pm)*pn,&free_space);
83822e3da61SHong Zhang   current_space = free_space;
83922e3da61SHong Zhang 
84022e3da61SHong Zhang   /* Determine symbolic info for each row of C: */
84122e3da61SHong Zhang   for (i=0;i<pn;i++) {
84222e3da61SHong Zhang     ptnzi  = pti[i+1] - pti[i];
84322e3da61SHong Zhang     ptanzi = 0;
84422e3da61SHong Zhang     /* Determine symbolic row of PtA_reduced: */
84522e3da61SHong Zhang     for (j=0;j<ptnzi;j++) {
84622e3da61SHong Zhang       arow = *ptJ++;
847e240928fSHong Zhang       if (rank==1) printf("i: %d, ptnzi: %d, arow: %d\n",i,ptnzi,arow);
84822e3da61SHong Zhang       /* diagonal portion of A */
84922e3da61SHong Zhang       anzj = adi[arow+1] - adi[arow];
85022e3da61SHong Zhang       ajj  = adj + adi[arow];
85122e3da61SHong Zhang       for (k=0;k<anzj;k++) {
85222e3da61SHong Zhang         col = ajj[k]+prstart;
85322e3da61SHong Zhang         if (!ptadenserow[col]) {
85422e3da61SHong Zhang           ptadenserow[col]    = -1;
85522e3da61SHong Zhang           ptasparserow[ptanzi++] = col;
85622e3da61SHong Zhang         }
85722e3da61SHong Zhang       }
85822e3da61SHong Zhang       /* off-diagonal portion of A */
85922e3da61SHong Zhang       anzj = aoi[arow+1] - aoi[arow];
86022e3da61SHong Zhang       ajj  = aoj + aoi[arow];
86122e3da61SHong Zhang       for (k=0;k<anzj;k++) {
86222e3da61SHong Zhang         col = a->garray[ajj[k]];  /* global col */
86322e3da61SHong Zhang         if (col < cstart){
86422e3da61SHong Zhang           col = ajj[k];
86522e3da61SHong Zhang         } else if (col >= cend){
86622e3da61SHong Zhang           col = ajj[k] + m;
86722e3da61SHong Zhang         } else {
86822e3da61SHong Zhang           SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Off-diagonal portion of A has wrong column map");
86922e3da61SHong Zhang         }
87022e3da61SHong Zhang         if (!ptadenserow[col]) {
87122e3da61SHong Zhang           ptadenserow[col]    = -1;
87222e3da61SHong Zhang           ptasparserow[ptanzi++] = col;
87322e3da61SHong Zhang         }
87422e3da61SHong Zhang       }
87522e3da61SHong Zhang     }
876e240928fSHong Zhang 
877e240928fSHong Zhang     printf(" [%d] i: %d, ptanzi: %d \n",rank,i,ptanzi);
87822e3da61SHong Zhang     /* Using symbolic info for row of PtA, determine symbolic info for row of C: */
87922e3da61SHong Zhang     ptaj = ptasparserow;
88022e3da61SHong Zhang     cnzi   = 0;
88122e3da61SHong Zhang     for (j=0;j<ptanzi;j++) {
88222e3da61SHong Zhang       prow = *ptaj++;
88322e3da61SHong Zhang       pnzj = pi[prow+1] - pi[prow];
88422e3da61SHong Zhang       pjj  = pj + pi[prow];
88522e3da61SHong Zhang       /* if (rank==1) printf(" prow: %d, pnzj: %d\n",prow,pnzj); */
88622e3da61SHong Zhang       /* add non-zero cols of P into the sorted linked list lnk */
88722e3da61SHong Zhang       ierr = PetscLLAdd(pnzj,pjj,pn,nlnk,lnk,lnkbt);CHKERRQ(ierr);
88822e3da61SHong Zhang       cnzi += nlnk;
88922e3da61SHong Zhang     }
89022e3da61SHong Zhang 
89122e3da61SHong Zhang     /* If free space is not available, make more free space */
89222e3da61SHong Zhang     /* Double the amount of total space in the list */
89322e3da61SHong Zhang     if (current_space->local_remaining<cnzi) {
89422e3da61SHong Zhang       ierr = GetMoreSpace(current_space->total_array_size,&current_space);CHKERRQ(ierr);
89522e3da61SHong Zhang     }
89622e3da61SHong Zhang 
89722e3da61SHong Zhang     /* Copy data into free space, and zero out denserows */
89822e3da61SHong Zhang     ierr = PetscLLClean(pn,pn,cnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
89922e3da61SHong Zhang     current_space->array           += cnzi;
90022e3da61SHong Zhang     current_space->local_used      += cnzi;
90122e3da61SHong Zhang     current_space->local_remaining -= cnzi;
90222e3da61SHong Zhang 
90322e3da61SHong Zhang     for (j=0;j<ptanzi;j++) {
90422e3da61SHong Zhang       ptadenserow[ptasparserow[j]] = 0;
90522e3da61SHong Zhang     }
90622e3da61SHong Zhang 
90722e3da61SHong Zhang     /* Aside: Perhaps we should save the pta info for the numerical factorization. */
90822e3da61SHong Zhang     /*        For now, we will recompute what is needed. */
90922e3da61SHong Zhang     ci[i+1] = ci[i] + cnzi;
91022e3da61SHong Zhang   }
91122e3da61SHong Zhang   /* nnz is now stored in ci[ptm], column indices are in the list of free space */
91222e3da61SHong Zhang   /* Allocate space for cj, initialize cj, and */
91322e3da61SHong Zhang   /* destroy list of free space and other temporary array(s) */
91422e3da61SHong Zhang   ierr = PetscMalloc((ci[pn]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr);
91522e3da61SHong Zhang   ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr);
91622e3da61SHong Zhang   ierr = PetscFree(ptadenserow);CHKERRQ(ierr);
91722e3da61SHong Zhang   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
91822e3da61SHong Zhang 
91922e3da61SHong Zhang   /* Allocate space for ca */
92022e3da61SHong Zhang   ierr = PetscMalloc((ci[pn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr);
92122e3da61SHong Zhang   ierr = PetscMemzero(ca,(ci[pn]+1)*sizeof(MatScalar));CHKERRQ(ierr);
92222e3da61SHong Zhang 
92322e3da61SHong Zhang   /* put together the new matrix */
92422e3da61SHong Zhang   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,pn,pn,ci,cj,ca,C);CHKERRQ(ierr);
92522e3da61SHong Zhang 
92622e3da61SHong Zhang   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
92722e3da61SHong Zhang   /* Since these are PETSc arrays, change flags to free them as necessary. */
92822e3da61SHong Zhang   c = (Mat_SeqAIJ *)((*C)->data);
92922e3da61SHong Zhang   c->freedata = PETSC_TRUE;
93022e3da61SHong Zhang   c->nonew    = 0;
93122e3da61SHong Zhang 
93222e3da61SHong Zhang   /* Clean up. */
93322e3da61SHong Zhang   ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
93422e3da61SHong Zhang 
93522e3da61SHong Zhang   PetscFunctionReturn(0);
93622e3da61SHong Zhang }
93722e3da61SHong Zhang 
938e240928fSHong Zhang /*@C
939e240928fSHong Zhang    MatPtAPReducedPt - Creates C = P[rstart:rend,:]^T * A * P of seqaij matrices,
940e240928fSHong Zhang                 used by MatPtAP_MPIAIJ_MPIAIJ()
941e240928fSHong Zhang 
942e240928fSHong Zhang    Collective on Mat
943e240928fSHong Zhang 
944e240928fSHong Zhang    Input Parameters:
945e240928fSHong Zhang +  A - the matrix in mpiaij format
946e240928fSHong Zhang .  P - the projection matrix in seqaij format
947e240928fSHong Zhang .  scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
948e240928fSHong Zhang .  fill - expected fill (not being used when scall==MAT_REUSE_MATRIX)
949e240928fSHong Zhang .  prstart, prend - the starting and ending-1-th row of the matrix P to be used for transpose
950e240928fSHong Zhang 
951e240928fSHong Zhang    Output Parameters:
952e240928fSHong Zhang .  C - the product matrix in seqaij format
953e240928fSHong Zhang 
954e240928fSHong Zhang    Level: developer
955e240928fSHong Zhang 
956e240928fSHong Zhang .seealso: MatPtAPSymbolic(),MatPtAPNumeric(),MatMatMult()
957e240928fSHong Zhang @*/
9580390132cSHong Zhang static PetscEvent logkey_matptapreducedpt = 0;
95922e3da61SHong Zhang #undef __FUNCT__
9600390132cSHong Zhang #define __FUNCT__ "MatPtAPReducedPt_MPIAIJ_SeqAIJ"
9610390132cSHong Zhang PetscErrorCode MatPtAPReducedPt_MPIAIJ_SeqAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,PetscInt prstart,PetscInt prend,Mat *C)
96222e3da61SHong Zhang {
96322e3da61SHong Zhang   PetscErrorCode ierr;
96422e3da61SHong Zhang 
96522e3da61SHong Zhang   PetscFunctionBegin;
96622e3da61SHong Zhang   if (A->m != prend-prstart) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",A->m,prend-prstart);
96722e3da61SHong Zhang   if (prend-prstart > P->m) SETERRQ2(PETSC_ERR_ARG_SIZ," prend-prstart %d cannot be larger than P->m %d",prend-prstart,P->m);
9680390132cSHong Zhang   if (!logkey_matptapreducedpt) {
9690390132cSHong Zhang     ierr = PetscLogEventRegister(&logkey_matptapreducedpt,"MatPtAP_ReducedPt",MAT_COOKIE);
970e462e02cSHong Zhang   }
9710390132cSHong Zhang   PetscLogEventBegin(logkey_matptapreducedpt,A,P,0,0);CHKERRQ(ierr);
97222e3da61SHong Zhang 
97322e3da61SHong Zhang   if (scall == MAT_INITIAL_MATRIX){
9740390132cSHong Zhang     ierr = MatPtAPReducedPtSymbolic_MPIAIJ_SeqAIJ(A,P,fill,prstart,prend,C);CHKERRQ(ierr);
97522e3da61SHong Zhang   }
9760390132cSHong Zhang   ierr = MatPtAPReducedPtNumeric_MPIAIJ_SeqAIJ(A,P,prstart,prend,*C);CHKERRQ(ierr);
9770390132cSHong Zhang   ierr = PetscLogEventEnd(logkey_matptapreducedpt,A,P,0,0);CHKERRQ(ierr);
97822e3da61SHong Zhang 
97922e3da61SHong Zhang   PetscFunctionReturn(0);
98022e3da61SHong Zhang }
98122e3da61SHong Zhang 
982e240928fSHong Zhang /* ------------- New --------------------*/
983e240928fSHong Zhang static PetscEvent logkey_matptapnumeric_local = 0;
984e240928fSHong Zhang #undef __FUNCT__
985e240928fSHong Zhang #define __FUNCT__ "MatPtAPNumeric_MPIAIJ_MPIAIJ_Local"
986e240928fSHong Zhang PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIAIJ_Local(Mat A,Mat P_loc,Mat P_oth,PetscInt prstart,Mat C)
987e240928fSHong Zhang {
988e240928fSHong Zhang   PetscErrorCode ierr;
989e240928fSHong Zhang   PetscInt       flops=0;
990e240928fSHong Zhang   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
991e240928fSHong Zhang   Mat_SeqAIJ     *ad=(Mat_SeqAIJ*)(a->A)->data,*ao=(Mat_SeqAIJ*)(a->B)->data;
992e240928fSHong Zhang   Mat_SeqAIJ     *c  = (Mat_SeqAIJ *) C->data;
993e240928fSHong Zhang   Mat_SeqAIJ     *p_loc=(Mat_SeqAIJ*)P_loc->data,*p_oth=(Mat_SeqAIJ*)P_oth->data;
994e240928fSHong Zhang   PetscInt       *pi_loc=p_loc->i,*pj_loc=p_loc->j,*pi_oth=p_oth->i,*pj_oth=p_oth->j;
995e240928fSHong Zhang   PetscInt       *adi=ad->i,*adj=ad->j,*aoi=ao->i,*aoj=ao->j,*apj,*apjdense,cstart=a->cstart,cend=a->cend,col;
996e240928fSHong Zhang   PetscInt       *pJ=pj_loc,*pjj;
997e240928fSHong Zhang   PetscInt       *ci=c->i,*cj=c->j,*cjj;
998e240928fSHong Zhang   PetscInt       am=A->m,cn=C->N,cm=C->M;
999e240928fSHong Zhang   PetscInt       i,j,k,anzi,pnzi,apnzj,nextap,pnzj,prow,crow;
1000e240928fSHong Zhang   MatScalar      *ada=ad->a,*aoa=ao->a,*apa,*paj,*ca=c->a,*caj;
1001e240928fSHong Zhang   MatScalar      *pa_loc=p_loc->a,*pA=pa_loc,*pa_oth=p_oth->a;
1002e240928fSHong Zhang 
1003e240928fSHong Zhang   PetscFunctionBegin;
1004e240928fSHong Zhang   if (!logkey_matptapnumeric_local) {
1005e240928fSHong Zhang     ierr = PetscLogEventRegister(&logkey_matptapnumeric_local,"MatPtAPNumeric_MPIAIJ_MPIAIJ_Local",MAT_COOKIE);
1006e240928fSHong Zhang   }
1007e240928fSHong Zhang   PetscLogEventBegin(logkey_matptapnumeric_local,A,P_loc,P_oth,0);CHKERRQ(ierr);
1008e240928fSHong Zhang 
1009e240928fSHong Zhang   /* Allocate temporary array for storage of one row of A*P */
1010e240928fSHong Zhang   ierr = PetscMalloc(cn*(sizeof(MatScalar)+2*sizeof(PetscInt)),&apa);CHKERRQ(ierr);
1011e240928fSHong Zhang   ierr = PetscMemzero(apa,cn*(sizeof(MatScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr);
1012e240928fSHong Zhang   apj      = (PetscInt *)(apa + cn);
1013e240928fSHong Zhang   apjdense = apj + cn;
1014e240928fSHong Zhang 
1015e240928fSHong Zhang   /* Clear old values in C */
1016e240928fSHong Zhang   ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr);
1017e240928fSHong Zhang 
1018e240928fSHong Zhang   for (i=0;i<am;i++) {
1019e240928fSHong Zhang     /* Form i-th sparse row of A*P */
1020e240928fSHong Zhang      apnzj = 0;
1021e240928fSHong Zhang     /* diagonal portion of A */
1022e240928fSHong Zhang     anzi  = adi[i+1] - adi[i];
1023e240928fSHong Zhang     for (j=0;j<anzi;j++) {
1024e240928fSHong Zhang       prow = *adj;
1025e240928fSHong Zhang       adj++;
1026e240928fSHong Zhang       pnzj = pi_loc[prow+1] - pi_loc[prow];
1027e240928fSHong Zhang       pjj  = pj_loc + pi_loc[prow];
1028e240928fSHong Zhang       paj  = pa_loc + pi_loc[prow];
1029e240928fSHong Zhang       for (k=0;k<pnzj;k++) {
1030e240928fSHong Zhang         if (!apjdense[pjj[k]]) {
1031e240928fSHong Zhang           apjdense[pjj[k]] = -1;
1032e240928fSHong Zhang           apj[apnzj++]     = pjj[k];
1033e240928fSHong Zhang         }
1034e240928fSHong Zhang         apa[pjj[k]] += (*ada)*paj[k];
1035e240928fSHong Zhang       }
1036e240928fSHong Zhang       flops += 2*pnzj;
1037e240928fSHong Zhang       ada++;
1038e240928fSHong Zhang     }
1039e240928fSHong Zhang     /* off-diagonal portion of A */
1040e240928fSHong Zhang     anzi  = aoi[i+1] - aoi[i];
1041e240928fSHong Zhang     for (j=0;j<anzi;j++) {
1042e240928fSHong Zhang       col = a->garray[*aoj];
1043e240928fSHong Zhang       if (col < cstart){
1044e240928fSHong Zhang         prow = *aoj;
1045e240928fSHong Zhang       } else if (col >= cend){
1046e240928fSHong Zhang         prow = *aoj;
1047e240928fSHong Zhang       } else {
1048e240928fSHong Zhang         SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Off-diagonal portion of A has wrong column map");
1049e240928fSHong Zhang       }
1050e240928fSHong Zhang       aoj++;
1051e240928fSHong Zhang       pnzj = pi_oth[prow+1] - pi_oth[prow];
1052e240928fSHong Zhang       pjj  = pj_oth + pi_oth[prow];
1053e240928fSHong Zhang       paj  = pa_oth + pi_oth[prow];
1054e240928fSHong Zhang       for (k=0;k<pnzj;k++) {
1055e240928fSHong Zhang         if (!apjdense[pjj[k]]) {
1056e240928fSHong Zhang           apjdense[pjj[k]] = -1;
1057e240928fSHong Zhang           apj[apnzj++]     = pjj[k];
1058e240928fSHong Zhang         }
1059e240928fSHong Zhang         apa[pjj[k]] += (*aoa)*paj[k];
1060e240928fSHong Zhang       }
1061e240928fSHong Zhang       flops += 2*pnzj;
1062e240928fSHong Zhang       aoa++;
1063e240928fSHong Zhang     }
1064e240928fSHong Zhang     /* Sort the j index array for quick sparse axpy. */
1065e240928fSHong Zhang     ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr);
1066e240928fSHong Zhang 
1067e240928fSHong Zhang     /* Compute P_loc[i,:]^T*AP[i,:] using outer product */
1068e240928fSHong Zhang     pnzi = pi_loc[i+1] - pi_loc[i];
1069e240928fSHong Zhang     for (j=0;j<pnzi;j++) {
1070e240928fSHong Zhang       nextap = 0;
1071e240928fSHong Zhang       crow   = *pJ++;
1072e240928fSHong Zhang       cjj    = cj + ci[crow];
1073e240928fSHong Zhang       caj    = ca + ci[crow];
1074e240928fSHong Zhang       /* Perform sparse axpy operation.  Note cjj includes apj. */
1075e240928fSHong Zhang       for (k=0;nextap<apnzj;k++) {
1076e240928fSHong Zhang         if (cjj[k]==apj[nextap]) {
1077e240928fSHong Zhang           caj[k] += (*pA)*apa[apj[nextap++]];
1078e240928fSHong Zhang         }
1079e240928fSHong Zhang       }
1080e240928fSHong Zhang       flops += 2*apnzj;
1081e240928fSHong Zhang       pA++;
1082e240928fSHong Zhang     }
1083e240928fSHong Zhang 
1084e240928fSHong Zhang     /* Zero the current row info for A*P */
1085e240928fSHong Zhang     for (j=0;j<apnzj;j++) {
1086e240928fSHong Zhang       apa[apj[j]]      = 0.;
1087e240928fSHong Zhang       apjdense[apj[j]] = 0;
1088e240928fSHong Zhang     }
1089e240928fSHong Zhang   }
1090e240928fSHong Zhang 
1091e240928fSHong Zhang   /* Assemble the final matrix and clean up */
1092e240928fSHong Zhang   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1093e240928fSHong Zhang   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1094e240928fSHong Zhang   ierr = PetscFree(apa);CHKERRQ(ierr);
1095e240928fSHong Zhang   ierr = PetscLogFlops(flops);CHKERRQ(ierr);
1096e240928fSHong Zhang   PetscLogEventEnd(logkey_matptapnumeric_local,A,P_loc,P_oth,0);CHKERRQ(ierr);
1097e240928fSHong Zhang   PetscFunctionReturn(0);
1098e240928fSHong Zhang }
1099e240928fSHong Zhang static PetscEvent logkey_matptapsymbolic_local = 0;
1100e240928fSHong Zhang #undef __FUNCT__
1101e240928fSHong Zhang #define __FUNCT__ "MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local"
1102e240928fSHong Zhang PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local(Mat A,Mat P_loc,Mat P_oth,PetscInt prstart,PetscReal fill,Mat *C)
1103e240928fSHong Zhang {
1104e240928fSHong Zhang   PetscErrorCode ierr;
1105e240928fSHong Zhang   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
1106e240928fSHong Zhang   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
1107e240928fSHong Zhang   Mat_SeqAIJ     *ad=(Mat_SeqAIJ*)(a->A)->data,*ao=(Mat_SeqAIJ*)(a->B)->data,*c;
1108e240928fSHong Zhang   PetscInt       *pti,*ptj,*ptJ,*ajj,*pjj;
1109e240928fSHong Zhang   PetscInt       *adi=ad->i,*adj=ad->j,*aoi=ao->i,*aoj=ao->j,nnz,cstart=a->cstart,cend=a->cend,col;
1110e240928fSHong Zhang   PetscInt       *ci,*cj,*ptaj;
1111e240928fSHong Zhang   PetscInt       an=A->N,am=A->m,pN=P_loc->N;
1112e240928fSHong Zhang   PetscInt       i,j,k,ptnzi,arow,anzj,prow,pnzj,cnzi;
1113e240928fSHong Zhang   PetscInt       pm=P_loc->m,nlnk,*lnk;
1114e240928fSHong Zhang   MatScalar      *ca;
1115e240928fSHong Zhang   PetscBT        lnkbt;
1116e240928fSHong Zhang   PetscInt       prend,nprow_loc,nprow_oth;
1117e240928fSHong Zhang   PetscInt       *ptadenserow_loc,*ptadenserow_oth,*ptasparserow_loc,*ptasparserow_oth;
1118e240928fSHong Zhang   Mat_SeqAIJ     *p_loc=(Mat_SeqAIJ*)P_loc->data,*p_oth=(Mat_SeqAIJ*)P_oth->data;
1119e240928fSHong Zhang   PetscInt       *pi_loc=p_loc->i,*pj_loc=p_loc->j,*pi_oth=p_oth->i,*pj_oth=p_oth->j;
1120e240928fSHong Zhang   /* PetscInt       rank; */
1121e240928fSHong Zhang 
1122e240928fSHong Zhang   PetscFunctionBegin;
1123e240928fSHong Zhang   if (!logkey_matptapsymbolic_local) {
1124e240928fSHong Zhang     ierr = PetscLogEventRegister(&logkey_matptapsymbolic_local,"MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local",MAT_COOKIE);
1125e240928fSHong Zhang   }
1126e240928fSHong Zhang   ierr = PetscLogEventBegin(logkey_matptapsymbolic_local,A,P_loc,P_oth,0);CHKERRQ(ierr);
1127e240928fSHong Zhang 
1128e240928fSHong Zhang   /* ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr); */
1129e240928fSHong Zhang   prend = prstart + pm;
1130e240928fSHong Zhang 
1131e240928fSHong Zhang   /* get ij structure of P_loc^T */
1132e240928fSHong Zhang   ierr = MatGetSymbolicTranspose_SeqAIJ(P_loc,&pti,&ptj);CHKERRQ(ierr);
1133e240928fSHong Zhang   ptJ=ptj;
1134e240928fSHong Zhang 
1135e240928fSHong Zhang   /* Allocate ci array, arrays for fill computation and */
1136e240928fSHong Zhang   /* free space for accumulating nonzero column info */
1137e240928fSHong Zhang   ierr = PetscMalloc((pN+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
1138e240928fSHong Zhang   ci[0] = 0;
1139e240928fSHong Zhang 
1140e240928fSHong Zhang   ierr = PetscMalloc((4*an+1)*sizeof(PetscInt),&ptadenserow_loc);CHKERRQ(ierr);
1141e240928fSHong Zhang   ierr = PetscMemzero(ptadenserow_loc,(4*an+1)*sizeof(PetscInt));CHKERRQ(ierr);
1142e240928fSHong Zhang   ptasparserow_loc = ptadenserow_loc + an;
1143e240928fSHong Zhang   ptadenserow_oth  = ptasparserow_loc + an;
1144e240928fSHong Zhang   ptasparserow_oth = ptadenserow_oth + an;
1145e240928fSHong Zhang 
1146e240928fSHong Zhang   /* create and initialize a linked list */
1147e240928fSHong Zhang   nlnk = pN+1;
1148e240928fSHong Zhang   ierr = PetscLLCreate(pN,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr);
1149e240928fSHong Zhang 
1150e240928fSHong Zhang   /* Set initial free space to be nnz(A) scaled by aspect ratio of P (P->M=A->N). */
1151e240928fSHong Zhang   /* This should be reasonable if sparsity of PtAP is similar to that of A. */
1152e240928fSHong Zhang   nnz           = adi[am] + aoi[am];
1153e240928fSHong Zhang   ierr          = GetMoreSpace((PetscInt)(fill*nnz/A->N)*pN,&free_space);
1154e240928fSHong Zhang   current_space = free_space;
1155e240928fSHong Zhang 
1156e240928fSHong Zhang   /* determine symbolic info for each row of C: */
1157e240928fSHong Zhang   for (i=0;i<pN;i++) {
1158e240928fSHong Zhang     ptnzi  = pti[i+1] - pti[i];
1159e240928fSHong Zhang     nprow_loc = 0; nprow_oth = 0;
1160e240928fSHong Zhang     /* i-th row of symbolic P_loc^T*A_loc: */
1161e240928fSHong Zhang     for (j=0;j<ptnzi;j++) {
1162e240928fSHong Zhang       arow = *ptJ++;
1163e240928fSHong Zhang       /* if (rank==1) printf("i: %d, ptnzi: %d, arow: %d\n",i,ptnzi,arow); */
1164e240928fSHong Zhang       /* diagonal portion of A */
1165e240928fSHong Zhang       anzj = adi[arow+1] - adi[arow];
1166e240928fSHong Zhang       ajj  = adj + adi[arow];
1167e240928fSHong Zhang       for (k=0;k<anzj;k++) {
1168e240928fSHong Zhang         col = ajj[k]+prstart;
1169e240928fSHong Zhang         if (!ptadenserow_loc[col]) {
1170e240928fSHong Zhang           ptadenserow_loc[col]    = -1;
1171e240928fSHong Zhang           ptasparserow_loc[nprow_loc++] = col;
1172e240928fSHong Zhang         }
1173e240928fSHong Zhang       }
1174e240928fSHong Zhang       /* off-diagonal portion of A */
1175e240928fSHong Zhang       anzj = aoi[arow+1] - aoi[arow];
1176e240928fSHong Zhang       ajj  = aoj + aoi[arow];
1177e240928fSHong Zhang       for (k=0;k<anzj;k++) {
1178e240928fSHong Zhang         col = a->garray[ajj[k]];  /* global col */
1179e240928fSHong Zhang         if (col < cstart){
1180e240928fSHong Zhang           col = ajj[k];
1181e240928fSHong Zhang         } else if (col >= cend){
1182e240928fSHong Zhang           col = ajj[k] + pm;
1183e240928fSHong Zhang         } else {
1184e240928fSHong Zhang           SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Off-diagonal portion of A has wrong column map");
1185e240928fSHong Zhang         }
1186e240928fSHong Zhang         if (!ptadenserow_oth[col]) {
1187e240928fSHong Zhang           ptadenserow_oth[col]    = -1;
1188e240928fSHong Zhang           ptasparserow_oth[nprow_oth++] = col;
1189e240928fSHong Zhang         }
1190e240928fSHong Zhang       }
1191e240928fSHong Zhang     }
1192e240928fSHong Zhang 
1193e240928fSHong Zhang     /* printf(" [%d] i: %d, nprow_loc: %d, nprow_oth: %d\n",rank,i,nprow_loc,nprow_oth); */
1194e240928fSHong Zhang     /* using symbolic info of local PtA, determine symbolic info for row of C: */
1195e240928fSHong Zhang     cnzi   = 0;
1196e240928fSHong Zhang     /* rows of P_loc */
1197e240928fSHong Zhang     ptaj = ptasparserow_loc;
1198e240928fSHong Zhang     for (j=0; j<nprow_loc; j++) {
1199e240928fSHong Zhang       prow = *ptaj++;
1200e240928fSHong Zhang       prow -= prstart; /* rm */
1201e240928fSHong Zhang       pnzj = pi_loc[prow+1] - pi_loc[prow];
1202e240928fSHong Zhang       pjj  = pj_loc + pi_loc[prow];
1203e240928fSHong Zhang       /* add non-zero cols of P into the sorted linked list lnk */
1204e240928fSHong Zhang       ierr = PetscLLAdd(pnzj,pjj,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr);
1205e240928fSHong Zhang       cnzi += nlnk;
1206e240928fSHong Zhang     }
1207e240928fSHong Zhang     /* rows of P_oth */
1208e240928fSHong Zhang     ptaj = ptasparserow_oth;
1209e240928fSHong Zhang     for (j=0; j<nprow_oth; j++) {
1210e240928fSHong Zhang       prow = *ptaj++;
1211e240928fSHong Zhang       if (prow >= prend) prow -= pm; /* rm */
1212e240928fSHong Zhang       pnzj = pi_oth[prow+1] - pi_oth[prow];
1213e240928fSHong Zhang       pjj  = pj_oth + pi_oth[prow];
1214e240928fSHong Zhang       /* add non-zero cols of P into the sorted linked list lnk */
1215e240928fSHong Zhang       ierr = PetscLLAdd(pnzj,pjj,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr);
1216e240928fSHong Zhang       cnzi += nlnk;
1217e240928fSHong Zhang     }
1218e240928fSHong Zhang 
1219e240928fSHong Zhang     /* If free space is not available, make more free space */
1220e240928fSHong Zhang     /* Double the amount of total space in the list */
1221e240928fSHong Zhang     if (current_space->local_remaining<cnzi) {
1222e240928fSHong Zhang       ierr = GetMoreSpace(current_space->total_array_size,&current_space);CHKERRQ(ierr);
1223e240928fSHong Zhang     }
1224e240928fSHong Zhang 
1225e240928fSHong Zhang     /* Copy data into free space, and zero out denserows */
1226e240928fSHong Zhang     ierr = PetscLLClean(pN,pN,cnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
1227e240928fSHong Zhang     current_space->array           += cnzi;
1228e240928fSHong Zhang     current_space->local_used      += cnzi;
1229e240928fSHong Zhang     current_space->local_remaining -= cnzi;
1230e240928fSHong Zhang 
1231e240928fSHong Zhang     for (j=0;j<nprow_loc; j++) {
1232e240928fSHong Zhang       ptadenserow_loc[ptasparserow_loc[j]] = 0;
1233e240928fSHong Zhang     }
1234e240928fSHong Zhang     for (j=0;j<nprow_oth; j++) {
1235e240928fSHong Zhang       ptadenserow_oth[ptasparserow_oth[j]] = 0;
1236e240928fSHong Zhang     }
1237e240928fSHong Zhang 
1238e240928fSHong Zhang     /* Aside: Perhaps we should save the pta info for the numerical factorization. */
1239e240928fSHong Zhang     /*        For now, we will recompute what is needed. */
1240e240928fSHong Zhang     ci[i+1] = ci[i] + cnzi;
1241e240928fSHong Zhang   }
1242e240928fSHong Zhang   /* nnz is now stored in ci[ptm], column indices are in the list of free space */
1243e240928fSHong Zhang   /* Allocate space for cj, initialize cj, and */
1244e240928fSHong Zhang   /* destroy list of free space and other temporary array(s) */
1245e240928fSHong Zhang   ierr = PetscMalloc((ci[pN]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr);
1246e240928fSHong Zhang   ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr);
1247e240928fSHong Zhang   ierr = PetscFree(ptadenserow_loc);CHKERRQ(ierr);
1248e240928fSHong Zhang   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
1249e240928fSHong Zhang 
1250e240928fSHong Zhang   /* Allocate space for ca */
1251e240928fSHong Zhang   ierr = PetscMalloc((ci[pN]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr);
1252e240928fSHong Zhang   ierr = PetscMemzero(ca,(ci[pN]+1)*sizeof(MatScalar));CHKERRQ(ierr);
1253e240928fSHong Zhang 
1254e240928fSHong Zhang   /* put together the new matrix */
1255e240928fSHong Zhang   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,pN,pN,ci,cj,ca,C);CHKERRQ(ierr);
1256e240928fSHong Zhang 
1257e240928fSHong Zhang   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
1258e240928fSHong Zhang   /* Since these are PETSc arrays, change flags to free them as necessary. */
1259e240928fSHong Zhang   c = (Mat_SeqAIJ *)((*C)->data);
1260e240928fSHong Zhang   c->freedata = PETSC_TRUE;
1261e240928fSHong Zhang   c->nonew    = 0;
1262e240928fSHong Zhang 
1263e240928fSHong Zhang   /* Clean up. */
1264e240928fSHong Zhang   ierr = MatRestoreSymbolicTranspose_SeqAIJ(P_loc,&pti,&ptj);CHKERRQ(ierr);
1265e240928fSHong Zhang   /*
1266e240928fSHong Zhang   ierr = MPI_Barrier(PETSC_COMM_WORLD);
1267e240928fSHong Zhang   if (rank == 1){
1268e240928fSHong Zhang     printf("[%d] C: %d, %d\n",rank,(*C)->M,(*C)->N);
1269e240928fSHong Zhang     ierr = MatView(*C, PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr);
1270e240928fSHong Zhang     } */
1271e240928fSHong Zhang   ierr = PetscLogEventEnd(logkey_matptapsymbolic_local,A,P_loc,P_oth,0);CHKERRQ(ierr);
1272e240928fSHong Zhang   PetscFunctionReturn(0);
1273e240928fSHong Zhang }
1274