xref: /petsc/src/mat/impls/aij/seq/matptap.c (revision e240928fe605233d1142eb087c23949de9c531ad)
1eb9c0419SKris Buschelman /*
29af31e4aSHong Zhang   Defines projective product routines where A is a AIJ matrix
3eb9c0419SKris Buschelman           C = P^T * A * P
4eb9c0419SKris Buschelman */
5eb9c0419SKris Buschelman 
6231952e2SKris Buschelman #include "src/mat/impls/aij/seq/aij.h"   /*I "petscmat.h" I*/
7eb9c0419SKris Buschelman #include "src/mat/utils/freespace.h"
89af31e4aSHong Zhang #include "src/mat/impls/aij/mpi/mpiaij.h"
955a3bba9SHong Zhang #include "petscbt.h"
10eb9c0419SKris Buschelman 
11eb9c0419SKris Buschelman #undef __FUNCT__
129af31e4aSHong Zhang #define __FUNCT__ "MatPtAP"
134d3841fdSKris Buschelman /*@
149af31e4aSHong Zhang    MatPtAP - Creates the matrix projection C = P^T * A * P
154d3841fdSKris Buschelman 
164d3841fdSKris Buschelman    Collective on Mat
174d3841fdSKris Buschelman 
184d3841fdSKris Buschelman    Input Parameters:
194d3841fdSKris Buschelman +  A - the matrix
20f747e1aeSHong Zhang .  P - the projection matrix
21f747e1aeSHong Zhang .  scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
22f747e1aeSHong Zhang -  fill - expected fill as ratio of nnz(C)/(nnz(A) + nnz(P))
234d3841fdSKris Buschelman 
244d3841fdSKris Buschelman    Output Parameters:
254d3841fdSKris Buschelman .  C - the product matrix
264d3841fdSKris Buschelman 
274d3841fdSKris Buschelman    Notes:
284d3841fdSKris Buschelman    C will be created and must be destroyed by the user with MatDestroy().
294d3841fdSKris Buschelman 
304d3841fdSKris Buschelman    This routine is currently only implemented for pairs of SeqAIJ matrices and classes
314d3841fdSKris Buschelman    which inherit from SeqAIJ.  C will be of type MATSEQAIJ.
324d3841fdSKris Buschelman 
334d3841fdSKris Buschelman    Level: intermediate
344d3841fdSKris Buschelman 
359af31e4aSHong Zhang .seealso: MatPtAPSymbolic(),MatPtAPNumeric(),MatMatMult()
364d3841fdSKris Buschelman @*/
3755a3bba9SHong Zhang PetscErrorCode MatPtAP(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C)
3855a3bba9SHong Zhang {
39dfbe8321SBarry Smith   PetscErrorCode ierr;
40534c1384SKris Buschelman   PetscErrorCode (*fA)(Mat,Mat,MatReuse,PetscReal,Mat *);
41534c1384SKris Buschelman   PetscErrorCode (*fP)(Mat,Mat,MatReuse,PetscReal,Mat *);
42eb9c0419SKris Buschelman 
43eb9c0419SKris Buschelman   PetscFunctionBegin;
449af31e4aSHong Zhang   PetscValidHeaderSpecific(A,MAT_COOKIE,1);
459af31e4aSHong Zhang   PetscValidType(A,1);
469af31e4aSHong Zhang   MatPreallocated(A);
479af31e4aSHong Zhang   if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
489af31e4aSHong Zhang   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
499af31e4aSHong Zhang   PetscValidHeaderSpecific(P,MAT_COOKIE,2);
509af31e4aSHong Zhang   PetscValidType(P,2);
519af31e4aSHong Zhang   MatPreallocated(P);
529af31e4aSHong Zhang   if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
539af31e4aSHong Zhang   if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
549af31e4aSHong Zhang   PetscValidPointer(C,3);
55534c1384SKris Buschelman 
569af31e4aSHong Zhang   if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->M,A->N);
57eb9c0419SKris Buschelman 
589af31e4aSHong Zhang   if (fill <=0.0) SETERRQ1(PETSC_ERR_ARG_SIZ,"fill=%g must be > 0.0",fill);
59eb9c0419SKris Buschelman 
60534c1384SKris Buschelman   /* For now, we do not dispatch based on the type of A and P */
61534c1384SKris Buschelman   /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */
62534c1384SKris Buschelman   fA = A->ops->ptap;
63534c1384SKris Buschelman   if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatPtAP not supported for A of type %s",A->type_name);
64534c1384SKris Buschelman   fP = P->ops->ptap;
65534c1384SKris Buschelman   if (!fP) SETERRQ1(PETSC_ERR_SUP,"MatPtAP not supported for P of type %s",P->type_name);
66534c1384SKris Buschelman   if (fP!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatPtAP requires A, %s, to be compatible with P, %s",A->type_name,P->type_name);
67534c1384SKris Buschelman 
689af31e4aSHong Zhang   ierr = PetscLogEventBegin(MAT_PtAP,A,P,0,0);CHKERRQ(ierr);
69534c1384SKris Buschelman   ierr = (*fA)(A,P,scall,fill,C);CHKERRQ(ierr);
709af31e4aSHong Zhang   ierr = PetscLogEventEnd(MAT_PtAP,A,P,0,0);CHKERRQ(ierr);
71eb9c0419SKris Buschelman   PetscFunctionReturn(0);
72eb9c0419SKris Buschelman }
73eb9c0419SKris Buschelman 
740390132cSHong Zhang EXTERN PetscErrorCode MatPtAPReducedPt_MPIAIJ_SeqAIJ(Mat,Mat,MatReuse,PetscReal,PetscInt,PetscInt,Mat*);
750390132cSHong Zhang EXTERN PetscErrorCode MatPtAPReducedPtSymbolic_MPIAIJ_SeqAIJ(Mat,Mat,PetscReal,PetscInt,PetscInt,Mat*);
760390132cSHong Zhang EXTERN PetscErrorCode MatPtAPReducedPtNumeric_MPIAIJ_SeqAIJ(Mat,Mat,PetscInt,PetscInt,Mat);
7722e3da61SHong Zhang 
78*e240928fSHong Zhang 
79*e240928fSHong Zhang EXTERN PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local(Mat,Mat,Mat,PetscInt,PetscReal,Mat*);
80*e240928fSHong Zhang EXTERN PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIAIJ_Local(Mat,Mat,Mat,PetscInt,Mat C);
81*e240928fSHong Zhang 
82eb9c0419SKris Buschelman #undef __FUNCT__
83ff134f7aSHong Zhang #define __FUNCT__ "MatPtAP_MPIAIJ_MPIAIJ"
84ff134f7aSHong Zhang PetscErrorCode MatPtAP_MPIAIJ_MPIAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C)
85ff134f7aSHong Zhang {
86ff134f7aSHong Zhang   PetscErrorCode    ierr;
87b90dcfe3SHong Zhang 
88b90dcfe3SHong Zhang   PetscFunctionBegin;
89b90dcfe3SHong Zhang   if (scall == MAT_INITIAL_MATRIX){
904c627768SHong Zhang     ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr);
91b90dcfe3SHong Zhang     ierr = MatPtAPSymbolic_MPIAIJ_MPIAIJ(A,P,fill,C);CHKERRQ(ierr);/* numeric product is computed as well */
924c627768SHong Zhang     ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr);
93*e240928fSHong Zhang   } else if (scall == MAT_REUSE_MATRIX){ /* not done yet! */
944c627768SHong Zhang     ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr);
95*e240928fSHong Zhang     ierr = MatDestroy(*C);CHKERRQ(ierr);
96*e240928fSHong Zhang     ierr = MatPtAPSymbolic_MPIAIJ_MPIAIJ(A,P,fill,C);CHKERRQ(ierr);
97*e240928fSHong Zhang 
98*e240928fSHong Zhang     /*
99b90dcfe3SHong Zhang     ierr = MatPtAPNumeric_MPIAIJ_MPIAIJ(A,P,*C);CHKERRQ(ierr);
100*e240928fSHong Zhang     */
1014c627768SHong Zhang     ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr);
102b90dcfe3SHong Zhang   } else {
103b90dcfe3SHong Zhang     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",scall);
104b90dcfe3SHong Zhang   }
105b90dcfe3SHong Zhang   PetscFunctionReturn(0);
106b90dcfe3SHong Zhang }
107*e240928fSHong Zhang #define NEW
108b90dcfe3SHong Zhang #undef __FUNCT__
109*e240928fSHong Zhang #define __FUNCT__ "MatPtAPSymbolic_MPIAIJ_MPIAIJ"
110b90dcfe3SHong Zhang PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIAIJ(Mat A,Mat P,PetscReal fill,Mat *C)
111b90dcfe3SHong Zhang {
112b90dcfe3SHong Zhang   PetscErrorCode    ierr;
1130390132cSHong Zhang   Mat               P_seq,C_seq;
114*e240928fSHong Zhang   Mat               P_loc,P_oth,*ploc;
115*e240928fSHong Zhang   PetscInt          prstart,prend,m=P->m,low;
116*e240928fSHong Zhang   IS                isrow,iscol;
117ff134f7aSHong Zhang 
118ff134f7aSHong Zhang   PetscFunctionBegin;
119*e240928fSHong Zhang   /* get P_oth by taking rows of P (= non-zero cols of local A) from other processors */
120*e240928fSHong Zhang   ierr = MatGetBrowsOfAoCols_Private(A,P,MAT_INITIAL_MATRIX,PETSC_NULL,PETSC_NULL,&prstart,&P_oth);CHKERRQ(ierr);
121*e240928fSHong Zhang   prend = prstart + m;
122*e240928fSHong Zhang   /* get P_loc by taking all local rows of P */
123*e240928fSHong Zhang   ierr = MatGetOwnershipRange(P,&low,PETSC_NULL);CHKERRQ(ierr);
124*e240928fSHong Zhang   ierr = ISCreateStride(PETSC_COMM_SELF,P->m,low,1,&isrow);CHKERRQ(ierr);
125*e240928fSHong Zhang   ierr = ISCreateStride(PETSC_COMM_SELF,P->N,0,1,&iscol);CHKERRQ(ierr);
126*e240928fSHong Zhang   ierr = MatGetSubMatrices(P,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&ploc);CHKERRQ(ierr);
127*e240928fSHong Zhang   P_loc = ploc[0];
128*e240928fSHong Zhang   ierr = ISDestroy(isrow);CHKERRQ(ierr);
129*e240928fSHong Zhang   ierr = ISDestroy(iscol);CHKERRQ(ierr);
130*e240928fSHong Zhang 
131*e240928fSHong Zhang   /* compute C_seq = P_loc^T * A_loc * P_seq */
132*e240928fSHong Zhang   ierr = MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local(A,P_loc,P_oth,prstart,fill,&C_seq);CHKERRQ(ierr);
133*e240928fSHong Zhang   ierr = MatPtAPNumeric_MPIAIJ_MPIAIJ_Local(A,P_loc,P_oth,prstart,C_seq);CHKERRQ(ierr);
134*e240928fSHong Zhang 
135*e240928fSHong Zhang   ierr = MatDestroyMatrices(1,&ploc);CHKERRQ(ierr);
136*e240928fSHong Zhang   ierr = MatDestroy(P_oth);CHKERRQ(ierr);
137*e240928fSHong Zhang #ifdef OLD
13825616d81SHong Zhang   /* get P_seq = submatrix of P by taking rows of P that equal to nonzero col of A */
13922e3da61SHong Zhang   ierr = MatGetBrowsOfAcols(A,P,MAT_INITIAL_MATRIX,PETSC_NULL,PETSC_NULL,&prstart,&P_seq);CHKERRQ(ierr);
14025616d81SHong Zhang   /* compute C_seq = P_loc^T * A_loc * P_seq */
141ff134f7aSHong Zhang   prend  = prstart + m;
1420390132cSHong Zhang   ierr = MatPtAPReducedPt_MPIAIJ_SeqAIJ(A,P_seq,MAT_INITIAL_MATRIX,fill,prstart,prend,&C_seq);CHKERRQ(ierr);
14325616d81SHong Zhang   ierr = MatDestroy(P_seq);CHKERRQ(ierr);
144*e240928fSHong Zhang #endif
145b90dcfe3SHong Zhang   /* add C_seq into mpi C */
146b90dcfe3SHong Zhang   ierr = MatMerge_SeqsToMPI(A->comm,C_seq,P->n,P->n,MAT_INITIAL_MATRIX,C);CHKERRQ(ierr);
147b90dcfe3SHong Zhang 
148ff134f7aSHong Zhang   PetscFunctionReturn(0);
149ff134f7aSHong Zhang }
150ff134f7aSHong Zhang 
151ff134f7aSHong Zhang #undef __FUNCT__
152*e240928fSHong Zhang #define __FUNCT__ "MatPtAPNumeric_MPIAIJ_MPIAIJ"
153b90dcfe3SHong Zhang PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIAIJ(Mat A,Mat P,Mat C)
154ff134f7aSHong Zhang {
155b90dcfe3SHong Zhang   PetscErrorCode       ierr;
1560390132cSHong Zhang   Mat                  P_seq,C_seq;
157b1d57f15SBarry Smith   PetscInt             prstart,prend,m=P->m;
158671beff6SHong Zhang   Mat_Merge_SeqsToMPI  *merge;
159671beff6SHong Zhang   PetscObjectContainer container;
160ff134f7aSHong Zhang 
161ff134f7aSHong Zhang   PetscFunctionBegin;
162671beff6SHong Zhang   ierr = PetscObjectQuery((PetscObject)C,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
163671beff6SHong Zhang   if (container) {
1647f79fd58SMatthew Knepley     ierr  = PetscObjectContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
1657f79fd58SMatthew Knepley   } else {
1667f79fd58SMatthew Knepley     SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix C does not posses and object container");
167671beff6SHong Zhang   }
168671beff6SHong Zhang 
169b90dcfe3SHong Zhang   /* get P_seq = submatrix of P by taking rows of P that equal to nonzero col of A */
1700390132cSHong Zhang   ierr = MatGetBrowsOfAcols(A,P,MAT_INITIAL_MATRIX,PETSC_NULL,PETSC_NULL,&prstart,&P_seq);CHKERRQ(ierr);
171ff134f7aSHong Zhang 
172b90dcfe3SHong Zhang   /* compute C_seq = P_loc^T * A_loc * P_seq */
173b90dcfe3SHong Zhang   prend = prstart + m;
174b90dcfe3SHong Zhang   C_seq = merge->C_seq;
1750390132cSHong Zhang   ierr = MatPtAPReducedPt_MPIAIJ_SeqAIJ(A,P_seq,MAT_REUSE_MATRIX,1.0,prstart,prend,&C_seq);CHKERRQ(ierr);
176b90dcfe3SHong Zhang   ierr = MatDestroy(P_seq);CHKERRQ(ierr);
177b90dcfe3SHong Zhang 
178b90dcfe3SHong Zhang   /* add C_seq into mpi C */
179b90dcfe3SHong Zhang   ierr = MatMerge_SeqsToMPI(A->comm,C_seq,P->n,P->n,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
180b90dcfe3SHong Zhang 
181ff134f7aSHong Zhang   PetscFunctionReturn(0);
182ff134f7aSHong Zhang }
183ff134f7aSHong Zhang 
184ff134f7aSHong Zhang #undef __FUNCT__
1859af31e4aSHong Zhang #define __FUNCT__ "MatPtAP_SeqAIJ_SeqAIJ"
186dfbe8321SBarry Smith PetscErrorCode MatPtAP_SeqAIJ_SeqAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C)
1879af31e4aSHong Zhang {
188dfbe8321SBarry Smith   PetscErrorCode ierr;
189b1d57f15SBarry Smith 
1909af31e4aSHong Zhang   PetscFunctionBegin;
1919af31e4aSHong Zhang   if (scall == MAT_INITIAL_MATRIX){
192d20bfe6fSHong Zhang     ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr);
1939af31e4aSHong Zhang     ierr = MatPtAPSymbolic_SeqAIJ_SeqAIJ(A,P,fill,C);CHKERRQ(ierr);
194d20bfe6fSHong Zhang     ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr);
1959af31e4aSHong Zhang   }
196d20bfe6fSHong Zhang   ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr);
1979af31e4aSHong Zhang   ierr = MatPtAPNumeric_SeqAIJ_SeqAIJ(A,P,*C);CHKERRQ(ierr);
198d20bfe6fSHong Zhang   ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr);
1999af31e4aSHong Zhang   PetscFunctionReturn(0);
2009af31e4aSHong Zhang }
2019af31e4aSHong Zhang 
2026849ba73SBarry Smith /*
2039af31e4aSHong Zhang    MatPtAPSymbolic - Creates the (i,j) structure of the matrix projection C = P^T * A * P
2044d3841fdSKris Buschelman 
2054d3841fdSKris Buschelman    Collective on Mat
2064d3841fdSKris Buschelman 
2074d3841fdSKris Buschelman    Input Parameters:
2084d3841fdSKris Buschelman +  A - the matrix
2094d3841fdSKris Buschelman -  P - the projection matrix
2104d3841fdSKris Buschelman 
2114d3841fdSKris Buschelman    Output Parameters:
2124d3841fdSKris Buschelman .  C - the (i,j) structure of the product matrix
2134d3841fdSKris Buschelman 
2144d3841fdSKris Buschelman    Notes:
2154d3841fdSKris Buschelman    C will be created and must be destroyed by the user with MatDestroy().
2164d3841fdSKris Buschelman 
2174d3841fdSKris Buschelman    This routine is currently only implemented for pairs of SeqAIJ matrices and classes
2184d3841fdSKris Buschelman    which inherit from SeqAIJ.  C will be of type MATSEQAIJ.  The product is computed using
2199af31e4aSHong Zhang    this (i,j) structure by calling MatPtAPNumeric().
2204d3841fdSKris Buschelman 
2214d3841fdSKris Buschelman    Level: intermediate
2224d3841fdSKris Buschelman 
2239af31e4aSHong Zhang .seealso: MatPtAP(),MatPtAPNumeric(),MatMatMultSymbolic()
2246849ba73SBarry Smith */
225*e240928fSHong Zhang #undef __FUNCT__
226*e240928fSHong Zhang #define __FUNCT__ "MatPtAPSymbolic"
22755a3bba9SHong Zhang PetscErrorCode MatPtAPSymbolic(Mat A,Mat P,PetscReal fill,Mat *C)
22855a3bba9SHong Zhang {
229dfbe8321SBarry Smith   PetscErrorCode ierr;
230534c1384SKris Buschelman   PetscErrorCode (*fA)(Mat,Mat,PetscReal,Mat*);
231534c1384SKris Buschelman   PetscErrorCode (*fP)(Mat,Mat,PetscReal,Mat*);
232eb9c0419SKris Buschelman 
233eb9c0419SKris Buschelman   PetscFunctionBegin;
234eb9c0419SKris Buschelman 
2354482741eSBarry Smith   PetscValidHeaderSpecific(A,MAT_COOKIE,1);
236c9780b6fSBarry Smith   PetscValidType(A,1);
237eb9c0419SKris Buschelman   MatPreallocated(A);
238eb9c0419SKris Buschelman   if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
239eb9c0419SKris Buschelman   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
240eb9c0419SKris Buschelman 
2414482741eSBarry Smith   PetscValidHeaderSpecific(P,MAT_COOKIE,2);
242c9780b6fSBarry Smith   PetscValidType(P,2);
243eb9c0419SKris Buschelman   MatPreallocated(P);
244eb9c0419SKris Buschelman   if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
245eb9c0419SKris Buschelman   if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
246eb9c0419SKris Buschelman 
2474482741eSBarry Smith   PetscValidPointer(C,3);
2484482741eSBarry Smith 
249eb9c0419SKris Buschelman   if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->M,A->N);
250eb9c0419SKris Buschelman   if (A->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %d != %d",A->M,A->N);
251eb9c0419SKris Buschelman 
252534c1384SKris Buschelman   /* For now, we do not dispatch based on the type of A and P */
253534c1384SKris Buschelman   /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */
254534c1384SKris Buschelman   fA = A->ops->ptapsymbolic;
255534c1384SKris Buschelman   if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatPtAPSymbolic not supported for A of type %s",A->type_name);
256534c1384SKris Buschelman   fP = P->ops->ptapsymbolic;
257534c1384SKris Buschelman   if (!fP) SETERRQ1(PETSC_ERR_SUP,"MatPtAPSymbolic not supported for P of type %s",P->type_name);
258534c1384SKris Buschelman   if (fP!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatPtAPSymbolic requires A, %s, to be compatible with P, %s",A->type_name,P->type_name);
2594d3841fdSKris Buschelman 
260534c1384SKris Buschelman   ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr);
261534c1384SKris Buschelman   ierr = (*fA)(A,P,fill,C);CHKERRQ(ierr);
262534c1384SKris Buschelman   ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr);
263eb9c0419SKris Buschelman 
264eb9c0419SKris Buschelman   PetscFunctionReturn(0);
265eb9c0419SKris Buschelman }
266eb9c0419SKris Buschelman 
267f747e1aeSHong Zhang typedef struct {
268f747e1aeSHong Zhang   Mat    symAP;
269f747e1aeSHong Zhang } Mat_PtAPstruct;
270f747e1aeSHong Zhang 
27178a80504SBarry Smith EXTERN PetscErrorCode MatDestroy_SeqAIJ(Mat);
27278a80504SBarry Smith 
273f747e1aeSHong Zhang #undef __FUNCT__
274f747e1aeSHong Zhang #define __FUNCT__ "MatDestroy_SeqAIJ_PtAP"
275f4a850bbSBarry Smith PetscErrorCode MatDestroy_SeqAIJ_PtAP(Mat A)
276f747e1aeSHong Zhang {
277f4a850bbSBarry Smith   PetscErrorCode    ierr;
278f747e1aeSHong Zhang   Mat_PtAPstruct    *ptap=(Mat_PtAPstruct*)A->spptr;
279f747e1aeSHong Zhang 
280f747e1aeSHong Zhang   PetscFunctionBegin;
281f747e1aeSHong Zhang   ierr = MatDestroy(ptap->symAP);CHKERRQ(ierr);
282f747e1aeSHong Zhang   ierr = PetscFree(ptap);CHKERRQ(ierr);
28378a80504SBarry Smith   ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr);
284f747e1aeSHong Zhang   PetscFunctionReturn(0);
285f747e1aeSHong Zhang }
286f747e1aeSHong Zhang 
287eb9c0419SKris Buschelman #undef __FUNCT__
2889af31e4aSHong Zhang #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqAIJ"
28955a3bba9SHong Zhang PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat P,PetscReal fill,Mat *C)
29055a3bba9SHong Zhang {
291dfbe8321SBarry Smith   PetscErrorCode ierr;
292d20bfe6fSHong Zhang   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
293d20bfe6fSHong Zhang   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data,*p = (Mat_SeqAIJ*)P->data,*c;
29455a3bba9SHong Zhang   PetscInt       *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj;
295b8374ebeSBarry Smith   PetscInt       *ci,*cj,*ptadenserow,*ptasparserow,*ptaj;
29655a3bba9SHong Zhang   PetscInt       an=A->N,am=A->M,pn=P->N,pm=P->M;
297b8374ebeSBarry Smith   PetscInt       i,j,k,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi,nlnk,*lnk;
298d20bfe6fSHong Zhang   MatScalar      *ca;
29955a3bba9SHong Zhang   PetscBT        lnkbt;
300eb9c0419SKris Buschelman 
301eb9c0419SKris Buschelman   PetscFunctionBegin;
302d20bfe6fSHong Zhang   /* Get ij structure of P^T */
303eb9c0419SKris Buschelman   ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
304d20bfe6fSHong Zhang   ptJ=ptj;
305eb9c0419SKris Buschelman 
306d20bfe6fSHong Zhang   /* Allocate ci array, arrays for fill computation and */
307d20bfe6fSHong Zhang   /* free space for accumulating nonzero column info */
30855a3bba9SHong Zhang   ierr = PetscMalloc((pn+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
309d20bfe6fSHong Zhang   ci[0] = 0;
310eb9c0419SKris Buschelman 
31155a3bba9SHong Zhang   ierr = PetscMalloc((2*an+1)*sizeof(PetscInt),&ptadenserow);CHKERRQ(ierr);
31255a3bba9SHong Zhang   ierr = PetscMemzero(ptadenserow,(2*an+1)*sizeof(PetscInt));CHKERRQ(ierr);
313d20bfe6fSHong Zhang   ptasparserow = ptadenserow  + an;
31455a3bba9SHong Zhang 
31555a3bba9SHong Zhang   /* create and initialize a linked list */
31655a3bba9SHong Zhang   nlnk = pn+1;
31755a3bba9SHong Zhang   ierr = PetscLLCreate(pn,pn,nlnk,lnk,lnkbt);CHKERRQ(ierr);
318eb9c0419SKris Buschelman 
319d20bfe6fSHong Zhang   /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */
320d20bfe6fSHong Zhang   /* This should be reasonable if sparsity of PtAP is similar to that of A. */
321d20bfe6fSHong Zhang   ierr          = GetMoreSpace((ai[am]/pm)*pn,&free_space);
322d20bfe6fSHong Zhang   current_space = free_space;
323d20bfe6fSHong Zhang 
324d20bfe6fSHong Zhang   /* Determine symbolic info for each row of C: */
325d20bfe6fSHong Zhang   for (i=0;i<pn;i++) {
326d20bfe6fSHong Zhang     ptnzi  = pti[i+1] - pti[i];
327d20bfe6fSHong Zhang     ptanzi = 0;
328d20bfe6fSHong Zhang     /* Determine symbolic row of PtA: */
329d20bfe6fSHong Zhang     for (j=0;j<ptnzi;j++) {
330d20bfe6fSHong Zhang       arow = *ptJ++;
331d20bfe6fSHong Zhang       anzj = ai[arow+1] - ai[arow];
332d20bfe6fSHong Zhang       ajj  = aj + ai[arow];
333d20bfe6fSHong Zhang       for (k=0;k<anzj;k++) {
334d20bfe6fSHong Zhang         if (!ptadenserow[ajj[k]]) {
335d20bfe6fSHong Zhang           ptadenserow[ajj[k]]    = -1;
336d20bfe6fSHong Zhang           ptasparserow[ptanzi++] = ajj[k];
337d20bfe6fSHong Zhang         }
338d20bfe6fSHong Zhang       }
339d20bfe6fSHong Zhang     }
340d20bfe6fSHong Zhang       /* Using symbolic info for row of PtA, determine symbolic info for row of C: */
341d20bfe6fSHong Zhang     ptaj = ptasparserow;
342d20bfe6fSHong Zhang     cnzi   = 0;
343d20bfe6fSHong Zhang     for (j=0;j<ptanzi;j++) {
344d20bfe6fSHong Zhang       prow = *ptaj++;
345d20bfe6fSHong Zhang       pnzj = pi[prow+1] - pi[prow];
346d20bfe6fSHong Zhang       pjj  = pj + pi[prow];
34755a3bba9SHong Zhang       /* add non-zero cols of P into the sorted linked list lnk */
34855a3bba9SHong Zhang       ierr = PetscLLAdd(pnzj,pjj,pn,nlnk,lnk,lnkbt);CHKERRQ(ierr);
34955a3bba9SHong Zhang       cnzi += nlnk;
350d20bfe6fSHong Zhang     }
351d20bfe6fSHong Zhang 
352d20bfe6fSHong Zhang     /* If free space is not available, make more free space */
353d20bfe6fSHong Zhang     /* Double the amount of total space in the list */
354d20bfe6fSHong Zhang     if (current_space->local_remaining<cnzi) {
355d20bfe6fSHong Zhang       ierr = GetMoreSpace(current_space->total_array_size,&current_space);CHKERRQ(ierr);
356d20bfe6fSHong Zhang     }
357d20bfe6fSHong Zhang 
358d20bfe6fSHong Zhang     /* Copy data into free space, and zero out denserows */
35955a3bba9SHong Zhang     ierr = PetscLLClean(pn,pn,cnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
360d20bfe6fSHong Zhang     current_space->array           += cnzi;
361d20bfe6fSHong Zhang     current_space->local_used      += cnzi;
362d20bfe6fSHong Zhang     current_space->local_remaining -= cnzi;
363d20bfe6fSHong Zhang 
364d20bfe6fSHong Zhang     for (j=0;j<ptanzi;j++) {
365d20bfe6fSHong Zhang       ptadenserow[ptasparserow[j]] = 0;
366d20bfe6fSHong Zhang     }
367d20bfe6fSHong Zhang     /* Aside: Perhaps we should save the pta info for the numerical factorization. */
368d20bfe6fSHong Zhang     /*        For now, we will recompute what is needed. */
369d20bfe6fSHong Zhang     ci[i+1] = ci[i] + cnzi;
370d20bfe6fSHong Zhang   }
371d20bfe6fSHong Zhang   /* nnz is now stored in ci[ptm], column indices are in the list of free space */
372d20bfe6fSHong Zhang   /* Allocate space for cj, initialize cj, and */
373d20bfe6fSHong Zhang   /* destroy list of free space and other temporary array(s) */
37455a3bba9SHong Zhang   ierr = PetscMalloc((ci[pn]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr);
375d20bfe6fSHong Zhang   ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr);
376d20bfe6fSHong Zhang   ierr = PetscFree(ptadenserow);CHKERRQ(ierr);
37755a3bba9SHong Zhang   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
378d20bfe6fSHong Zhang 
379d20bfe6fSHong Zhang   /* Allocate space for ca */
380d20bfe6fSHong Zhang   ierr = PetscMalloc((ci[pn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr);
381d20bfe6fSHong Zhang   ierr = PetscMemzero(ca,(ci[pn]+1)*sizeof(MatScalar));CHKERRQ(ierr);
382d20bfe6fSHong Zhang 
383d20bfe6fSHong Zhang   /* put together the new matrix */
384d20bfe6fSHong Zhang   ierr = MatCreateSeqAIJWithArrays(A->comm,pn,pn,ci,cj,ca,C);CHKERRQ(ierr);
385d20bfe6fSHong Zhang 
386d20bfe6fSHong Zhang   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
387d20bfe6fSHong Zhang   /* Since these are PETSc arrays, change flags to free them as necessary. */
388d20bfe6fSHong Zhang   c = (Mat_SeqAIJ *)((*C)->data);
389d20bfe6fSHong Zhang   c->freedata = PETSC_TRUE;
390d20bfe6fSHong Zhang   c->nonew    = 0;
391d20bfe6fSHong Zhang 
392d20bfe6fSHong Zhang   /* Clean up. */
393d20bfe6fSHong Zhang   ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
394eb9c0419SKris Buschelman 
395eb9c0419SKris Buschelman   PetscFunctionReturn(0);
396eb9c0419SKris Buschelman }
397eb9c0419SKris Buschelman 
3983985e5eaSKris Buschelman #include "src/mat/impls/maij/maij.h"
3993985e5eaSKris Buschelman EXTERN_C_BEGIN
4003985e5eaSKris Buschelman #undef __FUNCT__
4019af31e4aSHong Zhang #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqMAIJ"
40255a3bba9SHong Zhang PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat *C)
40355a3bba9SHong Zhang {
4045c66b693SKris Buschelman   /* This routine requires testing -- I don't think it works. */
405dfbe8321SBarry Smith   PetscErrorCode ierr;
4063985e5eaSKris Buschelman   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
4073985e5eaSKris Buschelman   Mat_SeqMAIJ    *pp=(Mat_SeqMAIJ*)PP->data;
4083985e5eaSKris Buschelman   Mat            P=pp->AIJ;
4093985e5eaSKris Buschelman   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c;
410b1d57f15SBarry Smith   PetscInt       *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj;
411b1d57f15SBarry Smith   PetscInt       *ci,*cj,*denserow,*sparserow,*ptadenserow,*ptasparserow,*ptaj;
412b1d57f15SBarry Smith   PetscInt       an=A->N,am=A->M,pn=P->N,pm=P->M,ppdof=pp->dof;
413b1d57f15SBarry Smith   PetscInt       i,j,k,dof,pdof,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi;
4143985e5eaSKris Buschelman   MatScalar      *ca;
4153985e5eaSKris Buschelman 
4163985e5eaSKris Buschelman   PetscFunctionBegin;
4173985e5eaSKris Buschelman   /* Start timer */
4189af31e4aSHong Zhang   ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr);
4193985e5eaSKris Buschelman 
4203985e5eaSKris Buschelman   /* Get ij structure of P^T */
4213985e5eaSKris Buschelman   ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
4223985e5eaSKris Buschelman 
4233985e5eaSKris Buschelman   /* Allocate ci array, arrays for fill computation and */
4243985e5eaSKris Buschelman   /* free space for accumulating nonzero column info */
425b1d57f15SBarry Smith   ierr = PetscMalloc((pn+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
4263985e5eaSKris Buschelman   ci[0] = 0;
4273985e5eaSKris Buschelman 
428b1d57f15SBarry Smith   ierr = PetscMalloc((2*pn+2*an+1)*sizeof(PetscInt),&ptadenserow);CHKERRQ(ierr);
429b1d57f15SBarry Smith   ierr = PetscMemzero(ptadenserow,(2*pn+2*an+1)*sizeof(PetscInt));CHKERRQ(ierr);
4303985e5eaSKris Buschelman   ptasparserow = ptadenserow  + an;
4313985e5eaSKris Buschelman   denserow     = ptasparserow + an;
4323985e5eaSKris Buschelman   sparserow    = denserow     + pn;
4333985e5eaSKris Buschelman 
4343985e5eaSKris Buschelman   /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */
4353985e5eaSKris Buschelman   /* This should be reasonable if sparsity of PtAP is similar to that of A. */
4363985e5eaSKris Buschelman   ierr          = GetMoreSpace((ai[am]/pm)*pn,&free_space);
4373985e5eaSKris Buschelman   current_space = free_space;
4383985e5eaSKris Buschelman 
4393985e5eaSKris Buschelman   /* Determine symbolic info for each row of C: */
4403985e5eaSKris Buschelman   for (i=0;i<pn/ppdof;i++) {
4413985e5eaSKris Buschelman     ptnzi  = pti[i+1] - pti[i];
4423985e5eaSKris Buschelman     ptanzi = 0;
4433985e5eaSKris Buschelman     ptJ    = ptj + pti[i];
4443985e5eaSKris Buschelman     for (dof=0;dof<ppdof;dof++) {
4453985e5eaSKris Buschelman     /* Determine symbolic row of PtA: */
4463985e5eaSKris Buschelman       for (j=0;j<ptnzi;j++) {
4473985e5eaSKris Buschelman         arow = ptJ[j] + dof;
4483985e5eaSKris Buschelman         anzj = ai[arow+1] - ai[arow];
4493985e5eaSKris Buschelman         ajj  = aj + ai[arow];
4503985e5eaSKris Buschelman         for (k=0;k<anzj;k++) {
4513985e5eaSKris Buschelman           if (!ptadenserow[ajj[k]]) {
4523985e5eaSKris Buschelman             ptadenserow[ajj[k]]    = -1;
4533985e5eaSKris Buschelman             ptasparserow[ptanzi++] = ajj[k];
4543985e5eaSKris Buschelman           }
4553985e5eaSKris Buschelman         }
4563985e5eaSKris Buschelman       }
4573985e5eaSKris Buschelman       /* Using symbolic info for row of PtA, determine symbolic info for row of C: */
4583985e5eaSKris Buschelman       ptaj = ptasparserow;
4593985e5eaSKris Buschelman       cnzi   = 0;
4603985e5eaSKris Buschelman       for (j=0;j<ptanzi;j++) {
461fe05a634SKris Buschelman         pdof = *ptaj%dof;
4623985e5eaSKris Buschelman         prow = (*ptaj++)/dof;
4633985e5eaSKris Buschelman         pnzj = pi[prow+1] - pi[prow];
4643985e5eaSKris Buschelman         pjj  = pj + pi[prow];
4653985e5eaSKris Buschelman         for (k=0;k<pnzj;k++) {
466fe05a634SKris Buschelman           if (!denserow[pjj[k]+pdof]) {
467fe05a634SKris Buschelman             denserow[pjj[k]+pdof] = -1;
468fe05a634SKris Buschelman             sparserow[cnzi++]     = pjj[k]+pdof;
4693985e5eaSKris Buschelman           }
4703985e5eaSKris Buschelman         }
4713985e5eaSKris Buschelman       }
4723985e5eaSKris Buschelman 
4733985e5eaSKris Buschelman       /* sort sparserow */
4743985e5eaSKris Buschelman       ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr);
4753985e5eaSKris Buschelman 
4763985e5eaSKris Buschelman       /* If free space is not available, make more free space */
4773985e5eaSKris Buschelman       /* Double the amount of total space in the list */
4783985e5eaSKris Buschelman       if (current_space->local_remaining<cnzi) {
4793985e5eaSKris Buschelman         ierr = GetMoreSpace(current_space->total_array_size,&current_space);CHKERRQ(ierr);
4803985e5eaSKris Buschelman       }
4813985e5eaSKris Buschelman 
4823985e5eaSKris Buschelman       /* Copy data into free space, and zero out denserows */
483b1d57f15SBarry Smith       ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(PetscInt));CHKERRQ(ierr);
4843985e5eaSKris Buschelman       current_space->array           += cnzi;
4853985e5eaSKris Buschelman       current_space->local_used      += cnzi;
4863985e5eaSKris Buschelman       current_space->local_remaining -= cnzi;
4873985e5eaSKris Buschelman 
4883985e5eaSKris Buschelman       for (j=0;j<ptanzi;j++) {
4893985e5eaSKris Buschelman         ptadenserow[ptasparserow[j]] = 0;
4903985e5eaSKris Buschelman       }
4913985e5eaSKris Buschelman       for (j=0;j<cnzi;j++) {
4923985e5eaSKris Buschelman         denserow[sparserow[j]] = 0;
4933985e5eaSKris Buschelman       }
4943985e5eaSKris Buschelman       /* Aside: Perhaps we should save the pta info for the numerical factorization. */
4953985e5eaSKris Buschelman       /*        For now, we will recompute what is needed. */
4963985e5eaSKris Buschelman       ci[i+1+dof] = ci[i+dof] + cnzi;
4973985e5eaSKris Buschelman     }
4983985e5eaSKris Buschelman   }
4993985e5eaSKris Buschelman   /* nnz is now stored in ci[ptm], column indices are in the list of free space */
5003985e5eaSKris Buschelman   /* Allocate space for cj, initialize cj, and */
5013985e5eaSKris Buschelman   /* destroy list of free space and other temporary array(s) */
502b1d57f15SBarry Smith   ierr = PetscMalloc((ci[pn]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr);
5033985e5eaSKris Buschelman   ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr);
5043985e5eaSKris Buschelman   ierr = PetscFree(ptadenserow);CHKERRQ(ierr);
5053985e5eaSKris Buschelman 
5063985e5eaSKris Buschelman   /* Allocate space for ca */
5073985e5eaSKris Buschelman   ierr = PetscMalloc((ci[pn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr);
5083985e5eaSKris Buschelman   ierr = PetscMemzero(ca,(ci[pn]+1)*sizeof(MatScalar));CHKERRQ(ierr);
5093985e5eaSKris Buschelman 
5103985e5eaSKris Buschelman   /* put together the new matrix */
5113985e5eaSKris Buschelman   ierr = MatCreateSeqAIJWithArrays(A->comm,pn,pn,ci,cj,ca,C);CHKERRQ(ierr);
5123985e5eaSKris Buschelman 
5133985e5eaSKris Buschelman   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5143985e5eaSKris Buschelman   /* Since these are PETSc arrays, change flags to free them as necessary. */
5153985e5eaSKris Buschelman   c = (Mat_SeqAIJ *)((*C)->data);
5163985e5eaSKris Buschelman   c->freedata = PETSC_TRUE;
5173985e5eaSKris Buschelman   c->nonew    = 0;
5183985e5eaSKris Buschelman 
5193985e5eaSKris Buschelman   /* Clean up. */
5203985e5eaSKris Buschelman   ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
5213985e5eaSKris Buschelman 
5229af31e4aSHong Zhang   ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr);
5233985e5eaSKris Buschelman   PetscFunctionReturn(0);
5243985e5eaSKris Buschelman }
5253985e5eaSKris Buschelman EXTERN_C_END
5263985e5eaSKris Buschelman 
5276849ba73SBarry Smith /*
5289af31e4aSHong Zhang    MatPtAPNumeric - Computes the matrix projection C = P^T * A * P
5294d3841fdSKris Buschelman 
5304d3841fdSKris Buschelman    Collective on Mat
5314d3841fdSKris Buschelman 
5324d3841fdSKris Buschelman    Input Parameters:
5334d3841fdSKris Buschelman +  A - the matrix
5344d3841fdSKris Buschelman -  P - the projection matrix
5354d3841fdSKris Buschelman 
5364d3841fdSKris Buschelman    Output Parameters:
5374d3841fdSKris Buschelman .  C - the product matrix
5384d3841fdSKris Buschelman 
5394d3841fdSKris Buschelman    Notes:
5409af31e4aSHong Zhang    C must have been created by calling MatPtAPSymbolic and must be destroyed by
5414d3841fdSKris Buschelman    the user using MatDeatroy().
5424d3841fdSKris Buschelman 
543170ef064SHong Zhang    This routine is currently only implemented for pairs of AIJ matrices and classes
544170ef064SHong Zhang    which inherit from AIJ.  C will be of type MATAIJ.
5454d3841fdSKris Buschelman 
5464d3841fdSKris Buschelman    Level: intermediate
5474d3841fdSKris Buschelman 
5489af31e4aSHong Zhang .seealso: MatPtAP(),MatPtAPSymbolic(),MatMatMultNumeric()
5496849ba73SBarry Smith */
550*e240928fSHong Zhang #undef __FUNCT__
551*e240928fSHong Zhang #define __FUNCT__ "MatPtAPNumeric"
55255a3bba9SHong Zhang PetscErrorCode MatPtAPNumeric(Mat A,Mat P,Mat C)
55355a3bba9SHong Zhang {
554dfbe8321SBarry Smith   PetscErrorCode ierr;
555534c1384SKris Buschelman   PetscErrorCode (*fA)(Mat,Mat,Mat);
556534c1384SKris Buschelman   PetscErrorCode (*fP)(Mat,Mat,Mat);
557eb9c0419SKris Buschelman 
558eb9c0419SKris Buschelman   PetscFunctionBegin;
559eb9c0419SKris Buschelman 
5604482741eSBarry Smith   PetscValidHeaderSpecific(A,MAT_COOKIE,1);
561c9780b6fSBarry Smith   PetscValidType(A,1);
562eb9c0419SKris Buschelman   MatPreallocated(A);
563eb9c0419SKris Buschelman   if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
564eb9c0419SKris Buschelman   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
565eb9c0419SKris Buschelman 
5664482741eSBarry Smith   PetscValidHeaderSpecific(P,MAT_COOKIE,2);
567c9780b6fSBarry Smith   PetscValidType(P,2);
568eb9c0419SKris Buschelman   MatPreallocated(P);
569eb9c0419SKris Buschelman   if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
570eb9c0419SKris Buschelman   if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
571eb9c0419SKris Buschelman 
5724482741eSBarry Smith   PetscValidHeaderSpecific(C,MAT_COOKIE,3);
573c9780b6fSBarry Smith   PetscValidType(C,3);
574eb9c0419SKris Buschelman   MatPreallocated(C);
575eb9c0419SKris Buschelman   if (!C->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
576eb9c0419SKris Buschelman   if (C->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
577eb9c0419SKris Buschelman 
578eb9c0419SKris Buschelman   if (P->N!=C->M) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->N,C->M);
579eb9c0419SKris Buschelman   if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->M,A->N);
580eb9c0419SKris Buschelman   if (A->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %d != %d",A->M,A->N);
581eb9c0419SKris Buschelman   if (P->N!=C->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->N,C->N);
582eb9c0419SKris Buschelman 
583534c1384SKris Buschelman   /* For now, we do not dispatch based on the type of A and P */
584534c1384SKris Buschelman   /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */
585534c1384SKris Buschelman   fA = A->ops->ptapnumeric;
586534c1384SKris Buschelman   if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatPtAPNumeric not supported for A of type %s",A->type_name);
587534c1384SKris Buschelman   fP = P->ops->ptapnumeric;
588534c1384SKris Buschelman   if (!fP) SETERRQ1(PETSC_ERR_SUP,"MatPtAPNumeric not supported for P of type %s",P->type_name);
589534c1384SKris Buschelman   if (fP!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatPtAPNumeric requires A, %s, to be compatible with P, %s",A->type_name,P->type_name);
5904d3841fdSKris Buschelman 
591534c1384SKris Buschelman   ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr);
592534c1384SKris Buschelman   ierr = (*fA)(A,P,C);CHKERRQ(ierr);
593534c1384SKris Buschelman   ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr);
594eb9c0419SKris Buschelman 
595eb9c0419SKris Buschelman   PetscFunctionReturn(0);
596eb9c0419SKris Buschelman }
597eb9c0419SKris Buschelman 
598eb9c0419SKris Buschelman #undef __FUNCT__
5999af31e4aSHong Zhang #define __FUNCT__ "MatPtAPNumeric_SeqAIJ_SeqAIJ"
600dfbe8321SBarry Smith PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqAIJ(Mat A,Mat P,Mat C)
601dfbe8321SBarry Smith {
602dfbe8321SBarry Smith   PetscErrorCode ierr;
603b1d57f15SBarry Smith   PetscInt       flops=0;
604d20bfe6fSHong Zhang   Mat_SeqAIJ     *a  = (Mat_SeqAIJ *) A->data;
605d20bfe6fSHong Zhang   Mat_SeqAIJ     *p  = (Mat_SeqAIJ *) P->data;
606d20bfe6fSHong Zhang   Mat_SeqAIJ     *c  = (Mat_SeqAIJ *) C->data;
607b1d57f15SBarry Smith   PetscInt       *ai=a->i,*aj=a->j,*apj,*apjdense,*pi=p->i,*pj=p->j,*pJ=p->j,*pjj;
608b1d57f15SBarry Smith   PetscInt       *ci=c->i,*cj=c->j,*cjj;
609b1d57f15SBarry Smith   PetscInt       am=A->M,cn=C->N,cm=C->M;
610b1d57f15SBarry Smith   PetscInt       i,j,k,anzi,pnzi,apnzj,nextap,pnzj,prow,crow;
611d20bfe6fSHong Zhang   MatScalar      *aa=a->a,*apa,*pa=p->a,*pA=p->a,*paj,*ca=c->a,*caj;
612eb9c0419SKris Buschelman 
613eb9c0419SKris Buschelman   PetscFunctionBegin;
614d20bfe6fSHong Zhang   /* Allocate temporary array for storage of one row of A*P */
615b1d57f15SBarry Smith   ierr = PetscMalloc(cn*(sizeof(MatScalar)+2*sizeof(PetscInt)),&apa);CHKERRQ(ierr);
616b1d57f15SBarry Smith   ierr = PetscMemzero(apa,cn*(sizeof(MatScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr);
617eb9c0419SKris Buschelman 
618b1d57f15SBarry Smith   apj      = (PetscInt *)(apa + cn);
619d20bfe6fSHong Zhang   apjdense = apj + cn;
620d20bfe6fSHong Zhang 
621d20bfe6fSHong Zhang   /* Clear old values in C */
622d20bfe6fSHong Zhang   ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr);
623d20bfe6fSHong Zhang 
624d20bfe6fSHong Zhang   for (i=0;i<am;i++) {
625d20bfe6fSHong Zhang     /* Form sparse row of A*P */
626d20bfe6fSHong Zhang     anzi  = ai[i+1] - ai[i];
627d20bfe6fSHong Zhang     apnzj = 0;
628d20bfe6fSHong Zhang     for (j=0;j<anzi;j++) {
629d20bfe6fSHong Zhang       prow = *aj++;
630d20bfe6fSHong Zhang       pnzj = pi[prow+1] - pi[prow];
631d20bfe6fSHong Zhang       pjj  = pj + pi[prow];
632d20bfe6fSHong Zhang       paj  = pa + pi[prow];
633d20bfe6fSHong Zhang       for (k=0;k<pnzj;k++) {
634d20bfe6fSHong Zhang         if (!apjdense[pjj[k]]) {
635d20bfe6fSHong Zhang           apjdense[pjj[k]] = -1;
636d20bfe6fSHong Zhang           apj[apnzj++]     = pjj[k];
637d20bfe6fSHong Zhang         }
638d20bfe6fSHong Zhang         apa[pjj[k]] += (*aa)*paj[k];
639d20bfe6fSHong Zhang       }
640d20bfe6fSHong Zhang       flops += 2*pnzj;
641d20bfe6fSHong Zhang       aa++;
642d20bfe6fSHong Zhang     }
643d20bfe6fSHong Zhang 
644d20bfe6fSHong Zhang     /* Sort the j index array for quick sparse axpy. */
645d20bfe6fSHong Zhang     ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr);
646d20bfe6fSHong Zhang 
647d20bfe6fSHong Zhang     /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */
648d20bfe6fSHong Zhang     pnzi = pi[i+1] - pi[i];
649d20bfe6fSHong Zhang     for (j=0;j<pnzi;j++) {
650d20bfe6fSHong Zhang       nextap = 0;
651d20bfe6fSHong Zhang       crow   = *pJ++;
652d20bfe6fSHong Zhang       cjj    = cj + ci[crow];
653d20bfe6fSHong Zhang       caj    = ca + ci[crow];
654d20bfe6fSHong Zhang       /* Perform sparse axpy operation.  Note cjj includes apj. */
655d20bfe6fSHong Zhang       for (k=0;nextap<apnzj;k++) {
656d20bfe6fSHong Zhang         if (cjj[k]==apj[nextap]) {
657d20bfe6fSHong Zhang           caj[k] += (*pA)*apa[apj[nextap++]];
658d20bfe6fSHong Zhang         }
659d20bfe6fSHong Zhang       }
660d20bfe6fSHong Zhang       flops += 2*apnzj;
661d20bfe6fSHong Zhang       pA++;
662d20bfe6fSHong Zhang     }
663d20bfe6fSHong Zhang 
664d20bfe6fSHong Zhang     /* Zero the current row info for A*P */
665d20bfe6fSHong Zhang     for (j=0;j<apnzj;j++) {
666d20bfe6fSHong Zhang       apa[apj[j]]      = 0.;
667d20bfe6fSHong Zhang       apjdense[apj[j]] = 0;
668d20bfe6fSHong Zhang     }
669d20bfe6fSHong Zhang   }
670d20bfe6fSHong Zhang 
671d20bfe6fSHong Zhang   /* Assemble the final matrix and clean up */
672d20bfe6fSHong Zhang   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
673d20bfe6fSHong Zhang   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
674d20bfe6fSHong Zhang   ierr = PetscFree(apa);CHKERRQ(ierr);
675d20bfe6fSHong Zhang   ierr = PetscLogFlops(flops);CHKERRQ(ierr);
676d20bfe6fSHong Zhang 
677eb9c0419SKris Buschelman   PetscFunctionReturn(0);
678eb9c0419SKris Buschelman }
6790e36024fSHong Zhang 
6800390132cSHong Zhang /* Compute local C = P[rstart:rend,:]^T * A * P - used by MatPtAP_MPIAIJ_MPIAIJ() */
6810e36024fSHong Zhang 
6820e36024fSHong Zhang #undef __FUNCT__
6830390132cSHong Zhang #define __FUNCT__ "MatPtAPReducedPtNumeric_MPIAIJ_SeqAIJ"
6840390132cSHong Zhang PetscErrorCode MatPtAPReducedPtNumeric_MPIAIJ_SeqAIJ(Mat A,Mat P,PetscInt prstart,PetscInt prend,Mat C)
68522e3da61SHong Zhang {
68622e3da61SHong Zhang   PetscErrorCode ierr;
68722e3da61SHong Zhang   PetscInt       flops=0;
68822e3da61SHong Zhang   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
68922e3da61SHong Zhang   Mat_SeqAIJ     *ad=(Mat_SeqAIJ*)(a->A)->data,*ao=(Mat_SeqAIJ*)(a->B)->data;
69022e3da61SHong Zhang   Mat_SeqAIJ     *p  = (Mat_SeqAIJ *) P->data;
69122e3da61SHong Zhang   Mat_SeqAIJ     *c  = (Mat_SeqAIJ *) C->data;
69222e3da61SHong Zhang   PetscInt       *adi=ad->i,*adj=ad->j,*aoi=ao->i,*aoj=ao->j,*apj,*apjdense,cstart=a->cstart,cend=a->cend,col;
69322e3da61SHong Zhang   PetscInt       *pi=p->i,*pj=p->j,*pJ=p->j+pi[prstart],*pjj;
69422e3da61SHong Zhang   PetscInt       *ci=c->i,*cj=c->j,*cjj;
69522e3da61SHong Zhang   PetscInt       am=A->m,cn=C->N,cm=C->M;
69622e3da61SHong Zhang   PetscInt       i,j,k,anzi,pnzi,apnzj,nextap,pnzj,prow,crow;
69722e3da61SHong Zhang   MatScalar      *ada=ad->a,*aoa=ao->a,*apa,*pa=p->a,*pA=p->a,*paj,*ca=c->a,*caj;
69822e3da61SHong Zhang 
69922e3da61SHong Zhang   PetscFunctionBegin;
70022e3da61SHong Zhang   pA=p->a+pi[prstart];
70122e3da61SHong Zhang   /* Allocate temporary array for storage of one row of A*P */
70222e3da61SHong Zhang   ierr = PetscMalloc(cn*(sizeof(MatScalar)+2*sizeof(PetscInt)),&apa);CHKERRQ(ierr);
70322e3da61SHong Zhang   ierr = PetscMemzero(apa,cn*(sizeof(MatScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr);
70422e3da61SHong Zhang 
70522e3da61SHong Zhang   apj      = (PetscInt *)(apa + cn);
70622e3da61SHong Zhang   apjdense = apj + cn;
70722e3da61SHong Zhang 
70822e3da61SHong Zhang   /* Clear old values in C */
70922e3da61SHong Zhang   ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr);
71022e3da61SHong Zhang 
71122e3da61SHong Zhang   for (i=0;i<am;i++) {
71222e3da61SHong Zhang     /* Form sparse row of A*P */
71322e3da61SHong Zhang     /* diagonal portion of A */
71422e3da61SHong Zhang     anzi  = adi[i+1] - adi[i];
71522e3da61SHong Zhang     apnzj = 0;
71622e3da61SHong Zhang     for (j=0;j<anzi;j++) {
71722e3da61SHong Zhang       prow = *adj + prstart;
71822e3da61SHong Zhang       adj++;
71922e3da61SHong Zhang       pnzj = pi[prow+1] - pi[prow];
72022e3da61SHong Zhang       pjj  = pj + pi[prow];
72122e3da61SHong Zhang       paj  = pa + pi[prow];
72222e3da61SHong Zhang       for (k=0;k<pnzj;k++) {
72322e3da61SHong Zhang         if (!apjdense[pjj[k]]) {
72422e3da61SHong Zhang           apjdense[pjj[k]] = -1;
72522e3da61SHong Zhang           apj[apnzj++]     = pjj[k];
72622e3da61SHong Zhang         }
72722e3da61SHong Zhang         apa[pjj[k]] += (*ada)*paj[k];
72822e3da61SHong Zhang       }
72922e3da61SHong Zhang       flops += 2*pnzj;
73022e3da61SHong Zhang       ada++;
73122e3da61SHong Zhang     }
73222e3da61SHong Zhang     /* off-diagonal portion of A */
73322e3da61SHong Zhang     anzi  = aoi[i+1] - aoi[i];
73422e3da61SHong Zhang     for (j=0;j<anzi;j++) {
73522e3da61SHong Zhang       col = a->garray[*aoj];
73622e3da61SHong Zhang       if (col < cstart){
73722e3da61SHong Zhang         prow = *aoj;
73822e3da61SHong Zhang       } else if (col >= cend){
73922e3da61SHong Zhang         prow = *aoj + prend-prstart;
74022e3da61SHong Zhang       } else {
74122e3da61SHong Zhang         SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Off-diagonal portion of A has wrong column map");
74222e3da61SHong Zhang       }
74322e3da61SHong Zhang       aoj++;
74422e3da61SHong Zhang       pnzj = pi[prow+1] - pi[prow];
74522e3da61SHong Zhang       pjj  = pj + pi[prow];
74622e3da61SHong Zhang       paj  = pa + pi[prow];
74722e3da61SHong Zhang       for (k=0;k<pnzj;k++) {
74822e3da61SHong Zhang         if (!apjdense[pjj[k]]) {
74922e3da61SHong Zhang           apjdense[pjj[k]] = -1;
75022e3da61SHong Zhang           apj[apnzj++]     = pjj[k];
75122e3da61SHong Zhang         }
75222e3da61SHong Zhang         apa[pjj[k]] += (*aoa)*paj[k];
75322e3da61SHong Zhang       }
75422e3da61SHong Zhang       flops += 2*pnzj;
75522e3da61SHong Zhang       aoa++;
75622e3da61SHong Zhang     }
75722e3da61SHong Zhang 
75822e3da61SHong Zhang     /* Sort the j index array for quick sparse axpy. */
75922e3da61SHong Zhang     ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr);
76022e3da61SHong Zhang 
76122e3da61SHong Zhang     /* Compute P[[prstart:prend,:]^T*A*P using outer product (P^T)[:,j+prstart]*(A*P)[j,:]. */
76222e3da61SHong Zhang     pnzi = pi[i+1+prstart] - pi[i+prstart];
76322e3da61SHong Zhang     for (j=0;j<pnzi;j++) {
76422e3da61SHong Zhang       nextap = 0;
76522e3da61SHong Zhang       crow   = *pJ++;
76622e3da61SHong Zhang       cjj    = cj + ci[crow];
76722e3da61SHong Zhang       caj    = ca + ci[crow];
76822e3da61SHong Zhang       /* Perform sparse axpy operation.  Note cjj includes apj. */
76922e3da61SHong Zhang       for (k=0;nextap<apnzj;k++) {
77022e3da61SHong Zhang         if (cjj[k]==apj[nextap]) {
77122e3da61SHong Zhang           caj[k] += (*pA)*apa[apj[nextap++]];
77222e3da61SHong Zhang         }
77322e3da61SHong Zhang       }
77422e3da61SHong Zhang       flops += 2*apnzj;
77522e3da61SHong Zhang       pA++;
77622e3da61SHong Zhang     }
77722e3da61SHong Zhang 
77822e3da61SHong Zhang     /* Zero the current row info for A*P */
77922e3da61SHong Zhang     for (j=0;j<apnzj;j++) {
78022e3da61SHong Zhang       apa[apj[j]]      = 0.;
78122e3da61SHong Zhang       apjdense[apj[j]] = 0;
78222e3da61SHong Zhang     }
78322e3da61SHong Zhang   }
78422e3da61SHong Zhang 
78522e3da61SHong Zhang   /* Assemble the final matrix and clean up */
78622e3da61SHong Zhang   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
78722e3da61SHong Zhang   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
78822e3da61SHong Zhang   ierr = PetscFree(apa);CHKERRQ(ierr);
78922e3da61SHong Zhang   ierr = PetscLogFlops(flops);CHKERRQ(ierr);
79022e3da61SHong Zhang 
79122e3da61SHong Zhang   PetscFunctionReturn(0);
79222e3da61SHong Zhang }
79322e3da61SHong Zhang 
79422e3da61SHong Zhang #undef __FUNCT__
7950390132cSHong Zhang #define __FUNCT__ "MatPtAPReducedPtSymbolic_MPIAIJ_SeqAIJ"
7960390132cSHong Zhang PetscErrorCode MatPtAPReducedPtSymbolic_MPIAIJ_SeqAIJ(Mat A,Mat P,PetscReal fill,PetscInt prstart,PetscInt prend,Mat *C)
79722e3da61SHong Zhang {
79822e3da61SHong Zhang   PetscErrorCode ierr;
79922e3da61SHong Zhang   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
80022e3da61SHong Zhang   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
80122e3da61SHong Zhang   Mat_SeqAIJ     *ad=(Mat_SeqAIJ*)(a->A)->data,*ao=(Mat_SeqAIJ*)(a->B)->data;
80222e3da61SHong Zhang   Mat_SeqAIJ     *p=(Mat_SeqAIJ*)P->data,*c;
80322e3da61SHong Zhang   PetscInt       *pti,*ptj,*ptJ,*ajj,*pi=p->i,*pj=p->j,*pjj;
80422e3da61SHong Zhang   PetscInt       *adi=ad->i,*adj=ad->j,*aoi=ao->i,*aoj=ao->j,nnz,cstart=a->cstart,cend=a->cend,col;
80522e3da61SHong Zhang   PetscInt       *ci,*cj,*ptadenserow,*ptasparserow,*ptaj;
80622e3da61SHong Zhang   PetscInt       an=A->N,am=A->m,pn=P->N,pm=P->M;
80722e3da61SHong Zhang   PetscInt       i,j,k,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi;
80822e3da61SHong Zhang   PetscInt       m=prend-prstart,nlnk,*lnk;
80922e3da61SHong Zhang   MatScalar      *ca;
81022e3da61SHong Zhang   PetscBT        lnkbt;
81122e3da61SHong Zhang 
81222e3da61SHong Zhang   PetscFunctionBegin;
813*e240928fSHong Zhang   int rank;
814*e240928fSHong Zhang   ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr);
81522e3da61SHong Zhang 
81622e3da61SHong Zhang   /* Get ij structure of P[rstart:rend,:]^T */
817e462e02cSHong Zhang   ierr = MatGetSymbolicTransposeReduced_SeqAIJ(P,prstart,prend,&pti,&ptj);CHKERRQ(ierr);
81822e3da61SHong Zhang   ptJ=ptj;
81922e3da61SHong Zhang 
82022e3da61SHong Zhang   /* Allocate ci array, arrays for fill computation and */
82122e3da61SHong Zhang   /* free space for accumulating nonzero column info */
82222e3da61SHong Zhang   ierr = PetscMalloc((pn+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
82322e3da61SHong Zhang   ci[0] = 0;
82422e3da61SHong Zhang 
82522e3da61SHong Zhang   ierr = PetscMalloc((2*an+1)*sizeof(PetscInt),&ptadenserow);CHKERRQ(ierr);
82622e3da61SHong Zhang   ierr = PetscMemzero(ptadenserow,(2*an+1)*sizeof(PetscInt));CHKERRQ(ierr);
82722e3da61SHong Zhang   ptasparserow = ptadenserow  + an;
82822e3da61SHong Zhang 
82922e3da61SHong Zhang   /* create and initialize a linked list */
83022e3da61SHong Zhang   nlnk = pn+1;
83122e3da61SHong Zhang   ierr = PetscLLCreate(pn,pn,nlnk,lnk,lnkbt);CHKERRQ(ierr);
83222e3da61SHong Zhang 
83322e3da61SHong Zhang   /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */
83422e3da61SHong Zhang   /* This should be reasonable if sparsity of PtAP is similar to that of A. */
83522e3da61SHong Zhang   nnz           = adi[am] + aoi[am];
83622e3da61SHong Zhang   ierr          = GetMoreSpace((PetscInt)(fill*nnz/pm)*pn,&free_space);
83722e3da61SHong Zhang   current_space = free_space;
83822e3da61SHong Zhang 
83922e3da61SHong Zhang   /* Determine symbolic info for each row of C: */
84022e3da61SHong Zhang   for (i=0;i<pn;i++) {
84122e3da61SHong Zhang     ptnzi  = pti[i+1] - pti[i];
84222e3da61SHong Zhang     ptanzi = 0;
84322e3da61SHong Zhang     /* Determine symbolic row of PtA_reduced: */
84422e3da61SHong Zhang     for (j=0;j<ptnzi;j++) {
84522e3da61SHong Zhang       arow = *ptJ++;
846*e240928fSHong Zhang       if (rank==1) printf("i: %d, ptnzi: %d, arow: %d\n",i,ptnzi,arow);
84722e3da61SHong Zhang       /* diagonal portion of A */
84822e3da61SHong Zhang       anzj = adi[arow+1] - adi[arow];
84922e3da61SHong Zhang       ajj  = adj + adi[arow];
85022e3da61SHong Zhang       for (k=0;k<anzj;k++) {
85122e3da61SHong Zhang         col = ajj[k]+prstart;
85222e3da61SHong Zhang         if (!ptadenserow[col]) {
85322e3da61SHong Zhang           ptadenserow[col]    = -1;
85422e3da61SHong Zhang           ptasparserow[ptanzi++] = col;
85522e3da61SHong Zhang         }
85622e3da61SHong Zhang       }
85722e3da61SHong Zhang       /* off-diagonal portion of A */
85822e3da61SHong Zhang       anzj = aoi[arow+1] - aoi[arow];
85922e3da61SHong Zhang       ajj  = aoj + aoi[arow];
86022e3da61SHong Zhang       for (k=0;k<anzj;k++) {
86122e3da61SHong Zhang         col = a->garray[ajj[k]];  /* global col */
86222e3da61SHong Zhang         if (col < cstart){
86322e3da61SHong Zhang           col = ajj[k];
86422e3da61SHong Zhang         } else if (col >= cend){
86522e3da61SHong Zhang           col = ajj[k] + m;
86622e3da61SHong Zhang         } else {
86722e3da61SHong Zhang           SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Off-diagonal portion of A has wrong column map");
86822e3da61SHong Zhang         }
86922e3da61SHong Zhang         if (!ptadenserow[col]) {
87022e3da61SHong Zhang           ptadenserow[col]    = -1;
87122e3da61SHong Zhang           ptasparserow[ptanzi++] = col;
87222e3da61SHong Zhang         }
87322e3da61SHong Zhang       }
87422e3da61SHong Zhang     }
875*e240928fSHong Zhang 
876*e240928fSHong Zhang     printf(" [%d] i: %d, ptanzi: %d \n",rank,i,ptanzi);
87722e3da61SHong Zhang     /* Using symbolic info for row of PtA, determine symbolic info for row of C: */
87822e3da61SHong Zhang     ptaj = ptasparserow;
87922e3da61SHong Zhang     cnzi   = 0;
88022e3da61SHong Zhang     for (j=0;j<ptanzi;j++) {
88122e3da61SHong Zhang       prow = *ptaj++;
88222e3da61SHong Zhang       pnzj = pi[prow+1] - pi[prow];
88322e3da61SHong Zhang       pjj  = pj + pi[prow];
88422e3da61SHong Zhang       /* if (rank==1) printf(" prow: %d, pnzj: %d\n",prow,pnzj); */
88522e3da61SHong Zhang       /* add non-zero cols of P into the sorted linked list lnk */
88622e3da61SHong Zhang       ierr = PetscLLAdd(pnzj,pjj,pn,nlnk,lnk,lnkbt);CHKERRQ(ierr);
88722e3da61SHong Zhang       cnzi += nlnk;
88822e3da61SHong Zhang     }
88922e3da61SHong Zhang 
89022e3da61SHong Zhang     /* If free space is not available, make more free space */
89122e3da61SHong Zhang     /* Double the amount of total space in the list */
89222e3da61SHong Zhang     if (current_space->local_remaining<cnzi) {
89322e3da61SHong Zhang       ierr = GetMoreSpace(current_space->total_array_size,&current_space);CHKERRQ(ierr);
89422e3da61SHong Zhang     }
89522e3da61SHong Zhang 
89622e3da61SHong Zhang     /* Copy data into free space, and zero out denserows */
89722e3da61SHong Zhang     ierr = PetscLLClean(pn,pn,cnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
89822e3da61SHong Zhang     current_space->array           += cnzi;
89922e3da61SHong Zhang     current_space->local_used      += cnzi;
90022e3da61SHong Zhang     current_space->local_remaining -= cnzi;
90122e3da61SHong Zhang 
90222e3da61SHong Zhang     for (j=0;j<ptanzi;j++) {
90322e3da61SHong Zhang       ptadenserow[ptasparserow[j]] = 0;
90422e3da61SHong Zhang     }
90522e3da61SHong Zhang 
90622e3da61SHong Zhang     /* Aside: Perhaps we should save the pta info for the numerical factorization. */
90722e3da61SHong Zhang     /*        For now, we will recompute what is needed. */
90822e3da61SHong Zhang     ci[i+1] = ci[i] + cnzi;
90922e3da61SHong Zhang   }
91022e3da61SHong Zhang   /* nnz is now stored in ci[ptm], column indices are in the list of free space */
91122e3da61SHong Zhang   /* Allocate space for cj, initialize cj, and */
91222e3da61SHong Zhang   /* destroy list of free space and other temporary array(s) */
91322e3da61SHong Zhang   ierr = PetscMalloc((ci[pn]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr);
91422e3da61SHong Zhang   ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr);
91522e3da61SHong Zhang   ierr = PetscFree(ptadenserow);CHKERRQ(ierr);
91622e3da61SHong Zhang   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
91722e3da61SHong Zhang 
91822e3da61SHong Zhang   /* Allocate space for ca */
91922e3da61SHong Zhang   ierr = PetscMalloc((ci[pn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr);
92022e3da61SHong Zhang   ierr = PetscMemzero(ca,(ci[pn]+1)*sizeof(MatScalar));CHKERRQ(ierr);
92122e3da61SHong Zhang 
92222e3da61SHong Zhang   /* put together the new matrix */
92322e3da61SHong Zhang   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,pn,pn,ci,cj,ca,C);CHKERRQ(ierr);
92422e3da61SHong Zhang 
92522e3da61SHong Zhang   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
92622e3da61SHong Zhang   /* Since these are PETSc arrays, change flags to free them as necessary. */
92722e3da61SHong Zhang   c = (Mat_SeqAIJ *)((*C)->data);
92822e3da61SHong Zhang   c->freedata = PETSC_TRUE;
92922e3da61SHong Zhang   c->nonew    = 0;
93022e3da61SHong Zhang 
93122e3da61SHong Zhang   /* Clean up. */
93222e3da61SHong Zhang   ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
93322e3da61SHong Zhang 
93422e3da61SHong Zhang   PetscFunctionReturn(0);
93522e3da61SHong Zhang }
93622e3da61SHong Zhang 
937*e240928fSHong Zhang /*@C
938*e240928fSHong Zhang    MatPtAPReducedPt - Creates C = P[rstart:rend,:]^T * A * P of seqaij matrices,
939*e240928fSHong Zhang                 used by MatPtAP_MPIAIJ_MPIAIJ()
940*e240928fSHong Zhang 
941*e240928fSHong Zhang    Collective on Mat
942*e240928fSHong Zhang 
943*e240928fSHong Zhang    Input Parameters:
944*e240928fSHong Zhang +  A - the matrix in mpiaij format
945*e240928fSHong Zhang .  P - the projection matrix in seqaij format
946*e240928fSHong Zhang .  scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
947*e240928fSHong Zhang .  fill - expected fill (not being used when scall==MAT_REUSE_MATRIX)
948*e240928fSHong Zhang .  prstart, prend - the starting and ending-1-th row of the matrix P to be used for transpose
949*e240928fSHong Zhang 
950*e240928fSHong Zhang    Output Parameters:
951*e240928fSHong Zhang .  C - the product matrix in seqaij format
952*e240928fSHong Zhang 
953*e240928fSHong Zhang    Level: developer
954*e240928fSHong Zhang 
955*e240928fSHong Zhang .seealso: MatPtAPSymbolic(),MatPtAPNumeric(),MatMatMult()
956*e240928fSHong Zhang @*/
9570390132cSHong Zhang static PetscEvent logkey_matptapreducedpt = 0;
95822e3da61SHong Zhang #undef __FUNCT__
9590390132cSHong Zhang #define __FUNCT__ "MatPtAPReducedPt_MPIAIJ_SeqAIJ"
9600390132cSHong Zhang PetscErrorCode MatPtAPReducedPt_MPIAIJ_SeqAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,PetscInt prstart,PetscInt prend,Mat *C)
96122e3da61SHong Zhang {
96222e3da61SHong Zhang   PetscErrorCode ierr;
96322e3da61SHong Zhang 
96422e3da61SHong Zhang   PetscFunctionBegin;
96522e3da61SHong Zhang   if (A->m != prend-prstart) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",A->m,prend-prstart);
96622e3da61SHong Zhang   if (prend-prstart > P->m) SETERRQ2(PETSC_ERR_ARG_SIZ," prend-prstart %d cannot be larger than P->m %d",prend-prstart,P->m);
9670390132cSHong Zhang   if (!logkey_matptapreducedpt) {
9680390132cSHong Zhang     ierr = PetscLogEventRegister(&logkey_matptapreducedpt,"MatPtAP_ReducedPt",MAT_COOKIE);
969e462e02cSHong Zhang   }
9700390132cSHong Zhang   PetscLogEventBegin(logkey_matptapreducedpt,A,P,0,0);CHKERRQ(ierr);
97122e3da61SHong Zhang 
97222e3da61SHong Zhang   if (scall == MAT_INITIAL_MATRIX){
9730390132cSHong Zhang     ierr = MatPtAPReducedPtSymbolic_MPIAIJ_SeqAIJ(A,P,fill,prstart,prend,C);CHKERRQ(ierr);
97422e3da61SHong Zhang   }
9750390132cSHong Zhang   ierr = MatPtAPReducedPtNumeric_MPIAIJ_SeqAIJ(A,P,prstart,prend,*C);CHKERRQ(ierr);
9760390132cSHong Zhang   ierr = PetscLogEventEnd(logkey_matptapreducedpt,A,P,0,0);CHKERRQ(ierr);
97722e3da61SHong Zhang 
97822e3da61SHong Zhang   PetscFunctionReturn(0);
97922e3da61SHong Zhang }
98022e3da61SHong Zhang 
981*e240928fSHong Zhang /*@C
982*e240928fSHong Zhang     MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
983*e240928fSHong Zhang      of the off-diagonal portion of A
984*e240928fSHong Zhang 
985*e240928fSHong Zhang     Collective on Mat
986*e240928fSHong Zhang 
987*e240928fSHong Zhang    Input Parameters:
988*e240928fSHong Zhang +    A,B - the matrices in mpiaij format
989*e240928fSHong Zhang .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
990*e240928fSHong Zhang -    rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL)
991*e240928fSHong Zhang 
992*e240928fSHong Zhang    Output Parameter:
993*e240928fSHong Zhang +    rowb, colb - index sets of rows and columns of B to extract
994*e240928fSHong Zhang .    brstart - row index of B_seq from which next B->m rows are taken from B's local rows
995*e240928fSHong Zhang -    B_seq - the sequential matrix generated
996*e240928fSHong Zhang 
997*e240928fSHong Zhang     Level: developer
998*e240928fSHong Zhang 
999*e240928fSHong Zhang @*/
1000*e240928fSHong Zhang static PetscEvent logkey_GetBrowsOfAocols = 0;
1001*e240928fSHong Zhang #undef __FUNCT__
1002*e240928fSHong Zhang #define __FUNCT__ "MatGetBrowsOfAoCols_Private"
1003*e240928fSHong Zhang PetscErrorCode MatGetBrowsOfAoCols_Private(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq)
1004*e240928fSHong Zhang {
1005*e240928fSHong Zhang   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data,*b=(Mat_MPIAIJ*)B->data;
1006*e240928fSHong Zhang   PetscErrorCode    ierr;
1007*e240928fSHong Zhang   PetscInt          *idx,i,start,ncols,nzB,*cmap,imark;
1008*e240928fSHong Zhang   IS                isrowb,iscolb;
1009*e240928fSHong Zhang   Mat               *bseq;
1010*e240928fSHong Zhang 
1011*e240928fSHong Zhang   PetscFunctionBegin;
1012*e240928fSHong Zhang   if (a->cstart != b->rstart || a->cend != b->rend){
1013*e240928fSHong Zhang     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",a->cstart,a->cend,b->rstart,b->rend);
1014*e240928fSHong Zhang   }
1015*e240928fSHong Zhang   if (!logkey_GetBrowsOfAocols) {
1016*e240928fSHong Zhang     ierr = PetscLogEventRegister(&logkey_GetBrowsOfAocols,"MatGetBrowsOfAoCols",MAT_COOKIE);
1017*e240928fSHong Zhang   }
1018*e240928fSHong Zhang   ierr = PetscLogEventBegin(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
1019*e240928fSHong Zhang 
1020*e240928fSHong Zhang   if (scall == MAT_INITIAL_MATRIX){
1021*e240928fSHong Zhang     start = a->cstart;
1022*e240928fSHong Zhang     cmap  = a->garray;
1023*e240928fSHong Zhang     nzB   = a->B->n;
1024*e240928fSHong Zhang     if (!nzB){ /* output a null matrix */
1025*e240928fSHong Zhang       B_seq = PETSC_NULL;
1026*e240928fSHong Zhang       ierr = PetscLogEventEnd(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
1027*e240928fSHong Zhang       PetscFunctionReturn(0);
1028*e240928fSHong Zhang     }
1029*e240928fSHong Zhang     ierr  = PetscMalloc(nzB*sizeof(PetscInt), &idx);CHKERRQ(ierr);
1030*e240928fSHong Zhang     ncols = 0;
1031*e240928fSHong Zhang     for (i=0; i<nzB; i++) {  /* row < local row index */
1032*e240928fSHong Zhang       if (cmap[i] < start) idx[ncols++] = cmap[i];
1033*e240928fSHong Zhang       else break;
1034*e240928fSHong Zhang     }
1035*e240928fSHong Zhang     imark = i;
1036*e240928fSHong Zhang     /* for (i=0; i<nzA; i++) idx[ncols++] = start + i; */ /* local rows */
1037*e240928fSHong Zhang     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
1038*e240928fSHong Zhang     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr);
1039*e240928fSHong Zhang     ierr = PetscFree(idx);CHKERRQ(ierr);
1040*e240928fSHong Zhang     *brstart = imark;
1041*e240928fSHong Zhang     ierr = ISCreateStride(PETSC_COMM_SELF,B->N,0,1,&iscolb);CHKERRQ(ierr);
1042*e240928fSHong Zhang   } else {
1043*e240928fSHong Zhang     if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
1044*e240928fSHong Zhang     isrowb = *rowb; iscolb = *colb;
1045*e240928fSHong Zhang     ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
1046*e240928fSHong Zhang     bseq[0] = *B_seq;
1047*e240928fSHong Zhang   }
1048*e240928fSHong Zhang   ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
1049*e240928fSHong Zhang   *B_seq = bseq[0];
1050*e240928fSHong Zhang   ierr = PetscFree(bseq);CHKERRQ(ierr);
1051*e240928fSHong Zhang   if (!rowb){
1052*e240928fSHong Zhang     ierr = ISDestroy(isrowb);CHKERRQ(ierr);
1053*e240928fSHong Zhang   } else {
1054*e240928fSHong Zhang     *rowb = isrowb;
1055*e240928fSHong Zhang   }
1056*e240928fSHong Zhang   if (!colb){
1057*e240928fSHong Zhang     ierr = ISDestroy(iscolb);CHKERRQ(ierr);
1058*e240928fSHong Zhang   } else {
1059*e240928fSHong Zhang     *colb = iscolb;
1060*e240928fSHong Zhang   }
1061*e240928fSHong Zhang   ierr = PetscLogEventEnd(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
1062*e240928fSHong Zhang   PetscFunctionReturn(0);
1063*e240928fSHong Zhang }
1064*e240928fSHong Zhang 
1065*e240928fSHong Zhang /* ------------- New --------------------*/
1066*e240928fSHong Zhang static PetscEvent logkey_matptapnumeric_local = 0;
1067*e240928fSHong Zhang #undef __FUNCT__
1068*e240928fSHong Zhang #define __FUNCT__ "MatPtAPNumeric_MPIAIJ_MPIAIJ_Local"
1069*e240928fSHong Zhang PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIAIJ_Local(Mat A,Mat P_loc,Mat P_oth,PetscInt prstart,Mat C)
1070*e240928fSHong Zhang {
1071*e240928fSHong Zhang   PetscErrorCode ierr;
1072*e240928fSHong Zhang   PetscInt       flops=0;
1073*e240928fSHong Zhang   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
1074*e240928fSHong Zhang   Mat_SeqAIJ     *ad=(Mat_SeqAIJ*)(a->A)->data,*ao=(Mat_SeqAIJ*)(a->B)->data;
1075*e240928fSHong Zhang   Mat_SeqAIJ     *c  = (Mat_SeqAIJ *) C->data;
1076*e240928fSHong Zhang   Mat_SeqAIJ     *p_loc=(Mat_SeqAIJ*)P_loc->data,*p_oth=(Mat_SeqAIJ*)P_oth->data;
1077*e240928fSHong Zhang   PetscInt       *pi_loc=p_loc->i,*pj_loc=p_loc->j,*pi_oth=p_oth->i,*pj_oth=p_oth->j;
1078*e240928fSHong Zhang   PetscInt       *adi=ad->i,*adj=ad->j,*aoi=ao->i,*aoj=ao->j,*apj,*apjdense,cstart=a->cstart,cend=a->cend,col;
1079*e240928fSHong Zhang   PetscInt       *pJ=pj_loc,*pjj;
1080*e240928fSHong Zhang   PetscInt       *ci=c->i,*cj=c->j,*cjj;
1081*e240928fSHong Zhang   PetscInt       am=A->m,cn=C->N,cm=C->M;
1082*e240928fSHong Zhang   PetscInt       i,j,k,anzi,pnzi,apnzj,nextap,pnzj,prow,crow;
1083*e240928fSHong Zhang   MatScalar      *ada=ad->a,*aoa=ao->a,*apa,*paj,*ca=c->a,*caj;
1084*e240928fSHong Zhang   MatScalar      *pa_loc=p_loc->a,*pA=pa_loc,*pa_oth=p_oth->a;
1085*e240928fSHong Zhang 
1086*e240928fSHong Zhang   PetscFunctionBegin;
1087*e240928fSHong Zhang     if (!logkey_matptapnumeric_local) {
1088*e240928fSHong Zhang     ierr = PetscLogEventRegister(&logkey_matptapnumeric_local,"MatPtAPNumeric_MPIAIJ_MPIAIJ_Local",MAT_COOKIE);
1089*e240928fSHong Zhang   }
1090*e240928fSHong Zhang   PetscLogEventBegin(logkey_matptapnumeric_local,A,P_loc,P_oth,0);CHKERRQ(ierr);
1091*e240928fSHong Zhang 
1092*e240928fSHong Zhang   /* Allocate temporary array for storage of one row of A*P */
1093*e240928fSHong Zhang   ierr = PetscMalloc(cn*(sizeof(MatScalar)+2*sizeof(PetscInt)),&apa);CHKERRQ(ierr);
1094*e240928fSHong Zhang   ierr = PetscMemzero(apa,cn*(sizeof(MatScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr);
1095*e240928fSHong Zhang   apj      = (PetscInt *)(apa + cn);
1096*e240928fSHong Zhang   apjdense = apj + cn;
1097*e240928fSHong Zhang 
1098*e240928fSHong Zhang   /* Clear old values in C */
1099*e240928fSHong Zhang   ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr);
1100*e240928fSHong Zhang 
1101*e240928fSHong Zhang   for (i=0;i<am;i++) {
1102*e240928fSHong Zhang     /* Form i-th sparse row of A*P */
1103*e240928fSHong Zhang      apnzj = 0;
1104*e240928fSHong Zhang     /* diagonal portion of A */
1105*e240928fSHong Zhang     anzi  = adi[i+1] - adi[i];
1106*e240928fSHong Zhang     for (j=0;j<anzi;j++) {
1107*e240928fSHong Zhang       prow = *adj;
1108*e240928fSHong Zhang       adj++;
1109*e240928fSHong Zhang       pnzj = pi_loc[prow+1] - pi_loc[prow];
1110*e240928fSHong Zhang       pjj  = pj_loc + pi_loc[prow];
1111*e240928fSHong Zhang       paj  = pa_loc + pi_loc[prow];
1112*e240928fSHong Zhang       for (k=0;k<pnzj;k++) {
1113*e240928fSHong Zhang         if (!apjdense[pjj[k]]) {
1114*e240928fSHong Zhang           apjdense[pjj[k]] = -1;
1115*e240928fSHong Zhang           apj[apnzj++]     = pjj[k];
1116*e240928fSHong Zhang         }
1117*e240928fSHong Zhang         apa[pjj[k]] += (*ada)*paj[k];
1118*e240928fSHong Zhang       }
1119*e240928fSHong Zhang       flops += 2*pnzj;
1120*e240928fSHong Zhang       ada++;
1121*e240928fSHong Zhang     }
1122*e240928fSHong Zhang     /* off-diagonal portion of A */
1123*e240928fSHong Zhang     anzi  = aoi[i+1] - aoi[i];
1124*e240928fSHong Zhang     for (j=0;j<anzi;j++) {
1125*e240928fSHong Zhang       col = a->garray[*aoj];
1126*e240928fSHong Zhang       if (col < cstart){
1127*e240928fSHong Zhang         prow = *aoj;
1128*e240928fSHong Zhang       } else if (col >= cend){
1129*e240928fSHong Zhang         prow = *aoj;
1130*e240928fSHong Zhang       } else {
1131*e240928fSHong Zhang         SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Off-diagonal portion of A has wrong column map");
1132*e240928fSHong Zhang       }
1133*e240928fSHong Zhang       aoj++;
1134*e240928fSHong Zhang       pnzj = pi_oth[prow+1] - pi_oth[prow];
1135*e240928fSHong Zhang       pjj  = pj_oth + pi_oth[prow];
1136*e240928fSHong Zhang       paj  = pa_oth + pi_oth[prow];
1137*e240928fSHong Zhang       for (k=0;k<pnzj;k++) {
1138*e240928fSHong Zhang         if (!apjdense[pjj[k]]) {
1139*e240928fSHong Zhang           apjdense[pjj[k]] = -1;
1140*e240928fSHong Zhang           apj[apnzj++]     = pjj[k];
1141*e240928fSHong Zhang         }
1142*e240928fSHong Zhang         apa[pjj[k]] += (*aoa)*paj[k];
1143*e240928fSHong Zhang       }
1144*e240928fSHong Zhang       flops += 2*pnzj;
1145*e240928fSHong Zhang       aoa++;
1146*e240928fSHong Zhang     }
1147*e240928fSHong Zhang     /* Sort the j index array for quick sparse axpy. */
1148*e240928fSHong Zhang     ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr);
1149*e240928fSHong Zhang 
1150*e240928fSHong Zhang     /* Compute P_loc[i,:]^T*AP[i,:] using outer product */
1151*e240928fSHong Zhang     pnzi = pi_loc[i+1] - pi_loc[i];
1152*e240928fSHong Zhang     for (j=0;j<pnzi;j++) {
1153*e240928fSHong Zhang       nextap = 0;
1154*e240928fSHong Zhang       crow   = *pJ++;
1155*e240928fSHong Zhang       cjj    = cj + ci[crow];
1156*e240928fSHong Zhang       caj    = ca + ci[crow];
1157*e240928fSHong Zhang       /* Perform sparse axpy operation.  Note cjj includes apj. */
1158*e240928fSHong Zhang       for (k=0;nextap<apnzj;k++) {
1159*e240928fSHong Zhang         if (cjj[k]==apj[nextap]) {
1160*e240928fSHong Zhang           caj[k] += (*pA)*apa[apj[nextap++]];
1161*e240928fSHong Zhang         }
1162*e240928fSHong Zhang       }
1163*e240928fSHong Zhang       flops += 2*apnzj;
1164*e240928fSHong Zhang       pA++;
1165*e240928fSHong Zhang     }
1166*e240928fSHong Zhang 
1167*e240928fSHong Zhang     /* Zero the current row info for A*P */
1168*e240928fSHong Zhang     for (j=0;j<apnzj;j++) {
1169*e240928fSHong Zhang       apa[apj[j]]      = 0.;
1170*e240928fSHong Zhang       apjdense[apj[j]] = 0;
1171*e240928fSHong Zhang     }
1172*e240928fSHong Zhang   }
1173*e240928fSHong Zhang 
1174*e240928fSHong Zhang   /* Assemble the final matrix and clean up */
1175*e240928fSHong Zhang   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1176*e240928fSHong Zhang   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1177*e240928fSHong Zhang   ierr = PetscFree(apa);CHKERRQ(ierr);
1178*e240928fSHong Zhang   ierr = PetscLogFlops(flops);CHKERRQ(ierr);
1179*e240928fSHong Zhang   PetscLogEventEnd(logkey_matptapnumeric_local,A,P_loc,P_oth,0);CHKERRQ(ierr);
1180*e240928fSHong Zhang   PetscFunctionReturn(0);
1181*e240928fSHong Zhang }
1182*e240928fSHong Zhang static PetscEvent logkey_matptapsymbolic_local = 0;
1183*e240928fSHong Zhang #undef __FUNCT__
1184*e240928fSHong Zhang #define __FUNCT__ "MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local"
1185*e240928fSHong Zhang PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local(Mat A,Mat P_loc,Mat P_oth,PetscInt prstart,PetscReal fill,Mat *C)
1186*e240928fSHong Zhang {
1187*e240928fSHong Zhang   PetscErrorCode ierr;
1188*e240928fSHong Zhang   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
1189*e240928fSHong Zhang   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
1190*e240928fSHong Zhang   Mat_SeqAIJ     *ad=(Mat_SeqAIJ*)(a->A)->data,*ao=(Mat_SeqAIJ*)(a->B)->data,*c;
1191*e240928fSHong Zhang   PetscInt       *pti,*ptj,*ptJ,*ajj,*pjj;
1192*e240928fSHong Zhang   PetscInt       *adi=ad->i,*adj=ad->j,*aoi=ao->i,*aoj=ao->j,nnz,cstart=a->cstart,cend=a->cend,col;
1193*e240928fSHong Zhang   PetscInt       *ci,*cj,*ptaj;
1194*e240928fSHong Zhang   PetscInt       an=A->N,am=A->m,pN=P_loc->N;
1195*e240928fSHong Zhang   PetscInt       i,j,k,ptnzi,arow,anzj,prow,pnzj,cnzi;
1196*e240928fSHong Zhang   PetscInt       pm=P_loc->m,nlnk,*lnk;
1197*e240928fSHong Zhang   MatScalar      *ca;
1198*e240928fSHong Zhang   PetscBT        lnkbt;
1199*e240928fSHong Zhang   PetscInt       prend,nprow_loc,nprow_oth;
1200*e240928fSHong Zhang   PetscInt       *ptadenserow_loc,*ptadenserow_oth,*ptasparserow_loc,*ptasparserow_oth;
1201*e240928fSHong Zhang   Mat_SeqAIJ     *p_loc=(Mat_SeqAIJ*)P_loc->data,*p_oth=(Mat_SeqAIJ*)P_oth->data;
1202*e240928fSHong Zhang   PetscInt       *pi_loc=p_loc->i,*pj_loc=p_loc->j,*pi_oth=p_oth->i,*pj_oth=p_oth->j;
1203*e240928fSHong Zhang   /* PetscInt       rank; */
1204*e240928fSHong Zhang 
1205*e240928fSHong Zhang   PetscFunctionBegin;
1206*e240928fSHong Zhang   if (!logkey_matptapsymbolic_local) {
1207*e240928fSHong Zhang     ierr = PetscLogEventRegister(&logkey_matptapsymbolic_local,"MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local",MAT_COOKIE);
1208*e240928fSHong Zhang   }
1209*e240928fSHong Zhang   ierr = PetscLogEventBegin(logkey_matptapsymbolic_local,A,P_loc,P_oth,0);CHKERRQ(ierr);
1210*e240928fSHong Zhang 
1211*e240928fSHong Zhang   /* ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr); */
1212*e240928fSHong Zhang   prend = prstart + pm;
1213*e240928fSHong Zhang 
1214*e240928fSHong Zhang   /* get ij structure of P_loc^T */
1215*e240928fSHong Zhang   ierr = MatGetSymbolicTranspose_SeqAIJ(P_loc,&pti,&ptj);CHKERRQ(ierr);
1216*e240928fSHong Zhang   ptJ=ptj;
1217*e240928fSHong Zhang 
1218*e240928fSHong Zhang   /* Allocate ci array, arrays for fill computation and */
1219*e240928fSHong Zhang   /* free space for accumulating nonzero column info */
1220*e240928fSHong Zhang   ierr = PetscMalloc((pN+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
1221*e240928fSHong Zhang   ci[0] = 0;
1222*e240928fSHong Zhang 
1223*e240928fSHong Zhang   ierr = PetscMalloc((4*an+1)*sizeof(PetscInt),&ptadenserow_loc);CHKERRQ(ierr);
1224*e240928fSHong Zhang   ierr = PetscMemzero(ptadenserow_loc,(4*an+1)*sizeof(PetscInt));CHKERRQ(ierr);
1225*e240928fSHong Zhang   ptasparserow_loc = ptadenserow_loc + an;
1226*e240928fSHong Zhang   ptadenserow_oth  = ptasparserow_loc + an;
1227*e240928fSHong Zhang   ptasparserow_oth = ptadenserow_oth + an;
1228*e240928fSHong Zhang 
1229*e240928fSHong Zhang   /* create and initialize a linked list */
1230*e240928fSHong Zhang   nlnk = pN+1;
1231*e240928fSHong Zhang   ierr = PetscLLCreate(pN,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr);
1232*e240928fSHong Zhang 
1233*e240928fSHong Zhang   /* Set initial free space to be nnz(A) scaled by aspect ratio of P (P->M=A->N). */
1234*e240928fSHong Zhang   /* This should be reasonable if sparsity of PtAP is similar to that of A. */
1235*e240928fSHong Zhang   nnz           = adi[am] + aoi[am];
1236*e240928fSHong Zhang   ierr          = GetMoreSpace((PetscInt)(fill*nnz/A->N)*pN,&free_space);
1237*e240928fSHong Zhang   current_space = free_space;
1238*e240928fSHong Zhang 
1239*e240928fSHong Zhang   /* determine symbolic info for each row of C: */
1240*e240928fSHong Zhang   for (i=0;i<pN;i++) {
1241*e240928fSHong Zhang     ptnzi  = pti[i+1] - pti[i];
1242*e240928fSHong Zhang     nprow_loc = 0; nprow_oth = 0;
1243*e240928fSHong Zhang     /* i-th row of symbolic P_loc^T*A_loc: */
1244*e240928fSHong Zhang     for (j=0;j<ptnzi;j++) {
1245*e240928fSHong Zhang       arow = *ptJ++;
1246*e240928fSHong Zhang       /* if (rank==1) printf("i: %d, ptnzi: %d, arow: %d\n",i,ptnzi,arow); */
1247*e240928fSHong Zhang       /* diagonal portion of A */
1248*e240928fSHong Zhang       anzj = adi[arow+1] - adi[arow];
1249*e240928fSHong Zhang       ajj  = adj + adi[arow];
1250*e240928fSHong Zhang       for (k=0;k<anzj;k++) {
1251*e240928fSHong Zhang         col = ajj[k]+prstart;
1252*e240928fSHong Zhang         if (!ptadenserow_loc[col]) {
1253*e240928fSHong Zhang           ptadenserow_loc[col]    = -1;
1254*e240928fSHong Zhang           ptasparserow_loc[nprow_loc++] = col;
1255*e240928fSHong Zhang         }
1256*e240928fSHong Zhang       }
1257*e240928fSHong Zhang       /* off-diagonal portion of A */
1258*e240928fSHong Zhang       anzj = aoi[arow+1] - aoi[arow];
1259*e240928fSHong Zhang       ajj  = aoj + aoi[arow];
1260*e240928fSHong Zhang       for (k=0;k<anzj;k++) {
1261*e240928fSHong Zhang         col = a->garray[ajj[k]];  /* global col */
1262*e240928fSHong Zhang         if (col < cstart){
1263*e240928fSHong Zhang           col = ajj[k];
1264*e240928fSHong Zhang         } else if (col >= cend){
1265*e240928fSHong Zhang           col = ajj[k] + pm;
1266*e240928fSHong Zhang         } else {
1267*e240928fSHong Zhang           SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Off-diagonal portion of A has wrong column map");
1268*e240928fSHong Zhang         }
1269*e240928fSHong Zhang         if (!ptadenserow_oth[col]) {
1270*e240928fSHong Zhang           ptadenserow_oth[col]    = -1;
1271*e240928fSHong Zhang           ptasparserow_oth[nprow_oth++] = col;
1272*e240928fSHong Zhang         }
1273*e240928fSHong Zhang       }
1274*e240928fSHong Zhang     }
1275*e240928fSHong Zhang 
1276*e240928fSHong Zhang     /* printf(" [%d] i: %d, nprow_loc: %d, nprow_oth: %d\n",rank,i,nprow_loc,nprow_oth); */
1277*e240928fSHong Zhang     /* using symbolic info of local PtA, determine symbolic info for row of C: */
1278*e240928fSHong Zhang     cnzi   = 0;
1279*e240928fSHong Zhang     /* rows of P_loc */
1280*e240928fSHong Zhang     ptaj = ptasparserow_loc;
1281*e240928fSHong Zhang     for (j=0; j<nprow_loc; j++) {
1282*e240928fSHong Zhang       prow = *ptaj++;
1283*e240928fSHong Zhang       prow -= prstart; /* rm */
1284*e240928fSHong Zhang       pnzj = pi_loc[prow+1] - pi_loc[prow];
1285*e240928fSHong Zhang       pjj  = pj_loc + pi_loc[prow];
1286*e240928fSHong Zhang       /* add non-zero cols of P into the sorted linked list lnk */
1287*e240928fSHong Zhang       ierr = PetscLLAdd(pnzj,pjj,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr);
1288*e240928fSHong Zhang       cnzi += nlnk;
1289*e240928fSHong Zhang     }
1290*e240928fSHong Zhang     /* rows of P_oth */
1291*e240928fSHong Zhang     ptaj = ptasparserow_oth;
1292*e240928fSHong Zhang     for (j=0; j<nprow_oth; j++) {
1293*e240928fSHong Zhang       prow = *ptaj++;
1294*e240928fSHong Zhang       if (prow >= prend) prow -= pm; /* rm */
1295*e240928fSHong Zhang       pnzj = pi_oth[prow+1] - pi_oth[prow];
1296*e240928fSHong Zhang       pjj  = pj_oth + pi_oth[prow];
1297*e240928fSHong Zhang       /* add non-zero cols of P into the sorted linked list lnk */
1298*e240928fSHong Zhang       ierr = PetscLLAdd(pnzj,pjj,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr);
1299*e240928fSHong Zhang       cnzi += nlnk;
1300*e240928fSHong Zhang     }
1301*e240928fSHong Zhang 
1302*e240928fSHong Zhang     /* If free space is not available, make more free space */
1303*e240928fSHong Zhang     /* Double the amount of total space in the list */
1304*e240928fSHong Zhang     if (current_space->local_remaining<cnzi) {
1305*e240928fSHong Zhang       ierr = GetMoreSpace(current_space->total_array_size,&current_space);CHKERRQ(ierr);
1306*e240928fSHong Zhang     }
1307*e240928fSHong Zhang 
1308*e240928fSHong Zhang     /* Copy data into free space, and zero out denserows */
1309*e240928fSHong Zhang     ierr = PetscLLClean(pN,pN,cnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
1310*e240928fSHong Zhang     current_space->array           += cnzi;
1311*e240928fSHong Zhang     current_space->local_used      += cnzi;
1312*e240928fSHong Zhang     current_space->local_remaining -= cnzi;
1313*e240928fSHong Zhang 
1314*e240928fSHong Zhang     for (j=0;j<nprow_loc; j++) {
1315*e240928fSHong Zhang       ptadenserow_loc[ptasparserow_loc[j]] = 0;
1316*e240928fSHong Zhang     }
1317*e240928fSHong Zhang     for (j=0;j<nprow_oth; j++) {
1318*e240928fSHong Zhang       ptadenserow_oth[ptasparserow_oth[j]] = 0;
1319*e240928fSHong Zhang     }
1320*e240928fSHong Zhang 
1321*e240928fSHong Zhang     /* Aside: Perhaps we should save the pta info for the numerical factorization. */
1322*e240928fSHong Zhang     /*        For now, we will recompute what is needed. */
1323*e240928fSHong Zhang     ci[i+1] = ci[i] + cnzi;
1324*e240928fSHong Zhang   }
1325*e240928fSHong Zhang   /* nnz is now stored in ci[ptm], column indices are in the list of free space */
1326*e240928fSHong Zhang   /* Allocate space for cj, initialize cj, and */
1327*e240928fSHong Zhang   /* destroy list of free space and other temporary array(s) */
1328*e240928fSHong Zhang   ierr = PetscMalloc((ci[pN]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr);
1329*e240928fSHong Zhang   ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr);
1330*e240928fSHong Zhang   ierr = PetscFree(ptadenserow_loc);CHKERRQ(ierr);
1331*e240928fSHong Zhang   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
1332*e240928fSHong Zhang 
1333*e240928fSHong Zhang   /* Allocate space for ca */
1334*e240928fSHong Zhang   ierr = PetscMalloc((ci[pN]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr);
1335*e240928fSHong Zhang   ierr = PetscMemzero(ca,(ci[pN]+1)*sizeof(MatScalar));CHKERRQ(ierr);
1336*e240928fSHong Zhang 
1337*e240928fSHong Zhang   /* put together the new matrix */
1338*e240928fSHong Zhang   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,pN,pN,ci,cj,ca,C);CHKERRQ(ierr);
1339*e240928fSHong Zhang 
1340*e240928fSHong Zhang   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
1341*e240928fSHong Zhang   /* Since these are PETSc arrays, change flags to free them as necessary. */
1342*e240928fSHong Zhang   c = (Mat_SeqAIJ *)((*C)->data);
1343*e240928fSHong Zhang   c->freedata = PETSC_TRUE;
1344*e240928fSHong Zhang   c->nonew    = 0;
1345*e240928fSHong Zhang 
1346*e240928fSHong Zhang   /* Clean up. */
1347*e240928fSHong Zhang   ierr = MatRestoreSymbolicTranspose_SeqAIJ(P_loc,&pti,&ptj);CHKERRQ(ierr);
1348*e240928fSHong Zhang   /*
1349*e240928fSHong Zhang   ierr = MPI_Barrier(PETSC_COMM_WORLD);
1350*e240928fSHong Zhang   if (rank == 1){
1351*e240928fSHong Zhang     printf("[%d] C: %d, %d\n",rank,(*C)->M,(*C)->N);
1352*e240928fSHong Zhang     ierr = MatView(*C, PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr);
1353*e240928fSHong Zhang     } */
1354*e240928fSHong Zhang   ierr = PetscLogEventEnd(logkey_matptapsymbolic_local,A,P_loc,P_oth,0);CHKERRQ(ierr);
1355*e240928fSHong Zhang   PetscFunctionReturn(0);
1356*e240928fSHong Zhang }
1357