xref: /petsc/src/mat/impls/aij/seq/matptap.c (revision 097ab81bd3f74b14d4b06ab00c8f3db64f59e618)
1eb9c0419SKris Buschelman /*
29af31e4aSHong Zhang   Defines projective product routines where A is a AIJ matrix
3eb9c0419SKris Buschelman           C = P^T * A * P
4eb9c0419SKris Buschelman */
5eb9c0419SKris Buschelman 
6231952e2SKris Buschelman #include "src/mat/impls/aij/seq/aij.h"   /*I "petscmat.h" I*/
7eb9c0419SKris Buschelman #include "src/mat/utils/freespace.h"
89af31e4aSHong Zhang #include "src/mat/impls/aij/mpi/mpiaij.h"
955a3bba9SHong Zhang #include "petscbt.h"
10eb9c0419SKris Buschelman 
11eb9c0419SKris Buschelman #undef __FUNCT__
129af31e4aSHong Zhang #define __FUNCT__ "MatPtAP"
134d3841fdSKris Buschelman /*@
149af31e4aSHong Zhang    MatPtAP - Creates the matrix projection C = P^T * A * P
154d3841fdSKris Buschelman 
164d3841fdSKris Buschelman    Collective on Mat
174d3841fdSKris Buschelman 
184d3841fdSKris Buschelman    Input Parameters:
194d3841fdSKris Buschelman +  A - the matrix
20f747e1aeSHong Zhang .  P - the projection matrix
21f747e1aeSHong Zhang .  scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
22f747e1aeSHong Zhang -  fill - expected fill as ratio of nnz(C)/(nnz(A) + nnz(P))
234d3841fdSKris Buschelman 
244d3841fdSKris Buschelman    Output Parameters:
254d3841fdSKris Buschelman .  C - the product matrix
264d3841fdSKris Buschelman 
274d3841fdSKris Buschelman    Notes:
284d3841fdSKris Buschelman    C will be created and must be destroyed by the user with MatDestroy().
294d3841fdSKris Buschelman 
304d3841fdSKris Buschelman    This routine is currently only implemented for pairs of SeqAIJ matrices and classes
314d3841fdSKris Buschelman    which inherit from SeqAIJ.  C will be of type MATSEQAIJ.
324d3841fdSKris Buschelman 
334d3841fdSKris Buschelman    Level: intermediate
344d3841fdSKris Buschelman 
359af31e4aSHong Zhang .seealso: MatPtAPSymbolic(),MatPtAPNumeric(),MatMatMult()
364d3841fdSKris Buschelman @*/
3755a3bba9SHong Zhang PetscErrorCode MatPtAP(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C)
3855a3bba9SHong Zhang {
39dfbe8321SBarry Smith   PetscErrorCode ierr;
40534c1384SKris Buschelman   PetscErrorCode (*fA)(Mat,Mat,MatReuse,PetscReal,Mat *);
41534c1384SKris Buschelman   PetscErrorCode (*fP)(Mat,Mat,MatReuse,PetscReal,Mat *);
42eb9c0419SKris Buschelman 
43eb9c0419SKris Buschelman   PetscFunctionBegin;
449af31e4aSHong Zhang   PetscValidHeaderSpecific(A,MAT_COOKIE,1);
459af31e4aSHong Zhang   PetscValidType(A,1);
469af31e4aSHong Zhang   MatPreallocated(A);
479af31e4aSHong Zhang   if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
489af31e4aSHong Zhang   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
499af31e4aSHong Zhang   PetscValidHeaderSpecific(P,MAT_COOKIE,2);
509af31e4aSHong Zhang   PetscValidType(P,2);
519af31e4aSHong Zhang   MatPreallocated(P);
529af31e4aSHong Zhang   if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
539af31e4aSHong Zhang   if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
549af31e4aSHong Zhang   PetscValidPointer(C,3);
55534c1384SKris Buschelman 
569af31e4aSHong Zhang   if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->M,A->N);
57eb9c0419SKris Buschelman 
589af31e4aSHong Zhang   if (fill <=0.0) SETERRQ1(PETSC_ERR_ARG_SIZ,"fill=%g must be > 0.0",fill);
59eb9c0419SKris Buschelman 
60534c1384SKris Buschelman   /* For now, we do not dispatch based on the type of A and P */
61534c1384SKris Buschelman   /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */
62534c1384SKris Buschelman   fA = A->ops->ptap;
63534c1384SKris Buschelman   if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatPtAP not supported for A of type %s",A->type_name);
64534c1384SKris Buschelman   fP = P->ops->ptap;
65534c1384SKris Buschelman   if (!fP) SETERRQ1(PETSC_ERR_SUP,"MatPtAP not supported for P of type %s",P->type_name);
66534c1384SKris Buschelman   if (fP!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatPtAP requires A, %s, to be compatible with P, %s",A->type_name,P->type_name);
67534c1384SKris Buschelman 
689af31e4aSHong Zhang   ierr = PetscLogEventBegin(MAT_PtAP,A,P,0,0);CHKERRQ(ierr);
69534c1384SKris Buschelman   ierr = (*fA)(A,P,scall,fill,C);CHKERRQ(ierr);
709af31e4aSHong Zhang   ierr = PetscLogEventEnd(MAT_PtAP,A,P,0,0);CHKERRQ(ierr);
71eb9c0419SKris Buschelman   PetscFunctionReturn(0);
72eb9c0419SKris Buschelman }
73eb9c0419SKris Buschelman 
74e240928fSHong Zhang EXTERN PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local(Mat,Mat,Mat,PetscInt,PetscReal,Mat*);
75e240928fSHong Zhang EXTERN PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIAIJ_Local(Mat,Mat,Mat,PetscInt,Mat C);
76e240928fSHong Zhang 
77a61c8c0fSHong Zhang EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat);
78a61c8c0fSHong Zhang #undef __FUNCT__
79a61c8c0fSHong Zhang #define __FUNCT__ "MatDestroy_MPIAIJ_PtAP"
80a61c8c0fSHong Zhang PetscErrorCode MatDestroy_MPIAIJ_PtAP(Mat A)
81a61c8c0fSHong Zhang {
82a61c8c0fSHong Zhang   PetscErrorCode       ierr;
8332fba14fSHong Zhang   Mat_MatMatMultMPI    *ptap;
84a61c8c0fSHong Zhang   PetscObjectContainer container;
85a61c8c0fSHong Zhang 
86a61c8c0fSHong Zhang   PetscFunctionBegin;
87a61c8c0fSHong Zhang   ierr = PetscObjectQuery((PetscObject)A,"MatPtAPMPI",(PetscObject *)&container);CHKERRQ(ierr);
88a61c8c0fSHong Zhang   if (container) {
89a61c8c0fSHong Zhang     ierr  = PetscObjectContainerGetPointer(container,(void **)&ptap);CHKERRQ(ierr);
9032fba14fSHong Zhang     ierr = MatDestroy(ptap->B_loc);CHKERRQ(ierr);
9132fba14fSHong Zhang     ierr = MatDestroy(ptap->B_oth);CHKERRQ(ierr);
9201b7ae99SHong Zhang     ierr = ISDestroy(ptap->isrowb);CHKERRQ(ierr);
9301b7ae99SHong Zhang     ierr = ISDestroy(ptap->iscolb);CHKERRQ(ierr);
94a61c8c0fSHong Zhang 
95a61c8c0fSHong Zhang     ierr = PetscObjectContainerDestroy(container);CHKERRQ(ierr);
9632fba14fSHong Zhang     ierr = PetscObjectCompose((PetscObject)A,"MatPtAPMPI",0);CHKERRQ(ierr);
97a61c8c0fSHong Zhang   }
98a61c8c0fSHong Zhang   ierr = PetscFree(ptap);CHKERRQ(ierr);
99a61c8c0fSHong Zhang 
100a61c8c0fSHong Zhang   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
101a61c8c0fSHong Zhang   PetscFunctionReturn(0);
102a61c8c0fSHong Zhang }
103a61c8c0fSHong Zhang 
104eb9c0419SKris Buschelman #undef __FUNCT__
105ff134f7aSHong Zhang #define __FUNCT__ "MatPtAP_MPIAIJ_MPIAIJ"
106ff134f7aSHong Zhang PetscErrorCode MatPtAP_MPIAIJ_MPIAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C)
107ff134f7aSHong Zhang {
108ff134f7aSHong Zhang   PetscErrorCode       ierr;
10932fba14fSHong Zhang   Mat_MatMatMultMPI    *ptap;
110a61c8c0fSHong Zhang   PetscObjectContainer container;
111b90dcfe3SHong Zhang 
112b90dcfe3SHong Zhang   PetscFunctionBegin;
113b90dcfe3SHong Zhang   if (scall == MAT_INITIAL_MATRIX){
1144c627768SHong Zhang     ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr);
11532fba14fSHong Zhang     ierr = PetscNew(Mat_MatMatMultMPI,&ptap);CHKERRQ(ierr);
11632fba14fSHong Zhang 
117e240928fSHong Zhang     /* get P_oth by taking rows of P (= non-zero cols of local A) from other processors */
11801b7ae99SHong Zhang     ierr = MatGetBrowsOfAoCols(A,P,scall,&ptap->isrowb,&ptap->iscolb,&ptap->brstart,&ptap->B_oth);CHKERRQ(ierr);
119a61c8c0fSHong Zhang 
120e240928fSHong Zhang     /* get P_loc by taking all local rows of P */
12132fba14fSHong Zhang     ierr = MatGetLocalMat(P,scall,&ptap->B_loc);CHKERRQ(ierr);
122e240928fSHong Zhang 
123a61c8c0fSHong Zhang     /* attach the supporting struct to P for reuse */
124a61c8c0fSHong Zhang     P->ops->destroy  = MatDestroy_MPIAIJ_PtAP;
125a61c8c0fSHong Zhang     ierr = PetscObjectContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
126a61c8c0fSHong Zhang     ierr = PetscObjectContainerSetPointer(container,ptap);CHKERRQ(ierr);
127a61c8c0fSHong Zhang     ierr = PetscObjectCompose((PetscObject)P,"MatPtAPMPI",(PetscObject)container);CHKERRQ(ierr);
128e240928fSHong Zhang 
12932fba14fSHong Zhang     /* now, compute symbolic local P^T*A*P */
130a61c8c0fSHong Zhang     ierr = MatPtAPSymbolic_MPIAIJ_MPIAIJ(A,P,fill,C);CHKERRQ(ierr);/* numeric product is computed as well */
131a61c8c0fSHong Zhang     ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr);
132a61c8c0fSHong Zhang   } else if (scall == MAT_REUSE_MATRIX){
133a61c8c0fSHong Zhang     ierr = PetscObjectQuery((PetscObject)P,"MatPtAPMPI",(PetscObject *)&container);CHKERRQ(ierr);
134a61c8c0fSHong Zhang     if (container) {
135a61c8c0fSHong Zhang       ierr  = PetscObjectContainerGetPointer(container,(void **)&ptap);CHKERRQ(ierr);
136a61c8c0fSHong Zhang     } else {
137a61c8c0fSHong Zhang       SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
138a61c8c0fSHong Zhang     }
139a61c8c0fSHong Zhang 
140a61c8c0fSHong Zhang     /* get P_oth by taking rows of P (= non-zero cols of local A) from other processors */
14101b7ae99SHong Zhang     ierr = MatGetBrowsOfAoCols(A,P,scall,&ptap->isrowb,&ptap->iscolb,&ptap->brstart,&ptap->B_oth);CHKERRQ(ierr);
142a61c8c0fSHong Zhang 
143a61c8c0fSHong Zhang     /* get P_loc by taking all local rows of P */
14432fba14fSHong Zhang     ierr = MatGetLocalMat(P,scall,&ptap->B_loc);CHKERRQ(ierr);
14532fba14fSHong Zhang 
146a61c8c0fSHong Zhang   } else {
147a61c8c0fSHong Zhang     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",scall);
148a61c8c0fSHong Zhang   }
14932fba14fSHong Zhang   /* now, compute numeric local P^T*A*P */
150a61c8c0fSHong Zhang   ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr);
151a61c8c0fSHong Zhang   ierr = MatPtAPNumeric_MPIAIJ_MPIAIJ(A,P,*C);CHKERRQ(ierr);
152a61c8c0fSHong Zhang   ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr);
153a61c8c0fSHong Zhang 
154a61c8c0fSHong Zhang   PetscFunctionReturn(0);
155a61c8c0fSHong Zhang }
156a61c8c0fSHong Zhang 
157a61c8c0fSHong Zhang #undef __FUNCT__
158a61c8c0fSHong Zhang #define __FUNCT__ "MatPtAPSymbolic_MPIAIJ_MPIAIJ"
159a61c8c0fSHong Zhang PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIAIJ(Mat A,Mat P,PetscReal fill,Mat *C)
160a61c8c0fSHong Zhang {
161a61c8c0fSHong Zhang   PetscErrorCode       ierr;
162a61c8c0fSHong Zhang   Mat                  C_seq;
16332fba14fSHong Zhang   Mat_MatMatMultMPI    *ptap;
164a61c8c0fSHong Zhang   PetscObjectContainer container;
165a61c8c0fSHong Zhang 
166a61c8c0fSHong Zhang   PetscFunctionBegin;
167a61c8c0fSHong Zhang 
168a61c8c0fSHong Zhang   ierr = PetscObjectQuery((PetscObject)P,"MatPtAPMPI",(PetscObject *)&container);CHKERRQ(ierr);
169a61c8c0fSHong Zhang   if (container) {
170a61c8c0fSHong Zhang     ierr  = PetscObjectContainerGetPointer(container,(void **)&ptap);CHKERRQ(ierr);
171a61c8c0fSHong Zhang   } else {
172a61c8c0fSHong Zhang     SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
173a61c8c0fSHong Zhang   }
17425616d81SHong Zhang   /* compute C_seq = P_loc^T * A_loc * P_seq */
17532fba14fSHong Zhang   ierr = MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local(A,ptap->B_loc,ptap->B_oth,ptap->brstart,fill,&C_seq);CHKERRQ(ierr);
176a61c8c0fSHong Zhang 
177b90dcfe3SHong Zhang   /* add C_seq into mpi C */
178a61c8c0fSHong Zhang   ierr = MatMerge_SeqsToMPISymbolic(A->comm,C_seq,P->n,P->n,C);CHKERRQ(ierr);
179b90dcfe3SHong Zhang 
180ff134f7aSHong Zhang   PetscFunctionReturn(0);
181ff134f7aSHong Zhang }
182ff134f7aSHong Zhang 
183ff134f7aSHong Zhang #undef __FUNCT__
184e240928fSHong Zhang #define __FUNCT__ "MatPtAPNumeric_MPIAIJ_MPIAIJ"
185b90dcfe3SHong Zhang PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIAIJ(Mat A,Mat P,Mat C)
186ff134f7aSHong Zhang {
187b90dcfe3SHong Zhang   PetscErrorCode       ierr;
188671beff6SHong Zhang   Mat_Merge_SeqsToMPI  *merge;
18932fba14fSHong Zhang   Mat_MatMatMultMPI    *ptap;
190a61c8c0fSHong Zhang   PetscObjectContainer cont_merge,cont_ptap;
191ff134f7aSHong Zhang 
192*097ab81bSHong Zhang   PetscInt       flops=0;
193*097ab81bSHong Zhang   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
194*097ab81bSHong Zhang   Mat_SeqAIJ     *ad=(Mat_SeqAIJ*)(a->A)->data,*ao=(Mat_SeqAIJ*)(a->B)->data;
195*097ab81bSHong Zhang   Mat            C_seq;
196*097ab81bSHong Zhang   Mat_SeqAIJ     *c,*p_loc,*p_oth;
197*097ab81bSHong Zhang   PetscInt       *adi=ad->i,*adj=ad->j,*aoi=ao->i,*aoj=ao->j,*apj,*apjdense,cstart=a->cstart,cend=a->cend,col;
198*097ab81bSHong Zhang   PetscInt       *pi_loc,*pj_loc,*pi_oth,*pj_oth,*pJ,*pjj;
199*097ab81bSHong Zhang   PetscInt       i,j,k,anzi,pnzi,apnzj,nextap,pnzj,prow,crow;
200*097ab81bSHong Zhang   PetscInt       *cjj;
201*097ab81bSHong Zhang   MatScalar      *ada=ad->a,*aoa=ao->a,*apa,*paj,*cseqa,*caj;
202*097ab81bSHong Zhang   MatScalar      *pa_loc,*pA,*pa_oth;
203*097ab81bSHong Zhang   PetscInt       am=A->m,cN=C->N;
204*097ab81bSHong Zhang 
205*097ab81bSHong Zhang   MPI_Comm             comm=C->comm;
206*097ab81bSHong Zhang   PetscMPIInt          size,rank,taga,*len_s;
207*097ab81bSHong Zhang   PetscInt             *owners;
208*097ab81bSHong Zhang   PetscInt             proc;
209*097ab81bSHong Zhang   PetscInt             **buf_ri,**buf_rj;
210*097ab81bSHong Zhang   PetscInt             cseqnzi,*bj_i,*bi,*bj,cseqrow,bnzi,nextcseqj; /* bi, bj, ba: for C (mpi mat) */
211*097ab81bSHong Zhang   PetscInt             nrows,**buf_ri_k,**nextrow,**nextcseqi;
212*097ab81bSHong Zhang   MPI_Request          *s_waits,*r_waits;
213*097ab81bSHong Zhang   MPI_Status           *status;
214*097ab81bSHong Zhang   MatScalar            **abuf_r,*ba_i;
215*097ab81bSHong Zhang   Mat_SeqAIJ           *cseq;
216*097ab81bSHong Zhang   PetscInt             *cseqi,*cseqj;
217*097ab81bSHong Zhang 
218ff134f7aSHong Zhang   PetscFunctionBegin;
219a61c8c0fSHong Zhang   ierr = PetscObjectQuery((PetscObject)C,"MatMergeSeqsToMPI",(PetscObject *)&cont_merge);CHKERRQ(ierr);
220a61c8c0fSHong Zhang   if (cont_merge) {
221a61c8c0fSHong Zhang     ierr  = PetscObjectContainerGetPointer(cont_merge,(void **)&merge);CHKERRQ(ierr);
2227f79fd58SMatthew Knepley   } else {
223a61c8c0fSHong Zhang     SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix C does not posses an object container");
224671beff6SHong Zhang   }
225a61c8c0fSHong Zhang   ierr = PetscObjectQuery((PetscObject)P,"MatPtAPMPI",(PetscObject *)&cont_ptap);CHKERRQ(ierr);
226a61c8c0fSHong Zhang   if (cont_ptap) {
227a61c8c0fSHong Zhang     ierr  = PetscObjectContainerGetPointer(cont_ptap,(void **)&ptap);CHKERRQ(ierr);
228a61c8c0fSHong Zhang   } else {
229a61c8c0fSHong Zhang     SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
230a61c8c0fSHong Zhang   }
231*097ab81bSHong Zhang #ifdef OLD
23232fba14fSHong Zhang   ierr = MatPtAPNumeric_MPIAIJ_MPIAIJ_Local(A,ptap->B_loc,ptap->B_oth,ptap->brstart,merge->C_seq);CHKERRQ(ierr);
233*097ab81bSHong Zhang #endif /* OLD */
234*097ab81bSHong Zhang    /*--------------------------------------------------------------*/
235*097ab81bSHong Zhang 
236*097ab81bSHong Zhang   p_loc=(Mat_SeqAIJ*)(ptap->B_loc)->data;
237*097ab81bSHong Zhang   p_oth=(Mat_SeqAIJ*)(ptap->B_oth)->data;
238*097ab81bSHong Zhang   pi_loc=p_loc->i; pj_loc=p_loc->j; pJ=pj_loc; pa_loc=p_loc->a; pA=pa_loc;
239*097ab81bSHong Zhang   pi_oth=p_oth->i; pj_oth=p_oth->j; pa_oth=p_oth->a;
240*097ab81bSHong Zhang 
241*097ab81bSHong Zhang   C_seq=merge->C_seq;
242*097ab81bSHong Zhang   cseq=(Mat_SeqAIJ*)C_seq->data;
243*097ab81bSHong Zhang   cseqi=cseq->i; cseqj=cseq->j;
244*097ab81bSHong Zhang   cseqa=cseq->a;
245*097ab81bSHong Zhang 
246*097ab81bSHong Zhang   /* Allocate temporary array for storage of one row of A*P */
247*097ab81bSHong Zhang   ierr = PetscMalloc(cN*(sizeof(MatScalar)+2*sizeof(PetscInt)),&apa);CHKERRQ(ierr);
248*097ab81bSHong Zhang   ierr = PetscMemzero(apa,cN*(sizeof(MatScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr);
249*097ab81bSHong Zhang   apj      = (PetscInt *)(apa + cN);
250*097ab81bSHong Zhang   apjdense = apj + cN;
251*097ab81bSHong Zhang 
252*097ab81bSHong Zhang   /* Clear old values in C_Seq */
253*097ab81bSHong Zhang   ierr = PetscMemzero(cseqa,cseqi[cN]*sizeof(MatScalar));CHKERRQ(ierr);
254*097ab81bSHong Zhang 
255*097ab81bSHong Zhang   for (i=0;i<am;i++) {
256*097ab81bSHong Zhang     /* Form i-th sparse row of A*P */
257*097ab81bSHong Zhang      apnzj = 0;
258*097ab81bSHong Zhang     /* diagonal portion of A */
259*097ab81bSHong Zhang     anzi  = adi[i+1] - adi[i];
260*097ab81bSHong Zhang     for (j=0;j<anzi;j++) {
261*097ab81bSHong Zhang       prow = *adj;
262*097ab81bSHong Zhang       adj++;
263*097ab81bSHong Zhang       pnzj = pi_loc[prow+1] - pi_loc[prow];
264*097ab81bSHong Zhang       pjj  = pj_loc + pi_loc[prow];
265*097ab81bSHong Zhang       paj  = pa_loc + pi_loc[prow];
266*097ab81bSHong Zhang       for (k=0;k<pnzj;k++) {
267*097ab81bSHong Zhang         if (!apjdense[pjj[k]]) {
268*097ab81bSHong Zhang           apjdense[pjj[k]] = -1;
269*097ab81bSHong Zhang           apj[apnzj++]     = pjj[k];
270*097ab81bSHong Zhang         }
271*097ab81bSHong Zhang         apa[pjj[k]] += (*ada)*paj[k];
272*097ab81bSHong Zhang       }
273*097ab81bSHong Zhang       flops += 2*pnzj;
274*097ab81bSHong Zhang       ada++;
275*097ab81bSHong Zhang     }
276*097ab81bSHong Zhang     /* off-diagonal portion of A */
277*097ab81bSHong Zhang     anzi  = aoi[i+1] - aoi[i];
278*097ab81bSHong Zhang     for (j=0;j<anzi;j++) {
279*097ab81bSHong Zhang       col = a->garray[*aoj];
280*097ab81bSHong Zhang       if (col < cstart){
281*097ab81bSHong Zhang         prow = *aoj;
282*097ab81bSHong Zhang       } else if (col >= cend){
283*097ab81bSHong Zhang         prow = *aoj;
284*097ab81bSHong Zhang       } else {
285*097ab81bSHong Zhang         SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Off-diagonal portion of A has wrong column map");
286*097ab81bSHong Zhang       }
287*097ab81bSHong Zhang       aoj++;
288*097ab81bSHong Zhang       pnzj = pi_oth[prow+1] - pi_oth[prow];
289*097ab81bSHong Zhang       pjj  = pj_oth + pi_oth[prow];
290*097ab81bSHong Zhang       paj  = pa_oth + pi_oth[prow];
291*097ab81bSHong Zhang       for (k=0;k<pnzj;k++) {
292*097ab81bSHong Zhang         if (!apjdense[pjj[k]]) {
293*097ab81bSHong Zhang           apjdense[pjj[k]] = -1;
294*097ab81bSHong Zhang           apj[apnzj++]     = pjj[k];
295*097ab81bSHong Zhang         }
296*097ab81bSHong Zhang         apa[pjj[k]] += (*aoa)*paj[k];
297*097ab81bSHong Zhang       }
298*097ab81bSHong Zhang       flops += 2*pnzj;
299*097ab81bSHong Zhang       aoa++;
300*097ab81bSHong Zhang     }
301*097ab81bSHong Zhang     /* Sort the j index array for quick sparse axpy. */
302*097ab81bSHong Zhang     ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr);
303*097ab81bSHong Zhang 
304*097ab81bSHong Zhang     /* Compute P_loc[i,:]^T*AP[i,:] using outer product */
305*097ab81bSHong Zhang     pnzi = pi_loc[i+1] - pi_loc[i];
306*097ab81bSHong Zhang     for (j=0;j<pnzi;j++) {
307*097ab81bSHong Zhang       nextap = 0;
308*097ab81bSHong Zhang       crow   = *pJ++;
309*097ab81bSHong Zhang       cjj    = cseqj + cseqi[crow];
310*097ab81bSHong Zhang       caj    = cseqa + cseqi[crow];
311*097ab81bSHong Zhang       /* Perform sparse axpy operation.  Note cjj includes apj. */
312*097ab81bSHong Zhang       for (k=0;nextap<apnzj;k++) {
313*097ab81bSHong Zhang         if (cjj[k]==apj[nextap]) {
314*097ab81bSHong Zhang           caj[k] += (*pA)*apa[apj[nextap++]];
315*097ab81bSHong Zhang         }
316*097ab81bSHong Zhang       }
317*097ab81bSHong Zhang       flops += 2*apnzj;
318*097ab81bSHong Zhang       pA++;
319*097ab81bSHong Zhang     }
320*097ab81bSHong Zhang 
321*097ab81bSHong Zhang     /* Zero the current row info for A*P */
322*097ab81bSHong Zhang     for (j=0;j<apnzj;j++) {
323*097ab81bSHong Zhang       apa[apj[j]]      = 0.;
324*097ab81bSHong Zhang       apjdense[apj[j]] = 0;
325*097ab81bSHong Zhang     }
326*097ab81bSHong Zhang   }
327*097ab81bSHong Zhang 
328*097ab81bSHong Zhang   /* Assemble the final matrix and clean up */
329*097ab81bSHong Zhang   ierr = MatAssemblyBegin(C_seq,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
330*097ab81bSHong Zhang   ierr = MatAssemblyEnd(C_seq,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
331*097ab81bSHong Zhang   ierr = PetscFree(apa);CHKERRQ(ierr);
332*097ab81bSHong Zhang   ierr = PetscLogFlops(flops);CHKERRQ(ierr);
333*097ab81bSHong Zhang 
334*097ab81bSHong Zhang #ifdef OLD1
335a61c8c0fSHong Zhang   ierr = MatMerge_SeqsToMPINumeric(merge->C_seq,C);CHKERRQ(ierr);
336*097ab81bSHong Zhang #endif /* OLD1 */
337*097ab81bSHong Zhang   /*--------------------------------------------------------------*/
338*097ab81bSHong Zhang 
339*097ab81bSHong Zhang   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
340*097ab81bSHong Zhang   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
341*097ab81bSHong Zhang 
342*097ab81bSHong Zhang   bi     = merge->bi;
343*097ab81bSHong Zhang   bj     = merge->bj;
344*097ab81bSHong Zhang   buf_ri = merge->buf_ri;
345*097ab81bSHong Zhang   buf_rj = merge->buf_rj;
346*097ab81bSHong Zhang 
347*097ab81bSHong Zhang   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
348*097ab81bSHong Zhang   ierr   = PetscMapGetGlobalRange(merge->rowmap,&owners);CHKERRQ(ierr);
349*097ab81bSHong Zhang 
350*097ab81bSHong Zhang   /* send and recv matrix values */
351*097ab81bSHong Zhang   /*-----------------------------*/
352*097ab81bSHong Zhang   len_s  = merge->len_s;
353*097ab81bSHong Zhang   ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&taga);CHKERRQ(ierr);
354*097ab81bSHong Zhang   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
355*097ab81bSHong Zhang 
356*097ab81bSHong Zhang   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
357*097ab81bSHong Zhang   for (proc=0,k=0; proc<size; proc++){
358*097ab81bSHong Zhang     if (!len_s[proc]) continue;
359*097ab81bSHong Zhang     i = owners[proc];
360*097ab81bSHong Zhang     ierr = MPI_Isend(cseqa+cseqi[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
361*097ab81bSHong Zhang     k++;
362*097ab81bSHong Zhang   }
363*097ab81bSHong Zhang 
364*097ab81bSHong Zhang   ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);
365*097ab81bSHong Zhang   ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);
366*097ab81bSHong Zhang   ierr = PetscFree(status);CHKERRQ(ierr);
367*097ab81bSHong Zhang 
368*097ab81bSHong Zhang   ierr = PetscFree(s_waits);CHKERRQ(ierr);
369*097ab81bSHong Zhang   ierr = PetscFree(r_waits);CHKERRQ(ierr);
370*097ab81bSHong Zhang 
371*097ab81bSHong Zhang   /* insert mat values of mpimat */
372*097ab81bSHong Zhang   /*----------------------------*/
373*097ab81bSHong Zhang   ierr = PetscMalloc(cN*sizeof(MatScalar),&ba_i);CHKERRQ(ierr);
374*097ab81bSHong Zhang   ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr);
375*097ab81bSHong Zhang   nextrow = buf_ri_k + merge->nrecv;
376*097ab81bSHong Zhang   nextcseqi  = nextrow + merge->nrecv;
377*097ab81bSHong Zhang 
378*097ab81bSHong Zhang   for (k=0; k<merge->nrecv; k++){
379*097ab81bSHong Zhang     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
380*097ab81bSHong Zhang     nrows = *(buf_ri_k[k]);
381*097ab81bSHong Zhang     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
382*097ab81bSHong Zhang     nextcseqi[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
383*097ab81bSHong Zhang   }
384*097ab81bSHong Zhang 
385*097ab81bSHong Zhang   /* set values of ba */
386*097ab81bSHong Zhang   for (i=0; i<C->m; i++) {
387*097ab81bSHong Zhang     cseqrow = owners[rank] + i;
388*097ab81bSHong Zhang     bj_i = bj+bi[i];  /* col indices of the i-th row of C */
389*097ab81bSHong Zhang     bnzi = bi[i+1] - bi[i];
390*097ab81bSHong Zhang     ierr = PetscMemzero(ba_i,bnzi*sizeof(MatScalar));CHKERRQ(ierr);
391*097ab81bSHong Zhang 
392*097ab81bSHong Zhang     /* add local non-zero vals of this proc's C_seq into ba */
393*097ab81bSHong Zhang     cseqnzi = cseqi[cseqrow+1] - cseqi[cseqrow];
394*097ab81bSHong Zhang     cseqj   = cseq->j + cseqi[cseqrow];
395*097ab81bSHong Zhang     cseqa   = cseq->a + cseqi[cseqrow];
396*097ab81bSHong Zhang     nextcseqj = 0;
397*097ab81bSHong Zhang     for (j=0; nextcseqj<cseqnzi; j++){
398*097ab81bSHong Zhang       if (*(bj_i + j) == cseqj[nextcseqj]){ /* bcol == cseqcol */
399*097ab81bSHong Zhang         ba_i[j] += cseqa[nextcseqj++];
400*097ab81bSHong Zhang       }
401*097ab81bSHong Zhang     }
402*097ab81bSHong Zhang 
403*097ab81bSHong Zhang     /* add received vals into ba */
404*097ab81bSHong Zhang     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
405*097ab81bSHong Zhang       /* i-th row */
406*097ab81bSHong Zhang       if (i == *nextrow[k]) {
407*097ab81bSHong Zhang         cseqnzi = *(nextcseqi[k]+1) - *nextcseqi[k];
408*097ab81bSHong Zhang         cseqj   = buf_rj[k] + *(nextcseqi[k]);
409*097ab81bSHong Zhang         cseqa   = abuf_r[k] + *(nextcseqi[k]);
410*097ab81bSHong Zhang         nextcseqj = 0;
411*097ab81bSHong Zhang         for (j=0; nextcseqj<cseqnzi; j++){
412*097ab81bSHong Zhang           if (*(bj_i + j) == cseqj[nextcseqj]){ /* bcol == cseqcol */
413*097ab81bSHong Zhang             ba_i[j] += cseqa[nextcseqj++];
414*097ab81bSHong Zhang           }
415*097ab81bSHong Zhang         }
416*097ab81bSHong Zhang         nextrow[k]++; nextcseqi[k]++;
417*097ab81bSHong Zhang       }
418*097ab81bSHong Zhang     }
419*097ab81bSHong Zhang     ierr = MatSetValues(C,1,&cseqrow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
420*097ab81bSHong Zhang   }
421*097ab81bSHong Zhang   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
422*097ab81bSHong Zhang   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
423*097ab81bSHong Zhang 
424*097ab81bSHong Zhang   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
425*097ab81bSHong Zhang   ierr = PetscFree(ba_i);CHKERRQ(ierr);
426*097ab81bSHong Zhang   ierr = PetscFree(buf_ri_k);CHKERRQ(ierr);
427*097ab81bSHong Zhang 
428ff134f7aSHong Zhang   PetscFunctionReturn(0);
429ff134f7aSHong Zhang }
430ff134f7aSHong Zhang 
431ff134f7aSHong Zhang #undef __FUNCT__
4329af31e4aSHong Zhang #define __FUNCT__ "MatPtAP_SeqAIJ_SeqAIJ"
433dfbe8321SBarry Smith PetscErrorCode MatPtAP_SeqAIJ_SeqAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C)
4349af31e4aSHong Zhang {
435dfbe8321SBarry Smith   PetscErrorCode ierr;
436b1d57f15SBarry Smith 
4379af31e4aSHong Zhang   PetscFunctionBegin;
4389af31e4aSHong Zhang   if (scall == MAT_INITIAL_MATRIX){
439d20bfe6fSHong Zhang     ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr);
4409af31e4aSHong Zhang     ierr = MatPtAPSymbolic_SeqAIJ_SeqAIJ(A,P,fill,C);CHKERRQ(ierr);
441d20bfe6fSHong Zhang     ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr);
4429af31e4aSHong Zhang   }
443d20bfe6fSHong Zhang   ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr);
4449af31e4aSHong Zhang   ierr = MatPtAPNumeric_SeqAIJ_SeqAIJ(A,P,*C);CHKERRQ(ierr);
445d20bfe6fSHong Zhang   ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr);
4469af31e4aSHong Zhang   PetscFunctionReturn(0);
4479af31e4aSHong Zhang }
4489af31e4aSHong Zhang 
4496849ba73SBarry Smith /*
4509af31e4aSHong Zhang    MatPtAPSymbolic - Creates the (i,j) structure of the matrix projection C = P^T * A * P
4514d3841fdSKris Buschelman 
4524d3841fdSKris Buschelman    Collective on Mat
4534d3841fdSKris Buschelman 
4544d3841fdSKris Buschelman    Input Parameters:
4554d3841fdSKris Buschelman +  A - the matrix
4564d3841fdSKris Buschelman -  P - the projection matrix
4574d3841fdSKris Buschelman 
4584d3841fdSKris Buschelman    Output Parameters:
4594d3841fdSKris Buschelman .  C - the (i,j) structure of the product matrix
4604d3841fdSKris Buschelman 
4614d3841fdSKris Buschelman    Notes:
4624d3841fdSKris Buschelman    C will be created and must be destroyed by the user with MatDestroy().
4634d3841fdSKris Buschelman 
4644d3841fdSKris Buschelman    This routine is currently only implemented for pairs of SeqAIJ matrices and classes
4654d3841fdSKris Buschelman    which inherit from SeqAIJ.  C will be of type MATSEQAIJ.  The product is computed using
4669af31e4aSHong Zhang    this (i,j) structure by calling MatPtAPNumeric().
4674d3841fdSKris Buschelman 
4684d3841fdSKris Buschelman    Level: intermediate
4694d3841fdSKris Buschelman 
4709af31e4aSHong Zhang .seealso: MatPtAP(),MatPtAPNumeric(),MatMatMultSymbolic()
4716849ba73SBarry Smith */
472e240928fSHong Zhang #undef __FUNCT__
473e240928fSHong Zhang #define __FUNCT__ "MatPtAPSymbolic"
47455a3bba9SHong Zhang PetscErrorCode MatPtAPSymbolic(Mat A,Mat P,PetscReal fill,Mat *C)
47555a3bba9SHong Zhang {
476dfbe8321SBarry Smith   PetscErrorCode ierr;
477534c1384SKris Buschelman   PetscErrorCode (*fA)(Mat,Mat,PetscReal,Mat*);
478534c1384SKris Buschelman   PetscErrorCode (*fP)(Mat,Mat,PetscReal,Mat*);
479eb9c0419SKris Buschelman 
480eb9c0419SKris Buschelman   PetscFunctionBegin;
481eb9c0419SKris Buschelman 
4824482741eSBarry Smith   PetscValidHeaderSpecific(A,MAT_COOKIE,1);
483c9780b6fSBarry Smith   PetscValidType(A,1);
484eb9c0419SKris Buschelman   MatPreallocated(A);
485eb9c0419SKris Buschelman   if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
486eb9c0419SKris Buschelman   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
487eb9c0419SKris Buschelman 
4884482741eSBarry Smith   PetscValidHeaderSpecific(P,MAT_COOKIE,2);
489c9780b6fSBarry Smith   PetscValidType(P,2);
490eb9c0419SKris Buschelman   MatPreallocated(P);
491eb9c0419SKris Buschelman   if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
492eb9c0419SKris Buschelman   if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
493eb9c0419SKris Buschelman 
4944482741eSBarry Smith   PetscValidPointer(C,3);
4954482741eSBarry Smith 
496eb9c0419SKris Buschelman   if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->M,A->N);
497eb9c0419SKris Buschelman   if (A->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %d != %d",A->M,A->N);
498eb9c0419SKris Buschelman 
499534c1384SKris Buschelman   /* For now, we do not dispatch based on the type of A and P */
500534c1384SKris Buschelman   /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */
501534c1384SKris Buschelman   fA = A->ops->ptapsymbolic;
502534c1384SKris Buschelman   if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatPtAPSymbolic not supported for A of type %s",A->type_name);
503534c1384SKris Buschelman   fP = P->ops->ptapsymbolic;
504534c1384SKris Buschelman   if (!fP) SETERRQ1(PETSC_ERR_SUP,"MatPtAPSymbolic not supported for P of type %s",P->type_name);
505534c1384SKris Buschelman   if (fP!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatPtAPSymbolic requires A, %s, to be compatible with P, %s",A->type_name,P->type_name);
5064d3841fdSKris Buschelman 
507534c1384SKris Buschelman   ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr);
508534c1384SKris Buschelman   ierr = (*fA)(A,P,fill,C);CHKERRQ(ierr);
509534c1384SKris Buschelman   ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr);
510eb9c0419SKris Buschelman 
511eb9c0419SKris Buschelman   PetscFunctionReturn(0);
512eb9c0419SKris Buschelman }
513eb9c0419SKris Buschelman 
514f747e1aeSHong Zhang typedef struct {
515f747e1aeSHong Zhang   Mat    symAP;
516f747e1aeSHong Zhang } Mat_PtAPstruct;
517f747e1aeSHong Zhang 
51878a80504SBarry Smith EXTERN PetscErrorCode MatDestroy_SeqAIJ(Mat);
51978a80504SBarry Smith 
520f747e1aeSHong Zhang #undef __FUNCT__
521f747e1aeSHong Zhang #define __FUNCT__ "MatDestroy_SeqAIJ_PtAP"
522f4a850bbSBarry Smith PetscErrorCode MatDestroy_SeqAIJ_PtAP(Mat A)
523f747e1aeSHong Zhang {
524f4a850bbSBarry Smith   PetscErrorCode    ierr;
525f747e1aeSHong Zhang   Mat_PtAPstruct    *ptap=(Mat_PtAPstruct*)A->spptr;
526f747e1aeSHong Zhang 
527f747e1aeSHong Zhang   PetscFunctionBegin;
528f747e1aeSHong Zhang   ierr = MatDestroy(ptap->symAP);CHKERRQ(ierr);
529f747e1aeSHong Zhang   ierr = PetscFree(ptap);CHKERRQ(ierr);
53078a80504SBarry Smith   ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr);
531f747e1aeSHong Zhang   PetscFunctionReturn(0);
532f747e1aeSHong Zhang }
533f747e1aeSHong Zhang 
534eb9c0419SKris Buschelman #undef __FUNCT__
5359af31e4aSHong Zhang #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqAIJ"
53655a3bba9SHong Zhang PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat P,PetscReal fill,Mat *C)
53755a3bba9SHong Zhang {
538dfbe8321SBarry Smith   PetscErrorCode ierr;
539d20bfe6fSHong Zhang   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
540d20bfe6fSHong Zhang   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data,*p = (Mat_SeqAIJ*)P->data,*c;
54155a3bba9SHong Zhang   PetscInt       *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj;
542b8374ebeSBarry Smith   PetscInt       *ci,*cj,*ptadenserow,*ptasparserow,*ptaj;
54355a3bba9SHong Zhang   PetscInt       an=A->N,am=A->M,pn=P->N,pm=P->M;
544b8374ebeSBarry Smith   PetscInt       i,j,k,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi,nlnk,*lnk;
545d20bfe6fSHong Zhang   MatScalar      *ca;
54655a3bba9SHong Zhang   PetscBT        lnkbt;
547eb9c0419SKris Buschelman 
548eb9c0419SKris Buschelman   PetscFunctionBegin;
549d20bfe6fSHong Zhang   /* Get ij structure of P^T */
550eb9c0419SKris Buschelman   ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
551d20bfe6fSHong Zhang   ptJ=ptj;
552eb9c0419SKris Buschelman 
553d20bfe6fSHong Zhang   /* Allocate ci array, arrays for fill computation and */
554d20bfe6fSHong Zhang   /* free space for accumulating nonzero column info */
55555a3bba9SHong Zhang   ierr = PetscMalloc((pn+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
556d20bfe6fSHong Zhang   ci[0] = 0;
557eb9c0419SKris Buschelman 
55855a3bba9SHong Zhang   ierr = PetscMalloc((2*an+1)*sizeof(PetscInt),&ptadenserow);CHKERRQ(ierr);
55955a3bba9SHong Zhang   ierr = PetscMemzero(ptadenserow,(2*an+1)*sizeof(PetscInt));CHKERRQ(ierr);
560d20bfe6fSHong Zhang   ptasparserow = ptadenserow  + an;
56155a3bba9SHong Zhang 
56255a3bba9SHong Zhang   /* create and initialize a linked list */
56355a3bba9SHong Zhang   nlnk = pn+1;
56455a3bba9SHong Zhang   ierr = PetscLLCreate(pn,pn,nlnk,lnk,lnkbt);CHKERRQ(ierr);
565eb9c0419SKris Buschelman 
566d20bfe6fSHong Zhang   /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */
567d20bfe6fSHong Zhang   /* This should be reasonable if sparsity of PtAP is similar to that of A. */
568d20bfe6fSHong Zhang   ierr          = GetMoreSpace((ai[am]/pm)*pn,&free_space);
569d20bfe6fSHong Zhang   current_space = free_space;
570d20bfe6fSHong Zhang 
571d20bfe6fSHong Zhang   /* Determine symbolic info for each row of C: */
572d20bfe6fSHong Zhang   for (i=0;i<pn;i++) {
573d20bfe6fSHong Zhang     ptnzi  = pti[i+1] - pti[i];
574d20bfe6fSHong Zhang     ptanzi = 0;
575d20bfe6fSHong Zhang     /* Determine symbolic row of PtA: */
576d20bfe6fSHong Zhang     for (j=0;j<ptnzi;j++) {
577d20bfe6fSHong Zhang       arow = *ptJ++;
578d20bfe6fSHong Zhang       anzj = ai[arow+1] - ai[arow];
579d20bfe6fSHong Zhang       ajj  = aj + ai[arow];
580d20bfe6fSHong Zhang       for (k=0;k<anzj;k++) {
581d20bfe6fSHong Zhang         if (!ptadenserow[ajj[k]]) {
582d20bfe6fSHong Zhang           ptadenserow[ajj[k]]    = -1;
583d20bfe6fSHong Zhang           ptasparserow[ptanzi++] = ajj[k];
584d20bfe6fSHong Zhang         }
585d20bfe6fSHong Zhang       }
586d20bfe6fSHong Zhang     }
587d20bfe6fSHong Zhang       /* Using symbolic info for row of PtA, determine symbolic info for row of C: */
588d20bfe6fSHong Zhang     ptaj = ptasparserow;
589d20bfe6fSHong Zhang     cnzi   = 0;
590d20bfe6fSHong Zhang     for (j=0;j<ptanzi;j++) {
591d20bfe6fSHong Zhang       prow = *ptaj++;
592d20bfe6fSHong Zhang       pnzj = pi[prow+1] - pi[prow];
593d20bfe6fSHong Zhang       pjj  = pj + pi[prow];
59455a3bba9SHong Zhang       /* add non-zero cols of P into the sorted linked list lnk */
59555a3bba9SHong Zhang       ierr = PetscLLAdd(pnzj,pjj,pn,nlnk,lnk,lnkbt);CHKERRQ(ierr);
59655a3bba9SHong Zhang       cnzi += nlnk;
597d20bfe6fSHong Zhang     }
598d20bfe6fSHong Zhang 
599d20bfe6fSHong Zhang     /* If free space is not available, make more free space */
600d20bfe6fSHong Zhang     /* Double the amount of total space in the list */
601d20bfe6fSHong Zhang     if (current_space->local_remaining<cnzi) {
602d20bfe6fSHong Zhang       ierr = GetMoreSpace(current_space->total_array_size,&current_space);CHKERRQ(ierr);
603d20bfe6fSHong Zhang     }
604d20bfe6fSHong Zhang 
605d20bfe6fSHong Zhang     /* Copy data into free space, and zero out denserows */
60655a3bba9SHong Zhang     ierr = PetscLLClean(pn,pn,cnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
607d20bfe6fSHong Zhang     current_space->array           += cnzi;
608d20bfe6fSHong Zhang     current_space->local_used      += cnzi;
609d20bfe6fSHong Zhang     current_space->local_remaining -= cnzi;
610d20bfe6fSHong Zhang 
611d20bfe6fSHong Zhang     for (j=0;j<ptanzi;j++) {
612d20bfe6fSHong Zhang       ptadenserow[ptasparserow[j]] = 0;
613d20bfe6fSHong Zhang     }
614d20bfe6fSHong Zhang     /* Aside: Perhaps we should save the pta info for the numerical factorization. */
615d20bfe6fSHong Zhang     /*        For now, we will recompute what is needed. */
616d20bfe6fSHong Zhang     ci[i+1] = ci[i] + cnzi;
617d20bfe6fSHong Zhang   }
618d20bfe6fSHong Zhang   /* nnz is now stored in ci[ptm], column indices are in the list of free space */
619d20bfe6fSHong Zhang   /* Allocate space for cj, initialize cj, and */
620d20bfe6fSHong Zhang   /* destroy list of free space and other temporary array(s) */
62155a3bba9SHong Zhang   ierr = PetscMalloc((ci[pn]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr);
622d20bfe6fSHong Zhang   ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr);
623d20bfe6fSHong Zhang   ierr = PetscFree(ptadenserow);CHKERRQ(ierr);
62455a3bba9SHong Zhang   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
625d20bfe6fSHong Zhang 
626d20bfe6fSHong Zhang   /* Allocate space for ca */
627d20bfe6fSHong Zhang   ierr = PetscMalloc((ci[pn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr);
628d20bfe6fSHong Zhang   ierr = PetscMemzero(ca,(ci[pn]+1)*sizeof(MatScalar));CHKERRQ(ierr);
629d20bfe6fSHong Zhang 
630d20bfe6fSHong Zhang   /* put together the new matrix */
631d20bfe6fSHong Zhang   ierr = MatCreateSeqAIJWithArrays(A->comm,pn,pn,ci,cj,ca,C);CHKERRQ(ierr);
632d20bfe6fSHong Zhang 
633d20bfe6fSHong Zhang   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
634d20bfe6fSHong Zhang   /* Since these are PETSc arrays, change flags to free them as necessary. */
635d20bfe6fSHong Zhang   c = (Mat_SeqAIJ *)((*C)->data);
636d20bfe6fSHong Zhang   c->freedata = PETSC_TRUE;
637d20bfe6fSHong Zhang   c->nonew    = 0;
638d20bfe6fSHong Zhang 
639d20bfe6fSHong Zhang   /* Clean up. */
640d20bfe6fSHong Zhang   ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
641eb9c0419SKris Buschelman 
642eb9c0419SKris Buschelman   PetscFunctionReturn(0);
643eb9c0419SKris Buschelman }
644eb9c0419SKris Buschelman 
6453985e5eaSKris Buschelman #include "src/mat/impls/maij/maij.h"
6463985e5eaSKris Buschelman EXTERN_C_BEGIN
6473985e5eaSKris Buschelman #undef __FUNCT__
6489af31e4aSHong Zhang #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqMAIJ"
64955a3bba9SHong Zhang PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat *C)
65055a3bba9SHong Zhang {
6515c66b693SKris Buschelman   /* This routine requires testing -- I don't think it works. */
652dfbe8321SBarry Smith   PetscErrorCode ierr;
6533985e5eaSKris Buschelman   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
6543985e5eaSKris Buschelman   Mat_SeqMAIJ    *pp=(Mat_SeqMAIJ*)PP->data;
6553985e5eaSKris Buschelman   Mat            P=pp->AIJ;
6563985e5eaSKris Buschelman   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c;
657b1d57f15SBarry Smith   PetscInt       *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj;
658b1d57f15SBarry Smith   PetscInt       *ci,*cj,*denserow,*sparserow,*ptadenserow,*ptasparserow,*ptaj;
659b1d57f15SBarry Smith   PetscInt       an=A->N,am=A->M,pn=P->N,pm=P->M,ppdof=pp->dof;
660b1d57f15SBarry Smith   PetscInt       i,j,k,dof,pdof,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi;
6613985e5eaSKris Buschelman   MatScalar      *ca;
6623985e5eaSKris Buschelman 
6633985e5eaSKris Buschelman   PetscFunctionBegin;
6643985e5eaSKris Buschelman   /* Start timer */
6659af31e4aSHong Zhang   ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr);
6663985e5eaSKris Buschelman 
6673985e5eaSKris Buschelman   /* Get ij structure of P^T */
6683985e5eaSKris Buschelman   ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
6693985e5eaSKris Buschelman 
6703985e5eaSKris Buschelman   /* Allocate ci array, arrays for fill computation and */
6713985e5eaSKris Buschelman   /* free space for accumulating nonzero column info */
672b1d57f15SBarry Smith   ierr = PetscMalloc((pn+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
6733985e5eaSKris Buschelman   ci[0] = 0;
6743985e5eaSKris Buschelman 
675b1d57f15SBarry Smith   ierr = PetscMalloc((2*pn+2*an+1)*sizeof(PetscInt),&ptadenserow);CHKERRQ(ierr);
676b1d57f15SBarry Smith   ierr = PetscMemzero(ptadenserow,(2*pn+2*an+1)*sizeof(PetscInt));CHKERRQ(ierr);
6773985e5eaSKris Buschelman   ptasparserow = ptadenserow  + an;
6783985e5eaSKris Buschelman   denserow     = ptasparserow + an;
6793985e5eaSKris Buschelman   sparserow    = denserow     + pn;
6803985e5eaSKris Buschelman 
6813985e5eaSKris Buschelman   /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */
6823985e5eaSKris Buschelman   /* This should be reasonable if sparsity of PtAP is similar to that of A. */
6833985e5eaSKris Buschelman   ierr          = GetMoreSpace((ai[am]/pm)*pn,&free_space);
6843985e5eaSKris Buschelman   current_space = free_space;
6853985e5eaSKris Buschelman 
6863985e5eaSKris Buschelman   /* Determine symbolic info for each row of C: */
6873985e5eaSKris Buschelman   for (i=0;i<pn/ppdof;i++) {
6883985e5eaSKris Buschelman     ptnzi  = pti[i+1] - pti[i];
6893985e5eaSKris Buschelman     ptanzi = 0;
6903985e5eaSKris Buschelman     ptJ    = ptj + pti[i];
6913985e5eaSKris Buschelman     for (dof=0;dof<ppdof;dof++) {
6923985e5eaSKris Buschelman     /* Determine symbolic row of PtA: */
6933985e5eaSKris Buschelman       for (j=0;j<ptnzi;j++) {
6943985e5eaSKris Buschelman         arow = ptJ[j] + dof;
6953985e5eaSKris Buschelman         anzj = ai[arow+1] - ai[arow];
6963985e5eaSKris Buschelman         ajj  = aj + ai[arow];
6973985e5eaSKris Buschelman         for (k=0;k<anzj;k++) {
6983985e5eaSKris Buschelman           if (!ptadenserow[ajj[k]]) {
6993985e5eaSKris Buschelman             ptadenserow[ajj[k]]    = -1;
7003985e5eaSKris Buschelman             ptasparserow[ptanzi++] = ajj[k];
7013985e5eaSKris Buschelman           }
7023985e5eaSKris Buschelman         }
7033985e5eaSKris Buschelman       }
7043985e5eaSKris Buschelman       /* Using symbolic info for row of PtA, determine symbolic info for row of C: */
7053985e5eaSKris Buschelman       ptaj = ptasparserow;
7063985e5eaSKris Buschelman       cnzi   = 0;
7073985e5eaSKris Buschelman       for (j=0;j<ptanzi;j++) {
708fe05a634SKris Buschelman         pdof = *ptaj%dof;
7093985e5eaSKris Buschelman         prow = (*ptaj++)/dof;
7103985e5eaSKris Buschelman         pnzj = pi[prow+1] - pi[prow];
7113985e5eaSKris Buschelman         pjj  = pj + pi[prow];
7123985e5eaSKris Buschelman         for (k=0;k<pnzj;k++) {
713fe05a634SKris Buschelman           if (!denserow[pjj[k]+pdof]) {
714fe05a634SKris Buschelman             denserow[pjj[k]+pdof] = -1;
715fe05a634SKris Buschelman             sparserow[cnzi++]     = pjj[k]+pdof;
7163985e5eaSKris Buschelman           }
7173985e5eaSKris Buschelman         }
7183985e5eaSKris Buschelman       }
7193985e5eaSKris Buschelman 
7203985e5eaSKris Buschelman       /* sort sparserow */
7213985e5eaSKris Buschelman       ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr);
7223985e5eaSKris Buschelman 
7233985e5eaSKris Buschelman       /* If free space is not available, make more free space */
7243985e5eaSKris Buschelman       /* Double the amount of total space in the list */
7253985e5eaSKris Buschelman       if (current_space->local_remaining<cnzi) {
7263985e5eaSKris Buschelman         ierr = GetMoreSpace(current_space->total_array_size,&current_space);CHKERRQ(ierr);
7273985e5eaSKris Buschelman       }
7283985e5eaSKris Buschelman 
7293985e5eaSKris Buschelman       /* Copy data into free space, and zero out denserows */
730b1d57f15SBarry Smith       ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(PetscInt));CHKERRQ(ierr);
7313985e5eaSKris Buschelman       current_space->array           += cnzi;
7323985e5eaSKris Buschelman       current_space->local_used      += cnzi;
7333985e5eaSKris Buschelman       current_space->local_remaining -= cnzi;
7343985e5eaSKris Buschelman 
7353985e5eaSKris Buschelman       for (j=0;j<ptanzi;j++) {
7363985e5eaSKris Buschelman         ptadenserow[ptasparserow[j]] = 0;
7373985e5eaSKris Buschelman       }
7383985e5eaSKris Buschelman       for (j=0;j<cnzi;j++) {
7393985e5eaSKris Buschelman         denserow[sparserow[j]] = 0;
7403985e5eaSKris Buschelman       }
7413985e5eaSKris Buschelman       /* Aside: Perhaps we should save the pta info for the numerical factorization. */
7423985e5eaSKris Buschelman       /*        For now, we will recompute what is needed. */
7433985e5eaSKris Buschelman       ci[i+1+dof] = ci[i+dof] + cnzi;
7443985e5eaSKris Buschelman     }
7453985e5eaSKris Buschelman   }
7463985e5eaSKris Buschelman   /* nnz is now stored in ci[ptm], column indices are in the list of free space */
7473985e5eaSKris Buschelman   /* Allocate space for cj, initialize cj, and */
7483985e5eaSKris Buschelman   /* destroy list of free space and other temporary array(s) */
749b1d57f15SBarry Smith   ierr = PetscMalloc((ci[pn]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr);
7503985e5eaSKris Buschelman   ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr);
7513985e5eaSKris Buschelman   ierr = PetscFree(ptadenserow);CHKERRQ(ierr);
7523985e5eaSKris Buschelman 
7533985e5eaSKris Buschelman   /* Allocate space for ca */
7543985e5eaSKris Buschelman   ierr = PetscMalloc((ci[pn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr);
7553985e5eaSKris Buschelman   ierr = PetscMemzero(ca,(ci[pn]+1)*sizeof(MatScalar));CHKERRQ(ierr);
7563985e5eaSKris Buschelman 
7573985e5eaSKris Buschelman   /* put together the new matrix */
7583985e5eaSKris Buschelman   ierr = MatCreateSeqAIJWithArrays(A->comm,pn,pn,ci,cj,ca,C);CHKERRQ(ierr);
7593985e5eaSKris Buschelman 
7603985e5eaSKris Buschelman   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
7613985e5eaSKris Buschelman   /* Since these are PETSc arrays, change flags to free them as necessary. */
7623985e5eaSKris Buschelman   c = (Mat_SeqAIJ *)((*C)->data);
7633985e5eaSKris Buschelman   c->freedata = PETSC_TRUE;
7643985e5eaSKris Buschelman   c->nonew    = 0;
7653985e5eaSKris Buschelman 
7663985e5eaSKris Buschelman   /* Clean up. */
7673985e5eaSKris Buschelman   ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
7683985e5eaSKris Buschelman 
7699af31e4aSHong Zhang   ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr);
7703985e5eaSKris Buschelman   PetscFunctionReturn(0);
7713985e5eaSKris Buschelman }
7723985e5eaSKris Buschelman EXTERN_C_END
7733985e5eaSKris Buschelman 
7746849ba73SBarry Smith /*
7759af31e4aSHong Zhang    MatPtAPNumeric - Computes the matrix projection C = P^T * A * P
7764d3841fdSKris Buschelman 
7774d3841fdSKris Buschelman    Collective on Mat
7784d3841fdSKris Buschelman 
7794d3841fdSKris Buschelman    Input Parameters:
7804d3841fdSKris Buschelman +  A - the matrix
7814d3841fdSKris Buschelman -  P - the projection matrix
7824d3841fdSKris Buschelman 
7834d3841fdSKris Buschelman    Output Parameters:
7844d3841fdSKris Buschelman .  C - the product matrix
7854d3841fdSKris Buschelman 
7864d3841fdSKris Buschelman    Notes:
7879af31e4aSHong Zhang    C must have been created by calling MatPtAPSymbolic and must be destroyed by
7884d3841fdSKris Buschelman    the user using MatDeatroy().
7894d3841fdSKris Buschelman 
790170ef064SHong Zhang    This routine is currently only implemented for pairs of AIJ matrices and classes
791170ef064SHong Zhang    which inherit from AIJ.  C will be of type MATAIJ.
7924d3841fdSKris Buschelman 
7934d3841fdSKris Buschelman    Level: intermediate
7944d3841fdSKris Buschelman 
7959af31e4aSHong Zhang .seealso: MatPtAP(),MatPtAPSymbolic(),MatMatMultNumeric()
7966849ba73SBarry Smith */
797e240928fSHong Zhang #undef __FUNCT__
798e240928fSHong Zhang #define __FUNCT__ "MatPtAPNumeric"
79955a3bba9SHong Zhang PetscErrorCode MatPtAPNumeric(Mat A,Mat P,Mat C)
80055a3bba9SHong Zhang {
801dfbe8321SBarry Smith   PetscErrorCode ierr;
802534c1384SKris Buschelman   PetscErrorCode (*fA)(Mat,Mat,Mat);
803534c1384SKris Buschelman   PetscErrorCode (*fP)(Mat,Mat,Mat);
804eb9c0419SKris Buschelman 
805eb9c0419SKris Buschelman   PetscFunctionBegin;
806eb9c0419SKris Buschelman 
8074482741eSBarry Smith   PetscValidHeaderSpecific(A,MAT_COOKIE,1);
808c9780b6fSBarry Smith   PetscValidType(A,1);
809eb9c0419SKris Buschelman   MatPreallocated(A);
810eb9c0419SKris Buschelman   if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
811eb9c0419SKris Buschelman   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
812eb9c0419SKris Buschelman 
8134482741eSBarry Smith   PetscValidHeaderSpecific(P,MAT_COOKIE,2);
814c9780b6fSBarry Smith   PetscValidType(P,2);
815eb9c0419SKris Buschelman   MatPreallocated(P);
816eb9c0419SKris Buschelman   if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
817eb9c0419SKris Buschelman   if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
818eb9c0419SKris Buschelman 
8194482741eSBarry Smith   PetscValidHeaderSpecific(C,MAT_COOKIE,3);
820c9780b6fSBarry Smith   PetscValidType(C,3);
821eb9c0419SKris Buschelman   MatPreallocated(C);
822eb9c0419SKris Buschelman   if (!C->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
823eb9c0419SKris Buschelman   if (C->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
824eb9c0419SKris Buschelman 
825eb9c0419SKris Buschelman   if (P->N!=C->M) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->N,C->M);
826eb9c0419SKris Buschelman   if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->M,A->N);
827eb9c0419SKris Buschelman   if (A->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %d != %d",A->M,A->N);
828eb9c0419SKris Buschelman   if (P->N!=C->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->N,C->N);
829eb9c0419SKris Buschelman 
830534c1384SKris Buschelman   /* For now, we do not dispatch based on the type of A and P */
831534c1384SKris Buschelman   /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */
832534c1384SKris Buschelman   fA = A->ops->ptapnumeric;
833534c1384SKris Buschelman   if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatPtAPNumeric not supported for A of type %s",A->type_name);
834534c1384SKris Buschelman   fP = P->ops->ptapnumeric;
835534c1384SKris Buschelman   if (!fP) SETERRQ1(PETSC_ERR_SUP,"MatPtAPNumeric not supported for P of type %s",P->type_name);
836534c1384SKris Buschelman   if (fP!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatPtAPNumeric requires A, %s, to be compatible with P, %s",A->type_name,P->type_name);
8374d3841fdSKris Buschelman 
838534c1384SKris Buschelman   ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr);
839534c1384SKris Buschelman   ierr = (*fA)(A,P,C);CHKERRQ(ierr);
840534c1384SKris Buschelman   ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr);
841eb9c0419SKris Buschelman 
842eb9c0419SKris Buschelman   PetscFunctionReturn(0);
843eb9c0419SKris Buschelman }
844eb9c0419SKris Buschelman 
845eb9c0419SKris Buschelman #undef __FUNCT__
8469af31e4aSHong Zhang #define __FUNCT__ "MatPtAPNumeric_SeqAIJ_SeqAIJ"
847dfbe8321SBarry Smith PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqAIJ(Mat A,Mat P,Mat C)
848dfbe8321SBarry Smith {
849dfbe8321SBarry Smith   PetscErrorCode ierr;
850b1d57f15SBarry Smith   PetscInt       flops=0;
851d20bfe6fSHong Zhang   Mat_SeqAIJ     *a  = (Mat_SeqAIJ *) A->data;
852d20bfe6fSHong Zhang   Mat_SeqAIJ     *p  = (Mat_SeqAIJ *) P->data;
853d20bfe6fSHong Zhang   Mat_SeqAIJ     *c  = (Mat_SeqAIJ *) C->data;
854b1d57f15SBarry Smith   PetscInt       *ai=a->i,*aj=a->j,*apj,*apjdense,*pi=p->i,*pj=p->j,*pJ=p->j,*pjj;
855b1d57f15SBarry Smith   PetscInt       *ci=c->i,*cj=c->j,*cjj;
856b1d57f15SBarry Smith   PetscInt       am=A->M,cn=C->N,cm=C->M;
857b1d57f15SBarry Smith   PetscInt       i,j,k,anzi,pnzi,apnzj,nextap,pnzj,prow,crow;
858d20bfe6fSHong Zhang   MatScalar      *aa=a->a,*apa,*pa=p->a,*pA=p->a,*paj,*ca=c->a,*caj;
859eb9c0419SKris Buschelman 
860eb9c0419SKris Buschelman   PetscFunctionBegin;
861d20bfe6fSHong Zhang   /* Allocate temporary array for storage of one row of A*P */
862b1d57f15SBarry Smith   ierr = PetscMalloc(cn*(sizeof(MatScalar)+2*sizeof(PetscInt)),&apa);CHKERRQ(ierr);
863b1d57f15SBarry Smith   ierr = PetscMemzero(apa,cn*(sizeof(MatScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr);
864eb9c0419SKris Buschelman 
865b1d57f15SBarry Smith   apj      = (PetscInt *)(apa + cn);
866d20bfe6fSHong Zhang   apjdense = apj + cn;
867d20bfe6fSHong Zhang 
868d20bfe6fSHong Zhang   /* Clear old values in C */
869d20bfe6fSHong Zhang   ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr);
870d20bfe6fSHong Zhang 
871d20bfe6fSHong Zhang   for (i=0;i<am;i++) {
872d20bfe6fSHong Zhang     /* Form sparse row of A*P */
873d20bfe6fSHong Zhang     anzi  = ai[i+1] - ai[i];
874d20bfe6fSHong Zhang     apnzj = 0;
875d20bfe6fSHong Zhang     for (j=0;j<anzi;j++) {
876d20bfe6fSHong Zhang       prow = *aj++;
877d20bfe6fSHong Zhang       pnzj = pi[prow+1] - pi[prow];
878d20bfe6fSHong Zhang       pjj  = pj + pi[prow];
879d20bfe6fSHong Zhang       paj  = pa + pi[prow];
880d20bfe6fSHong Zhang       for (k=0;k<pnzj;k++) {
881d20bfe6fSHong Zhang         if (!apjdense[pjj[k]]) {
882d20bfe6fSHong Zhang           apjdense[pjj[k]] = -1;
883d20bfe6fSHong Zhang           apj[apnzj++]     = pjj[k];
884d20bfe6fSHong Zhang         }
885d20bfe6fSHong Zhang         apa[pjj[k]] += (*aa)*paj[k];
886d20bfe6fSHong Zhang       }
887d20bfe6fSHong Zhang       flops += 2*pnzj;
888d20bfe6fSHong Zhang       aa++;
889d20bfe6fSHong Zhang     }
890d20bfe6fSHong Zhang 
891d20bfe6fSHong Zhang     /* Sort the j index array for quick sparse axpy. */
892d20bfe6fSHong Zhang     ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr);
893d20bfe6fSHong Zhang 
894d20bfe6fSHong Zhang     /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */
895d20bfe6fSHong Zhang     pnzi = pi[i+1] - pi[i];
896d20bfe6fSHong Zhang     for (j=0;j<pnzi;j++) {
897d20bfe6fSHong Zhang       nextap = 0;
898d20bfe6fSHong Zhang       crow   = *pJ++;
899d20bfe6fSHong Zhang       cjj    = cj + ci[crow];
900d20bfe6fSHong Zhang       caj    = ca + ci[crow];
901d20bfe6fSHong Zhang       /* Perform sparse axpy operation.  Note cjj includes apj. */
902d20bfe6fSHong Zhang       for (k=0;nextap<apnzj;k++) {
903d20bfe6fSHong Zhang         if (cjj[k]==apj[nextap]) {
904d20bfe6fSHong Zhang           caj[k] += (*pA)*apa[apj[nextap++]];
905d20bfe6fSHong Zhang         }
906d20bfe6fSHong Zhang       }
907d20bfe6fSHong Zhang       flops += 2*apnzj;
908d20bfe6fSHong Zhang       pA++;
909d20bfe6fSHong Zhang     }
910d20bfe6fSHong Zhang 
911d20bfe6fSHong Zhang     /* Zero the current row info for A*P */
912d20bfe6fSHong Zhang     for (j=0;j<apnzj;j++) {
913d20bfe6fSHong Zhang       apa[apj[j]]      = 0.;
914d20bfe6fSHong Zhang       apjdense[apj[j]] = 0;
915d20bfe6fSHong Zhang     }
916d20bfe6fSHong Zhang   }
917d20bfe6fSHong Zhang 
918d20bfe6fSHong Zhang   /* Assemble the final matrix and clean up */
919d20bfe6fSHong Zhang   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
920d20bfe6fSHong Zhang   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
921d20bfe6fSHong Zhang   ierr = PetscFree(apa);CHKERRQ(ierr);
922d20bfe6fSHong Zhang   ierr = PetscLogFlops(flops);CHKERRQ(ierr);
923d20bfe6fSHong Zhang 
924eb9c0419SKris Buschelman   PetscFunctionReturn(0);
925eb9c0419SKris Buschelman }
9260e36024fSHong Zhang 
927a61c8c0fSHong Zhang /* Compute local C = P_loc^T * A * P - used by MatPtAP_MPIAIJ_MPIAIJ() */
928e240928fSHong Zhang static PetscEvent logkey_matptapnumeric_local = 0;
929e240928fSHong Zhang #undef __FUNCT__
930e240928fSHong Zhang #define __FUNCT__ "MatPtAPNumeric_MPIAIJ_MPIAIJ_Local"
931e240928fSHong Zhang PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIAIJ_Local(Mat A,Mat P_loc,Mat P_oth,PetscInt prstart,Mat C)
932e240928fSHong Zhang {
933e240928fSHong Zhang   PetscErrorCode ierr;
934e240928fSHong Zhang   PetscInt       flops=0;
935e240928fSHong Zhang   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
936e240928fSHong Zhang   Mat_SeqAIJ     *ad=(Mat_SeqAIJ*)(a->A)->data,*ao=(Mat_SeqAIJ*)(a->B)->data;
937e240928fSHong Zhang   Mat_SeqAIJ     *c  = (Mat_SeqAIJ *) C->data;
938e240928fSHong Zhang   Mat_SeqAIJ     *p_loc=(Mat_SeqAIJ*)P_loc->data,*p_oth=(Mat_SeqAIJ*)P_oth->data;
939e240928fSHong Zhang   PetscInt       *pi_loc=p_loc->i,*pj_loc=p_loc->j,*pi_oth=p_oth->i,*pj_oth=p_oth->j;
940e240928fSHong Zhang   PetscInt       *adi=ad->i,*adj=ad->j,*aoi=ao->i,*aoj=ao->j,*apj,*apjdense,cstart=a->cstart,cend=a->cend,col;
941e240928fSHong Zhang   PetscInt       *pJ=pj_loc,*pjj;
942e240928fSHong Zhang   PetscInt       *ci=c->i,*cj=c->j,*cjj;
943e240928fSHong Zhang   PetscInt       am=A->m,cn=C->N,cm=C->M;
944e240928fSHong Zhang   PetscInt       i,j,k,anzi,pnzi,apnzj,nextap,pnzj,prow,crow;
945e240928fSHong Zhang   MatScalar      *ada=ad->a,*aoa=ao->a,*apa,*paj,*ca=c->a,*caj;
946e240928fSHong Zhang   MatScalar      *pa_loc=p_loc->a,*pA=pa_loc,*pa_oth=p_oth->a;
947e240928fSHong Zhang 
948e240928fSHong Zhang   PetscFunctionBegin;
949e240928fSHong Zhang   if (!logkey_matptapnumeric_local) {
950a61c8c0fSHong Zhang     ierr = PetscLogEventRegister(&logkey_matptapnumeric_local,"MatPtAPNumeric_MPIAIJ_MPIAIJ_Local",MAT_COOKIE);CHKERRQ(ierr);
951e240928fSHong Zhang   }
952a61c8c0fSHong Zhang   ierr = PetscLogEventBegin(logkey_matptapnumeric_local,A,0,0,0);CHKERRQ(ierr);
953e240928fSHong Zhang 
954e240928fSHong Zhang   /* Allocate temporary array for storage of one row of A*P */
955e240928fSHong Zhang   ierr = PetscMalloc(cn*(sizeof(MatScalar)+2*sizeof(PetscInt)),&apa);CHKERRQ(ierr);
956e240928fSHong Zhang   ierr = PetscMemzero(apa,cn*(sizeof(MatScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr);
957e240928fSHong Zhang   apj      = (PetscInt *)(apa + cn);
958e240928fSHong Zhang   apjdense = apj + cn;
959e240928fSHong Zhang 
960e240928fSHong Zhang   /* Clear old values in C */
961e240928fSHong Zhang   ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr);
962e240928fSHong Zhang 
963e240928fSHong Zhang   for (i=0;i<am;i++) {
964e240928fSHong Zhang     /* Form i-th sparse row of A*P */
965e240928fSHong Zhang      apnzj = 0;
966e240928fSHong Zhang     /* diagonal portion of A */
967e240928fSHong Zhang     anzi  = adi[i+1] - adi[i];
968e240928fSHong Zhang     for (j=0;j<anzi;j++) {
969e240928fSHong Zhang       prow = *adj;
970e240928fSHong Zhang       adj++;
971e240928fSHong Zhang       pnzj = pi_loc[prow+1] - pi_loc[prow];
972e240928fSHong Zhang       pjj  = pj_loc + pi_loc[prow];
973e240928fSHong Zhang       paj  = pa_loc + pi_loc[prow];
974e240928fSHong Zhang       for (k=0;k<pnzj;k++) {
975e240928fSHong Zhang         if (!apjdense[pjj[k]]) {
976e240928fSHong Zhang           apjdense[pjj[k]] = -1;
977e240928fSHong Zhang           apj[apnzj++]     = pjj[k];
978e240928fSHong Zhang         }
979e240928fSHong Zhang         apa[pjj[k]] += (*ada)*paj[k];
980e240928fSHong Zhang       }
981e240928fSHong Zhang       flops += 2*pnzj;
982e240928fSHong Zhang       ada++;
983e240928fSHong Zhang     }
984e240928fSHong Zhang     /* off-diagonal portion of A */
985e240928fSHong Zhang     anzi  = aoi[i+1] - aoi[i];
986e240928fSHong Zhang     for (j=0;j<anzi;j++) {
987e240928fSHong Zhang       col = a->garray[*aoj];
988e240928fSHong Zhang       if (col < cstart){
989e240928fSHong Zhang         prow = *aoj;
990e240928fSHong Zhang       } else if (col >= cend){
991e240928fSHong Zhang         prow = *aoj;
992e240928fSHong Zhang       } else {
993e240928fSHong Zhang         SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Off-diagonal portion of A has wrong column map");
994e240928fSHong Zhang       }
995e240928fSHong Zhang       aoj++;
996e240928fSHong Zhang       pnzj = pi_oth[prow+1] - pi_oth[prow];
997e240928fSHong Zhang       pjj  = pj_oth + pi_oth[prow];
998e240928fSHong Zhang       paj  = pa_oth + pi_oth[prow];
999e240928fSHong Zhang       for (k=0;k<pnzj;k++) {
1000e240928fSHong Zhang         if (!apjdense[pjj[k]]) {
1001e240928fSHong Zhang           apjdense[pjj[k]] = -1;
1002e240928fSHong Zhang           apj[apnzj++]     = pjj[k];
1003e240928fSHong Zhang         }
1004e240928fSHong Zhang         apa[pjj[k]] += (*aoa)*paj[k];
1005e240928fSHong Zhang       }
1006e240928fSHong Zhang       flops += 2*pnzj;
1007e240928fSHong Zhang       aoa++;
1008e240928fSHong Zhang     }
1009e240928fSHong Zhang     /* Sort the j index array for quick sparse axpy. */
1010e240928fSHong Zhang     ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr);
1011e240928fSHong Zhang 
1012e240928fSHong Zhang     /* Compute P_loc[i,:]^T*AP[i,:] using outer product */
1013e240928fSHong Zhang     pnzi = pi_loc[i+1] - pi_loc[i];
1014e240928fSHong Zhang     for (j=0;j<pnzi;j++) {
1015e240928fSHong Zhang       nextap = 0;
1016e240928fSHong Zhang       crow   = *pJ++;
1017e240928fSHong Zhang       cjj    = cj + ci[crow];
1018e240928fSHong Zhang       caj    = ca + ci[crow];
1019e240928fSHong Zhang       /* Perform sparse axpy operation.  Note cjj includes apj. */
1020e240928fSHong Zhang       for (k=0;nextap<apnzj;k++) {
1021e240928fSHong Zhang         if (cjj[k]==apj[nextap]) {
1022e240928fSHong Zhang           caj[k] += (*pA)*apa[apj[nextap++]];
1023e240928fSHong Zhang         }
1024e240928fSHong Zhang       }
1025e240928fSHong Zhang       flops += 2*apnzj;
1026e240928fSHong Zhang       pA++;
1027e240928fSHong Zhang     }
1028e240928fSHong Zhang 
1029e240928fSHong Zhang     /* Zero the current row info for A*P */
1030e240928fSHong Zhang     for (j=0;j<apnzj;j++) {
1031e240928fSHong Zhang       apa[apj[j]]      = 0.;
1032e240928fSHong Zhang       apjdense[apj[j]] = 0;
1033e240928fSHong Zhang     }
1034e240928fSHong Zhang   }
1035e240928fSHong Zhang 
1036e240928fSHong Zhang   /* Assemble the final matrix and clean up */
1037e240928fSHong Zhang   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1038e240928fSHong Zhang   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1039e240928fSHong Zhang   ierr = PetscFree(apa);CHKERRQ(ierr);
1040e240928fSHong Zhang   ierr = PetscLogFlops(flops);CHKERRQ(ierr);
1041a61c8c0fSHong Zhang   ierr = PetscLogEventEnd(logkey_matptapnumeric_local,A,0,0,0);CHKERRQ(ierr);
1042e240928fSHong Zhang   PetscFunctionReturn(0);
1043e240928fSHong Zhang }
1044e240928fSHong Zhang static PetscEvent logkey_matptapsymbolic_local = 0;
1045e240928fSHong Zhang #undef __FUNCT__
1046e240928fSHong Zhang #define __FUNCT__ "MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local"
1047e240928fSHong Zhang PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local(Mat A,Mat P_loc,Mat P_oth,PetscInt prstart,PetscReal fill,Mat *C)
1048e240928fSHong Zhang {
1049e240928fSHong Zhang   PetscErrorCode ierr;
1050e240928fSHong Zhang   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
1051e240928fSHong Zhang   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
1052e240928fSHong Zhang   Mat_SeqAIJ     *ad=(Mat_SeqAIJ*)(a->A)->data,*ao=(Mat_SeqAIJ*)(a->B)->data,*c;
1053e240928fSHong Zhang   PetscInt       *pti,*ptj,*ptJ,*ajj,*pjj;
1054e240928fSHong Zhang   PetscInt       *adi=ad->i,*adj=ad->j,*aoi=ao->i,*aoj=ao->j,nnz,cstart=a->cstart,cend=a->cend,col;
1055e240928fSHong Zhang   PetscInt       *ci,*cj,*ptaj;
10566abd8857SHong Zhang   PetscInt       aN=A->N,am=A->m,pN=P_loc->N;
1057e240928fSHong Zhang   PetscInt       i,j,k,ptnzi,arow,anzj,prow,pnzj,cnzi;
1058e240928fSHong Zhang   PetscInt       pm=P_loc->m,nlnk,*lnk;
1059e240928fSHong Zhang   MatScalar      *ca;
1060e240928fSHong Zhang   PetscBT        lnkbt;
1061e240928fSHong Zhang   PetscInt       prend,nprow_loc,nprow_oth;
1062e240928fSHong Zhang   PetscInt       *ptadenserow_loc,*ptadenserow_oth,*ptasparserow_loc,*ptasparserow_oth;
1063e240928fSHong Zhang   Mat_SeqAIJ     *p_loc=(Mat_SeqAIJ*)P_loc->data,*p_oth=(Mat_SeqAIJ*)P_oth->data;
1064e240928fSHong Zhang   PetscInt       *pi_loc=p_loc->i,*pj_loc=p_loc->j,*pi_oth=p_oth->i,*pj_oth=p_oth->j;
1065e240928fSHong Zhang 
1066e240928fSHong Zhang   PetscFunctionBegin;
1067e240928fSHong Zhang   if (!logkey_matptapsymbolic_local) {
1068e240928fSHong Zhang     ierr = PetscLogEventRegister(&logkey_matptapsymbolic_local,"MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local",MAT_COOKIE);
1069e240928fSHong Zhang   }
1070e240928fSHong Zhang   ierr = PetscLogEventBegin(logkey_matptapsymbolic_local,A,P_loc,P_oth,0);CHKERRQ(ierr);
1071e240928fSHong Zhang 
1072e240928fSHong Zhang   prend = prstart + pm;
1073e240928fSHong Zhang 
1074e240928fSHong Zhang   /* get ij structure of P_loc^T */
1075e240928fSHong Zhang   ierr = MatGetSymbolicTranspose_SeqAIJ(P_loc,&pti,&ptj);CHKERRQ(ierr);
1076e240928fSHong Zhang   ptJ=ptj;
1077e240928fSHong Zhang 
1078e240928fSHong Zhang   /* Allocate ci array, arrays for fill computation and */
1079e240928fSHong Zhang   /* free space for accumulating nonzero column info */
1080e240928fSHong Zhang   ierr = PetscMalloc((pN+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
1081e240928fSHong Zhang   ci[0] = 0;
1082e240928fSHong Zhang 
10836abd8857SHong Zhang   ierr = PetscMalloc((4*aN+1)*sizeof(PetscInt),&ptadenserow_loc);CHKERRQ(ierr);
10846abd8857SHong Zhang   ierr = PetscMemzero(ptadenserow_loc,(4*aN+1)*sizeof(PetscInt));CHKERRQ(ierr);
10856abd8857SHong Zhang   ptasparserow_loc = ptadenserow_loc + aN;
10866abd8857SHong Zhang   ptadenserow_oth  = ptasparserow_loc + aN;
10876abd8857SHong Zhang   ptasparserow_oth = ptadenserow_oth + aN;
1088e240928fSHong Zhang 
1089e240928fSHong Zhang   /* create and initialize a linked list */
1090e240928fSHong Zhang   nlnk = pN+1;
1091e240928fSHong Zhang   ierr = PetscLLCreate(pN,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr);
1092e240928fSHong Zhang 
10936abd8857SHong Zhang   /* Set initial free space to be nnz(A) scaled by fill*P->N/P->M. */
1094e240928fSHong Zhang   /* This should be reasonable if sparsity of PtAP is similar to that of A. */
1095e240928fSHong Zhang   nnz           = adi[am] + aoi[am];
10966abd8857SHong Zhang   ierr          = GetMoreSpace((PetscInt)(fill*nnz*pN/aN+1),&free_space);
1097e240928fSHong Zhang   current_space = free_space;
1098e240928fSHong Zhang 
1099e240928fSHong Zhang   /* determine symbolic info for each row of C: */
1100e240928fSHong Zhang   for (i=0;i<pN;i++) {
1101e240928fSHong Zhang     ptnzi  = pti[i+1] - pti[i];
1102e240928fSHong Zhang     nprow_loc = 0; nprow_oth = 0;
1103e240928fSHong Zhang     /* i-th row of symbolic P_loc^T*A_loc: */
1104e240928fSHong Zhang     for (j=0;j<ptnzi;j++) {
1105e240928fSHong Zhang       arow = *ptJ++;
1106e240928fSHong Zhang       /* diagonal portion of A */
1107e240928fSHong Zhang       anzj = adi[arow+1] - adi[arow];
1108e240928fSHong Zhang       ajj  = adj + adi[arow];
1109e240928fSHong Zhang       for (k=0;k<anzj;k++) {
1110e240928fSHong Zhang         col = ajj[k]+prstart;
1111e240928fSHong Zhang         if (!ptadenserow_loc[col]) {
1112e240928fSHong Zhang           ptadenserow_loc[col]    = -1;
1113e240928fSHong Zhang           ptasparserow_loc[nprow_loc++] = col;
1114e240928fSHong Zhang         }
1115e240928fSHong Zhang       }
1116e240928fSHong Zhang       /* off-diagonal portion of A */
1117e240928fSHong Zhang       anzj = aoi[arow+1] - aoi[arow];
1118e240928fSHong Zhang       ajj  = aoj + aoi[arow];
1119e240928fSHong Zhang       for (k=0;k<anzj;k++) {
1120e240928fSHong Zhang         col = a->garray[ajj[k]];  /* global col */
1121e240928fSHong Zhang         if (col < cstart){
1122e240928fSHong Zhang           col = ajj[k];
1123e240928fSHong Zhang         } else if (col >= cend){
1124e240928fSHong Zhang           col = ajj[k] + pm;
1125e240928fSHong Zhang         } else {
1126e240928fSHong Zhang           SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Off-diagonal portion of A has wrong column map");
1127e240928fSHong Zhang         }
1128e240928fSHong Zhang         if (!ptadenserow_oth[col]) {
1129e240928fSHong Zhang           ptadenserow_oth[col]    = -1;
1130e240928fSHong Zhang           ptasparserow_oth[nprow_oth++] = col;
1131e240928fSHong Zhang         }
1132e240928fSHong Zhang       }
1133e240928fSHong Zhang     }
1134e240928fSHong Zhang 
1135e240928fSHong Zhang     /* using symbolic info of local PtA, determine symbolic info for row of C: */
1136e240928fSHong Zhang     cnzi   = 0;
1137e240928fSHong Zhang     /* rows of P_loc */
1138e240928fSHong Zhang     ptaj = ptasparserow_loc;
1139e240928fSHong Zhang     for (j=0; j<nprow_loc; j++) {
1140e240928fSHong Zhang       prow = *ptaj++;
1141e240928fSHong Zhang       prow -= prstart; /* rm */
1142e240928fSHong Zhang       pnzj = pi_loc[prow+1] - pi_loc[prow];
1143e240928fSHong Zhang       pjj  = pj_loc + pi_loc[prow];
1144e240928fSHong Zhang       /* add non-zero cols of P into the sorted linked list lnk */
1145e240928fSHong Zhang       ierr = PetscLLAdd(pnzj,pjj,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr);
1146e240928fSHong Zhang       cnzi += nlnk;
1147e240928fSHong Zhang     }
1148e240928fSHong Zhang     /* rows of P_oth */
1149e240928fSHong Zhang     ptaj = ptasparserow_oth;
1150e240928fSHong Zhang     for (j=0; j<nprow_oth; j++) {
1151e240928fSHong Zhang       prow = *ptaj++;
1152e240928fSHong Zhang       if (prow >= prend) prow -= pm; /* rm */
1153e240928fSHong Zhang       pnzj = pi_oth[prow+1] - pi_oth[prow];
1154e240928fSHong Zhang       pjj  = pj_oth + pi_oth[prow];
1155e240928fSHong Zhang       /* add non-zero cols of P into the sorted linked list lnk */
1156e240928fSHong Zhang       ierr = PetscLLAdd(pnzj,pjj,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr);
1157e240928fSHong Zhang       cnzi += nlnk;
1158e240928fSHong Zhang     }
1159e240928fSHong Zhang 
1160e240928fSHong Zhang     /* If free space is not available, make more free space */
1161e240928fSHong Zhang     /* Double the amount of total space in the list */
1162e240928fSHong Zhang     if (current_space->local_remaining<cnzi) {
1163e240928fSHong Zhang       ierr = GetMoreSpace(current_space->total_array_size,&current_space);CHKERRQ(ierr);
1164e240928fSHong Zhang     }
1165e240928fSHong Zhang 
1166e240928fSHong Zhang     /* Copy data into free space, and zero out denserows */
1167e240928fSHong Zhang     ierr = PetscLLClean(pN,pN,cnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
1168e240928fSHong Zhang     current_space->array           += cnzi;
1169e240928fSHong Zhang     current_space->local_used      += cnzi;
1170e240928fSHong Zhang     current_space->local_remaining -= cnzi;
1171e240928fSHong Zhang 
1172e240928fSHong Zhang     for (j=0;j<nprow_loc; j++) {
1173e240928fSHong Zhang       ptadenserow_loc[ptasparserow_loc[j]] = 0;
1174e240928fSHong Zhang     }
1175e240928fSHong Zhang     for (j=0;j<nprow_oth; j++) {
1176e240928fSHong Zhang       ptadenserow_oth[ptasparserow_oth[j]] = 0;
1177e240928fSHong Zhang     }
1178e240928fSHong Zhang 
1179e240928fSHong Zhang     /* Aside: Perhaps we should save the pta info for the numerical factorization. */
1180e240928fSHong Zhang     /*        For now, we will recompute what is needed. */
1181e240928fSHong Zhang     ci[i+1] = ci[i] + cnzi;
1182e240928fSHong Zhang   }
1183e240928fSHong Zhang   /* nnz is now stored in ci[ptm], column indices are in the list of free space */
1184e240928fSHong Zhang   /* Allocate space for cj, initialize cj, and */
1185e240928fSHong Zhang   /* destroy list of free space and other temporary array(s) */
1186e240928fSHong Zhang   ierr = PetscMalloc((ci[pN]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr);
1187e240928fSHong Zhang   ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr);
1188e240928fSHong Zhang   ierr = PetscFree(ptadenserow_loc);CHKERRQ(ierr);
1189e240928fSHong Zhang   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
1190e240928fSHong Zhang 
1191e240928fSHong Zhang   /* Allocate space for ca */
1192e240928fSHong Zhang   ierr = PetscMalloc((ci[pN]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr);
1193e240928fSHong Zhang   ierr = PetscMemzero(ca,(ci[pN]+1)*sizeof(MatScalar));CHKERRQ(ierr);
1194e240928fSHong Zhang 
1195e240928fSHong Zhang   /* put together the new matrix */
1196e240928fSHong Zhang   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,pN,pN,ci,cj,ca,C);CHKERRQ(ierr);
1197e240928fSHong Zhang 
1198e240928fSHong Zhang   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
1199e240928fSHong Zhang   /* Since these are PETSc arrays, change flags to free them as necessary. */
1200e240928fSHong Zhang   c = (Mat_SeqAIJ *)((*C)->data);
1201e240928fSHong Zhang   c->freedata = PETSC_TRUE;
1202e240928fSHong Zhang   c->nonew    = 0;
1203e240928fSHong Zhang 
1204e240928fSHong Zhang   /* Clean up. */
1205e240928fSHong Zhang   ierr = MatRestoreSymbolicTranspose_SeqAIJ(P_loc,&pti,&ptj);CHKERRQ(ierr);
1206a61c8c0fSHong Zhang 
1207e240928fSHong Zhang   ierr = PetscLogEventEnd(logkey_matptapsymbolic_local,A,P_loc,P_oth,0);CHKERRQ(ierr);
1208e240928fSHong Zhang   PetscFunctionReturn(0);
1209e240928fSHong Zhang }
1210