xref: /petsc/src/mat/impls/aij/seq/matptap.c (revision 82afef89a5de20cc299e0d36137d907034be2a90)
1eb9c0419SKris Buschelman /*
29af31e4aSHong Zhang   Defines projective product routines where A is a AIJ matrix
3eb9c0419SKris Buschelman           C = P^T * A * P
4eb9c0419SKris Buschelman */
5eb9c0419SKris Buschelman 
6231952e2SKris Buschelman #include "src/mat/impls/aij/seq/aij.h"   /*I "petscmat.h" I*/
7eb9c0419SKris Buschelman #include "src/mat/utils/freespace.h"
89af31e4aSHong Zhang #include "src/mat/impls/aij/mpi/mpiaij.h"
955a3bba9SHong Zhang #include "petscbt.h"
10eb9c0419SKris Buschelman 
11eb9c0419SKris Buschelman #undef __FUNCT__
129af31e4aSHong Zhang #define __FUNCT__ "MatPtAP"
134d3841fdSKris Buschelman /*@
149af31e4aSHong Zhang    MatPtAP - Creates the matrix projection C = P^T * A * P
154d3841fdSKris Buschelman 
164d3841fdSKris Buschelman    Collective on Mat
174d3841fdSKris Buschelman 
184d3841fdSKris Buschelman    Input Parameters:
194d3841fdSKris Buschelman +  A - the matrix
20f747e1aeSHong Zhang .  P - the projection matrix
21f747e1aeSHong Zhang .  scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
22f747e1aeSHong Zhang -  fill - expected fill as ratio of nnz(C)/(nnz(A) + nnz(P))
234d3841fdSKris Buschelman 
244d3841fdSKris Buschelman    Output Parameters:
254d3841fdSKris Buschelman .  C - the product matrix
264d3841fdSKris Buschelman 
274d3841fdSKris Buschelman    Notes:
284d3841fdSKris Buschelman    C will be created and must be destroyed by the user with MatDestroy().
294d3841fdSKris Buschelman 
304d3841fdSKris Buschelman    This routine is currently only implemented for pairs of SeqAIJ matrices and classes
314d3841fdSKris Buschelman    which inherit from SeqAIJ.  C will be of type MATSEQAIJ.
324d3841fdSKris Buschelman 
334d3841fdSKris Buschelman    Level: intermediate
344d3841fdSKris Buschelman 
359af31e4aSHong Zhang .seealso: MatPtAPSymbolic(),MatPtAPNumeric(),MatMatMult()
364d3841fdSKris Buschelman @*/
3755a3bba9SHong Zhang PetscErrorCode MatPtAP(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C)
3855a3bba9SHong Zhang {
39dfbe8321SBarry Smith   PetscErrorCode ierr;
40534c1384SKris Buschelman   PetscErrorCode (*fA)(Mat,Mat,MatReuse,PetscReal,Mat *);
41534c1384SKris Buschelman   PetscErrorCode (*fP)(Mat,Mat,MatReuse,PetscReal,Mat *);
42eb9c0419SKris Buschelman 
43eb9c0419SKris Buschelman   PetscFunctionBegin;
449af31e4aSHong Zhang   PetscValidHeaderSpecific(A,MAT_COOKIE,1);
459af31e4aSHong Zhang   PetscValidType(A,1);
469af31e4aSHong Zhang   MatPreallocated(A);
479af31e4aSHong Zhang   if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
489af31e4aSHong Zhang   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
499af31e4aSHong Zhang   PetscValidHeaderSpecific(P,MAT_COOKIE,2);
509af31e4aSHong Zhang   PetscValidType(P,2);
519af31e4aSHong Zhang   MatPreallocated(P);
529af31e4aSHong Zhang   if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
539af31e4aSHong Zhang   if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
549af31e4aSHong Zhang   PetscValidPointer(C,3);
55534c1384SKris Buschelman 
569af31e4aSHong Zhang   if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->M,A->N);
57eb9c0419SKris Buschelman 
589af31e4aSHong Zhang   if (fill <=0.0) SETERRQ1(PETSC_ERR_ARG_SIZ,"fill=%g must be > 0.0",fill);
59eb9c0419SKris Buschelman 
60534c1384SKris Buschelman   /* For now, we do not dispatch based on the type of A and P */
61534c1384SKris Buschelman   /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */
62534c1384SKris Buschelman   fA = A->ops->ptap;
63534c1384SKris Buschelman   if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatPtAP not supported for A of type %s",A->type_name);
64534c1384SKris Buschelman   fP = P->ops->ptap;
65534c1384SKris Buschelman   if (!fP) SETERRQ1(PETSC_ERR_SUP,"MatPtAP not supported for P of type %s",P->type_name);
66534c1384SKris Buschelman   if (fP!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatPtAP requires A, %s, to be compatible with P, %s",A->type_name,P->type_name);
67534c1384SKris Buschelman 
689af31e4aSHong Zhang   ierr = PetscLogEventBegin(MAT_PtAP,A,P,0,0);CHKERRQ(ierr);
69534c1384SKris Buschelman   ierr = (*fA)(A,P,scall,fill,C);CHKERRQ(ierr);
709af31e4aSHong Zhang   ierr = PetscLogEventEnd(MAT_PtAP,A,P,0,0);CHKERRQ(ierr);
71eb9c0419SKris Buschelman   PetscFunctionReturn(0);
72eb9c0419SKris Buschelman }
73eb9c0419SKris Buschelman 
74e240928fSHong Zhang EXTERN PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local(Mat,Mat,Mat,PetscInt,PetscReal,Mat*);
75e240928fSHong Zhang EXTERN PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIAIJ_Local(Mat,Mat,Mat,PetscInt,Mat C);
76e240928fSHong Zhang 
77a61c8c0fSHong Zhang EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat);
78a61c8c0fSHong Zhang #undef __FUNCT__
79a61c8c0fSHong Zhang #define __FUNCT__ "MatDestroy_MPIAIJ_PtAP"
80a61c8c0fSHong Zhang PetscErrorCode MatDestroy_MPIAIJ_PtAP(Mat A)
81a61c8c0fSHong Zhang {
82a61c8c0fSHong Zhang   PetscErrorCode       ierr;
8332fba14fSHong Zhang   Mat_MatMatMultMPI    *ptap;
84a61c8c0fSHong Zhang   PetscObjectContainer container;
85a61c8c0fSHong Zhang 
86a61c8c0fSHong Zhang   PetscFunctionBegin;
87a61c8c0fSHong Zhang   ierr = PetscObjectQuery((PetscObject)A,"MatPtAPMPI",(PetscObject *)&container);CHKERRQ(ierr);
88a61c8c0fSHong Zhang   if (container) {
89a61c8c0fSHong Zhang     ierr  = PetscObjectContainerGetPointer(container,(void **)&ptap);CHKERRQ(ierr);
9032fba14fSHong Zhang     ierr = MatDestroy(ptap->B_loc);CHKERRQ(ierr);
9132fba14fSHong Zhang     ierr = MatDestroy(ptap->B_oth);CHKERRQ(ierr);
9201b7ae99SHong Zhang     ierr = ISDestroy(ptap->isrowb);CHKERRQ(ierr);
9301b7ae99SHong Zhang     ierr = ISDestroy(ptap->iscolb);CHKERRQ(ierr);
94a61c8c0fSHong Zhang 
95a61c8c0fSHong Zhang     ierr = PetscObjectContainerDestroy(container);CHKERRQ(ierr);
9632fba14fSHong Zhang     ierr = PetscObjectCompose((PetscObject)A,"MatPtAPMPI",0);CHKERRQ(ierr);
97a61c8c0fSHong Zhang   }
98a61c8c0fSHong Zhang   ierr = PetscFree(ptap);CHKERRQ(ierr);
99a61c8c0fSHong Zhang 
100a61c8c0fSHong Zhang   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
101a61c8c0fSHong Zhang   PetscFunctionReturn(0);
102a61c8c0fSHong Zhang }
103a61c8c0fSHong Zhang 
104eb9c0419SKris Buschelman #undef __FUNCT__
105ff134f7aSHong Zhang #define __FUNCT__ "MatPtAP_MPIAIJ_MPIAIJ"
106ff134f7aSHong Zhang PetscErrorCode MatPtAP_MPIAIJ_MPIAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C)
107ff134f7aSHong Zhang {
108ff134f7aSHong Zhang   PetscErrorCode       ierr;
10932fba14fSHong Zhang   Mat_MatMatMultMPI    *ptap;
110a61c8c0fSHong Zhang   PetscObjectContainer container;
111b90dcfe3SHong Zhang 
112b90dcfe3SHong Zhang   PetscFunctionBegin;
113b90dcfe3SHong Zhang   if (scall == MAT_INITIAL_MATRIX){
1144c627768SHong Zhang     ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr);
11532fba14fSHong Zhang     ierr = PetscNew(Mat_MatMatMultMPI,&ptap);CHKERRQ(ierr);
11632fba14fSHong Zhang 
117e240928fSHong Zhang     /* get P_oth by taking rows of P (= non-zero cols of local A) from other processors */
11801b7ae99SHong Zhang     ierr = MatGetBrowsOfAoCols(A,P,scall,&ptap->isrowb,&ptap->iscolb,&ptap->brstart,&ptap->B_oth);CHKERRQ(ierr);
119a61c8c0fSHong Zhang 
120e240928fSHong Zhang     /* get P_loc by taking all local rows of P */
12132fba14fSHong Zhang     ierr = MatGetLocalMat(P,scall,&ptap->B_loc);CHKERRQ(ierr);
122e240928fSHong Zhang 
123a61c8c0fSHong Zhang     /* attach the supporting struct to P for reuse */
124a61c8c0fSHong Zhang     P->ops->destroy  = MatDestroy_MPIAIJ_PtAP;
125a61c8c0fSHong Zhang     ierr = PetscObjectContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
126a61c8c0fSHong Zhang     ierr = PetscObjectContainerSetPointer(container,ptap);CHKERRQ(ierr);
127a61c8c0fSHong Zhang     ierr = PetscObjectCompose((PetscObject)P,"MatPtAPMPI",(PetscObject)container);CHKERRQ(ierr);
128e240928fSHong Zhang 
12932fba14fSHong Zhang     /* now, compute symbolic local P^T*A*P */
130a61c8c0fSHong Zhang     ierr = MatPtAPSymbolic_MPIAIJ_MPIAIJ(A,P,fill,C);CHKERRQ(ierr);/* numeric product is computed as well */
131a61c8c0fSHong Zhang     ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr);
132a61c8c0fSHong Zhang   } else if (scall == MAT_REUSE_MATRIX){
133a61c8c0fSHong Zhang     ierr = PetscObjectQuery((PetscObject)P,"MatPtAPMPI",(PetscObject *)&container);CHKERRQ(ierr);
134a61c8c0fSHong Zhang     if (container) {
135a61c8c0fSHong Zhang       ierr  = PetscObjectContainerGetPointer(container,(void **)&ptap);CHKERRQ(ierr);
136a61c8c0fSHong Zhang     } else {
137a61c8c0fSHong Zhang       SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
138a61c8c0fSHong Zhang     }
139a61c8c0fSHong Zhang 
140a61c8c0fSHong Zhang     /* get P_oth by taking rows of P (= non-zero cols of local A) from other processors */
14101b7ae99SHong Zhang     ierr = MatGetBrowsOfAoCols(A,P,scall,&ptap->isrowb,&ptap->iscolb,&ptap->brstart,&ptap->B_oth);CHKERRQ(ierr);
142a61c8c0fSHong Zhang 
143a61c8c0fSHong Zhang     /* get P_loc by taking all local rows of P */
14432fba14fSHong Zhang     ierr = MatGetLocalMat(P,scall,&ptap->B_loc);CHKERRQ(ierr);
14532fba14fSHong Zhang 
146a61c8c0fSHong Zhang   } else {
147a61c8c0fSHong Zhang     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",scall);
148a61c8c0fSHong Zhang   }
14932fba14fSHong Zhang   /* now, compute numeric local P^T*A*P */
150a61c8c0fSHong Zhang   ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr);
151a61c8c0fSHong Zhang   ierr = MatPtAPNumeric_MPIAIJ_MPIAIJ(A,P,*C);CHKERRQ(ierr);
152a61c8c0fSHong Zhang   ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr);
153a61c8c0fSHong Zhang 
154a61c8c0fSHong Zhang   PetscFunctionReturn(0);
155a61c8c0fSHong Zhang }
156a61c8c0fSHong Zhang 
157a61c8c0fSHong Zhang #undef __FUNCT__
158a61c8c0fSHong Zhang #define __FUNCT__ "MatPtAPSymbolic_MPIAIJ_MPIAIJ"
159a61c8c0fSHong Zhang PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIAIJ(Mat A,Mat P,PetscReal fill,Mat *C)
160a61c8c0fSHong Zhang {
161a61c8c0fSHong Zhang   PetscErrorCode       ierr;
162a61c8c0fSHong Zhang   Mat                  C_seq;
16332fba14fSHong Zhang   Mat_MatMatMultMPI    *ptap;
164a61c8c0fSHong Zhang   PetscObjectContainer container;
165a61c8c0fSHong Zhang 
166a61c8c0fSHong Zhang   PetscFunctionBegin;
167a61c8c0fSHong Zhang 
168a61c8c0fSHong Zhang   ierr = PetscObjectQuery((PetscObject)P,"MatPtAPMPI",(PetscObject *)&container);CHKERRQ(ierr);
169a61c8c0fSHong Zhang   if (container) {
170a61c8c0fSHong Zhang     ierr  = PetscObjectContainerGetPointer(container,(void **)&ptap);CHKERRQ(ierr);
171a61c8c0fSHong Zhang   } else {
172a61c8c0fSHong Zhang     SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
173a61c8c0fSHong Zhang   }
17425616d81SHong Zhang   /* compute C_seq = P_loc^T * A_loc * P_seq */
17532fba14fSHong Zhang   ierr = MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local(A,ptap->B_loc,ptap->B_oth,ptap->brstart,fill,&C_seq);CHKERRQ(ierr);
176a61c8c0fSHong Zhang 
177b90dcfe3SHong Zhang   /* add C_seq into mpi C */
178a61c8c0fSHong Zhang   ierr = MatMerge_SeqsToMPISymbolic(A->comm,C_seq,P->n,P->n,C);CHKERRQ(ierr);
179b90dcfe3SHong Zhang 
180ff134f7aSHong Zhang   PetscFunctionReturn(0);
181ff134f7aSHong Zhang }
182ff134f7aSHong Zhang 
183ff134f7aSHong Zhang #undef __FUNCT__
184e240928fSHong Zhang #define __FUNCT__ "MatPtAPNumeric_MPIAIJ_MPIAIJ"
185b90dcfe3SHong Zhang PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIAIJ(Mat A,Mat P,Mat C)
186ff134f7aSHong Zhang {
187b90dcfe3SHong Zhang   PetscErrorCode       ierr;
188671beff6SHong Zhang   Mat_Merge_SeqsToMPI  *merge;
18932fba14fSHong Zhang   Mat_MatMatMultMPI    *ptap;
190a61c8c0fSHong Zhang   PetscObjectContainer cont_merge,cont_ptap;
191ff134f7aSHong Zhang 
192097ab81bSHong Zhang   PetscInt       flops=0;
193*82afef89SHong Zhang   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data,*c=(Mat_MPIAIJ*)C->data;
194097ab81bSHong Zhang   Mat_SeqAIJ     *ad=(Mat_SeqAIJ*)(a->A)->data,*ao=(Mat_SeqAIJ*)(a->B)->data;
195*82afef89SHong Zhang   Mat_SeqAIJ     *cd=(Mat_SeqAIJ*)(c->A)->data,*co=(Mat_SeqAIJ*)(c->B)->data;
196097ab81bSHong Zhang   Mat            C_seq;
197e658270aSHong Zhang   Mat_SeqAIJ     *p_loc,*p_oth;
198097ab81bSHong Zhang   PetscInt       *adi=ad->i,*adj=ad->j,*aoi=ao->i,*aoj=ao->j,*apj,*apjdense,cstart=a->cstart,cend=a->cend,col;
199097ab81bSHong Zhang   PetscInt       *pi_loc,*pj_loc,*pi_oth,*pj_oth,*pJ,*pjj;
200*82afef89SHong Zhang   PetscInt       i,j,k,nzi,pnzi,apnzj,nextap,pnzj,prow,crow;
201097ab81bSHong Zhang   PetscInt       *cjj;
202097ab81bSHong Zhang   MatScalar      *ada=ad->a,*aoa=ao->a,*apa,*paj,*cseqa,*caj;
203097ab81bSHong Zhang   MatScalar      *pa_loc,*pA,*pa_oth;
204*82afef89SHong Zhang   PetscInt       am=A->m,cN=C->N,cm=C->m;
205097ab81bSHong Zhang 
206097ab81bSHong Zhang   MPI_Comm             comm=C->comm;
207097ab81bSHong Zhang   PetscMPIInt          size,rank,taga,*len_s;
208097ab81bSHong Zhang   PetscInt             *owners;
209097ab81bSHong Zhang   PetscInt             proc;
210097ab81bSHong Zhang   PetscInt             **buf_ri,**buf_rj;
211*82afef89SHong Zhang   PetscInt             cseqnzi,*bj_i,*bi,*bj,bnzi,nextcseqj; /* bi, bj, ba: for C (mpi mat) */
212097ab81bSHong Zhang   PetscInt             nrows,**buf_ri_k,**nextrow,**nextcseqi;
213097ab81bSHong Zhang   MPI_Request          *s_waits,*r_waits;
214097ab81bSHong Zhang   MPI_Status           *status;
215097ab81bSHong Zhang   MatScalar            **abuf_r,*ba_i;
216097ab81bSHong Zhang   Mat_SeqAIJ           *cseq;
217097ab81bSHong Zhang   PetscInt             *cseqi,*cseqj;
218*82afef89SHong Zhang   MatScalar            *ca;
219097ab81bSHong Zhang 
220ff134f7aSHong Zhang   PetscFunctionBegin;
221a61c8c0fSHong Zhang   ierr = PetscObjectQuery((PetscObject)C,"MatMergeSeqsToMPI",(PetscObject *)&cont_merge);CHKERRQ(ierr);
222a61c8c0fSHong Zhang   if (cont_merge) {
223a61c8c0fSHong Zhang     ierr  = PetscObjectContainerGetPointer(cont_merge,(void **)&merge);CHKERRQ(ierr);
2247f79fd58SMatthew Knepley   } else {
225a61c8c0fSHong Zhang     SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix C does not posses an object container");
226671beff6SHong Zhang   }
227a61c8c0fSHong Zhang   ierr = PetscObjectQuery((PetscObject)P,"MatPtAPMPI",(PetscObject *)&cont_ptap);CHKERRQ(ierr);
228a61c8c0fSHong Zhang   if (cont_ptap) {
229a61c8c0fSHong Zhang     ierr  = PetscObjectContainerGetPointer(cont_ptap,(void **)&ptap);CHKERRQ(ierr);
230a61c8c0fSHong Zhang   } else {
231a61c8c0fSHong Zhang     SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
232a61c8c0fSHong Zhang   }
233097ab81bSHong Zhang #ifdef OLD
23432fba14fSHong Zhang   ierr = MatPtAPNumeric_MPIAIJ_MPIAIJ_Local(A,ptap->B_loc,ptap->B_oth,ptap->brstart,merge->C_seq);CHKERRQ(ierr);
235097ab81bSHong Zhang #endif /* OLD */
236097ab81bSHong Zhang    /*--------------------------------------------------------------*/
237097ab81bSHong Zhang 
238097ab81bSHong Zhang   p_loc=(Mat_SeqAIJ*)(ptap->B_loc)->data;
239097ab81bSHong Zhang   p_oth=(Mat_SeqAIJ*)(ptap->B_oth)->data;
240097ab81bSHong Zhang   pi_loc=p_loc->i; pj_loc=p_loc->j; pJ=pj_loc; pa_loc=p_loc->a; pA=pa_loc;
241097ab81bSHong Zhang   pi_oth=p_oth->i; pj_oth=p_oth->j; pa_oth=p_oth->a;
242097ab81bSHong Zhang 
243097ab81bSHong Zhang   C_seq=merge->C_seq;
244097ab81bSHong Zhang   cseq=(Mat_SeqAIJ*)C_seq->data;
245097ab81bSHong Zhang   cseqi=cseq->i; cseqj=cseq->j;
246097ab81bSHong Zhang   cseqa=cseq->a;
247097ab81bSHong Zhang 
248*82afef89SHong Zhang   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
249*82afef89SHong Zhang   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
250*82afef89SHong Zhang   ierr = PetscMapGetGlobalRange(merge->rowmap,&owners);CHKERRQ(ierr);
251*82afef89SHong Zhang 
252097ab81bSHong Zhang   /* Allocate temporary array for storage of one row of A*P */
253097ab81bSHong Zhang   ierr = PetscMalloc(cN*(sizeof(MatScalar)+2*sizeof(PetscInt)),&apa);CHKERRQ(ierr);
254097ab81bSHong Zhang   ierr = PetscMemzero(apa,cN*(sizeof(MatScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr);
255097ab81bSHong Zhang   apj      = (PetscInt *)(apa + cN);
256097ab81bSHong Zhang   apjdense = apj + cN;
257097ab81bSHong Zhang 
258*82afef89SHong Zhang   /* Allocate temporary MatScalar array for storage of one row of C */
259*82afef89SHong Zhang   ierr = PetscMalloc(cN*(sizeof(MatScalar)),&ca);CHKERRQ(ierr);
260*82afef89SHong Zhang   ierr = PetscMemzero(ca,cN*(sizeof(MatScalar)));CHKERRQ(ierr);
261*82afef89SHong Zhang 
262*82afef89SHong Zhang   /* Clear old values in C_Seq and C */
263097ab81bSHong Zhang   ierr = PetscMemzero(cseqa,cseqi[cN]*sizeof(MatScalar));CHKERRQ(ierr);
264*82afef89SHong Zhang   ierr = PetscMemzero(cd->a,cd->i[cm]*sizeof(MatScalar));CHKERRQ(ierr);
265*82afef89SHong Zhang   ierr = PetscMemzero(co->a,co->i[cm]*sizeof(MatScalar));CHKERRQ(ierr);
266097ab81bSHong Zhang 
267097ab81bSHong Zhang   for (i=0;i<am;i++) {
268097ab81bSHong Zhang     /* Form i-th sparse row of A*P */
269097ab81bSHong Zhang      apnzj = 0;
270097ab81bSHong Zhang     /* diagonal portion of A */
271*82afef89SHong Zhang     nzi  = adi[i+1] - adi[i];
272*82afef89SHong Zhang     for (j=0;j<nzi;j++) {
273097ab81bSHong Zhang       prow = *adj;
274097ab81bSHong Zhang       adj++;
275097ab81bSHong Zhang       pnzj = pi_loc[prow+1] - pi_loc[prow];
276097ab81bSHong Zhang       pjj  = pj_loc + pi_loc[prow];
277097ab81bSHong Zhang       paj  = pa_loc + pi_loc[prow];
278097ab81bSHong Zhang       for (k=0;k<pnzj;k++) {
279097ab81bSHong Zhang         if (!apjdense[pjj[k]]) {
280097ab81bSHong Zhang           apjdense[pjj[k]] = -1;
281097ab81bSHong Zhang           apj[apnzj++]     = pjj[k];
282097ab81bSHong Zhang         }
283097ab81bSHong Zhang         apa[pjj[k]] += (*ada)*paj[k];
284097ab81bSHong Zhang       }
285097ab81bSHong Zhang       flops += 2*pnzj;
286097ab81bSHong Zhang       ada++;
287097ab81bSHong Zhang     }
288097ab81bSHong Zhang     /* off-diagonal portion of A */
289*82afef89SHong Zhang     nzi  = aoi[i+1] - aoi[i];
290*82afef89SHong Zhang     for (j=0;j<nzi;j++) {
291097ab81bSHong Zhang       col = a->garray[*aoj];
292097ab81bSHong Zhang       if (col < cstart){
293097ab81bSHong Zhang         prow = *aoj;
294097ab81bSHong Zhang       } else if (col >= cend){
295097ab81bSHong Zhang         prow = *aoj;
296097ab81bSHong Zhang       } else {
297097ab81bSHong Zhang         SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Off-diagonal portion of A has wrong column map");
298097ab81bSHong Zhang       }
299097ab81bSHong Zhang       aoj++;
300097ab81bSHong Zhang       pnzj = pi_oth[prow+1] - pi_oth[prow];
301097ab81bSHong Zhang       pjj  = pj_oth + pi_oth[prow];
302097ab81bSHong Zhang       paj  = pa_oth + pi_oth[prow];
303097ab81bSHong Zhang       for (k=0;k<pnzj;k++) {
304097ab81bSHong Zhang         if (!apjdense[pjj[k]]) {
305097ab81bSHong Zhang           apjdense[pjj[k]] = -1;
306097ab81bSHong Zhang           apj[apnzj++]     = pjj[k];
307097ab81bSHong Zhang         }
308097ab81bSHong Zhang         apa[pjj[k]] += (*aoa)*paj[k];
309097ab81bSHong Zhang       }
310097ab81bSHong Zhang       flops += 2*pnzj;
311097ab81bSHong Zhang       aoa++;
312097ab81bSHong Zhang     }
313097ab81bSHong Zhang     /* Sort the j index array for quick sparse axpy. */
314097ab81bSHong Zhang     ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr);
315097ab81bSHong Zhang 
316097ab81bSHong Zhang     /* Compute P_loc[i,:]^T*AP[i,:] using outer product */
317097ab81bSHong Zhang     pnzi = pi_loc[i+1] - pi_loc[i];
318097ab81bSHong Zhang     for (j=0;j<pnzi;j++) {
319097ab81bSHong Zhang       crow   = *pJ++;
320097ab81bSHong Zhang       cjj    = cseqj + cseqi[crow];
321097ab81bSHong Zhang       caj    = cseqa + cseqi[crow];
322097ab81bSHong Zhang       /* Perform sparse axpy operation.  Note cjj includes apj. */
323*82afef89SHong Zhang       if (crow >= owners[rank] && crow < owners[rank+1]){ /* add value into mpi C */
324*82afef89SHong Zhang         for (k=0; k<apnzj; k++) ca[k] = (*pA)*apa[apj[k]];
325*82afef89SHong Zhang         ierr = MatSetValues(C,1,&crow,apnzj,apj,ca,ADD_VALUES);CHKERRQ(ierr);
326*82afef89SHong Zhang         ierr = PetscMemzero(ca,apnzj*(sizeof(MatScalar)));CHKERRQ(ierr);
327*82afef89SHong Zhang       } else { /* add value into C_seq to be sent to other processors */
328*82afef89SHong Zhang         nextap = 0;
329097ab81bSHong Zhang         for (k=0;nextap<apnzj;k++) {
330097ab81bSHong Zhang           if (cjj[k]==apj[nextap]) {
331097ab81bSHong Zhang             caj[k] += (*pA)*apa[apj[nextap++]];
332097ab81bSHong Zhang           }
333097ab81bSHong Zhang         }
334*82afef89SHong Zhang       }
335097ab81bSHong Zhang       flops += 2*apnzj;
336097ab81bSHong Zhang       pA++;
337097ab81bSHong Zhang     }
338097ab81bSHong Zhang 
339097ab81bSHong Zhang     /* Zero the current row info for A*P */
340097ab81bSHong Zhang     for (j=0;j<apnzj;j++) {
341097ab81bSHong Zhang       apa[apj[j]]      = 0.;
342097ab81bSHong Zhang       apjdense[apj[j]] = 0;
343097ab81bSHong Zhang     }
344097ab81bSHong Zhang   }
345097ab81bSHong Zhang 
346097ab81bSHong Zhang   /* Assemble the final matrix and clean up */
347097ab81bSHong Zhang   ierr = MatAssemblyBegin(C_seq,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
348097ab81bSHong Zhang   ierr = MatAssemblyEnd(C_seq,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
349097ab81bSHong Zhang   ierr = PetscFree(apa);CHKERRQ(ierr);
350097ab81bSHong Zhang   ierr = PetscLogFlops(flops);CHKERRQ(ierr);
351097ab81bSHong Zhang 
352097ab81bSHong Zhang #ifdef OLD1
353a61c8c0fSHong Zhang   ierr = MatMerge_SeqsToMPINumeric(merge->C_seq,C);CHKERRQ(ierr);
354097ab81bSHong Zhang #endif /* OLD1 */
355097ab81bSHong Zhang   /*--------------------------------------------------------------*/
356*82afef89SHong Zhang   /* send and recv matrix values */
357*82afef89SHong Zhang   /*-----------------------------*/
358097ab81bSHong Zhang   bi     = merge->bi;
359097ab81bSHong Zhang   bj     = merge->bj;
360097ab81bSHong Zhang   buf_ri = merge->buf_ri;
361097ab81bSHong Zhang   buf_rj = merge->buf_rj;
362097ab81bSHong Zhang   len_s  = merge->len_s;
363097ab81bSHong Zhang   ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&taga);CHKERRQ(ierr);
364097ab81bSHong Zhang   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
365097ab81bSHong Zhang 
366097ab81bSHong Zhang   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
367097ab81bSHong Zhang   for (proc=0,k=0; proc<size; proc++){
368097ab81bSHong Zhang     if (!len_s[proc]) continue;
369097ab81bSHong Zhang     i = owners[proc];
370097ab81bSHong Zhang     ierr = MPI_Isend(cseqa+cseqi[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
371097ab81bSHong Zhang     k++;
372097ab81bSHong Zhang   }
373*82afef89SHong Zhang   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
374097ab81bSHong Zhang   ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);
375097ab81bSHong Zhang   ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);
376097ab81bSHong Zhang   ierr = PetscFree(status);CHKERRQ(ierr);
377097ab81bSHong Zhang 
378097ab81bSHong Zhang   ierr = PetscFree(s_waits);CHKERRQ(ierr);
379097ab81bSHong Zhang   ierr = PetscFree(r_waits);CHKERRQ(ierr);
380097ab81bSHong Zhang 
381097ab81bSHong Zhang   /* insert mat values of mpimat */
382097ab81bSHong Zhang   /*----------------------------*/
383097ab81bSHong Zhang   ierr = PetscMalloc(cN*sizeof(MatScalar),&ba_i);CHKERRQ(ierr);
384*82afef89SHong Zhang   ierr = PetscMemzero(ba_i,cN*sizeof(MatScalar));CHKERRQ(ierr);
385097ab81bSHong Zhang   ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr);
386097ab81bSHong Zhang   nextrow = buf_ri_k + merge->nrecv;
387097ab81bSHong Zhang   nextcseqi = nextrow + merge->nrecv;
388097ab81bSHong Zhang 
389097ab81bSHong Zhang   for (k=0; k<merge->nrecv; k++){
390097ab81bSHong Zhang     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
391097ab81bSHong Zhang     nrows = *(buf_ri_k[k]);
392097ab81bSHong Zhang     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
393097ab81bSHong Zhang     nextcseqi[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
394097ab81bSHong Zhang   }
395097ab81bSHong Zhang 
396*82afef89SHong Zhang   /* add received local vals to C */
397*82afef89SHong Zhang   for (i=0; i<cm; i++) {
398*82afef89SHong Zhang     crow = owners[rank] + i;
399097ab81bSHong Zhang     bj_i = bj+bi[i];  /* col indices of the i-th row of C */
400097ab81bSHong Zhang     bnzi = bi[i+1] - bi[i];
401*82afef89SHong Zhang     nzi = 0;
402097ab81bSHong Zhang     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
403097ab81bSHong Zhang       /* i-th row */
404097ab81bSHong Zhang       if (i == *nextrow[k]) {
405097ab81bSHong Zhang         cseqnzi = *(nextcseqi[k]+1) - *nextcseqi[k];
406097ab81bSHong Zhang         cseqj   = buf_rj[k] + *(nextcseqi[k]);
407097ab81bSHong Zhang         cseqa   = abuf_r[k] + *(nextcseqi[k]);
408097ab81bSHong Zhang         nextcseqj = 0;
409097ab81bSHong Zhang         for (j=0; nextcseqj<cseqnzi; j++){
410097ab81bSHong Zhang           if (*(bj_i + j) == cseqj[nextcseqj]){ /* bcol == cseqcol */
411097ab81bSHong Zhang             ba_i[j] += cseqa[nextcseqj++];
412*82afef89SHong Zhang             nzi++;
413097ab81bSHong Zhang           }
414097ab81bSHong Zhang         }
415097ab81bSHong Zhang         nextrow[k]++; nextcseqi[k]++;
416097ab81bSHong Zhang       }
417097ab81bSHong Zhang     }
418*82afef89SHong Zhang     if (nzi>0){
419*82afef89SHong Zhang       /* if (rank==1) printf(" [%d] set %d values on C, row: %d\n",rank,bnzi,crow); */
420*82afef89SHong Zhang       ierr = MatSetValues(C,1,&crow,bnzi,bj_i,ba_i,ADD_VALUES);CHKERRQ(ierr);
421*82afef89SHong Zhang       ierr = PetscMemzero(ba_i,bnzi*sizeof(MatScalar));CHKERRQ(ierr);
422*82afef89SHong Zhang     }
423097ab81bSHong Zhang   }
424097ab81bSHong Zhang   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
425097ab81bSHong Zhang   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
426097ab81bSHong Zhang 
427097ab81bSHong Zhang   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
428097ab81bSHong Zhang   ierr = PetscFree(ba_i);CHKERRQ(ierr);
429097ab81bSHong Zhang   ierr = PetscFree(buf_ri_k);CHKERRQ(ierr);
430*82afef89SHong Zhang   ierr = PetscFree(ca);CHKERRQ(ierr);
431097ab81bSHong Zhang 
432ff134f7aSHong Zhang   PetscFunctionReturn(0);
433ff134f7aSHong Zhang }
434ff134f7aSHong Zhang 
435ff134f7aSHong Zhang #undef __FUNCT__
4369af31e4aSHong Zhang #define __FUNCT__ "MatPtAP_SeqAIJ_SeqAIJ"
437dfbe8321SBarry Smith PetscErrorCode MatPtAP_SeqAIJ_SeqAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C)
4389af31e4aSHong Zhang {
439dfbe8321SBarry Smith   PetscErrorCode ierr;
440b1d57f15SBarry Smith 
4419af31e4aSHong Zhang   PetscFunctionBegin;
4429af31e4aSHong Zhang   if (scall == MAT_INITIAL_MATRIX){
443d20bfe6fSHong Zhang     ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr);
4449af31e4aSHong Zhang     ierr = MatPtAPSymbolic_SeqAIJ_SeqAIJ(A,P,fill,C);CHKERRQ(ierr);
445d20bfe6fSHong Zhang     ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr);
4469af31e4aSHong Zhang   }
447d20bfe6fSHong Zhang   ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr);
4489af31e4aSHong Zhang   ierr = MatPtAPNumeric_SeqAIJ_SeqAIJ(A,P,*C);CHKERRQ(ierr);
449d20bfe6fSHong Zhang   ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr);
4509af31e4aSHong Zhang   PetscFunctionReturn(0);
4519af31e4aSHong Zhang }
4529af31e4aSHong Zhang 
4536849ba73SBarry Smith /*
4549af31e4aSHong Zhang    MatPtAPSymbolic - Creates the (i,j) structure of the matrix projection C = P^T * A * P
4554d3841fdSKris Buschelman 
4564d3841fdSKris Buschelman    Collective on Mat
4574d3841fdSKris Buschelman 
4584d3841fdSKris Buschelman    Input Parameters:
4594d3841fdSKris Buschelman +  A - the matrix
4604d3841fdSKris Buschelman -  P - the projection matrix
4614d3841fdSKris Buschelman 
4624d3841fdSKris Buschelman    Output Parameters:
4634d3841fdSKris Buschelman .  C - the (i,j) structure of the product matrix
4644d3841fdSKris Buschelman 
4654d3841fdSKris Buschelman    Notes:
4664d3841fdSKris Buschelman    C will be created and must be destroyed by the user with MatDestroy().
4674d3841fdSKris Buschelman 
4684d3841fdSKris Buschelman    This routine is currently only implemented for pairs of SeqAIJ matrices and classes
4694d3841fdSKris Buschelman    which inherit from SeqAIJ.  C will be of type MATSEQAIJ.  The product is computed using
4709af31e4aSHong Zhang    this (i,j) structure by calling MatPtAPNumeric().
4714d3841fdSKris Buschelman 
4724d3841fdSKris Buschelman    Level: intermediate
4734d3841fdSKris Buschelman 
4749af31e4aSHong Zhang .seealso: MatPtAP(),MatPtAPNumeric(),MatMatMultSymbolic()
4756849ba73SBarry Smith */
476e240928fSHong Zhang #undef __FUNCT__
477e240928fSHong Zhang #define __FUNCT__ "MatPtAPSymbolic"
47855a3bba9SHong Zhang PetscErrorCode MatPtAPSymbolic(Mat A,Mat P,PetscReal fill,Mat *C)
47955a3bba9SHong Zhang {
480dfbe8321SBarry Smith   PetscErrorCode ierr;
481534c1384SKris Buschelman   PetscErrorCode (*fA)(Mat,Mat,PetscReal,Mat*);
482534c1384SKris Buschelman   PetscErrorCode (*fP)(Mat,Mat,PetscReal,Mat*);
483eb9c0419SKris Buschelman 
484eb9c0419SKris Buschelman   PetscFunctionBegin;
485eb9c0419SKris Buschelman 
4864482741eSBarry Smith   PetscValidHeaderSpecific(A,MAT_COOKIE,1);
487c9780b6fSBarry Smith   PetscValidType(A,1);
488eb9c0419SKris Buschelman   MatPreallocated(A);
489eb9c0419SKris Buschelman   if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
490eb9c0419SKris Buschelman   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
491eb9c0419SKris Buschelman 
4924482741eSBarry Smith   PetscValidHeaderSpecific(P,MAT_COOKIE,2);
493c9780b6fSBarry Smith   PetscValidType(P,2);
494eb9c0419SKris Buschelman   MatPreallocated(P);
495eb9c0419SKris Buschelman   if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
496eb9c0419SKris Buschelman   if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
497eb9c0419SKris Buschelman 
4984482741eSBarry Smith   PetscValidPointer(C,3);
4994482741eSBarry Smith 
500eb9c0419SKris Buschelman   if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->M,A->N);
501eb9c0419SKris Buschelman   if (A->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %d != %d",A->M,A->N);
502eb9c0419SKris Buschelman 
503534c1384SKris Buschelman   /* For now, we do not dispatch based on the type of A and P */
504534c1384SKris Buschelman   /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */
505534c1384SKris Buschelman   fA = A->ops->ptapsymbolic;
506534c1384SKris Buschelman   if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatPtAPSymbolic not supported for A of type %s",A->type_name);
507534c1384SKris Buschelman   fP = P->ops->ptapsymbolic;
508534c1384SKris Buschelman   if (!fP) SETERRQ1(PETSC_ERR_SUP,"MatPtAPSymbolic not supported for P of type %s",P->type_name);
509534c1384SKris Buschelman   if (fP!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatPtAPSymbolic requires A, %s, to be compatible with P, %s",A->type_name,P->type_name);
5104d3841fdSKris Buschelman 
511534c1384SKris Buschelman   ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr);
512534c1384SKris Buschelman   ierr = (*fA)(A,P,fill,C);CHKERRQ(ierr);
513534c1384SKris Buschelman   ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr);
514eb9c0419SKris Buschelman 
515eb9c0419SKris Buschelman   PetscFunctionReturn(0);
516eb9c0419SKris Buschelman }
517eb9c0419SKris Buschelman 
518f747e1aeSHong Zhang typedef struct {
519f747e1aeSHong Zhang   Mat    symAP;
520f747e1aeSHong Zhang } Mat_PtAPstruct;
521f747e1aeSHong Zhang 
52278a80504SBarry Smith EXTERN PetscErrorCode MatDestroy_SeqAIJ(Mat);
52378a80504SBarry Smith 
524f747e1aeSHong Zhang #undef __FUNCT__
525f747e1aeSHong Zhang #define __FUNCT__ "MatDestroy_SeqAIJ_PtAP"
526f4a850bbSBarry Smith PetscErrorCode MatDestroy_SeqAIJ_PtAP(Mat A)
527f747e1aeSHong Zhang {
528f4a850bbSBarry Smith   PetscErrorCode    ierr;
529f747e1aeSHong Zhang   Mat_PtAPstruct    *ptap=(Mat_PtAPstruct*)A->spptr;
530f747e1aeSHong Zhang 
531f747e1aeSHong Zhang   PetscFunctionBegin;
532f747e1aeSHong Zhang   ierr = MatDestroy(ptap->symAP);CHKERRQ(ierr);
533f747e1aeSHong Zhang   ierr = PetscFree(ptap);CHKERRQ(ierr);
53478a80504SBarry Smith   ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr);
535f747e1aeSHong Zhang   PetscFunctionReturn(0);
536f747e1aeSHong Zhang }
537f747e1aeSHong Zhang 
538eb9c0419SKris Buschelman #undef __FUNCT__
5399af31e4aSHong Zhang #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqAIJ"
54055a3bba9SHong Zhang PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat P,PetscReal fill,Mat *C)
54155a3bba9SHong Zhang {
542dfbe8321SBarry Smith   PetscErrorCode ierr;
543d20bfe6fSHong Zhang   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
544d20bfe6fSHong Zhang   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data,*p = (Mat_SeqAIJ*)P->data,*c;
54555a3bba9SHong Zhang   PetscInt       *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj;
546b8374ebeSBarry Smith   PetscInt       *ci,*cj,*ptadenserow,*ptasparserow,*ptaj;
54755a3bba9SHong Zhang   PetscInt       an=A->N,am=A->M,pn=P->N,pm=P->M;
548b8374ebeSBarry Smith   PetscInt       i,j,k,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi,nlnk,*lnk;
549d20bfe6fSHong Zhang   MatScalar      *ca;
55055a3bba9SHong Zhang   PetscBT        lnkbt;
551eb9c0419SKris Buschelman 
552eb9c0419SKris Buschelman   PetscFunctionBegin;
553d20bfe6fSHong Zhang   /* Get ij structure of P^T */
554eb9c0419SKris Buschelman   ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
555d20bfe6fSHong Zhang   ptJ=ptj;
556eb9c0419SKris Buschelman 
557d20bfe6fSHong Zhang   /* Allocate ci array, arrays for fill computation and */
558d20bfe6fSHong Zhang   /* free space for accumulating nonzero column info */
55955a3bba9SHong Zhang   ierr = PetscMalloc((pn+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
560d20bfe6fSHong Zhang   ci[0] = 0;
561eb9c0419SKris Buschelman 
56255a3bba9SHong Zhang   ierr = PetscMalloc((2*an+1)*sizeof(PetscInt),&ptadenserow);CHKERRQ(ierr);
56355a3bba9SHong Zhang   ierr = PetscMemzero(ptadenserow,(2*an+1)*sizeof(PetscInt));CHKERRQ(ierr);
564d20bfe6fSHong Zhang   ptasparserow = ptadenserow  + an;
56555a3bba9SHong Zhang 
56655a3bba9SHong Zhang   /* create and initialize a linked list */
56755a3bba9SHong Zhang   nlnk = pn+1;
56855a3bba9SHong Zhang   ierr = PetscLLCreate(pn,pn,nlnk,lnk,lnkbt);CHKERRQ(ierr);
569eb9c0419SKris Buschelman 
570d20bfe6fSHong Zhang   /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */
571d20bfe6fSHong Zhang   /* This should be reasonable if sparsity of PtAP is similar to that of A. */
572d20bfe6fSHong Zhang   ierr          = GetMoreSpace((ai[am]/pm)*pn,&free_space);
573d20bfe6fSHong Zhang   current_space = free_space;
574d20bfe6fSHong Zhang 
575d20bfe6fSHong Zhang   /* Determine symbolic info for each row of C: */
576d20bfe6fSHong Zhang   for (i=0;i<pn;i++) {
577d20bfe6fSHong Zhang     ptnzi  = pti[i+1] - pti[i];
578d20bfe6fSHong Zhang     ptanzi = 0;
579d20bfe6fSHong Zhang     /* Determine symbolic row of PtA: */
580d20bfe6fSHong Zhang     for (j=0;j<ptnzi;j++) {
581d20bfe6fSHong Zhang       arow = *ptJ++;
582d20bfe6fSHong Zhang       anzj = ai[arow+1] - ai[arow];
583d20bfe6fSHong Zhang       ajj  = aj + ai[arow];
584d20bfe6fSHong Zhang       for (k=0;k<anzj;k++) {
585d20bfe6fSHong Zhang         if (!ptadenserow[ajj[k]]) {
586d20bfe6fSHong Zhang           ptadenserow[ajj[k]]    = -1;
587d20bfe6fSHong Zhang           ptasparserow[ptanzi++] = ajj[k];
588d20bfe6fSHong Zhang         }
589d20bfe6fSHong Zhang       }
590d20bfe6fSHong Zhang     }
591d20bfe6fSHong Zhang       /* Using symbolic info for row of PtA, determine symbolic info for row of C: */
592d20bfe6fSHong Zhang     ptaj = ptasparserow;
593d20bfe6fSHong Zhang     cnzi   = 0;
594d20bfe6fSHong Zhang     for (j=0;j<ptanzi;j++) {
595d20bfe6fSHong Zhang       prow = *ptaj++;
596d20bfe6fSHong Zhang       pnzj = pi[prow+1] - pi[prow];
597d20bfe6fSHong Zhang       pjj  = pj + pi[prow];
59855a3bba9SHong Zhang       /* add non-zero cols of P into the sorted linked list lnk */
59955a3bba9SHong Zhang       ierr = PetscLLAdd(pnzj,pjj,pn,nlnk,lnk,lnkbt);CHKERRQ(ierr);
60055a3bba9SHong Zhang       cnzi += nlnk;
601d20bfe6fSHong Zhang     }
602d20bfe6fSHong Zhang 
603d20bfe6fSHong Zhang     /* If free space is not available, make more free space */
604d20bfe6fSHong Zhang     /* Double the amount of total space in the list */
605d20bfe6fSHong Zhang     if (current_space->local_remaining<cnzi) {
606d20bfe6fSHong Zhang       ierr = GetMoreSpace(current_space->total_array_size,&current_space);CHKERRQ(ierr);
607d20bfe6fSHong Zhang     }
608d20bfe6fSHong Zhang 
609d20bfe6fSHong Zhang     /* Copy data into free space, and zero out denserows */
61055a3bba9SHong Zhang     ierr = PetscLLClean(pn,pn,cnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
611d20bfe6fSHong Zhang     current_space->array           += cnzi;
612d20bfe6fSHong Zhang     current_space->local_used      += cnzi;
613d20bfe6fSHong Zhang     current_space->local_remaining -= cnzi;
614d20bfe6fSHong Zhang 
615d20bfe6fSHong Zhang     for (j=0;j<ptanzi;j++) {
616d20bfe6fSHong Zhang       ptadenserow[ptasparserow[j]] = 0;
617d20bfe6fSHong Zhang     }
618d20bfe6fSHong Zhang     /* Aside: Perhaps we should save the pta info for the numerical factorization. */
619d20bfe6fSHong Zhang     /*        For now, we will recompute what is needed. */
620d20bfe6fSHong Zhang     ci[i+1] = ci[i] + cnzi;
621d20bfe6fSHong Zhang   }
622d20bfe6fSHong Zhang   /* nnz is now stored in ci[ptm], column indices are in the list of free space */
623d20bfe6fSHong Zhang   /* Allocate space for cj, initialize cj, and */
624d20bfe6fSHong Zhang   /* destroy list of free space and other temporary array(s) */
62555a3bba9SHong Zhang   ierr = PetscMalloc((ci[pn]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr);
626d20bfe6fSHong Zhang   ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr);
627d20bfe6fSHong Zhang   ierr = PetscFree(ptadenserow);CHKERRQ(ierr);
62855a3bba9SHong Zhang   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
629d20bfe6fSHong Zhang 
630d20bfe6fSHong Zhang   /* Allocate space for ca */
631d20bfe6fSHong Zhang   ierr = PetscMalloc((ci[pn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr);
632d20bfe6fSHong Zhang   ierr = PetscMemzero(ca,(ci[pn]+1)*sizeof(MatScalar));CHKERRQ(ierr);
633d20bfe6fSHong Zhang 
634d20bfe6fSHong Zhang   /* put together the new matrix */
635d20bfe6fSHong Zhang   ierr = MatCreateSeqAIJWithArrays(A->comm,pn,pn,ci,cj,ca,C);CHKERRQ(ierr);
636d20bfe6fSHong Zhang 
637d20bfe6fSHong Zhang   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
638d20bfe6fSHong Zhang   /* Since these are PETSc arrays, change flags to free them as necessary. */
639d20bfe6fSHong Zhang   c = (Mat_SeqAIJ *)((*C)->data);
640d20bfe6fSHong Zhang   c->freedata = PETSC_TRUE;
641d20bfe6fSHong Zhang   c->nonew    = 0;
642d20bfe6fSHong Zhang 
643d20bfe6fSHong Zhang   /* Clean up. */
644d20bfe6fSHong Zhang   ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
645eb9c0419SKris Buschelman 
646eb9c0419SKris Buschelman   PetscFunctionReturn(0);
647eb9c0419SKris Buschelman }
648eb9c0419SKris Buschelman 
6493985e5eaSKris Buschelman #include "src/mat/impls/maij/maij.h"
6503985e5eaSKris Buschelman EXTERN_C_BEGIN
6513985e5eaSKris Buschelman #undef __FUNCT__
6529af31e4aSHong Zhang #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqMAIJ"
65355a3bba9SHong Zhang PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat *C)
65455a3bba9SHong Zhang {
6555c66b693SKris Buschelman   /* This routine requires testing -- I don't think it works. */
656dfbe8321SBarry Smith   PetscErrorCode ierr;
6573985e5eaSKris Buschelman   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
6583985e5eaSKris Buschelman   Mat_SeqMAIJ    *pp=(Mat_SeqMAIJ*)PP->data;
6593985e5eaSKris Buschelman   Mat            P=pp->AIJ;
6603985e5eaSKris Buschelman   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c;
661b1d57f15SBarry Smith   PetscInt       *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj;
662b1d57f15SBarry Smith   PetscInt       *ci,*cj,*denserow,*sparserow,*ptadenserow,*ptasparserow,*ptaj;
663b1d57f15SBarry Smith   PetscInt       an=A->N,am=A->M,pn=P->N,pm=P->M,ppdof=pp->dof;
664b1d57f15SBarry Smith   PetscInt       i,j,k,dof,pdof,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi;
6653985e5eaSKris Buschelman   MatScalar      *ca;
6663985e5eaSKris Buschelman 
6673985e5eaSKris Buschelman   PetscFunctionBegin;
6683985e5eaSKris Buschelman   /* Start timer */
6699af31e4aSHong Zhang   ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr);
6703985e5eaSKris Buschelman 
6713985e5eaSKris Buschelman   /* Get ij structure of P^T */
6723985e5eaSKris Buschelman   ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
6733985e5eaSKris Buschelman 
6743985e5eaSKris Buschelman   /* Allocate ci array, arrays for fill computation and */
6753985e5eaSKris Buschelman   /* free space for accumulating nonzero column info */
676b1d57f15SBarry Smith   ierr = PetscMalloc((pn+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
6773985e5eaSKris Buschelman   ci[0] = 0;
6783985e5eaSKris Buschelman 
679b1d57f15SBarry Smith   ierr = PetscMalloc((2*pn+2*an+1)*sizeof(PetscInt),&ptadenserow);CHKERRQ(ierr);
680b1d57f15SBarry Smith   ierr = PetscMemzero(ptadenserow,(2*pn+2*an+1)*sizeof(PetscInt));CHKERRQ(ierr);
6813985e5eaSKris Buschelman   ptasparserow = ptadenserow  + an;
6823985e5eaSKris Buschelman   denserow     = ptasparserow + an;
6833985e5eaSKris Buschelman   sparserow    = denserow     + pn;
6843985e5eaSKris Buschelman 
6853985e5eaSKris Buschelman   /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */
6863985e5eaSKris Buschelman   /* This should be reasonable if sparsity of PtAP is similar to that of A. */
6873985e5eaSKris Buschelman   ierr          = GetMoreSpace((ai[am]/pm)*pn,&free_space);
6883985e5eaSKris Buschelman   current_space = free_space;
6893985e5eaSKris Buschelman 
6903985e5eaSKris Buschelman   /* Determine symbolic info for each row of C: */
6913985e5eaSKris Buschelman   for (i=0;i<pn/ppdof;i++) {
6923985e5eaSKris Buschelman     ptnzi  = pti[i+1] - pti[i];
6933985e5eaSKris Buschelman     ptanzi = 0;
6943985e5eaSKris Buschelman     ptJ    = ptj + pti[i];
6953985e5eaSKris Buschelman     for (dof=0;dof<ppdof;dof++) {
6963985e5eaSKris Buschelman     /* Determine symbolic row of PtA: */
6973985e5eaSKris Buschelman       for (j=0;j<ptnzi;j++) {
6983985e5eaSKris Buschelman         arow = ptJ[j] + dof;
6993985e5eaSKris Buschelman         anzj = ai[arow+1] - ai[arow];
7003985e5eaSKris Buschelman         ajj  = aj + ai[arow];
7013985e5eaSKris Buschelman         for (k=0;k<anzj;k++) {
7023985e5eaSKris Buschelman           if (!ptadenserow[ajj[k]]) {
7033985e5eaSKris Buschelman             ptadenserow[ajj[k]]    = -1;
7043985e5eaSKris Buschelman             ptasparserow[ptanzi++] = ajj[k];
7053985e5eaSKris Buschelman           }
7063985e5eaSKris Buschelman         }
7073985e5eaSKris Buschelman       }
7083985e5eaSKris Buschelman       /* Using symbolic info for row of PtA, determine symbolic info for row of C: */
7093985e5eaSKris Buschelman       ptaj = ptasparserow;
7103985e5eaSKris Buschelman       cnzi   = 0;
7113985e5eaSKris Buschelman       for (j=0;j<ptanzi;j++) {
712fe05a634SKris Buschelman         pdof = *ptaj%dof;
7133985e5eaSKris Buschelman         prow = (*ptaj++)/dof;
7143985e5eaSKris Buschelman         pnzj = pi[prow+1] - pi[prow];
7153985e5eaSKris Buschelman         pjj  = pj + pi[prow];
7163985e5eaSKris Buschelman         for (k=0;k<pnzj;k++) {
717fe05a634SKris Buschelman           if (!denserow[pjj[k]+pdof]) {
718fe05a634SKris Buschelman             denserow[pjj[k]+pdof] = -1;
719fe05a634SKris Buschelman             sparserow[cnzi++]     = pjj[k]+pdof;
7203985e5eaSKris Buschelman           }
7213985e5eaSKris Buschelman         }
7223985e5eaSKris Buschelman       }
7233985e5eaSKris Buschelman 
7243985e5eaSKris Buschelman       /* sort sparserow */
7253985e5eaSKris Buschelman       ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr);
7263985e5eaSKris Buschelman 
7273985e5eaSKris Buschelman       /* If free space is not available, make more free space */
7283985e5eaSKris Buschelman       /* Double the amount of total space in the list */
7293985e5eaSKris Buschelman       if (current_space->local_remaining<cnzi) {
7303985e5eaSKris Buschelman         ierr = GetMoreSpace(current_space->total_array_size,&current_space);CHKERRQ(ierr);
7313985e5eaSKris Buschelman       }
7323985e5eaSKris Buschelman 
7333985e5eaSKris Buschelman       /* Copy data into free space, and zero out denserows */
734b1d57f15SBarry Smith       ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(PetscInt));CHKERRQ(ierr);
7353985e5eaSKris Buschelman       current_space->array           += cnzi;
7363985e5eaSKris Buschelman       current_space->local_used      += cnzi;
7373985e5eaSKris Buschelman       current_space->local_remaining -= cnzi;
7383985e5eaSKris Buschelman 
7393985e5eaSKris Buschelman       for (j=0;j<ptanzi;j++) {
7403985e5eaSKris Buschelman         ptadenserow[ptasparserow[j]] = 0;
7413985e5eaSKris Buschelman       }
7423985e5eaSKris Buschelman       for (j=0;j<cnzi;j++) {
7433985e5eaSKris Buschelman         denserow[sparserow[j]] = 0;
7443985e5eaSKris Buschelman       }
7453985e5eaSKris Buschelman       /* Aside: Perhaps we should save the pta info for the numerical factorization. */
7463985e5eaSKris Buschelman       /*        For now, we will recompute what is needed. */
7473985e5eaSKris Buschelman       ci[i+1+dof] = ci[i+dof] + cnzi;
7483985e5eaSKris Buschelman     }
7493985e5eaSKris Buschelman   }
7503985e5eaSKris Buschelman   /* nnz is now stored in ci[ptm], column indices are in the list of free space */
7513985e5eaSKris Buschelman   /* Allocate space for cj, initialize cj, and */
7523985e5eaSKris Buschelman   /* destroy list of free space and other temporary array(s) */
753b1d57f15SBarry Smith   ierr = PetscMalloc((ci[pn]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr);
7543985e5eaSKris Buschelman   ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr);
7553985e5eaSKris Buschelman   ierr = PetscFree(ptadenserow);CHKERRQ(ierr);
7563985e5eaSKris Buschelman 
7573985e5eaSKris Buschelman   /* Allocate space for ca */
7583985e5eaSKris Buschelman   ierr = PetscMalloc((ci[pn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr);
7593985e5eaSKris Buschelman   ierr = PetscMemzero(ca,(ci[pn]+1)*sizeof(MatScalar));CHKERRQ(ierr);
7603985e5eaSKris Buschelman 
7613985e5eaSKris Buschelman   /* put together the new matrix */
7623985e5eaSKris Buschelman   ierr = MatCreateSeqAIJWithArrays(A->comm,pn,pn,ci,cj,ca,C);CHKERRQ(ierr);
7633985e5eaSKris Buschelman 
7643985e5eaSKris Buschelman   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
7653985e5eaSKris Buschelman   /* Since these are PETSc arrays, change flags to free them as necessary. */
7663985e5eaSKris Buschelman   c = (Mat_SeqAIJ *)((*C)->data);
7673985e5eaSKris Buschelman   c->freedata = PETSC_TRUE;
7683985e5eaSKris Buschelman   c->nonew    = 0;
7693985e5eaSKris Buschelman 
7703985e5eaSKris Buschelman   /* Clean up. */
7713985e5eaSKris Buschelman   ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
7723985e5eaSKris Buschelman 
7739af31e4aSHong Zhang   ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr);
7743985e5eaSKris Buschelman   PetscFunctionReturn(0);
7753985e5eaSKris Buschelman }
7763985e5eaSKris Buschelman EXTERN_C_END
7773985e5eaSKris Buschelman 
7786849ba73SBarry Smith /*
7799af31e4aSHong Zhang    MatPtAPNumeric - Computes the matrix projection C = P^T * A * P
7804d3841fdSKris Buschelman 
7814d3841fdSKris Buschelman    Collective on Mat
7824d3841fdSKris Buschelman 
7834d3841fdSKris Buschelman    Input Parameters:
7844d3841fdSKris Buschelman +  A - the matrix
7854d3841fdSKris Buschelman -  P - the projection matrix
7864d3841fdSKris Buschelman 
7874d3841fdSKris Buschelman    Output Parameters:
7884d3841fdSKris Buschelman .  C - the product matrix
7894d3841fdSKris Buschelman 
7904d3841fdSKris Buschelman    Notes:
7919af31e4aSHong Zhang    C must have been created by calling MatPtAPSymbolic and must be destroyed by
7924d3841fdSKris Buschelman    the user using MatDeatroy().
7934d3841fdSKris Buschelman 
794170ef064SHong Zhang    This routine is currently only implemented for pairs of AIJ matrices and classes
795170ef064SHong Zhang    which inherit from AIJ.  C will be of type MATAIJ.
7964d3841fdSKris Buschelman 
7974d3841fdSKris Buschelman    Level: intermediate
7984d3841fdSKris Buschelman 
7999af31e4aSHong Zhang .seealso: MatPtAP(),MatPtAPSymbolic(),MatMatMultNumeric()
8006849ba73SBarry Smith */
801e240928fSHong Zhang #undef __FUNCT__
802e240928fSHong Zhang #define __FUNCT__ "MatPtAPNumeric"
80355a3bba9SHong Zhang PetscErrorCode MatPtAPNumeric(Mat A,Mat P,Mat C)
80455a3bba9SHong Zhang {
805dfbe8321SBarry Smith   PetscErrorCode ierr;
806534c1384SKris Buschelman   PetscErrorCode (*fA)(Mat,Mat,Mat);
807534c1384SKris Buschelman   PetscErrorCode (*fP)(Mat,Mat,Mat);
808eb9c0419SKris Buschelman 
809eb9c0419SKris Buschelman   PetscFunctionBegin;
810eb9c0419SKris Buschelman 
8114482741eSBarry Smith   PetscValidHeaderSpecific(A,MAT_COOKIE,1);
812c9780b6fSBarry Smith   PetscValidType(A,1);
813eb9c0419SKris Buschelman   MatPreallocated(A);
814eb9c0419SKris Buschelman   if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
815eb9c0419SKris Buschelman   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
816eb9c0419SKris Buschelman 
8174482741eSBarry Smith   PetscValidHeaderSpecific(P,MAT_COOKIE,2);
818c9780b6fSBarry Smith   PetscValidType(P,2);
819eb9c0419SKris Buschelman   MatPreallocated(P);
820eb9c0419SKris Buschelman   if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
821eb9c0419SKris Buschelman   if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
822eb9c0419SKris Buschelman 
8234482741eSBarry Smith   PetscValidHeaderSpecific(C,MAT_COOKIE,3);
824c9780b6fSBarry Smith   PetscValidType(C,3);
825eb9c0419SKris Buschelman   MatPreallocated(C);
826eb9c0419SKris Buschelman   if (!C->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
827eb9c0419SKris Buschelman   if (C->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
828eb9c0419SKris Buschelman 
829eb9c0419SKris Buschelman   if (P->N!=C->M) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->N,C->M);
830eb9c0419SKris Buschelman   if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->M,A->N);
831eb9c0419SKris Buschelman   if (A->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %d != %d",A->M,A->N);
832eb9c0419SKris Buschelman   if (P->N!=C->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->N,C->N);
833eb9c0419SKris Buschelman 
834534c1384SKris Buschelman   /* For now, we do not dispatch based on the type of A and P */
835534c1384SKris Buschelman   /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */
836534c1384SKris Buschelman   fA = A->ops->ptapnumeric;
837534c1384SKris Buschelman   if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatPtAPNumeric not supported for A of type %s",A->type_name);
838534c1384SKris Buschelman   fP = P->ops->ptapnumeric;
839534c1384SKris Buschelman   if (!fP) SETERRQ1(PETSC_ERR_SUP,"MatPtAPNumeric not supported for P of type %s",P->type_name);
840534c1384SKris Buschelman   if (fP!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatPtAPNumeric requires A, %s, to be compatible with P, %s",A->type_name,P->type_name);
8414d3841fdSKris Buschelman 
842534c1384SKris Buschelman   ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr);
843534c1384SKris Buschelman   ierr = (*fA)(A,P,C);CHKERRQ(ierr);
844534c1384SKris Buschelman   ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr);
845eb9c0419SKris Buschelman 
846eb9c0419SKris Buschelman   PetscFunctionReturn(0);
847eb9c0419SKris Buschelman }
848eb9c0419SKris Buschelman 
849eb9c0419SKris Buschelman #undef __FUNCT__
8509af31e4aSHong Zhang #define __FUNCT__ "MatPtAPNumeric_SeqAIJ_SeqAIJ"
851dfbe8321SBarry Smith PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqAIJ(Mat A,Mat P,Mat C)
852dfbe8321SBarry Smith {
853dfbe8321SBarry Smith   PetscErrorCode ierr;
854b1d57f15SBarry Smith   PetscInt       flops=0;
855d20bfe6fSHong Zhang   Mat_SeqAIJ     *a  = (Mat_SeqAIJ *) A->data;
856d20bfe6fSHong Zhang   Mat_SeqAIJ     *p  = (Mat_SeqAIJ *) P->data;
857d20bfe6fSHong Zhang   Mat_SeqAIJ     *c  = (Mat_SeqAIJ *) C->data;
858b1d57f15SBarry Smith   PetscInt       *ai=a->i,*aj=a->j,*apj,*apjdense,*pi=p->i,*pj=p->j,*pJ=p->j,*pjj;
859b1d57f15SBarry Smith   PetscInt       *ci=c->i,*cj=c->j,*cjj;
860b1d57f15SBarry Smith   PetscInt       am=A->M,cn=C->N,cm=C->M;
861b1d57f15SBarry Smith   PetscInt       i,j,k,anzi,pnzi,apnzj,nextap,pnzj,prow,crow;
862d20bfe6fSHong Zhang   MatScalar      *aa=a->a,*apa,*pa=p->a,*pA=p->a,*paj,*ca=c->a,*caj;
863eb9c0419SKris Buschelman 
864eb9c0419SKris Buschelman   PetscFunctionBegin;
865d20bfe6fSHong Zhang   /* Allocate temporary array for storage of one row of A*P */
866b1d57f15SBarry Smith   ierr = PetscMalloc(cn*(sizeof(MatScalar)+2*sizeof(PetscInt)),&apa);CHKERRQ(ierr);
867b1d57f15SBarry Smith   ierr = PetscMemzero(apa,cn*(sizeof(MatScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr);
868eb9c0419SKris Buschelman 
869b1d57f15SBarry Smith   apj      = (PetscInt *)(apa + cn);
870d20bfe6fSHong Zhang   apjdense = apj + cn;
871d20bfe6fSHong Zhang 
872d20bfe6fSHong Zhang   /* Clear old values in C */
873d20bfe6fSHong Zhang   ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr);
874d20bfe6fSHong Zhang 
875d20bfe6fSHong Zhang   for (i=0;i<am;i++) {
876d20bfe6fSHong Zhang     /* Form sparse row of A*P */
877d20bfe6fSHong Zhang     anzi  = ai[i+1] - ai[i];
878d20bfe6fSHong Zhang     apnzj = 0;
879d20bfe6fSHong Zhang     for (j=0;j<anzi;j++) {
880d20bfe6fSHong Zhang       prow = *aj++;
881d20bfe6fSHong Zhang       pnzj = pi[prow+1] - pi[prow];
882d20bfe6fSHong Zhang       pjj  = pj + pi[prow];
883d20bfe6fSHong Zhang       paj  = pa + pi[prow];
884d20bfe6fSHong Zhang       for (k=0;k<pnzj;k++) {
885d20bfe6fSHong Zhang         if (!apjdense[pjj[k]]) {
886d20bfe6fSHong Zhang           apjdense[pjj[k]] = -1;
887d20bfe6fSHong Zhang           apj[apnzj++]     = pjj[k];
888d20bfe6fSHong Zhang         }
889d20bfe6fSHong Zhang         apa[pjj[k]] += (*aa)*paj[k];
890d20bfe6fSHong Zhang       }
891d20bfe6fSHong Zhang       flops += 2*pnzj;
892d20bfe6fSHong Zhang       aa++;
893d20bfe6fSHong Zhang     }
894d20bfe6fSHong Zhang 
895d20bfe6fSHong Zhang     /* Sort the j index array for quick sparse axpy. */
896d20bfe6fSHong Zhang     ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr);
897d20bfe6fSHong Zhang 
898d20bfe6fSHong Zhang     /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */
899d20bfe6fSHong Zhang     pnzi = pi[i+1] - pi[i];
900d20bfe6fSHong Zhang     for (j=0;j<pnzi;j++) {
901d20bfe6fSHong Zhang       nextap = 0;
902d20bfe6fSHong Zhang       crow   = *pJ++;
903d20bfe6fSHong Zhang       cjj    = cj + ci[crow];
904d20bfe6fSHong Zhang       caj    = ca + ci[crow];
905d20bfe6fSHong Zhang       /* Perform sparse axpy operation.  Note cjj includes apj. */
906d20bfe6fSHong Zhang       for (k=0;nextap<apnzj;k++) {
907d20bfe6fSHong Zhang         if (cjj[k]==apj[nextap]) {
908d20bfe6fSHong Zhang           caj[k] += (*pA)*apa[apj[nextap++]];
909d20bfe6fSHong Zhang         }
910d20bfe6fSHong Zhang       }
911d20bfe6fSHong Zhang       flops += 2*apnzj;
912d20bfe6fSHong Zhang       pA++;
913d20bfe6fSHong Zhang     }
914d20bfe6fSHong Zhang 
915d20bfe6fSHong Zhang     /* Zero the current row info for A*P */
916d20bfe6fSHong Zhang     for (j=0;j<apnzj;j++) {
917d20bfe6fSHong Zhang       apa[apj[j]]      = 0.;
918d20bfe6fSHong Zhang       apjdense[apj[j]] = 0;
919d20bfe6fSHong Zhang     }
920d20bfe6fSHong Zhang   }
921d20bfe6fSHong Zhang 
922d20bfe6fSHong Zhang   /* Assemble the final matrix and clean up */
923d20bfe6fSHong Zhang   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
924d20bfe6fSHong Zhang   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
925d20bfe6fSHong Zhang   ierr = PetscFree(apa);CHKERRQ(ierr);
926d20bfe6fSHong Zhang   ierr = PetscLogFlops(flops);CHKERRQ(ierr);
927d20bfe6fSHong Zhang 
928eb9c0419SKris Buschelman   PetscFunctionReturn(0);
929eb9c0419SKris Buschelman }
9300e36024fSHong Zhang 
931a61c8c0fSHong Zhang /* Compute local C = P_loc^T * A * P - used by MatPtAP_MPIAIJ_MPIAIJ() */
932e240928fSHong Zhang static PetscEvent logkey_matptapnumeric_local = 0;
933e240928fSHong Zhang #undef __FUNCT__
934e240928fSHong Zhang #define __FUNCT__ "MatPtAPNumeric_MPIAIJ_MPIAIJ_Local"
935e240928fSHong Zhang PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIAIJ_Local(Mat A,Mat P_loc,Mat P_oth,PetscInt prstart,Mat C)
936e240928fSHong Zhang {
937e240928fSHong Zhang   PetscErrorCode ierr;
938e240928fSHong Zhang   PetscInt       flops=0;
939e240928fSHong Zhang   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
940e240928fSHong Zhang   Mat_SeqAIJ     *ad=(Mat_SeqAIJ*)(a->A)->data,*ao=(Mat_SeqAIJ*)(a->B)->data;
941e240928fSHong Zhang   Mat_SeqAIJ     *c  = (Mat_SeqAIJ *) C->data;
942e240928fSHong Zhang   Mat_SeqAIJ     *p_loc=(Mat_SeqAIJ*)P_loc->data,*p_oth=(Mat_SeqAIJ*)P_oth->data;
943e240928fSHong Zhang   PetscInt       *pi_loc=p_loc->i,*pj_loc=p_loc->j,*pi_oth=p_oth->i,*pj_oth=p_oth->j;
944e240928fSHong Zhang   PetscInt       *adi=ad->i,*adj=ad->j,*aoi=ao->i,*aoj=ao->j,*apj,*apjdense,cstart=a->cstart,cend=a->cend,col;
945e240928fSHong Zhang   PetscInt       *pJ=pj_loc,*pjj;
946e240928fSHong Zhang   PetscInt       *ci=c->i,*cj=c->j,*cjj;
947e240928fSHong Zhang   PetscInt       am=A->m,cn=C->N,cm=C->M;
948e240928fSHong Zhang   PetscInt       i,j,k,anzi,pnzi,apnzj,nextap,pnzj,prow,crow;
949e240928fSHong Zhang   MatScalar      *ada=ad->a,*aoa=ao->a,*apa,*paj,*ca=c->a,*caj;
950e240928fSHong Zhang   MatScalar      *pa_loc=p_loc->a,*pA=pa_loc,*pa_oth=p_oth->a;
951e240928fSHong Zhang 
952e240928fSHong Zhang   PetscFunctionBegin;
953e240928fSHong Zhang   if (!logkey_matptapnumeric_local) {
954a61c8c0fSHong Zhang     ierr = PetscLogEventRegister(&logkey_matptapnumeric_local,"MatPtAPNumeric_MPIAIJ_MPIAIJ_Local",MAT_COOKIE);CHKERRQ(ierr);
955e240928fSHong Zhang   }
956a61c8c0fSHong Zhang   ierr = PetscLogEventBegin(logkey_matptapnumeric_local,A,0,0,0);CHKERRQ(ierr);
957e240928fSHong Zhang 
958e240928fSHong Zhang   /* Allocate temporary array for storage of one row of A*P */
959e240928fSHong Zhang   ierr = PetscMalloc(cn*(sizeof(MatScalar)+2*sizeof(PetscInt)),&apa);CHKERRQ(ierr);
960e240928fSHong Zhang   ierr = PetscMemzero(apa,cn*(sizeof(MatScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr);
961e240928fSHong Zhang   apj      = (PetscInt *)(apa + cn);
962e240928fSHong Zhang   apjdense = apj + cn;
963e240928fSHong Zhang 
964e240928fSHong Zhang   /* Clear old values in C */
965e240928fSHong Zhang   ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr);
966e240928fSHong Zhang 
967e240928fSHong Zhang   for (i=0;i<am;i++) {
968e240928fSHong Zhang     /* Form i-th sparse row of A*P */
969e240928fSHong Zhang      apnzj = 0;
970e240928fSHong Zhang     /* diagonal portion of A */
971e240928fSHong Zhang     anzi  = adi[i+1] - adi[i];
972e240928fSHong Zhang     for (j=0;j<anzi;j++) {
973e240928fSHong Zhang       prow = *adj;
974e240928fSHong Zhang       adj++;
975e240928fSHong Zhang       pnzj = pi_loc[prow+1] - pi_loc[prow];
976e240928fSHong Zhang       pjj  = pj_loc + pi_loc[prow];
977e240928fSHong Zhang       paj  = pa_loc + pi_loc[prow];
978e240928fSHong Zhang       for (k=0;k<pnzj;k++) {
979e240928fSHong Zhang         if (!apjdense[pjj[k]]) {
980e240928fSHong Zhang           apjdense[pjj[k]] = -1;
981e240928fSHong Zhang           apj[apnzj++]     = pjj[k];
982e240928fSHong Zhang         }
983e240928fSHong Zhang         apa[pjj[k]] += (*ada)*paj[k];
984e240928fSHong Zhang       }
985e240928fSHong Zhang       flops += 2*pnzj;
986e240928fSHong Zhang       ada++;
987e240928fSHong Zhang     }
988e240928fSHong Zhang     /* off-diagonal portion of A */
989e240928fSHong Zhang     anzi  = aoi[i+1] - aoi[i];
990e240928fSHong Zhang     for (j=0;j<anzi;j++) {
991e240928fSHong Zhang       col = a->garray[*aoj];
992e240928fSHong Zhang       if (col < cstart){
993e240928fSHong Zhang         prow = *aoj;
994e240928fSHong Zhang       } else if (col >= cend){
995e240928fSHong Zhang         prow = *aoj;
996e240928fSHong Zhang       } else {
997e240928fSHong Zhang         SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Off-diagonal portion of A has wrong column map");
998e240928fSHong Zhang       }
999e240928fSHong Zhang       aoj++;
1000e240928fSHong Zhang       pnzj = pi_oth[prow+1] - pi_oth[prow];
1001e240928fSHong Zhang       pjj  = pj_oth + pi_oth[prow];
1002e240928fSHong Zhang       paj  = pa_oth + pi_oth[prow];
1003e240928fSHong Zhang       for (k=0;k<pnzj;k++) {
1004e240928fSHong Zhang         if (!apjdense[pjj[k]]) {
1005e240928fSHong Zhang           apjdense[pjj[k]] = -1;
1006e240928fSHong Zhang           apj[apnzj++]     = pjj[k];
1007e240928fSHong Zhang         }
1008e240928fSHong Zhang         apa[pjj[k]] += (*aoa)*paj[k];
1009e240928fSHong Zhang       }
1010e240928fSHong Zhang       flops += 2*pnzj;
1011e240928fSHong Zhang       aoa++;
1012e240928fSHong Zhang     }
1013e240928fSHong Zhang     /* Sort the j index array for quick sparse axpy. */
1014e240928fSHong Zhang     ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr);
1015e240928fSHong Zhang 
1016e240928fSHong Zhang     /* Compute P_loc[i,:]^T*AP[i,:] using outer product */
1017e240928fSHong Zhang     pnzi = pi_loc[i+1] - pi_loc[i];
1018e240928fSHong Zhang     for (j=0;j<pnzi;j++) {
1019e240928fSHong Zhang       nextap = 0;
1020e240928fSHong Zhang       crow   = *pJ++;
1021e240928fSHong Zhang       cjj    = cj + ci[crow];
1022e240928fSHong Zhang       caj    = ca + ci[crow];
1023e240928fSHong Zhang       /* Perform sparse axpy operation.  Note cjj includes apj. */
1024e240928fSHong Zhang       for (k=0;nextap<apnzj;k++) {
1025e240928fSHong Zhang         if (cjj[k]==apj[nextap]) {
1026e240928fSHong Zhang           caj[k] += (*pA)*apa[apj[nextap++]];
1027e240928fSHong Zhang         }
1028e240928fSHong Zhang       }
1029e240928fSHong Zhang       flops += 2*apnzj;
1030e240928fSHong Zhang       pA++;
1031e240928fSHong Zhang     }
1032e240928fSHong Zhang 
1033e240928fSHong Zhang     /* Zero the current row info for A*P */
1034e240928fSHong Zhang     for (j=0;j<apnzj;j++) {
1035e240928fSHong Zhang       apa[apj[j]]      = 0.;
1036e240928fSHong Zhang       apjdense[apj[j]] = 0;
1037e240928fSHong Zhang     }
1038e240928fSHong Zhang   }
1039e240928fSHong Zhang 
1040e240928fSHong Zhang   /* Assemble the final matrix and clean up */
1041e240928fSHong Zhang   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1042e240928fSHong Zhang   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1043e240928fSHong Zhang   ierr = PetscFree(apa);CHKERRQ(ierr);
1044e240928fSHong Zhang   ierr = PetscLogFlops(flops);CHKERRQ(ierr);
1045a61c8c0fSHong Zhang   ierr = PetscLogEventEnd(logkey_matptapnumeric_local,A,0,0,0);CHKERRQ(ierr);
1046e240928fSHong Zhang   PetscFunctionReturn(0);
1047e240928fSHong Zhang }
1048e240928fSHong Zhang static PetscEvent logkey_matptapsymbolic_local = 0;
1049e240928fSHong Zhang #undef __FUNCT__
1050e240928fSHong Zhang #define __FUNCT__ "MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local"
1051e240928fSHong Zhang PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local(Mat A,Mat P_loc,Mat P_oth,PetscInt prstart,PetscReal fill,Mat *C)
1052e240928fSHong Zhang {
1053e240928fSHong Zhang   PetscErrorCode ierr;
1054e240928fSHong Zhang   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
1055e240928fSHong Zhang   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
1056e240928fSHong Zhang   Mat_SeqAIJ     *ad=(Mat_SeqAIJ*)(a->A)->data,*ao=(Mat_SeqAIJ*)(a->B)->data,*c;
1057e240928fSHong Zhang   PetscInt       *pti,*ptj,*ptJ,*ajj,*pjj;
1058e240928fSHong Zhang   PetscInt       *adi=ad->i,*adj=ad->j,*aoi=ao->i,*aoj=ao->j,nnz,cstart=a->cstart,cend=a->cend,col;
1059e240928fSHong Zhang   PetscInt       *ci,*cj,*ptaj;
10606abd8857SHong Zhang   PetscInt       aN=A->N,am=A->m,pN=P_loc->N;
1061e240928fSHong Zhang   PetscInt       i,j,k,ptnzi,arow,anzj,prow,pnzj,cnzi;
1062e240928fSHong Zhang   PetscInt       pm=P_loc->m,nlnk,*lnk;
1063e240928fSHong Zhang   MatScalar      *ca;
1064e240928fSHong Zhang   PetscBT        lnkbt;
1065e240928fSHong Zhang   PetscInt       prend,nprow_loc,nprow_oth;
1066e240928fSHong Zhang   PetscInt       *ptadenserow_loc,*ptadenserow_oth,*ptasparserow_loc,*ptasparserow_oth;
1067e240928fSHong Zhang   Mat_SeqAIJ     *p_loc=(Mat_SeqAIJ*)P_loc->data,*p_oth=(Mat_SeqAIJ*)P_oth->data;
1068e240928fSHong Zhang   PetscInt       *pi_loc=p_loc->i,*pj_loc=p_loc->j,*pi_oth=p_oth->i,*pj_oth=p_oth->j;
1069e240928fSHong Zhang 
1070e240928fSHong Zhang   PetscFunctionBegin;
1071e240928fSHong Zhang   if (!logkey_matptapsymbolic_local) {
1072e240928fSHong Zhang     ierr = PetscLogEventRegister(&logkey_matptapsymbolic_local,"MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local",MAT_COOKIE);
1073e240928fSHong Zhang   }
1074e240928fSHong Zhang   ierr = PetscLogEventBegin(logkey_matptapsymbolic_local,A,P_loc,P_oth,0);CHKERRQ(ierr);
1075e240928fSHong Zhang 
1076e240928fSHong Zhang   prend = prstart + pm;
1077e240928fSHong Zhang 
1078e240928fSHong Zhang   /* get ij structure of P_loc^T */
1079e240928fSHong Zhang   ierr = MatGetSymbolicTranspose_SeqAIJ(P_loc,&pti,&ptj);CHKERRQ(ierr);
1080e240928fSHong Zhang   ptJ=ptj;
1081e240928fSHong Zhang 
1082e240928fSHong Zhang   /* Allocate ci array, arrays for fill computation and */
1083e240928fSHong Zhang   /* free space for accumulating nonzero column info */
1084e240928fSHong Zhang   ierr = PetscMalloc((pN+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
1085e240928fSHong Zhang   ci[0] = 0;
1086e240928fSHong Zhang 
10876abd8857SHong Zhang   ierr = PetscMalloc((4*aN+1)*sizeof(PetscInt),&ptadenserow_loc);CHKERRQ(ierr);
10886abd8857SHong Zhang   ierr = PetscMemzero(ptadenserow_loc,(4*aN+1)*sizeof(PetscInt));CHKERRQ(ierr);
10896abd8857SHong Zhang   ptasparserow_loc = ptadenserow_loc + aN;
10906abd8857SHong Zhang   ptadenserow_oth  = ptasparserow_loc + aN;
10916abd8857SHong Zhang   ptasparserow_oth = ptadenserow_oth + aN;
1092e240928fSHong Zhang 
1093e240928fSHong Zhang   /* create and initialize a linked list */
1094e240928fSHong Zhang   nlnk = pN+1;
1095e240928fSHong Zhang   ierr = PetscLLCreate(pN,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr);
1096e240928fSHong Zhang 
10976abd8857SHong Zhang   /* Set initial free space to be nnz(A) scaled by fill*P->N/P->M. */
1098e240928fSHong Zhang   /* This should be reasonable if sparsity of PtAP is similar to that of A. */
1099e240928fSHong Zhang   nnz           = adi[am] + aoi[am];
11006abd8857SHong Zhang   ierr          = GetMoreSpace((PetscInt)(fill*nnz*pN/aN+1),&free_space);
1101e240928fSHong Zhang   current_space = free_space;
1102e240928fSHong Zhang 
1103e240928fSHong Zhang   /* determine symbolic info for each row of C: */
1104e240928fSHong Zhang   for (i=0;i<pN;i++) {
1105e240928fSHong Zhang     ptnzi  = pti[i+1] - pti[i];
1106e240928fSHong Zhang     nprow_loc = 0; nprow_oth = 0;
1107e240928fSHong Zhang     /* i-th row of symbolic P_loc^T*A_loc: */
1108e240928fSHong Zhang     for (j=0;j<ptnzi;j++) {
1109e240928fSHong Zhang       arow = *ptJ++;
1110e240928fSHong Zhang       /* diagonal portion of A */
1111e240928fSHong Zhang       anzj = adi[arow+1] - adi[arow];
1112e240928fSHong Zhang       ajj  = adj + adi[arow];
1113e240928fSHong Zhang       for (k=0;k<anzj;k++) {
1114e240928fSHong Zhang         col = ajj[k]+prstart;
1115e240928fSHong Zhang         if (!ptadenserow_loc[col]) {
1116e240928fSHong Zhang           ptadenserow_loc[col]    = -1;
1117e240928fSHong Zhang           ptasparserow_loc[nprow_loc++] = col;
1118e240928fSHong Zhang         }
1119e240928fSHong Zhang       }
1120e240928fSHong Zhang       /* off-diagonal portion of A */
1121e240928fSHong Zhang       anzj = aoi[arow+1] - aoi[arow];
1122e240928fSHong Zhang       ajj  = aoj + aoi[arow];
1123e240928fSHong Zhang       for (k=0;k<anzj;k++) {
1124e240928fSHong Zhang         col = a->garray[ajj[k]];  /* global col */
1125e240928fSHong Zhang         if (col < cstart){
1126e240928fSHong Zhang           col = ajj[k];
1127e240928fSHong Zhang         } else if (col >= cend){
1128e240928fSHong Zhang           col = ajj[k] + pm;
1129e240928fSHong Zhang         } else {
1130e240928fSHong Zhang           SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Off-diagonal portion of A has wrong column map");
1131e240928fSHong Zhang         }
1132e240928fSHong Zhang         if (!ptadenserow_oth[col]) {
1133e240928fSHong Zhang           ptadenserow_oth[col]    = -1;
1134e240928fSHong Zhang           ptasparserow_oth[nprow_oth++] = col;
1135e240928fSHong Zhang         }
1136e240928fSHong Zhang       }
1137e240928fSHong Zhang     }
1138e240928fSHong Zhang 
1139e240928fSHong Zhang     /* using symbolic info of local PtA, determine symbolic info for row of C: */
1140e240928fSHong Zhang     cnzi   = 0;
1141e240928fSHong Zhang     /* rows of P_loc */
1142e240928fSHong Zhang     ptaj = ptasparserow_loc;
1143e240928fSHong Zhang     for (j=0; j<nprow_loc; j++) {
1144e240928fSHong Zhang       prow = *ptaj++;
1145e240928fSHong Zhang       prow -= prstart; /* rm */
1146e240928fSHong Zhang       pnzj = pi_loc[prow+1] - pi_loc[prow];
1147e240928fSHong Zhang       pjj  = pj_loc + pi_loc[prow];
1148e240928fSHong Zhang       /* add non-zero cols of P into the sorted linked list lnk */
1149e240928fSHong Zhang       ierr = PetscLLAdd(pnzj,pjj,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr);
1150e240928fSHong Zhang       cnzi += nlnk;
1151e240928fSHong Zhang     }
1152e240928fSHong Zhang     /* rows of P_oth */
1153e240928fSHong Zhang     ptaj = ptasparserow_oth;
1154e240928fSHong Zhang     for (j=0; j<nprow_oth; j++) {
1155e240928fSHong Zhang       prow = *ptaj++;
1156e240928fSHong Zhang       if (prow >= prend) prow -= pm; /* rm */
1157e240928fSHong Zhang       pnzj = pi_oth[prow+1] - pi_oth[prow];
1158e240928fSHong Zhang       pjj  = pj_oth + pi_oth[prow];
1159e240928fSHong Zhang       /* add non-zero cols of P into the sorted linked list lnk */
1160e240928fSHong Zhang       ierr = PetscLLAdd(pnzj,pjj,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr);
1161e240928fSHong Zhang       cnzi += nlnk;
1162e240928fSHong Zhang     }
1163e240928fSHong Zhang 
1164e240928fSHong Zhang     /* If free space is not available, make more free space */
1165e240928fSHong Zhang     /* Double the amount of total space in the list */
1166e240928fSHong Zhang     if (current_space->local_remaining<cnzi) {
1167e240928fSHong Zhang       ierr = GetMoreSpace(current_space->total_array_size,&current_space);CHKERRQ(ierr);
1168e240928fSHong Zhang     }
1169e240928fSHong Zhang 
1170e240928fSHong Zhang     /* Copy data into free space, and zero out denserows */
1171e240928fSHong Zhang     ierr = PetscLLClean(pN,pN,cnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
1172e240928fSHong Zhang     current_space->array           += cnzi;
1173e240928fSHong Zhang     current_space->local_used      += cnzi;
1174e240928fSHong Zhang     current_space->local_remaining -= cnzi;
1175e240928fSHong Zhang 
1176e240928fSHong Zhang     for (j=0;j<nprow_loc; j++) {
1177e240928fSHong Zhang       ptadenserow_loc[ptasparserow_loc[j]] = 0;
1178e240928fSHong Zhang     }
1179e240928fSHong Zhang     for (j=0;j<nprow_oth; j++) {
1180e240928fSHong Zhang       ptadenserow_oth[ptasparserow_oth[j]] = 0;
1181e240928fSHong Zhang     }
1182e240928fSHong Zhang 
1183e240928fSHong Zhang     /* Aside: Perhaps we should save the pta info for the numerical factorization. */
1184e240928fSHong Zhang     /*        For now, we will recompute what is needed. */
1185e240928fSHong Zhang     ci[i+1] = ci[i] + cnzi;
1186e240928fSHong Zhang   }
1187e240928fSHong Zhang   /* nnz is now stored in ci[ptm], column indices are in the list of free space */
1188e240928fSHong Zhang   /* Allocate space for cj, initialize cj, and */
1189e240928fSHong Zhang   /* destroy list of free space and other temporary array(s) */
1190e240928fSHong Zhang   ierr = PetscMalloc((ci[pN]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr);
1191e240928fSHong Zhang   ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr);
1192e240928fSHong Zhang   ierr = PetscFree(ptadenserow_loc);CHKERRQ(ierr);
1193e240928fSHong Zhang   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
1194e240928fSHong Zhang 
1195e240928fSHong Zhang   /* Allocate space for ca */
1196e240928fSHong Zhang   ierr = PetscMalloc((ci[pN]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr);
1197e240928fSHong Zhang   ierr = PetscMemzero(ca,(ci[pN]+1)*sizeof(MatScalar));CHKERRQ(ierr);
1198e240928fSHong Zhang 
1199e240928fSHong Zhang   /* put together the new matrix */
1200e240928fSHong Zhang   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,pN,pN,ci,cj,ca,C);CHKERRQ(ierr);
1201e240928fSHong Zhang 
1202e240928fSHong Zhang   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
1203e240928fSHong Zhang   /* Since these are PETSc arrays, change flags to free them as necessary. */
1204e240928fSHong Zhang   c = (Mat_SeqAIJ *)((*C)->data);
1205e240928fSHong Zhang   c->freedata = PETSC_TRUE;
1206e240928fSHong Zhang   c->nonew    = 0;
1207e240928fSHong Zhang 
1208e240928fSHong Zhang   /* Clean up. */
1209e240928fSHong Zhang   ierr = MatRestoreSymbolicTranspose_SeqAIJ(P_loc,&pti,&ptj);CHKERRQ(ierr);
1210a61c8c0fSHong Zhang 
1211e240928fSHong Zhang   ierr = PetscLogEventEnd(logkey_matptapsymbolic_local,A,P_loc,P_oth,0);CHKERRQ(ierr);
1212e240928fSHong Zhang   PetscFunctionReturn(0);
1213e240928fSHong Zhang }
1214