xref: /petsc/src/mat/impls/aij/seq/matptap.c (revision 77431f27ef6a796c710ee53da3b02f01a7247aeb)
1 /*
2   Defines projective product routines where A is a AIJ matrix
3           C = P^T * A * P
4 */
5 
6 #include "src/mat/impls/aij/seq/aij.h"   /*I "petscmat.h" I*/
7 #include "src/mat/utils/freespace.h"
8 #include "src/mat/impls/aij/mpi/mpiaij.h"
9 #include "petscbt.h"
10 
11 #undef __FUNCT__
12 #define __FUNCT__ "MatPtAP"
13 /*@
14    MatPtAP - Creates the matrix projection C = P^T * A * P
15 
16    Collective on Mat
17 
18    Input Parameters:
19 +  A - the matrix
20 .  P - the projection matrix
21 .  scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
22 -  fill - expected fill as ratio of nnz(C)/(nnz(A) + nnz(P))
23 
24    Output Parameters:
25 .  C - the product matrix
26 
27    Notes:
28    C will be created and must be destroyed by the user with MatDestroy().
29 
30    This routine is currently only implemented for pairs of SeqAIJ matrices and classes
31    which inherit from SeqAIJ.  C will be of type MATSEQAIJ.
32 
33    Level: intermediate
34 
35 .seealso: MatPtAPSymbolic(),MatPtAPNumeric(),MatMatMult()
36 @*/
37 PetscErrorCode MatPtAP(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C)
38 {
39   PetscErrorCode ierr;
40   PetscErrorCode (*fA)(Mat,Mat,MatReuse,PetscReal,Mat *);
41   PetscErrorCode (*fP)(Mat,Mat,MatReuse,PetscReal,Mat *);
42 
43   PetscFunctionBegin;
44   PetscValidHeaderSpecific(A,MAT_COOKIE,1);
45   PetscValidType(A,1);
46   MatPreallocated(A);
47   if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
48   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
49   PetscValidHeaderSpecific(P,MAT_COOKIE,2);
50   PetscValidType(P,2);
51   MatPreallocated(P);
52   if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
53   if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
54   PetscValidPointer(C,3);
55 
56   if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %D != %D",P->M,A->N);
57 
58   if (fill <=0.0) SETERRQ1(PETSC_ERR_ARG_SIZ,"fill=%g must be > 0.0",fill);
59 
60   /* For now, we do not dispatch based on the type of A and P */
61   /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */
62   fA = A->ops->ptap;
63   if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatPtAP not supported for A of type %s",A->type_name);
64   fP = P->ops->ptap;
65   if (!fP) SETERRQ1(PETSC_ERR_SUP,"MatPtAP not supported for P of type %s",P->type_name);
66   if (fP!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatPtAP requires A, %s, to be compatible with P, %s",A->type_name,P->type_name);
67 
68   ierr = PetscLogEventBegin(MAT_PtAP,A,P,0,0);CHKERRQ(ierr);
69   ierr = (*fA)(A,P,scall,fill,C);CHKERRQ(ierr);
70   ierr = PetscLogEventEnd(MAT_PtAP,A,P,0,0);CHKERRQ(ierr);
71   PetscFunctionReturn(0);
72 }
73 
74 EXTERN PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local(Mat,Mat,Mat,PetscInt,PetscReal,Mat*);
75 EXTERN PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIAIJ_Local(Mat,Mat,Mat,PetscInt,Mat C);
76 
77 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat);
78 #undef __FUNCT__
79 #define __FUNCT__ "MatDestroy_MPIAIJ_PtAP"
80 PetscErrorCode MatDestroy_MPIAIJ_PtAP(Mat A)
81 {
82   PetscErrorCode       ierr;
83   Mat_MatMatMultMPI    *ptap;
84   PetscObjectContainer container;
85 
86   PetscFunctionBegin;
87   ierr = PetscObjectQuery((PetscObject)A,"MatPtAPMPI",(PetscObject *)&container);CHKERRQ(ierr);
88   if (container) {
89     ierr  = PetscObjectContainerGetPointer(container,(void **)&ptap);CHKERRQ(ierr);
90     ierr = MatDestroy(ptap->B_loc);CHKERRQ(ierr);
91     ierr = MatDestroy(ptap->B_oth);CHKERRQ(ierr);
92     ierr = ISDestroy(ptap->isrowb);CHKERRQ(ierr);
93     ierr = ISDestroy(ptap->iscolb);CHKERRQ(ierr);
94 
95     ierr = PetscObjectContainerDestroy(container);CHKERRQ(ierr);
96     ierr = PetscObjectCompose((PetscObject)A,"MatPtAPMPI",0);CHKERRQ(ierr);
97   }
98   ierr = PetscFree(ptap);CHKERRQ(ierr);
99 
100   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
101   PetscFunctionReturn(0);
102 }
103 
104 #undef __FUNCT__
105 #define __FUNCT__ "MatPtAP_MPIAIJ_MPIAIJ"
106 PetscErrorCode MatPtAP_MPIAIJ_MPIAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C)
107 {
108   PetscErrorCode       ierr;
109   Mat_MatMatMultMPI    *ptap;
110   PetscObjectContainer container;
111 
112   PetscFunctionBegin;
113   if (scall == MAT_INITIAL_MATRIX){
114     ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr);
115     ierr = PetscNew(Mat_MatMatMultMPI,&ptap);CHKERRQ(ierr);
116 
117     /* get P_oth by taking rows of P (= non-zero cols of local A) from other processors */
118     ierr = MatGetBrowsOfAoCols(A,P,scall,&ptap->isrowb,&ptap->iscolb,&ptap->brstart,&ptap->B_oth);CHKERRQ(ierr);
119 
120     /* get P_loc by taking all local rows of P */
121     ierr = MatGetLocalMat(P,scall,&ptap->B_loc);CHKERRQ(ierr);
122 
123     /* attach the supporting struct to P for reuse */
124     P->ops->destroy  = MatDestroy_MPIAIJ_PtAP;
125     ierr = PetscObjectContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
126     ierr = PetscObjectContainerSetPointer(container,ptap);CHKERRQ(ierr);
127     ierr = PetscObjectCompose((PetscObject)P,"MatPtAPMPI",(PetscObject)container);CHKERRQ(ierr);
128 
129     /* now, compute symbolic local P^T*A*P */
130     ierr = MatPtAPSymbolic_MPIAIJ_MPIAIJ(A,P,fill,C);CHKERRQ(ierr);/* numeric product is computed as well */
131     ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr);
132   } else if (scall == MAT_REUSE_MATRIX){
133     ierr = PetscObjectQuery((PetscObject)P,"MatPtAPMPI",(PetscObject *)&container);CHKERRQ(ierr);
134     if (container) {
135       ierr  = PetscObjectContainerGetPointer(container,(void **)&ptap);CHKERRQ(ierr);
136     } else {
137       SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
138     }
139 
140     /* get P_oth by taking rows of P (= non-zero cols of local A) from other processors */
141     ierr = MatGetBrowsOfAoCols(A,P,scall,&ptap->isrowb,&ptap->iscolb,&ptap->brstart,&ptap->B_oth);CHKERRQ(ierr);
142 
143     /* get P_loc by taking all local rows of P */
144     ierr = MatGetLocalMat(P,scall,&ptap->B_loc);CHKERRQ(ierr);
145 
146   } else {
147     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %D",scall);
148   }
149   /* now, compute numeric local P^T*A*P */
150   ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr);
151   ierr = MatPtAPNumeric_MPIAIJ_MPIAIJ(A,P,*C);CHKERRQ(ierr);
152   ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr);
153 
154   PetscFunctionReturn(0);
155 }
156 
157 #undef __FUNCT__
158 #define __FUNCT__ "MatPtAPSymbolic_MPIAIJ_MPIAIJ"
159 PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIAIJ(Mat A,Mat P,PetscReal fill,Mat *C)
160 {
161   PetscErrorCode       ierr;
162   Mat                  C_seq;
163   Mat_MatMatMultMPI    *ptap;
164   PetscObjectContainer container;
165 
166   PetscFunctionBegin;
167 
168   ierr = PetscObjectQuery((PetscObject)P,"MatPtAPMPI",(PetscObject *)&container);CHKERRQ(ierr);
169   if (container) {
170     ierr  = PetscObjectContainerGetPointer(container,(void **)&ptap);CHKERRQ(ierr);
171   } else {
172     SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
173   }
174   /* compute C_seq = P_loc^T * A_loc * P_seq */
175   ierr = MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local(A,ptap->B_loc,ptap->B_oth,ptap->brstart,fill,&C_seq);CHKERRQ(ierr);
176 
177   /* add C_seq into mpi C */
178   ierr = MatMerge_SeqsToMPISymbolic(A->comm,C_seq,P->n,P->n,C);CHKERRQ(ierr);
179 
180   PetscFunctionReturn(0);
181 }
182 
183 #undef __FUNCT__
184 #define __FUNCT__ "MatPtAPNumeric_MPIAIJ_MPIAIJ"
185 PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIAIJ(Mat A,Mat P,Mat C)
186 {
187   PetscErrorCode       ierr;
188   Mat_Merge_SeqsToMPI  *merge;
189   Mat_MatMatMultMPI    *ptap;
190   PetscObjectContainer cont_merge,cont_ptap;
191 
192   PetscInt       flops=0;
193   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data,*c=(Mat_MPIAIJ*)C->data;
194   Mat_SeqAIJ     *ad=(Mat_SeqAIJ*)(a->A)->data,*ao=(Mat_SeqAIJ*)(a->B)->data;
195   Mat_SeqAIJ     *cd=(Mat_SeqAIJ*)(c->A)->data,*co=(Mat_SeqAIJ*)(c->B)->data;
196   Mat            C_seq;
197   Mat_SeqAIJ     *p_loc,*p_oth;
198   PetscInt       *adi=ad->i,*adj=ad->j,*aoi=ao->i,*aoj=ao->j,*apj,*apjdense,cstart=a->cstart,cend=a->cend,col;
199   PetscInt       *pi_loc,*pj_loc,*pi_oth,*pj_oth,*pJ,*pjj;
200   PetscInt       i,j,k,nzi,pnzi,apnzj,nextap,pnzj,prow,crow;
201   PetscInt       *cjj;
202   MatScalar      *ada=ad->a,*aoa=ao->a,*apa,*paj,*cseqa,*caj;
203   MatScalar      *pa_loc,*pA,*pa_oth;
204   PetscInt       am=A->m,cN=C->N,cm=C->m;
205 
206   MPI_Comm             comm=C->comm;
207   PetscMPIInt          size,rank,taga,*len_s;
208   PetscInt             *owners;
209   PetscInt             proc;
210   PetscInt             **buf_ri,**buf_rj;
211   PetscInt             cseqnzi,*bj_i,*bi,*bj,bnzi,nextcseqj; /* bi, bj, ba: for C (mpi mat) */
212   PetscInt             nrows,**buf_ri_k,**nextrow,**nextcseqi;
213   MPI_Request          *s_waits,*r_waits;
214   MPI_Status           *status;
215   MatScalar            **abuf_r,*ba_i;
216   Mat_SeqAIJ           *cseq;
217   PetscInt             *cseqi,*cseqj;
218   MatScalar            *ca;
219 
220   PetscFunctionBegin;
221   ierr = PetscObjectQuery((PetscObject)C,"MatMergeSeqsToMPI",(PetscObject *)&cont_merge);CHKERRQ(ierr);
222   if (cont_merge) {
223     ierr  = PetscObjectContainerGetPointer(cont_merge,(void **)&merge);CHKERRQ(ierr);
224   } else {
225     SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix C does not posses an object container");
226   }
227   ierr = PetscObjectQuery((PetscObject)P,"MatPtAPMPI",(PetscObject *)&cont_ptap);CHKERRQ(ierr);
228   if (cont_ptap) {
229     ierr  = PetscObjectContainerGetPointer(cont_ptap,(void **)&ptap);CHKERRQ(ierr);
230   } else {
231     SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
232   }
233 #ifdef OLD
234   ierr = MatPtAPNumeric_MPIAIJ_MPIAIJ_Local(A,ptap->B_loc,ptap->B_oth,ptap->brstart,merge->C_seq);CHKERRQ(ierr);
235 #endif /* OLD */
236    /*--------------------------------------------------------------*/
237 
238   p_loc=(Mat_SeqAIJ*)(ptap->B_loc)->data;
239   p_oth=(Mat_SeqAIJ*)(ptap->B_oth)->data;
240   pi_loc=p_loc->i; pj_loc=p_loc->j; pJ=pj_loc; pa_loc=p_loc->a; pA=pa_loc;
241   pi_oth=p_oth->i; pj_oth=p_oth->j; pa_oth=p_oth->a;
242 
243   C_seq=merge->C_seq;
244   cseq=(Mat_SeqAIJ*)C_seq->data;
245   cseqi=cseq->i; cseqj=cseq->j;
246   cseqa=cseq->a;
247 
248   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
249   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
250   ierr = PetscMapGetGlobalRange(merge->rowmap,&owners);CHKERRQ(ierr);
251 
252   /* Allocate temporary array for storage of one row of A*P */
253   ierr = PetscMalloc(cN*(sizeof(MatScalar)+2*sizeof(PetscInt)),&apa);CHKERRQ(ierr);
254   ierr = PetscMemzero(apa,cN*(sizeof(MatScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr);
255   apj      = (PetscInt *)(apa + cN);
256   apjdense = apj + cN;
257 
258   /* Allocate temporary MatScalar array for storage of one row of C */
259   ierr = PetscMalloc(cN*(sizeof(MatScalar)),&ca);CHKERRQ(ierr);
260   ierr = PetscMemzero(ca,cN*(sizeof(MatScalar)));CHKERRQ(ierr);
261 
262   /* Clear old values in C_Seq and C */
263   ierr = PetscMemzero(cseqa,cseqi[cN]*sizeof(MatScalar));CHKERRQ(ierr);
264   ierr = PetscMemzero(cd->a,cd->i[cm]*sizeof(MatScalar));CHKERRQ(ierr);
265   ierr = PetscMemzero(co->a,co->i[cm]*sizeof(MatScalar));CHKERRQ(ierr);
266 
267   for (i=0;i<am;i++) {
268     /* Form i-th sparse row of A*P */
269      apnzj = 0;
270     /* diagonal portion of A */
271     nzi  = adi[i+1] - adi[i];
272     for (j=0;j<nzi;j++) {
273       prow = *adj;
274       adj++;
275       pnzj = pi_loc[prow+1] - pi_loc[prow];
276       pjj  = pj_loc + pi_loc[prow];
277       paj  = pa_loc + pi_loc[prow];
278       for (k=0;k<pnzj;k++) {
279         if (!apjdense[pjj[k]]) {
280           apjdense[pjj[k]] = -1;
281           apj[apnzj++]     = pjj[k];
282         }
283         apa[pjj[k]] += (*ada)*paj[k];
284       }
285       flops += 2*pnzj;
286       ada++;
287     }
288     /* off-diagonal portion of A */
289     nzi  = aoi[i+1] - aoi[i];
290     for (j=0;j<nzi;j++) {
291       col = a->garray[*aoj];
292       if (col < cstart){
293         prow = *aoj;
294       } else if (col >= cend){
295         prow = *aoj;
296       } else {
297         SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Off-diagonal portion of A has wrong column map");
298       }
299       aoj++;
300       pnzj = pi_oth[prow+1] - pi_oth[prow];
301       pjj  = pj_oth + pi_oth[prow];
302       paj  = pa_oth + pi_oth[prow];
303       for (k=0;k<pnzj;k++) {
304         if (!apjdense[pjj[k]]) {
305           apjdense[pjj[k]] = -1;
306           apj[apnzj++]     = pjj[k];
307         }
308         apa[pjj[k]] += (*aoa)*paj[k];
309       }
310       flops += 2*pnzj;
311       aoa++;
312     }
313     /* Sort the j index array for quick sparse axpy. */
314     ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr);
315 
316     /* Compute P_loc[i,:]^T*AP[i,:] using outer product */
317     pnzi = pi_loc[i+1] - pi_loc[i];
318     for (j=0;j<pnzi;j++) {
319       crow   = *pJ++;
320       cjj    = cseqj + cseqi[crow];
321       caj    = cseqa + cseqi[crow];
322       /* Perform sparse axpy operation.  Note cjj includes apj. */
323       if (crow >= owners[rank] && crow < owners[rank+1]){ /* add value into mpi C */
324         for (k=0; k<apnzj; k++) ca[k] = (*pA)*apa[apj[k]];
325         ierr = MatSetValues(C,1,&crow,apnzj,apj,ca,ADD_VALUES);CHKERRQ(ierr);
326         ierr = PetscMemzero(ca,apnzj*(sizeof(MatScalar)));CHKERRQ(ierr);
327       } else { /* add value into C_seq to be sent to other processors */
328         nextap = 0;
329         for (k=0;nextap<apnzj;k++) {
330           if (cjj[k]==apj[nextap]) {
331             caj[k] += (*pA)*apa[apj[nextap++]];
332           }
333         }
334       }
335       flops += 2*apnzj;
336       pA++;
337     }
338 
339     /* Zero the current row info for A*P */
340     for (j=0;j<apnzj;j++) {
341       apa[apj[j]]      = 0.;
342       apjdense[apj[j]] = 0;
343     }
344   }
345 
346   /* Assemble the final matrix and clean up */
347   ierr = MatAssemblyBegin(C_seq,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
348   ierr = MatAssemblyEnd(C_seq,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
349   ierr = PetscFree(apa);CHKERRQ(ierr);
350   ierr = PetscLogFlops(flops);CHKERRQ(ierr);
351 
352 #ifdef OLD1
353   ierr = MatMerge_SeqsToMPINumeric(merge->C_seq,C);CHKERRQ(ierr);
354 #endif /* OLD1 */
355   /*--------------------------------------------------------------*/
356   /* send and recv matrix values */
357   /*-----------------------------*/
358   bi     = merge->bi;
359   bj     = merge->bj;
360   buf_ri = merge->buf_ri;
361   buf_rj = merge->buf_rj;
362   len_s  = merge->len_s;
363   ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&taga);CHKERRQ(ierr);
364   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
365 
366   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
367   for (proc=0,k=0; proc<size; proc++){
368     if (!len_s[proc]) continue;
369     i = owners[proc];
370     ierr = MPI_Isend(cseqa+cseqi[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
371     k++;
372   }
373   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
374   ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);
375   ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);
376   ierr = PetscFree(status);CHKERRQ(ierr);
377 
378   ierr = PetscFree(s_waits);CHKERRQ(ierr);
379   ierr = PetscFree(r_waits);CHKERRQ(ierr);
380 
381   /* insert mat values of mpimat */
382   /*----------------------------*/
383   ierr = PetscMalloc(cN*sizeof(MatScalar),&ba_i);CHKERRQ(ierr);
384   ierr = PetscMemzero(ba_i,cN*sizeof(MatScalar));CHKERRQ(ierr);
385   ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr);
386   nextrow = buf_ri_k + merge->nrecv;
387   nextcseqi = nextrow + merge->nrecv;
388 
389   for (k=0; k<merge->nrecv; k++){
390     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
391     nrows = *(buf_ri_k[k]);
392     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
393     nextcseqi[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
394   }
395 
396   /* add received local vals to C */
397   for (i=0; i<cm; i++) {
398     crow = owners[rank] + i;
399     bj_i = bj+bi[i];  /* col indices of the i-th row of C */
400     bnzi = bi[i+1] - bi[i];
401     nzi = 0;
402     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
403       /* i-th row */
404       if (i == *nextrow[k]) {
405         cseqnzi = *(nextcseqi[k]+1) - *nextcseqi[k];
406         cseqj   = buf_rj[k] + *(nextcseqi[k]);
407         cseqa   = abuf_r[k] + *(nextcseqi[k]);
408         nextcseqj = 0;
409         for (j=0; nextcseqj<cseqnzi; j++){
410           if (*(bj_i + j) == cseqj[nextcseqj]){ /* bcol == cseqcol */
411             ba_i[j] += cseqa[nextcseqj++];
412             nzi++;
413           }
414         }
415         nextrow[k]++; nextcseqi[k]++;
416       }
417     }
418     if (nzi>0){
419       ierr = MatSetValues(C,1,&crow,bnzi,bj_i,ba_i,ADD_VALUES);CHKERRQ(ierr);
420       ierr = PetscMemzero(ba_i,bnzi*sizeof(MatScalar));CHKERRQ(ierr);
421     }
422   }
423   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
424   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
425 
426   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
427   ierr = PetscFree(ba_i);CHKERRQ(ierr);
428   ierr = PetscFree(buf_ri_k);CHKERRQ(ierr);
429   ierr = PetscFree(ca);CHKERRQ(ierr);
430 
431   PetscFunctionReturn(0);
432 }
433 
434 #undef __FUNCT__
435 #define __FUNCT__ "MatPtAP_SeqAIJ_SeqAIJ"
436 PetscErrorCode MatPtAP_SeqAIJ_SeqAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C)
437 {
438   PetscErrorCode ierr;
439 
440   PetscFunctionBegin;
441   if (scall == MAT_INITIAL_MATRIX){
442     ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr);
443     ierr = MatPtAPSymbolic_SeqAIJ_SeqAIJ(A,P,fill,C);CHKERRQ(ierr);
444     ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr);
445   }
446   ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr);
447   ierr = MatPtAPNumeric_SeqAIJ_SeqAIJ(A,P,*C);CHKERRQ(ierr);
448   ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr);
449   PetscFunctionReturn(0);
450 }
451 
452 /*
453    MatPtAPSymbolic - Creates the (i,j) structure of the matrix projection C = P^T * A * P
454 
455    Collective on Mat
456 
457    Input Parameters:
458 +  A - the matrix
459 -  P - the projection matrix
460 
461    Output Parameters:
462 .  C - the (i,j) structure of the product matrix
463 
464    Notes:
465    C will be created and must be destroyed by the user with MatDestroy().
466 
467    This routine is currently only implemented for pairs of SeqAIJ matrices and classes
468    which inherit from SeqAIJ.  C will be of type MATSEQAIJ.  The product is computed using
469    this (i,j) structure by calling MatPtAPNumeric().
470 
471    Level: intermediate
472 
473 .seealso: MatPtAP(),MatPtAPNumeric(),MatMatMultSymbolic()
474 */
475 #undef __FUNCT__
476 #define __FUNCT__ "MatPtAPSymbolic"
477 PetscErrorCode MatPtAPSymbolic(Mat A,Mat P,PetscReal fill,Mat *C)
478 {
479   PetscErrorCode ierr;
480   PetscErrorCode (*fA)(Mat,Mat,PetscReal,Mat*);
481   PetscErrorCode (*fP)(Mat,Mat,PetscReal,Mat*);
482 
483   PetscFunctionBegin;
484 
485   PetscValidHeaderSpecific(A,MAT_COOKIE,1);
486   PetscValidType(A,1);
487   MatPreallocated(A);
488   if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
489   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
490 
491   PetscValidHeaderSpecific(P,MAT_COOKIE,2);
492   PetscValidType(P,2);
493   MatPreallocated(P);
494   if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
495   if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
496 
497   PetscValidPointer(C,3);
498 
499   if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %D != %D",P->M,A->N);
500   if (A->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %D != %D",A->M,A->N);
501 
502   /* For now, we do not dispatch based on the type of A and P */
503   /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */
504   fA = A->ops->ptapsymbolic;
505   if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatPtAPSymbolic not supported for A of type %s",A->type_name);
506   fP = P->ops->ptapsymbolic;
507   if (!fP) SETERRQ1(PETSC_ERR_SUP,"MatPtAPSymbolic not supported for P of type %s",P->type_name);
508   if (fP!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatPtAPSymbolic requires A, %s, to be compatible with P, %s",A->type_name,P->type_name);
509 
510   ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr);
511   ierr = (*fA)(A,P,fill,C);CHKERRQ(ierr);
512   ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr);
513 
514   PetscFunctionReturn(0);
515 }
516 
517 typedef struct {
518   Mat    symAP;
519 } Mat_PtAPstruct;
520 
521 EXTERN PetscErrorCode MatDestroy_SeqAIJ(Mat);
522 
523 #undef __FUNCT__
524 #define __FUNCT__ "MatDestroy_SeqAIJ_PtAP"
525 PetscErrorCode MatDestroy_SeqAIJ_PtAP(Mat A)
526 {
527   PetscErrorCode    ierr;
528   Mat_PtAPstruct    *ptap=(Mat_PtAPstruct*)A->spptr;
529 
530   PetscFunctionBegin;
531   ierr = MatDestroy(ptap->symAP);CHKERRQ(ierr);
532   ierr = PetscFree(ptap);CHKERRQ(ierr);
533   ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr);
534   PetscFunctionReturn(0);
535 }
536 
537 #undef __FUNCT__
538 #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqAIJ"
539 PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat P,PetscReal fill,Mat *C)
540 {
541   PetscErrorCode ierr;
542   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
543   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data,*p = (Mat_SeqAIJ*)P->data,*c;
544   PetscInt       *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj;
545   PetscInt       *ci,*cj,*ptadenserow,*ptasparserow,*ptaj;
546   PetscInt       an=A->N,am=A->M,pn=P->N,pm=P->M;
547   PetscInt       i,j,k,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi,nlnk,*lnk;
548   MatScalar      *ca;
549   PetscBT        lnkbt;
550 
551   PetscFunctionBegin;
552   /* Get ij structure of P^T */
553   ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
554   ptJ=ptj;
555 
556   /* Allocate ci array, arrays for fill computation and */
557   /* free space for accumulating nonzero column info */
558   ierr = PetscMalloc((pn+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
559   ci[0] = 0;
560 
561   ierr = PetscMalloc((2*an+1)*sizeof(PetscInt),&ptadenserow);CHKERRQ(ierr);
562   ierr = PetscMemzero(ptadenserow,(2*an+1)*sizeof(PetscInt));CHKERRQ(ierr);
563   ptasparserow = ptadenserow  + an;
564 
565   /* create and initialize a linked list */
566   nlnk = pn+1;
567   ierr = PetscLLCreate(pn,pn,nlnk,lnk,lnkbt);CHKERRQ(ierr);
568 
569   /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */
570   /* This should be reasonable if sparsity of PtAP is similar to that of A. */
571   ierr          = GetMoreSpace((ai[am]/pm)*pn,&free_space);
572   current_space = free_space;
573 
574   /* Determine symbolic info for each row of C: */
575   for (i=0;i<pn;i++) {
576     ptnzi  = pti[i+1] - pti[i];
577     ptanzi = 0;
578     /* Determine symbolic row of PtA: */
579     for (j=0;j<ptnzi;j++) {
580       arow = *ptJ++;
581       anzj = ai[arow+1] - ai[arow];
582       ajj  = aj + ai[arow];
583       for (k=0;k<anzj;k++) {
584         if (!ptadenserow[ajj[k]]) {
585           ptadenserow[ajj[k]]    = -1;
586           ptasparserow[ptanzi++] = ajj[k];
587         }
588       }
589     }
590       /* Using symbolic info for row of PtA, determine symbolic info for row of C: */
591     ptaj = ptasparserow;
592     cnzi   = 0;
593     for (j=0;j<ptanzi;j++) {
594       prow = *ptaj++;
595       pnzj = pi[prow+1] - pi[prow];
596       pjj  = pj + pi[prow];
597       /* add non-zero cols of P into the sorted linked list lnk */
598       ierr = PetscLLAdd(pnzj,pjj,pn,nlnk,lnk,lnkbt);CHKERRQ(ierr);
599       cnzi += nlnk;
600     }
601 
602     /* If free space is not available, make more free space */
603     /* Double the amount of total space in the list */
604     if (current_space->local_remaining<cnzi) {
605       ierr = GetMoreSpace(current_space->total_array_size,&current_space);CHKERRQ(ierr);
606     }
607 
608     /* Copy data into free space, and zero out denserows */
609     ierr = PetscLLClean(pn,pn,cnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
610     current_space->array           += cnzi;
611     current_space->local_used      += cnzi;
612     current_space->local_remaining -= cnzi;
613 
614     for (j=0;j<ptanzi;j++) {
615       ptadenserow[ptasparserow[j]] = 0;
616     }
617     /* Aside: Perhaps we should save the pta info for the numerical factorization. */
618     /*        For now, we will recompute what is needed. */
619     ci[i+1] = ci[i] + cnzi;
620   }
621   /* nnz is now stored in ci[ptm], column indices are in the list of free space */
622   /* Allocate space for cj, initialize cj, and */
623   /* destroy list of free space and other temporary array(s) */
624   ierr = PetscMalloc((ci[pn]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr);
625   ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr);
626   ierr = PetscFree(ptadenserow);CHKERRQ(ierr);
627   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
628 
629   /* Allocate space for ca */
630   ierr = PetscMalloc((ci[pn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr);
631   ierr = PetscMemzero(ca,(ci[pn]+1)*sizeof(MatScalar));CHKERRQ(ierr);
632 
633   /* put together the new matrix */
634   ierr = MatCreateSeqAIJWithArrays(A->comm,pn,pn,ci,cj,ca,C);CHKERRQ(ierr);
635 
636   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
637   /* Since these are PETSc arrays, change flags to free them as necessary. */
638   c = (Mat_SeqAIJ *)((*C)->data);
639   c->freedata = PETSC_TRUE;
640   c->nonew    = 0;
641 
642   /* Clean up. */
643   ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
644 
645   PetscFunctionReturn(0);
646 }
647 
648 #include "src/mat/impls/maij/maij.h"
649 EXTERN_C_BEGIN
650 #undef __FUNCT__
651 #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqMAIJ"
652 PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat *C)
653 {
654   /* This routine requires testing -- I don't think it works. */
655   PetscErrorCode ierr;
656   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
657   Mat_SeqMAIJ    *pp=(Mat_SeqMAIJ*)PP->data;
658   Mat            P=pp->AIJ;
659   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c;
660   PetscInt       *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj;
661   PetscInt       *ci,*cj,*denserow,*sparserow,*ptadenserow,*ptasparserow,*ptaj;
662   PetscInt       an=A->N,am=A->M,pn=P->N,pm=P->M,ppdof=pp->dof;
663   PetscInt       i,j,k,dof,pdof,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi;
664   MatScalar      *ca;
665 
666   PetscFunctionBegin;
667   /* Start timer */
668   ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr);
669 
670   /* Get ij structure of P^T */
671   ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
672 
673   /* Allocate ci array, arrays for fill computation and */
674   /* free space for accumulating nonzero column info */
675   ierr = PetscMalloc((pn+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
676   ci[0] = 0;
677 
678   ierr = PetscMalloc((2*pn+2*an+1)*sizeof(PetscInt),&ptadenserow);CHKERRQ(ierr);
679   ierr = PetscMemzero(ptadenserow,(2*pn+2*an+1)*sizeof(PetscInt));CHKERRQ(ierr);
680   ptasparserow = ptadenserow  + an;
681   denserow     = ptasparserow + an;
682   sparserow    = denserow     + pn;
683 
684   /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */
685   /* This should be reasonable if sparsity of PtAP is similar to that of A. */
686   ierr          = GetMoreSpace((ai[am]/pm)*pn,&free_space);
687   current_space = free_space;
688 
689   /* Determine symbolic info for each row of C: */
690   for (i=0;i<pn/ppdof;i++) {
691     ptnzi  = pti[i+1] - pti[i];
692     ptanzi = 0;
693     ptJ    = ptj + pti[i];
694     for (dof=0;dof<ppdof;dof++) {
695     /* Determine symbolic row of PtA: */
696       for (j=0;j<ptnzi;j++) {
697         arow = ptJ[j] + dof;
698         anzj = ai[arow+1] - ai[arow];
699         ajj  = aj + ai[arow];
700         for (k=0;k<anzj;k++) {
701           if (!ptadenserow[ajj[k]]) {
702             ptadenserow[ajj[k]]    = -1;
703             ptasparserow[ptanzi++] = ajj[k];
704           }
705         }
706       }
707       /* Using symbolic info for row of PtA, determine symbolic info for row of C: */
708       ptaj = ptasparserow;
709       cnzi   = 0;
710       for (j=0;j<ptanzi;j++) {
711         pdof = *ptaj%dof;
712         prow = (*ptaj++)/dof;
713         pnzj = pi[prow+1] - pi[prow];
714         pjj  = pj + pi[prow];
715         for (k=0;k<pnzj;k++) {
716           if (!denserow[pjj[k]+pdof]) {
717             denserow[pjj[k]+pdof] = -1;
718             sparserow[cnzi++]     = pjj[k]+pdof;
719           }
720         }
721       }
722 
723       /* sort sparserow */
724       ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr);
725 
726       /* If free space is not available, make more free space */
727       /* Double the amount of total space in the list */
728       if (current_space->local_remaining<cnzi) {
729         ierr = GetMoreSpace(current_space->total_array_size,&current_space);CHKERRQ(ierr);
730       }
731 
732       /* Copy data into free space, and zero out denserows */
733       ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(PetscInt));CHKERRQ(ierr);
734       current_space->array           += cnzi;
735       current_space->local_used      += cnzi;
736       current_space->local_remaining -= cnzi;
737 
738       for (j=0;j<ptanzi;j++) {
739         ptadenserow[ptasparserow[j]] = 0;
740       }
741       for (j=0;j<cnzi;j++) {
742         denserow[sparserow[j]] = 0;
743       }
744       /* Aside: Perhaps we should save the pta info for the numerical factorization. */
745       /*        For now, we will recompute what is needed. */
746       ci[i+1+dof] = ci[i+dof] + cnzi;
747     }
748   }
749   /* nnz is now stored in ci[ptm], column indices are in the list of free space */
750   /* Allocate space for cj, initialize cj, and */
751   /* destroy list of free space and other temporary array(s) */
752   ierr = PetscMalloc((ci[pn]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr);
753   ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr);
754   ierr = PetscFree(ptadenserow);CHKERRQ(ierr);
755 
756   /* Allocate space for ca */
757   ierr = PetscMalloc((ci[pn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr);
758   ierr = PetscMemzero(ca,(ci[pn]+1)*sizeof(MatScalar));CHKERRQ(ierr);
759 
760   /* put together the new matrix */
761   ierr = MatCreateSeqAIJWithArrays(A->comm,pn,pn,ci,cj,ca,C);CHKERRQ(ierr);
762 
763   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
764   /* Since these are PETSc arrays, change flags to free them as necessary. */
765   c = (Mat_SeqAIJ *)((*C)->data);
766   c->freedata = PETSC_TRUE;
767   c->nonew    = 0;
768 
769   /* Clean up. */
770   ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
771 
772   ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr);
773   PetscFunctionReturn(0);
774 }
775 EXTERN_C_END
776 
777 /*
778    MatPtAPNumeric - Computes the matrix projection C = P^T * A * P
779 
780    Collective on Mat
781 
782    Input Parameters:
783 +  A - the matrix
784 -  P - the projection matrix
785 
786    Output Parameters:
787 .  C - the product matrix
788 
789    Notes:
790    C must have been created by calling MatPtAPSymbolic and must be destroyed by
791    the user using MatDeatroy().
792 
793    This routine is currently only implemented for pairs of AIJ matrices and classes
794    which inherit from AIJ.  C will be of type MATAIJ.
795 
796    Level: intermediate
797 
798 .seealso: MatPtAP(),MatPtAPSymbolic(),MatMatMultNumeric()
799 */
800 #undef __FUNCT__
801 #define __FUNCT__ "MatPtAPNumeric"
802 PetscErrorCode MatPtAPNumeric(Mat A,Mat P,Mat C)
803 {
804   PetscErrorCode ierr;
805   PetscErrorCode (*fA)(Mat,Mat,Mat);
806   PetscErrorCode (*fP)(Mat,Mat,Mat);
807 
808   PetscFunctionBegin;
809 
810   PetscValidHeaderSpecific(A,MAT_COOKIE,1);
811   PetscValidType(A,1);
812   MatPreallocated(A);
813   if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
814   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
815 
816   PetscValidHeaderSpecific(P,MAT_COOKIE,2);
817   PetscValidType(P,2);
818   MatPreallocated(P);
819   if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
820   if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
821 
822   PetscValidHeaderSpecific(C,MAT_COOKIE,3);
823   PetscValidType(C,3);
824   MatPreallocated(C);
825   if (!C->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
826   if (C->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
827 
828   if (P->N!=C->M) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %D != %D",P->N,C->M);
829   if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %D != %D",P->M,A->N);
830   if (A->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %D != %D",A->M,A->N);
831   if (P->N!=C->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %D != %D",P->N,C->N);
832 
833   /* For now, we do not dispatch based on the type of A and P */
834   /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */
835   fA = A->ops->ptapnumeric;
836   if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatPtAPNumeric not supported for A of type %s",A->type_name);
837   fP = P->ops->ptapnumeric;
838   if (!fP) SETERRQ1(PETSC_ERR_SUP,"MatPtAPNumeric not supported for P of type %s",P->type_name);
839   if (fP!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatPtAPNumeric requires A, %s, to be compatible with P, %s",A->type_name,P->type_name);
840 
841   ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr);
842   ierr = (*fA)(A,P,C);CHKERRQ(ierr);
843   ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr);
844 
845   PetscFunctionReturn(0);
846 }
847 
848 #undef __FUNCT__
849 #define __FUNCT__ "MatPtAPNumeric_SeqAIJ_SeqAIJ"
850 PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqAIJ(Mat A,Mat P,Mat C)
851 {
852   PetscErrorCode ierr;
853   PetscInt       flops=0;
854   Mat_SeqAIJ     *a  = (Mat_SeqAIJ *) A->data;
855   Mat_SeqAIJ     *p  = (Mat_SeqAIJ *) P->data;
856   Mat_SeqAIJ     *c  = (Mat_SeqAIJ *) C->data;
857   PetscInt       *ai=a->i,*aj=a->j,*apj,*apjdense,*pi=p->i,*pj=p->j,*pJ=p->j,*pjj;
858   PetscInt       *ci=c->i,*cj=c->j,*cjj;
859   PetscInt       am=A->M,cn=C->N,cm=C->M;
860   PetscInt       i,j,k,anzi,pnzi,apnzj,nextap,pnzj,prow,crow;
861   MatScalar      *aa=a->a,*apa,*pa=p->a,*pA=p->a,*paj,*ca=c->a,*caj;
862 
863   PetscFunctionBegin;
864   /* Allocate temporary array for storage of one row of A*P */
865   ierr = PetscMalloc(cn*(sizeof(MatScalar)+2*sizeof(PetscInt)),&apa);CHKERRQ(ierr);
866   ierr = PetscMemzero(apa,cn*(sizeof(MatScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr);
867 
868   apj      = (PetscInt *)(apa + cn);
869   apjdense = apj + cn;
870 
871   /* Clear old values in C */
872   ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr);
873 
874   for (i=0;i<am;i++) {
875     /* Form sparse row of A*P */
876     anzi  = ai[i+1] - ai[i];
877     apnzj = 0;
878     for (j=0;j<anzi;j++) {
879       prow = *aj++;
880       pnzj = pi[prow+1] - pi[prow];
881       pjj  = pj + pi[prow];
882       paj  = pa + pi[prow];
883       for (k=0;k<pnzj;k++) {
884         if (!apjdense[pjj[k]]) {
885           apjdense[pjj[k]] = -1;
886           apj[apnzj++]     = pjj[k];
887         }
888         apa[pjj[k]] += (*aa)*paj[k];
889       }
890       flops += 2*pnzj;
891       aa++;
892     }
893 
894     /* Sort the j index array for quick sparse axpy. */
895     ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr);
896 
897     /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */
898     pnzi = pi[i+1] - pi[i];
899     for (j=0;j<pnzi;j++) {
900       nextap = 0;
901       crow   = *pJ++;
902       cjj    = cj + ci[crow];
903       caj    = ca + ci[crow];
904       /* Perform sparse axpy operation.  Note cjj includes apj. */
905       for (k=0;nextap<apnzj;k++) {
906         if (cjj[k]==apj[nextap]) {
907           caj[k] += (*pA)*apa[apj[nextap++]];
908         }
909       }
910       flops += 2*apnzj;
911       pA++;
912     }
913 
914     /* Zero the current row info for A*P */
915     for (j=0;j<apnzj;j++) {
916       apa[apj[j]]      = 0.;
917       apjdense[apj[j]] = 0;
918     }
919   }
920 
921   /* Assemble the final matrix and clean up */
922   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
923   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
924   ierr = PetscFree(apa);CHKERRQ(ierr);
925   ierr = PetscLogFlops(flops);CHKERRQ(ierr);
926 
927   PetscFunctionReturn(0);
928 }
929 
930 /* Compute local C = P_loc^T * A * P - used by MatPtAP_MPIAIJ_MPIAIJ() */
931 static PetscEvent logkey_matptapnumeric_local = 0;
932 #undef __FUNCT__
933 #define __FUNCT__ "MatPtAPNumeric_MPIAIJ_MPIAIJ_Local"
934 PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIAIJ_Local(Mat A,Mat P_loc,Mat P_oth,PetscInt prstart,Mat C)
935 {
936   PetscErrorCode ierr;
937   PetscInt       flops=0;
938   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
939   Mat_SeqAIJ     *ad=(Mat_SeqAIJ*)(a->A)->data,*ao=(Mat_SeqAIJ*)(a->B)->data;
940   Mat_SeqAIJ     *c  = (Mat_SeqAIJ *) C->data;
941   Mat_SeqAIJ     *p_loc=(Mat_SeqAIJ*)P_loc->data,*p_oth=(Mat_SeqAIJ*)P_oth->data;
942   PetscInt       *pi_loc=p_loc->i,*pj_loc=p_loc->j,*pi_oth=p_oth->i,*pj_oth=p_oth->j;
943   PetscInt       *adi=ad->i,*adj=ad->j,*aoi=ao->i,*aoj=ao->j,*apj,*apjdense,cstart=a->cstart,cend=a->cend,col;
944   PetscInt       *pJ=pj_loc,*pjj;
945   PetscInt       *ci=c->i,*cj=c->j,*cjj;
946   PetscInt       am=A->m,cn=C->N,cm=C->M;
947   PetscInt       i,j,k,anzi,pnzi,apnzj,nextap,pnzj,prow,crow;
948   MatScalar      *ada=ad->a,*aoa=ao->a,*apa,*paj,*ca=c->a,*caj;
949   MatScalar      *pa_loc=p_loc->a,*pA=pa_loc,*pa_oth=p_oth->a;
950 
951   PetscFunctionBegin;
952   if (!logkey_matptapnumeric_local) {
953     ierr = PetscLogEventRegister(&logkey_matptapnumeric_local,"MatPtAPNumeric_MPIAIJ_MPIAIJ_Local",MAT_COOKIE);CHKERRQ(ierr);
954   }
955   ierr = PetscLogEventBegin(logkey_matptapnumeric_local,A,0,0,0);CHKERRQ(ierr);
956 
957   /* Allocate temporary array for storage of one row of A*P */
958   ierr = PetscMalloc(cn*(sizeof(MatScalar)+2*sizeof(PetscInt)),&apa);CHKERRQ(ierr);
959   ierr = PetscMemzero(apa,cn*(sizeof(MatScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr);
960   apj      = (PetscInt *)(apa + cn);
961   apjdense = apj + cn;
962 
963   /* Clear old values in C */
964   ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr);
965 
966   for (i=0;i<am;i++) {
967     /* Form i-th sparse row of A*P */
968      apnzj = 0;
969     /* diagonal portion of A */
970     anzi  = adi[i+1] - adi[i];
971     for (j=0;j<anzi;j++) {
972       prow = *adj;
973       adj++;
974       pnzj = pi_loc[prow+1] - pi_loc[prow];
975       pjj  = pj_loc + pi_loc[prow];
976       paj  = pa_loc + pi_loc[prow];
977       for (k=0;k<pnzj;k++) {
978         if (!apjdense[pjj[k]]) {
979           apjdense[pjj[k]] = -1;
980           apj[apnzj++]     = pjj[k];
981         }
982         apa[pjj[k]] += (*ada)*paj[k];
983       }
984       flops += 2*pnzj;
985       ada++;
986     }
987     /* off-diagonal portion of A */
988     anzi  = aoi[i+1] - aoi[i];
989     for (j=0;j<anzi;j++) {
990       col = a->garray[*aoj];
991       if (col < cstart){
992         prow = *aoj;
993       } else if (col >= cend){
994         prow = *aoj;
995       } else {
996         SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Off-diagonal portion of A has wrong column map");
997       }
998       aoj++;
999       pnzj = pi_oth[prow+1] - pi_oth[prow];
1000       pjj  = pj_oth + pi_oth[prow];
1001       paj  = pa_oth + pi_oth[prow];
1002       for (k=0;k<pnzj;k++) {
1003         if (!apjdense[pjj[k]]) {
1004           apjdense[pjj[k]] = -1;
1005           apj[apnzj++]     = pjj[k];
1006         }
1007         apa[pjj[k]] += (*aoa)*paj[k];
1008       }
1009       flops += 2*pnzj;
1010       aoa++;
1011     }
1012     /* Sort the j index array for quick sparse axpy. */
1013     ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr);
1014 
1015     /* Compute P_loc[i,:]^T*AP[i,:] using outer product */
1016     pnzi = pi_loc[i+1] - pi_loc[i];
1017     for (j=0;j<pnzi;j++) {
1018       nextap = 0;
1019       crow   = *pJ++;
1020       cjj    = cj + ci[crow];
1021       caj    = ca + ci[crow];
1022       /* Perform sparse axpy operation.  Note cjj includes apj. */
1023       for (k=0;nextap<apnzj;k++) {
1024         if (cjj[k]==apj[nextap]) {
1025           caj[k] += (*pA)*apa[apj[nextap++]];
1026         }
1027       }
1028       flops += 2*apnzj;
1029       pA++;
1030     }
1031 
1032     /* Zero the current row info for A*P */
1033     for (j=0;j<apnzj;j++) {
1034       apa[apj[j]]      = 0.;
1035       apjdense[apj[j]] = 0;
1036     }
1037   }
1038 
1039   /* Assemble the final matrix and clean up */
1040   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1041   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1042   ierr = PetscFree(apa);CHKERRQ(ierr);
1043   ierr = PetscLogFlops(flops);CHKERRQ(ierr);
1044   ierr = PetscLogEventEnd(logkey_matptapnumeric_local,A,0,0,0);CHKERRQ(ierr);
1045   PetscFunctionReturn(0);
1046 }
1047 static PetscEvent logkey_matptapsymbolic_local = 0;
1048 #undef __FUNCT__
1049 #define __FUNCT__ "MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local"
1050 PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local(Mat A,Mat P_loc,Mat P_oth,PetscInt prstart,PetscReal fill,Mat *C)
1051 {
1052   PetscErrorCode ierr;
1053   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
1054   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
1055   Mat_SeqAIJ     *ad=(Mat_SeqAIJ*)(a->A)->data,*ao=(Mat_SeqAIJ*)(a->B)->data,*c;
1056   PetscInt       *pti,*ptj,*ptJ,*ajj,*pjj;
1057   PetscInt       *adi=ad->i,*adj=ad->j,*aoi=ao->i,*aoj=ao->j,nnz,cstart=a->cstart,cend=a->cend,col;
1058   PetscInt       *ci,*cj,*ptaj;
1059   PetscInt       aN=A->N,am=A->m,pN=P_loc->N;
1060   PetscInt       i,j,k,ptnzi,arow,anzj,prow,pnzj,cnzi;
1061   PetscInt       pm=P_loc->m,nlnk,*lnk;
1062   MatScalar      *ca;
1063   PetscBT        lnkbt;
1064   PetscInt       prend,nprow_loc,nprow_oth;
1065   PetscInt       *ptadenserow_loc,*ptadenserow_oth,*ptasparserow_loc,*ptasparserow_oth;
1066   Mat_SeqAIJ     *p_loc=(Mat_SeqAIJ*)P_loc->data,*p_oth=(Mat_SeqAIJ*)P_oth->data;
1067   PetscInt       *pi_loc=p_loc->i,*pj_loc=p_loc->j,*pi_oth=p_oth->i,*pj_oth=p_oth->j;
1068 
1069   PetscFunctionBegin;
1070   if (!logkey_matptapsymbolic_local) {
1071     ierr = PetscLogEventRegister(&logkey_matptapsymbolic_local,"MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local",MAT_COOKIE);
1072   }
1073   ierr = PetscLogEventBegin(logkey_matptapsymbolic_local,A,P_loc,P_oth,0);CHKERRQ(ierr);
1074 
1075   prend = prstart + pm;
1076 
1077   /* get ij structure of P_loc^T */
1078   ierr = MatGetSymbolicTranspose_SeqAIJ(P_loc,&pti,&ptj);CHKERRQ(ierr);
1079   ptJ=ptj;
1080 
1081   /* Allocate ci array, arrays for fill computation and */
1082   /* free space for accumulating nonzero column info */
1083   ierr = PetscMalloc((pN+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
1084   ci[0] = 0;
1085 
1086   ierr = PetscMalloc((4*aN+1)*sizeof(PetscInt),&ptadenserow_loc);CHKERRQ(ierr);
1087   ierr = PetscMemzero(ptadenserow_loc,(4*aN+1)*sizeof(PetscInt));CHKERRQ(ierr);
1088   ptasparserow_loc = ptadenserow_loc + aN;
1089   ptadenserow_oth  = ptasparserow_loc + aN;
1090   ptasparserow_oth = ptadenserow_oth + aN;
1091 
1092   /* create and initialize a linked list */
1093   nlnk = pN+1;
1094   ierr = PetscLLCreate(pN,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr);
1095 
1096   /* Set initial free space to be nnz(A) scaled by fill*P->N/P->M. */
1097   /* This should be reasonable if sparsity of PtAP is similar to that of A. */
1098   nnz           = adi[am] + aoi[am];
1099   ierr          = GetMoreSpace((PetscInt)(fill*nnz*pN/aN+1),&free_space);
1100   current_space = free_space;
1101 
1102   /* determine symbolic info for each row of C: */
1103   for (i=0;i<pN;i++) {
1104     ptnzi  = pti[i+1] - pti[i];
1105     nprow_loc = 0; nprow_oth = 0;
1106     /* i-th row of symbolic P_loc^T*A_loc: */
1107     for (j=0;j<ptnzi;j++) {
1108       arow = *ptJ++;
1109       /* diagonal portion of A */
1110       anzj = adi[arow+1] - adi[arow];
1111       ajj  = adj + adi[arow];
1112       for (k=0;k<anzj;k++) {
1113         col = ajj[k]+prstart;
1114         if (!ptadenserow_loc[col]) {
1115           ptadenserow_loc[col]    = -1;
1116           ptasparserow_loc[nprow_loc++] = col;
1117         }
1118       }
1119       /* off-diagonal portion of A */
1120       anzj = aoi[arow+1] - aoi[arow];
1121       ajj  = aoj + aoi[arow];
1122       for (k=0;k<anzj;k++) {
1123         col = a->garray[ajj[k]];  /* global col */
1124         if (col < cstart){
1125           col = ajj[k];
1126         } else if (col >= cend){
1127           col = ajj[k] + pm;
1128         } else {
1129           SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Off-diagonal portion of A has wrong column map");
1130         }
1131         if (!ptadenserow_oth[col]) {
1132           ptadenserow_oth[col]    = -1;
1133           ptasparserow_oth[nprow_oth++] = col;
1134         }
1135       }
1136     }
1137 
1138     /* using symbolic info of local PtA, determine symbolic info for row of C: */
1139     cnzi   = 0;
1140     /* rows of P_loc */
1141     ptaj = ptasparserow_loc;
1142     for (j=0; j<nprow_loc; j++) {
1143       prow = *ptaj++;
1144       prow -= prstart; /* rm */
1145       pnzj = pi_loc[prow+1] - pi_loc[prow];
1146       pjj  = pj_loc + pi_loc[prow];
1147       /* add non-zero cols of P into the sorted linked list lnk */
1148       ierr = PetscLLAdd(pnzj,pjj,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr);
1149       cnzi += nlnk;
1150     }
1151     /* rows of P_oth */
1152     ptaj = ptasparserow_oth;
1153     for (j=0; j<nprow_oth; j++) {
1154       prow = *ptaj++;
1155       if (prow >= prend) prow -= pm; /* rm */
1156       pnzj = pi_oth[prow+1] - pi_oth[prow];
1157       pjj  = pj_oth + pi_oth[prow];
1158       /* add non-zero cols of P into the sorted linked list lnk */
1159       ierr = PetscLLAdd(pnzj,pjj,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr);
1160       cnzi += nlnk;
1161     }
1162 
1163     /* If free space is not available, make more free space */
1164     /* Double the amount of total space in the list */
1165     if (current_space->local_remaining<cnzi) {
1166       ierr = GetMoreSpace(current_space->total_array_size,&current_space);CHKERRQ(ierr);
1167     }
1168 
1169     /* Copy data into free space, and zero out denserows */
1170     ierr = PetscLLClean(pN,pN,cnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
1171     current_space->array           += cnzi;
1172     current_space->local_used      += cnzi;
1173     current_space->local_remaining -= cnzi;
1174 
1175     for (j=0;j<nprow_loc; j++) {
1176       ptadenserow_loc[ptasparserow_loc[j]] = 0;
1177     }
1178     for (j=0;j<nprow_oth; j++) {
1179       ptadenserow_oth[ptasparserow_oth[j]] = 0;
1180     }
1181 
1182     /* Aside: Perhaps we should save the pta info for the numerical factorization. */
1183     /*        For now, we will recompute what is needed. */
1184     ci[i+1] = ci[i] + cnzi;
1185   }
1186   /* nnz is now stored in ci[ptm], column indices are in the list of free space */
1187   /* Allocate space for cj, initialize cj, and */
1188   /* destroy list of free space and other temporary array(s) */
1189   ierr = PetscMalloc((ci[pN]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr);
1190   ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr);
1191   ierr = PetscFree(ptadenserow_loc);CHKERRQ(ierr);
1192   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
1193 
1194   /* Allocate space for ca */
1195   ierr = PetscMalloc((ci[pN]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr);
1196   ierr = PetscMemzero(ca,(ci[pN]+1)*sizeof(MatScalar));CHKERRQ(ierr);
1197 
1198   /* put together the new matrix */
1199   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,pN,pN,ci,cj,ca,C);CHKERRQ(ierr);
1200 
1201   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
1202   /* Since these are PETSc arrays, change flags to free them as necessary. */
1203   c = (Mat_SeqAIJ *)((*C)->data);
1204   c->freedata = PETSC_TRUE;
1205   c->nonew    = 0;
1206 
1207   /* Clean up. */
1208   ierr = MatRestoreSymbolicTranspose_SeqAIJ(P_loc,&pti,&ptj);CHKERRQ(ierr);
1209 
1210   ierr = PetscLogEventEnd(logkey_matptapsymbolic_local,A,P_loc,P_oth,0);CHKERRQ(ierr);
1211   PetscFunctionReturn(0);
1212 }
1213