xref: /petsc/src/mat/impls/aij/seq/matptap.c (revision a5b70e5f0544f8d30f9f28fc789f01303c8fc079)
1 /*
2   Defines projective product routines where A is a AIJ matrix
3           C = P^T * A * P
4 */
5 
6 #include "src/mat/impls/aij/seq/aij.h"   /*I "petscmat.h" I*/
7 #include "src/mat/utils/freespace.h"
8 #include "src/mat/impls/aij/mpi/mpiaij.h"
9 #include "petscbt.h"
10 
11 #undef __FUNCT__
12 #define __FUNCT__ "MatPtAP"
13 /*@
14    MatPtAP - Creates the matrix projection C = P^T * A * P
15 
16    Collective on Mat
17 
18    Input Parameters:
19 +  A - the matrix
20 .  P - the projection matrix
21 .  scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
22 -  fill - expected fill as ratio of nnz(C)/(nnz(A) + nnz(P))
23 
24    Output Parameters:
25 .  C - the product matrix
26 
27    Notes:
28    C will be created and must be destroyed by the user with MatDestroy().
29 
30    This routine is currently only implemented for pairs of SeqAIJ matrices and classes
31    which inherit from SeqAIJ.  C will be of type MATSEQAIJ.
32 
33    Level: intermediate
34 
35 .seealso: MatPtAPSymbolic(),MatPtAPNumeric(),MatMatMult()
36 @*/
37 PetscErrorCode MatPtAP(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C)
38 {
39   PetscErrorCode ierr;
40   PetscErrorCode (*fA)(Mat,Mat,MatReuse,PetscReal,Mat *);
41   PetscErrorCode (*fP)(Mat,Mat,MatReuse,PetscReal,Mat *);
42 
43   PetscFunctionBegin;
44   PetscValidHeaderSpecific(A,MAT_COOKIE,1);
45   PetscValidType(A,1);
46   MatPreallocated(A);
47   if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
48   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
49   PetscValidHeaderSpecific(P,MAT_COOKIE,2);
50   PetscValidType(P,2);
51   MatPreallocated(P);
52   if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
53   if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
54   PetscValidPointer(C,3);
55 
56   if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->M,A->N);
57 
58   if (fill <=0.0) SETERRQ1(PETSC_ERR_ARG_SIZ,"fill=%g must be > 0.0",fill);
59 
60   /* For now, we do not dispatch based on the type of A and P */
61   /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */
62   fA = A->ops->ptap;
63   if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatPtAP not supported for A of type %s",A->type_name);
64   fP = P->ops->ptap;
65   if (!fP) SETERRQ1(PETSC_ERR_SUP,"MatPtAP not supported for P of type %s",P->type_name);
66   if (fP!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatPtAP requires A, %s, to be compatible with P, %s",A->type_name,P->type_name);
67 
68   ierr = PetscLogEventBegin(MAT_PtAP,A,P,0,0);CHKERRQ(ierr);
69   ierr = (*fA)(A,P,scall,fill,C);CHKERRQ(ierr);
70   ierr = PetscLogEventEnd(MAT_PtAP,A,P,0,0);CHKERRQ(ierr);
71   PetscFunctionReturn(0);
72 }
73 
74 EXTERN PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local(Mat,Mat,Mat,PetscInt,PetscReal,Mat*);
75 EXTERN PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIAIJ_Local(Mat,Mat,Mat,PetscInt,Mat C);
76 
77 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat);
78 #undef __FUNCT__
79 #define __FUNCT__ "MatDestroy_MPIAIJ_PtAP"
80 PetscErrorCode MatDestroy_MPIAIJ_PtAP(Mat A)
81 {
82   PetscErrorCode       ierr;
83   Mat_MatMatMultMPI    *ptap;
84   PetscObjectContainer container;
85 
86   PetscFunctionBegin;
87   ierr = PetscObjectQuery((PetscObject)A,"MatPtAPMPI",(PetscObject *)&container);CHKERRQ(ierr);
88   if (container) {
89     ierr  = PetscObjectContainerGetPointer(container,(void **)&ptap);CHKERRQ(ierr);
90     ierr = MatDestroy(ptap->B_loc);CHKERRQ(ierr);
91     ierr = MatDestroy(ptap->B_oth);CHKERRQ(ierr);
92     ierr = ISDestroy(ptap->isrowb);CHKERRQ(ierr);
93     ierr = ISDestroy(ptap->iscolb);CHKERRQ(ierr);
94 
95     ierr = PetscObjectContainerDestroy(container);CHKERRQ(ierr);
96     ierr = PetscObjectCompose((PetscObject)A,"MatPtAPMPI",0);CHKERRQ(ierr);
97   }
98   ierr = PetscFree(ptap);CHKERRQ(ierr);
99 
100   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
101   PetscFunctionReturn(0);
102 }
103 
104 #undef __FUNCT__
105 #define __FUNCT__ "MatPtAP_MPIAIJ_MPIAIJ"
106 PetscErrorCode MatPtAP_MPIAIJ_MPIAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C)
107 {
108   PetscErrorCode       ierr;
109   Mat_MatMatMultMPI    *ptap;
110   PetscObjectContainer container;
111 
112   PetscFunctionBegin;
113   if (scall == MAT_INITIAL_MATRIX){
114     ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr);
115     ierr = PetscNew(Mat_MatMatMultMPI,&ptap);CHKERRQ(ierr);
116 
117     /* get P_oth by taking rows of P (= non-zero cols of local A) from other processors */
118     ierr = MatGetBrowsOfAoCols(A,P,scall,&ptap->isrowb,&ptap->iscolb,&ptap->brstart,&ptap->B_oth);CHKERRQ(ierr);
119 
120     /* get P_loc by taking all local rows of P */
121     ierr = MatGetLocalMat(P,scall,&ptap->B_loc);CHKERRQ(ierr);
122 
123     /* attach the supporting struct to P for reuse */
124     P->ops->destroy  = MatDestroy_MPIAIJ_PtAP;
125     ierr = PetscObjectContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
126     ierr = PetscObjectContainerSetPointer(container,ptap);CHKERRQ(ierr);
127     ierr = PetscObjectCompose((PetscObject)P,"MatPtAPMPI",(PetscObject)container);CHKERRQ(ierr);
128 
129     /* now, compute symbolic local P^T*A*P */
130     ierr = MatPtAPSymbolic_MPIAIJ_MPIAIJ(A,P,fill,C);CHKERRQ(ierr);/* numeric product is computed as well */
131     ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr);
132   } else if (scall == MAT_REUSE_MATRIX){
133     ierr = PetscObjectQuery((PetscObject)P,"MatPtAPMPI",(PetscObject *)&container);CHKERRQ(ierr);
134     if (container) {
135       ierr  = PetscObjectContainerGetPointer(container,(void **)&ptap);CHKERRQ(ierr);
136     } else {
137       SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
138     }
139     /* update P_oth */
140     ierr = MatGetBrowsOfAoCols(A,P,scall,&ptap->isrowb,&ptap->iscolb,&ptap->brstart,&ptap->B_oth);CHKERRQ(ierr);
141 
142   } else {
143     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",scall);
144   }
145   /* now, compute numeric local P^T*A*P */
146   ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr);
147   ierr = MatPtAPNumeric_MPIAIJ_MPIAIJ(A,P,*C);CHKERRQ(ierr);
148   ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr);
149 
150   PetscFunctionReturn(0);
151 }
152 
153 #undef __FUNCT__
154 #define __FUNCT__ "MatPtAPSymbolic_MPIAIJ_MPIAIJ"
155 PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIAIJ(Mat A,Mat P,PetscReal fill,Mat *C)
156 {
157   PetscErrorCode       ierr;
158   Mat                  C_seq;
159   Mat_MatMatMultMPI    *ptap;
160   PetscObjectContainer container;
161 
162   PetscFunctionBegin;
163 
164   ierr = PetscObjectQuery((PetscObject)P,"MatPtAPMPI",(PetscObject *)&container);CHKERRQ(ierr);
165   if (container) {
166     ierr  = PetscObjectContainerGetPointer(container,(void **)&ptap);CHKERRQ(ierr);
167   } else {
168     SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
169   }
170   /* compute C_seq = P_loc^T * A_loc * P_seq */
171   ierr = MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local(A,ptap->B_loc,ptap->B_oth,ptap->brstart,fill,&C_seq);CHKERRQ(ierr);
172 
173   /* add C_seq into mpi C */
174   ierr = MatMerge_SeqsToMPISymbolic(A->comm,C_seq,P->n,P->n,C);CHKERRQ(ierr);
175 
176   PetscFunctionReturn(0);
177 }
178 
179 #undef __FUNCT__
180 #define __FUNCT__ "MatPtAPNumeric_MPIAIJ_MPIAIJ"
181 PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIAIJ(Mat A,Mat P,Mat C)
182 {
183   PetscErrorCode       ierr;
184   Mat_Merge_SeqsToMPI  *merge;
185   Mat_MatMatMultMPI    *ptap;
186   PetscObjectContainer cont_merge,cont_ptap;
187   PetscInt             flops=0;
188   Mat_MPIAIJ           *a=(Mat_MPIAIJ*)A->data,*p=(Mat_MPIAIJ*)P->data,*c=(Mat_MPIAIJ*)C->data;
189   Mat_SeqAIJ           *ad=(Mat_SeqAIJ*)(a->A)->data,*ao=(Mat_SeqAIJ*)(a->B)->data,
190                        *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data,
191                        *cd=(Mat_SeqAIJ*)(c->A)->data,*co=(Mat_SeqAIJ*)(c->B)->data;
192   Mat                  C_seq;
193   Mat_SeqAIJ           *cseq,*p_oth;
194   PetscInt             *adi=ad->i,*adj=ad->j,*aoi=ao->i,*aoj=ao->j,*apj,*apjdense,cstart=a->cstart,cend=a->cend;
195   PetscInt             *pi_oth,*pj_oth,*pJ_d=pd->j,*pJ_o=po->j,*pjj;
196   PetscInt             i,j,k,nzi,pnzi,apnzj,nextap,pnzj,prow,crow;
197   PetscInt             *cjj;
198   MatScalar            *ada=ad->a,*aoa=ao->a,*apa,*paj,*cseqa,*caj;
199   MatScalar            *pA_d=pd->a,*pA_o=po->a,*pa_oth;
200   PetscInt             am=A->m,cN=C->N,cm=C->m;
201   MPI_Comm             comm=C->comm;
202   PetscMPIInt          size,rank,taga,*len_s;
203   PetscInt             *owners;
204   PetscInt             proc;
205   PetscInt             **buf_ri,**buf_rj;
206   PetscInt             cseqnzi,*bj_i,*bi,*bj,bnzi,nextcseqj; /* bi, bj, ba: for C (mpi mat) */
207   PetscInt             nrows,**buf_ri_k,**nextrow,**nextcseqi;
208   MPI_Request          *s_waits,*r_waits;
209   MPI_Status           *status;
210   MatScalar            **abuf_r,*ba_i,*ca;
211   PetscInt             *cseqi,*cseqj,col;
212 
213 
214   PetscFunctionBegin;
215   ierr = PetscObjectQuery((PetscObject)C,"MatMergeSeqsToMPI",(PetscObject *)&cont_merge);CHKERRQ(ierr);
216   if (cont_merge) {
217     ierr  = PetscObjectContainerGetPointer(cont_merge,(void **)&merge);CHKERRQ(ierr);
218   } else {
219     SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix C does not posses an object container");
220   }
221   ierr = PetscObjectQuery((PetscObject)P,"MatPtAPMPI",(PetscObject *)&cont_ptap);CHKERRQ(ierr);
222   if (cont_ptap) {
223     ierr  = PetscObjectContainerGetPointer(cont_ptap,(void **)&ptap);CHKERRQ(ierr);
224   } else {
225     SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
226   }
227 
228   p_oth=(Mat_SeqAIJ*)(ptap->B_oth)->data;
229   pi_oth=p_oth->i; pj_oth=p_oth->j; pa_oth=p_oth->a;
230 
231   C_seq=merge->C_seq;
232   cseq=(Mat_SeqAIJ*)C_seq->data;
233   cseqi=cseq->i; cseqj=cseq->j;
234   cseqa=cseq->a;
235 
236   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
237   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
238   ierr = PetscMapGetGlobalRange(merge->rowmap,&owners);CHKERRQ(ierr);
239 
240   /* Allocate temporary array for storage of one row of A*P */
241   ierr = PetscMalloc(cN*(sizeof(MatScalar)+2*sizeof(PetscInt)),&apa);CHKERRQ(ierr);
242   ierr = PetscMemzero(apa,cN*(sizeof(MatScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr);
243   apj      = (PetscInt *)(apa + cN);
244   apjdense = apj + cN;
245 
246   /* Allocate temporary MatScalar array for storage of one row of C */
247   ierr = PetscMalloc(cN*(sizeof(MatScalar)),&ca);CHKERRQ(ierr);
248   ierr = PetscMemzero(ca,cN*(sizeof(MatScalar)));CHKERRQ(ierr);
249 
250   /* Clear old values in C_Seq and C */
251   ierr = PetscMemzero(cseqa,cseqi[cN]*sizeof(MatScalar));CHKERRQ(ierr);
252   ierr = PetscMemzero(cd->a,cd->i[cm]*sizeof(MatScalar));CHKERRQ(ierr);
253   ierr = PetscMemzero(co->a,co->i[cm]*sizeof(MatScalar));CHKERRQ(ierr);
254 
255   for (i=0;i<am;i++) {
256     /* Form i-th sparse row of A*P */
257      apnzj = 0;
258     /* diagonal portion of A */
259     nzi  = adi[i+1] - adi[i];
260     for (j=0;j<nzi;j++) {
261       prow = *adj++;
262       /* diagonal portion of P */
263       pnzj = pd->i[prow+1] - pd->i[prow];
264       pjj  = pd->j + pd->i[prow]; /* local col index of P */
265       paj  = pd->a + pd->i[prow];
266       for (k=0;k<pnzj;k++) {
267         col = *pjj + p->cstart; pjj++; /* global col index of P */
268         if (!apjdense[col]) {
269           apjdense[col] = -1;
270           apj[apnzj++]  = col;
271         }
272         apa[col] += (*ada)*paj[k];
273       }
274       flops += 2*pnzj;
275       /* off-diagonal portion of P */
276       pnzj = po->i[prow+1] - po->i[prow];
277       pjj  = po->j + po->i[prow]; /* local col index of P */
278       paj  = po->a + po->i[prow];
279       for (k=0;k<pnzj;k++) {
280         col = p->garray[*pjj]; pjj++; /* global col index of P */
281         if (!apjdense[col]) {
282           apjdense[col] = -1;
283           apj[apnzj++]  = col;
284         }
285         apa[col] += (*ada)*paj[k];
286       }
287       flops += 2*pnzj;
288 
289       ada++;
290     }
291     /* off-diagonal portion of A */
292     nzi  = aoi[i+1] - aoi[i];
293     for (j=0;j<nzi;j++) {
294       prow = *aoj++;
295       pnzj = pi_oth[prow+1] - pi_oth[prow];
296       pjj  = pj_oth + pi_oth[prow];
297       paj  = pa_oth + pi_oth[prow];
298       for (k=0;k<pnzj;k++) {
299         if (!apjdense[pjj[k]]) {
300           apjdense[pjj[k]] = -1;
301           apj[apnzj++]     = pjj[k];
302         }
303         apa[pjj[k]] += (*aoa)*paj[k];
304       }
305       flops += 2*pnzj;
306       aoa++;
307     }
308     /* Sort the j index array for quick sparse axpy. */
309     ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr);
310 
311     /* Compute P_loc[i,:]^T*AP[i,:] using outer product */
312     /* diagonal portion of P */
313     pnzi = pd->i[i+1] - pd->i[i];
314     for (j=0;j<pnzi;j++) {
315       crow = (*pJ_d++) + owners[rank];
316       cjj    = cseqj + cseqi[crow];
317       caj    = cseqa + cseqi[crow];
318       /* add value into C */
319       for (k=0; k<apnzj; k++) ca[k] = (*pA_d)*apa[apj[k]];
320       ierr = MatSetValues(C,1,&crow,apnzj,apj,ca,ADD_VALUES);CHKERRQ(ierr);
321       ierr = PetscMemzero(ca,apnzj*(sizeof(MatScalar)));CHKERRQ(ierr);
322       pA_d++;
323       flops += 2*apnzj;
324     }
325 
326     /* off-diagonal portion of P */
327     pnzi = po->i[i+1] - po->i[i];
328     for (j=0;j<pnzi;j++) {
329       crow   = p->garray[*pJ_o++];
330       cjj    = cseqj + cseqi[crow];
331       caj    = cseqa + cseqi[crow];
332       /* add value into C_seq to be sent to other processors */
333       nextap = 0;
334       for (k=0;nextap<apnzj;k++) {
335         if (cjj[k]==apj[nextap]) {
336           caj[k] += (*pA_o)*apa[apj[nextap++]];
337         }
338       }
339       flops += 2*apnzj;
340       pA_o++;
341     }
342 
343     /* Zero the current row info for A*P */
344     for (j=0;j<apnzj;j++) {
345       apa[apj[j]]      = 0.;
346       apjdense[apj[j]] = 0;
347     }
348   }
349 
350   /* Assemble the final matrix and clean up */
351   ierr = MatAssemblyBegin(C_seq,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
352   ierr = MatAssemblyEnd(C_seq,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
353   ierr = PetscFree(apa);CHKERRQ(ierr);
354   ierr = PetscLogFlops(flops);CHKERRQ(ierr);
355 
356   /*--------------------------------------------------------------*/
357   /* send and recv matrix values */
358   /*-----------------------------*/
359   bi     = merge->bi;
360   bj     = merge->bj;
361   buf_ri = merge->buf_ri;
362   buf_rj = merge->buf_rj;
363   len_s  = merge->len_s;
364   ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&taga);CHKERRQ(ierr);
365   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
366 
367   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
368   for (proc=0,k=0; proc<size; proc++){
369     if (!len_s[proc]) continue;
370     i = owners[proc];
371     ierr = MPI_Isend(cseqa+cseqi[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
372     k++;
373   }
374   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
375   ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);
376   ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);
377   ierr = PetscFree(status);CHKERRQ(ierr);
378 
379   ierr = PetscFree(s_waits);CHKERRQ(ierr);
380   ierr = PetscFree(r_waits);CHKERRQ(ierr);
381 
382   /* insert mat values of mpimat */
383   /*----------------------------*/
384   ierr = PetscMalloc(cN*sizeof(MatScalar),&ba_i);CHKERRQ(ierr);
385   ierr = PetscMemzero(ba_i,cN*sizeof(MatScalar));CHKERRQ(ierr);
386   ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr);
387   nextrow = buf_ri_k + merge->nrecv;
388   nextcseqi = nextrow + merge->nrecv;
389 
390   for (k=0; k<merge->nrecv; k++){
391     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
392     nrows = *(buf_ri_k[k]);
393     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
394     nextcseqi[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
395   }
396 
397   /* add received local vals to C */
398   for (i=0; i<cm; i++) {
399     crow = owners[rank] + i;
400     bj_i = bj+bi[i];  /* col indices of the i-th row of C */
401     bnzi = bi[i+1] - bi[i];
402     nzi = 0;
403     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
404       /* i-th row */
405       if (i == *nextrow[k]) {
406         cseqnzi = *(nextcseqi[k]+1) - *nextcseqi[k];
407         cseqj   = buf_rj[k] + *(nextcseqi[k]);
408         cseqa   = abuf_r[k] + *(nextcseqi[k]);
409         nextcseqj = 0;
410         for (j=0; nextcseqj<cseqnzi; j++){
411           if (*(bj_i + j) == cseqj[nextcseqj]){ /* bcol == cseqcol */
412             ba_i[j] += cseqa[nextcseqj++];
413             nzi++;
414           }
415         }
416         nextrow[k]++; nextcseqi[k]++;
417       }
418     }
419     if (nzi>0){
420       /* if (rank==1) printf(" [%d] set %d values on C, row: %d\n",rank,bnzi,crow); */
421       ierr = MatSetValues(C,1,&crow,bnzi,bj_i,ba_i,ADD_VALUES);CHKERRQ(ierr);
422       ierr = PetscMemzero(ba_i,bnzi*sizeof(MatScalar));CHKERRQ(ierr);
423       flops += 2*nzi;
424     }
425   }
426   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
427   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
428 
429   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
430   ierr = PetscFree(ba_i);CHKERRQ(ierr);
431   ierr = PetscFree(buf_ri_k);CHKERRQ(ierr);
432   ierr = PetscFree(ca);CHKERRQ(ierr);
433 
434   PetscFunctionReturn(0);
435 }
436 
437 #undef __FUNCT__
438 #define __FUNCT__ "MatPtAP_SeqAIJ_SeqAIJ"
439 PetscErrorCode MatPtAP_SeqAIJ_SeqAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C)
440 {
441   PetscErrorCode ierr;
442 
443   PetscFunctionBegin;
444   if (scall == MAT_INITIAL_MATRIX){
445     ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr);
446     ierr = MatPtAPSymbolic_SeqAIJ_SeqAIJ(A,P,fill,C);CHKERRQ(ierr);
447     ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr);
448   }
449   ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr);
450   ierr = MatPtAPNumeric_SeqAIJ_SeqAIJ(A,P,*C);CHKERRQ(ierr);
451   ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr);
452   PetscFunctionReturn(0);
453 }
454 
455 /*
456    MatPtAPSymbolic - Creates the (i,j) structure of the matrix projection C = P^T * A * P
457 
458    Collective on Mat
459 
460    Input Parameters:
461 +  A - the matrix
462 -  P - the projection matrix
463 
464    Output Parameters:
465 .  C - the (i,j) structure of the product matrix
466 
467    Notes:
468    C will be created and must be destroyed by the user with MatDestroy().
469 
470    This routine is currently only implemented for pairs of SeqAIJ matrices and classes
471    which inherit from SeqAIJ.  C will be of type MATSEQAIJ.  The product is computed using
472    this (i,j) structure by calling MatPtAPNumeric().
473 
474    Level: intermediate
475 
476 .seealso: MatPtAP(),MatPtAPNumeric(),MatMatMultSymbolic()
477 */
478 #undef __FUNCT__
479 #define __FUNCT__ "MatPtAPSymbolic"
480 PetscErrorCode MatPtAPSymbolic(Mat A,Mat P,PetscReal fill,Mat *C)
481 {
482   PetscErrorCode ierr;
483   PetscErrorCode (*fA)(Mat,Mat,PetscReal,Mat*);
484   PetscErrorCode (*fP)(Mat,Mat,PetscReal,Mat*);
485 
486   PetscFunctionBegin;
487 
488   PetscValidHeaderSpecific(A,MAT_COOKIE,1);
489   PetscValidType(A,1);
490   MatPreallocated(A);
491   if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
492   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
493 
494   PetscValidHeaderSpecific(P,MAT_COOKIE,2);
495   PetscValidType(P,2);
496   MatPreallocated(P);
497   if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
498   if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
499 
500   PetscValidPointer(C,3);
501 
502   if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->M,A->N);
503   if (A->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %d != %d",A->M,A->N);
504 
505   /* For now, we do not dispatch based on the type of A and P */
506   /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */
507   fA = A->ops->ptapsymbolic;
508   if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatPtAPSymbolic not supported for A of type %s",A->type_name);
509   fP = P->ops->ptapsymbolic;
510   if (!fP) SETERRQ1(PETSC_ERR_SUP,"MatPtAPSymbolic not supported for P of type %s",P->type_name);
511   if (fP!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatPtAPSymbolic requires A, %s, to be compatible with P, %s",A->type_name,P->type_name);
512 
513   ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr);
514   ierr = (*fA)(A,P,fill,C);CHKERRQ(ierr);
515   ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr);
516 
517   PetscFunctionReturn(0);
518 }
519 
520 typedef struct {
521   Mat    symAP;
522 } Mat_PtAPstruct;
523 
524 EXTERN PetscErrorCode MatDestroy_SeqAIJ(Mat);
525 
526 #undef __FUNCT__
527 #define __FUNCT__ "MatDestroy_SeqAIJ_PtAP"
528 PetscErrorCode MatDestroy_SeqAIJ_PtAP(Mat A)
529 {
530   PetscErrorCode    ierr;
531   Mat_PtAPstruct    *ptap=(Mat_PtAPstruct*)A->spptr;
532 
533   PetscFunctionBegin;
534   ierr = MatDestroy(ptap->symAP);CHKERRQ(ierr);
535   ierr = PetscFree(ptap);CHKERRQ(ierr);
536   ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr);
537   PetscFunctionReturn(0);
538 }
539 
540 #undef __FUNCT__
541 #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqAIJ"
542 PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat P,PetscReal fill,Mat *C)
543 {
544   PetscErrorCode ierr;
545   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
546   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data,*p = (Mat_SeqAIJ*)P->data,*c;
547   PetscInt       *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj;
548   PetscInt       *ci,*cj,*ptadenserow,*ptasparserow,*ptaj;
549   PetscInt       an=A->N,am=A->M,pn=P->N,pm=P->M;
550   PetscInt       i,j,k,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi,nlnk,*lnk;
551   MatScalar      *ca;
552   PetscBT        lnkbt;
553 
554   PetscFunctionBegin;
555   /* Get ij structure of P^T */
556   ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
557   ptJ=ptj;
558 
559   /* Allocate ci array, arrays for fill computation and */
560   /* free space for accumulating nonzero column info */
561   ierr = PetscMalloc((pn+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
562   ci[0] = 0;
563 
564   ierr = PetscMalloc((2*an+1)*sizeof(PetscInt),&ptadenserow);CHKERRQ(ierr);
565   ierr = PetscMemzero(ptadenserow,(2*an+1)*sizeof(PetscInt));CHKERRQ(ierr);
566   ptasparserow = ptadenserow  + an;
567 
568   /* create and initialize a linked list */
569   nlnk = pn+1;
570   ierr = PetscLLCreate(pn,pn,nlnk,lnk,lnkbt);CHKERRQ(ierr);
571 
572   /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */
573   /* This should be reasonable if sparsity of PtAP is similar to that of A. */
574   ierr          = GetMoreSpace((ai[am]/pm)*pn,&free_space);
575   current_space = free_space;
576 
577   /* Determine symbolic info for each row of C: */
578   for (i=0;i<pn;i++) {
579     ptnzi  = pti[i+1] - pti[i];
580     ptanzi = 0;
581     /* Determine symbolic row of PtA: */
582     for (j=0;j<ptnzi;j++) {
583       arow = *ptJ++;
584       anzj = ai[arow+1] - ai[arow];
585       ajj  = aj + ai[arow];
586       for (k=0;k<anzj;k++) {
587         if (!ptadenserow[ajj[k]]) {
588           ptadenserow[ajj[k]]    = -1;
589           ptasparserow[ptanzi++] = ajj[k];
590         }
591       }
592     }
593       /* Using symbolic info for row of PtA, determine symbolic info for row of C: */
594     ptaj = ptasparserow;
595     cnzi   = 0;
596     for (j=0;j<ptanzi;j++) {
597       prow = *ptaj++;
598       pnzj = pi[prow+1] - pi[prow];
599       pjj  = pj + pi[prow];
600       /* add non-zero cols of P into the sorted linked list lnk */
601       ierr = PetscLLAdd(pnzj,pjj,pn,nlnk,lnk,lnkbt);CHKERRQ(ierr);
602       cnzi += nlnk;
603     }
604 
605     /* If free space is not available, make more free space */
606     /* Double the amount of total space in the list */
607     if (current_space->local_remaining<cnzi) {
608       ierr = GetMoreSpace(current_space->total_array_size,&current_space);CHKERRQ(ierr);
609     }
610 
611     /* Copy data into free space, and zero out denserows */
612     ierr = PetscLLClean(pn,pn,cnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
613     current_space->array           += cnzi;
614     current_space->local_used      += cnzi;
615     current_space->local_remaining -= cnzi;
616 
617     for (j=0;j<ptanzi;j++) {
618       ptadenserow[ptasparserow[j]] = 0;
619     }
620     /* Aside: Perhaps we should save the pta info for the numerical factorization. */
621     /*        For now, we will recompute what is needed. */
622     ci[i+1] = ci[i] + cnzi;
623   }
624   /* nnz is now stored in ci[ptm], column indices are in the list of free space */
625   /* Allocate space for cj, initialize cj, and */
626   /* destroy list of free space and other temporary array(s) */
627   ierr = PetscMalloc((ci[pn]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr);
628   ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr);
629   ierr = PetscFree(ptadenserow);CHKERRQ(ierr);
630   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
631 
632   /* Allocate space for ca */
633   ierr = PetscMalloc((ci[pn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr);
634   ierr = PetscMemzero(ca,(ci[pn]+1)*sizeof(MatScalar));CHKERRQ(ierr);
635 
636   /* put together the new matrix */
637   ierr = MatCreateSeqAIJWithArrays(A->comm,pn,pn,ci,cj,ca,C);CHKERRQ(ierr);
638 
639   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
640   /* Since these are PETSc arrays, change flags to free them as necessary. */
641   c = (Mat_SeqAIJ *)((*C)->data);
642   c->freedata = PETSC_TRUE;
643   c->nonew    = 0;
644 
645   /* Clean up. */
646   ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
647 
648   PetscFunctionReturn(0);
649 }
650 
651 #include "src/mat/impls/maij/maij.h"
652 EXTERN_C_BEGIN
653 #undef __FUNCT__
654 #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqMAIJ"
655 PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat *C)
656 {
657   /* This routine requires testing -- I don't think it works. */
658   PetscErrorCode ierr;
659   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
660   Mat_SeqMAIJ    *pp=(Mat_SeqMAIJ*)PP->data;
661   Mat            P=pp->AIJ;
662   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c;
663   PetscInt       *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj;
664   PetscInt       *ci,*cj,*denserow,*sparserow,*ptadenserow,*ptasparserow,*ptaj;
665   PetscInt       an=A->N,am=A->M,pn=P->N,pm=P->M,ppdof=pp->dof;
666   PetscInt       i,j,k,dof,pdof,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi;
667   MatScalar      *ca;
668 
669   PetscFunctionBegin;
670   /* Start timer */
671   ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr);
672 
673   /* Get ij structure of P^T */
674   ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
675 
676   /* Allocate ci array, arrays for fill computation and */
677   /* free space for accumulating nonzero column info */
678   ierr = PetscMalloc((pn+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
679   ci[0] = 0;
680 
681   ierr = PetscMalloc((2*pn+2*an+1)*sizeof(PetscInt),&ptadenserow);CHKERRQ(ierr);
682   ierr = PetscMemzero(ptadenserow,(2*pn+2*an+1)*sizeof(PetscInt));CHKERRQ(ierr);
683   ptasparserow = ptadenserow  + an;
684   denserow     = ptasparserow + an;
685   sparserow    = denserow     + pn;
686 
687   /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */
688   /* This should be reasonable if sparsity of PtAP is similar to that of A. */
689   ierr          = GetMoreSpace((ai[am]/pm)*pn,&free_space);
690   current_space = free_space;
691 
692   /* Determine symbolic info for each row of C: */
693   for (i=0;i<pn/ppdof;i++) {
694     ptnzi  = pti[i+1] - pti[i];
695     ptanzi = 0;
696     ptJ    = ptj + pti[i];
697     for (dof=0;dof<ppdof;dof++) {
698     /* Determine symbolic row of PtA: */
699       for (j=0;j<ptnzi;j++) {
700         arow = ptJ[j] + dof;
701         anzj = ai[arow+1] - ai[arow];
702         ajj  = aj + ai[arow];
703         for (k=0;k<anzj;k++) {
704           if (!ptadenserow[ajj[k]]) {
705             ptadenserow[ajj[k]]    = -1;
706             ptasparserow[ptanzi++] = ajj[k];
707           }
708         }
709       }
710       /* Using symbolic info for row of PtA, determine symbolic info for row of C: */
711       ptaj = ptasparserow;
712       cnzi   = 0;
713       for (j=0;j<ptanzi;j++) {
714         pdof = *ptaj%dof;
715         prow = (*ptaj++)/dof;
716         pnzj = pi[prow+1] - pi[prow];
717         pjj  = pj + pi[prow];
718         for (k=0;k<pnzj;k++) {
719           if (!denserow[pjj[k]+pdof]) {
720             denserow[pjj[k]+pdof] = -1;
721             sparserow[cnzi++]     = pjj[k]+pdof;
722           }
723         }
724       }
725 
726       /* sort sparserow */
727       ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr);
728 
729       /* If free space is not available, make more free space */
730       /* Double the amount of total space in the list */
731       if (current_space->local_remaining<cnzi) {
732         ierr = GetMoreSpace(current_space->total_array_size,&current_space);CHKERRQ(ierr);
733       }
734 
735       /* Copy data into free space, and zero out denserows */
736       ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(PetscInt));CHKERRQ(ierr);
737       current_space->array           += cnzi;
738       current_space->local_used      += cnzi;
739       current_space->local_remaining -= cnzi;
740 
741       for (j=0;j<ptanzi;j++) {
742         ptadenserow[ptasparserow[j]] = 0;
743       }
744       for (j=0;j<cnzi;j++) {
745         denserow[sparserow[j]] = 0;
746       }
747       /* Aside: Perhaps we should save the pta info for the numerical factorization. */
748       /*        For now, we will recompute what is needed. */
749       ci[i+1+dof] = ci[i+dof] + cnzi;
750     }
751   }
752   /* nnz is now stored in ci[ptm], column indices are in the list of free space */
753   /* Allocate space for cj, initialize cj, and */
754   /* destroy list of free space and other temporary array(s) */
755   ierr = PetscMalloc((ci[pn]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr);
756   ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr);
757   ierr = PetscFree(ptadenserow);CHKERRQ(ierr);
758 
759   /* Allocate space for ca */
760   ierr = PetscMalloc((ci[pn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr);
761   ierr = PetscMemzero(ca,(ci[pn]+1)*sizeof(MatScalar));CHKERRQ(ierr);
762 
763   /* put together the new matrix */
764   ierr = MatCreateSeqAIJWithArrays(A->comm,pn,pn,ci,cj,ca,C);CHKERRQ(ierr);
765 
766   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
767   /* Since these are PETSc arrays, change flags to free them as necessary. */
768   c = (Mat_SeqAIJ *)((*C)->data);
769   c->freedata = PETSC_TRUE;
770   c->nonew    = 0;
771 
772   /* Clean up. */
773   ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
774 
775   ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr);
776   PetscFunctionReturn(0);
777 }
778 EXTERN_C_END
779 
780 /*
781    MatPtAPNumeric - Computes the matrix projection C = P^T * A * P
782 
783    Collective on Mat
784 
785    Input Parameters:
786 +  A - the matrix
787 -  P - the projection matrix
788 
789    Output Parameters:
790 .  C - the product matrix
791 
792    Notes:
793    C must have been created by calling MatPtAPSymbolic and must be destroyed by
794    the user using MatDeatroy().
795 
796    This routine is currently only implemented for pairs of AIJ matrices and classes
797    which inherit from AIJ.  C will be of type MATAIJ.
798 
799    Level: intermediate
800 
801 .seealso: MatPtAP(),MatPtAPSymbolic(),MatMatMultNumeric()
802 */
803 #undef __FUNCT__
804 #define __FUNCT__ "MatPtAPNumeric"
805 PetscErrorCode MatPtAPNumeric(Mat A,Mat P,Mat C)
806 {
807   PetscErrorCode ierr;
808   PetscErrorCode (*fA)(Mat,Mat,Mat);
809   PetscErrorCode (*fP)(Mat,Mat,Mat);
810 
811   PetscFunctionBegin;
812 
813   PetscValidHeaderSpecific(A,MAT_COOKIE,1);
814   PetscValidType(A,1);
815   MatPreallocated(A);
816   if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
817   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
818 
819   PetscValidHeaderSpecific(P,MAT_COOKIE,2);
820   PetscValidType(P,2);
821   MatPreallocated(P);
822   if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
823   if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
824 
825   PetscValidHeaderSpecific(C,MAT_COOKIE,3);
826   PetscValidType(C,3);
827   MatPreallocated(C);
828   if (!C->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
829   if (C->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
830 
831   if (P->N!=C->M) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->N,C->M);
832   if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->M,A->N);
833   if (A->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %d != %d",A->M,A->N);
834   if (P->N!=C->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->N,C->N);
835 
836   /* For now, we do not dispatch based on the type of A and P */
837   /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */
838   fA = A->ops->ptapnumeric;
839   if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatPtAPNumeric not supported for A of type %s",A->type_name);
840   fP = P->ops->ptapnumeric;
841   if (!fP) SETERRQ1(PETSC_ERR_SUP,"MatPtAPNumeric not supported for P of type %s",P->type_name);
842   if (fP!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatPtAPNumeric requires A, %s, to be compatible with P, %s",A->type_name,P->type_name);
843 
844   ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr);
845   ierr = (*fA)(A,P,C);CHKERRQ(ierr);
846   ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr);
847 
848   PetscFunctionReturn(0);
849 }
850 
851 #undef __FUNCT__
852 #define __FUNCT__ "MatPtAPNumeric_SeqAIJ_SeqAIJ"
853 PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqAIJ(Mat A,Mat P,Mat C)
854 {
855   PetscErrorCode ierr;
856   PetscInt       flops=0;
857   Mat_SeqAIJ     *a  = (Mat_SeqAIJ *) A->data;
858   Mat_SeqAIJ     *p  = (Mat_SeqAIJ *) P->data;
859   Mat_SeqAIJ     *c  = (Mat_SeqAIJ *) C->data;
860   PetscInt       *ai=a->i,*aj=a->j,*apj,*apjdense,*pi=p->i,*pj=p->j,*pJ=p->j,*pjj;
861   PetscInt       *ci=c->i,*cj=c->j,*cjj;
862   PetscInt       am=A->M,cn=C->N,cm=C->M;
863   PetscInt       i,j,k,anzi,pnzi,apnzj,nextap,pnzj,prow,crow;
864   MatScalar      *aa=a->a,*apa,*pa=p->a,*pA=p->a,*paj,*ca=c->a,*caj;
865 
866   PetscFunctionBegin;
867   /* Allocate temporary array for storage of one row of A*P */
868   ierr = PetscMalloc(cn*(sizeof(MatScalar)+2*sizeof(PetscInt)),&apa);CHKERRQ(ierr);
869   ierr = PetscMemzero(apa,cn*(sizeof(MatScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr);
870 
871   apj      = (PetscInt *)(apa + cn);
872   apjdense = apj + cn;
873 
874   /* Clear old values in C */
875   ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr);
876 
877   for (i=0;i<am;i++) {
878     /* Form sparse row of A*P */
879     anzi  = ai[i+1] - ai[i];
880     apnzj = 0;
881     for (j=0;j<anzi;j++) {
882       prow = *aj++;
883       pnzj = pi[prow+1] - pi[prow];
884       pjj  = pj + pi[prow];
885       paj  = pa + pi[prow];
886       for (k=0;k<pnzj;k++) {
887         if (!apjdense[pjj[k]]) {
888           apjdense[pjj[k]] = -1;
889           apj[apnzj++]     = pjj[k];
890         }
891         apa[pjj[k]] += (*aa)*paj[k];
892       }
893       flops += 2*pnzj;
894       aa++;
895     }
896 
897     /* Sort the j index array for quick sparse axpy. */
898     ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr);
899 
900     /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */
901     pnzi = pi[i+1] - pi[i];
902     for (j=0;j<pnzi;j++) {
903       nextap = 0;
904       crow   = *pJ++;
905       cjj    = cj + ci[crow];
906       caj    = ca + ci[crow];
907       /* Perform sparse axpy operation.  Note cjj includes apj. */
908       for (k=0;nextap<apnzj;k++) {
909         if (cjj[k]==apj[nextap]) {
910           caj[k] += (*pA)*apa[apj[nextap++]];
911         }
912       }
913       flops += 2*apnzj;
914       pA++;
915     }
916 
917     /* Zero the current row info for A*P */
918     for (j=0;j<apnzj;j++) {
919       apa[apj[j]]      = 0.;
920       apjdense[apj[j]] = 0;
921     }
922   }
923 
924   /* Assemble the final matrix and clean up */
925   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
926   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
927   ierr = PetscFree(apa);CHKERRQ(ierr);
928   ierr = PetscLogFlops(flops);CHKERRQ(ierr);
929 
930   PetscFunctionReturn(0);
931 }
932 
933 /* Compute local C = P_loc^T * A * P - used by MatPtAP_MPIAIJ_MPIAIJ() */
934 static PetscEvent logkey_matptapnumeric_local = 0;
935 #undef __FUNCT__
936 #define __FUNCT__ "MatPtAPNumeric_MPIAIJ_MPIAIJ_Local"
937 PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIAIJ_Local(Mat A,Mat P_loc,Mat P_oth,PetscInt prstart,Mat C)
938 {
939   PetscErrorCode ierr;
940   PetscInt       flops=0;
941   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
942   Mat_SeqAIJ     *ad=(Mat_SeqAIJ*)(a->A)->data,*ao=(Mat_SeqAIJ*)(a->B)->data;
943   Mat_SeqAIJ     *c  = (Mat_SeqAIJ *) C->data;
944   Mat_SeqAIJ     *p_loc=(Mat_SeqAIJ*)P_loc->data,*p_oth=(Mat_SeqAIJ*)P_oth->data;
945   PetscInt       *pi_loc=p_loc->i,*pj_loc=p_loc->j,*pi_oth=p_oth->i,*pj_oth=p_oth->j;
946   PetscInt       *adi=ad->i,*adj=ad->j,*aoi=ao->i,*aoj=ao->j,*apj,*apjdense,cstart=a->cstart,cend=a->cend,col;
947   PetscInt       *pJ=pj_loc,*pjj;
948   PetscInt       *ci=c->i,*cj=c->j,*cjj;
949   PetscInt       am=A->m,cn=C->N,cm=C->M;
950   PetscInt       i,j,k,anzi,pnzi,apnzj,nextap,pnzj,prow,crow;
951   MatScalar      *ada=ad->a,*aoa=ao->a,*apa,*paj,*ca=c->a,*caj;
952   MatScalar      *pa_loc=p_loc->a,*pA=pa_loc,*pa_oth=p_oth->a;
953 
954   PetscFunctionBegin;
955   if (!logkey_matptapnumeric_local) {
956     ierr = PetscLogEventRegister(&logkey_matptapnumeric_local,"MatPtAPNumeric_MPIAIJ_MPIAIJ_Local",MAT_COOKIE);CHKERRQ(ierr);
957   }
958   ierr = PetscLogEventBegin(logkey_matptapnumeric_local,A,0,0,0);CHKERRQ(ierr);
959 
960   /* Allocate temporary array for storage of one row of A*P */
961   ierr = PetscMalloc(cn*(sizeof(MatScalar)+2*sizeof(PetscInt)),&apa);CHKERRQ(ierr);
962   ierr = PetscMemzero(apa,cn*(sizeof(MatScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr);
963   apj      = (PetscInt *)(apa + cn);
964   apjdense = apj + cn;
965 
966   /* Clear old values in C */
967   ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr);
968 
969   for (i=0;i<am;i++) {
970     /* Form i-th sparse row of A*P */
971      apnzj = 0;
972     /* diagonal portion of A */
973     anzi  = adi[i+1] - adi[i];
974     for (j=0;j<anzi;j++) {
975       prow = *adj;
976       adj++;
977       pnzj = pi_loc[prow+1] - pi_loc[prow];
978       pjj  = pj_loc + pi_loc[prow];
979       paj  = pa_loc + pi_loc[prow];
980       for (k=0;k<pnzj;k++) {
981         if (!apjdense[pjj[k]]) {
982           apjdense[pjj[k]] = -1;
983           apj[apnzj++]     = pjj[k];
984         }
985         apa[pjj[k]] += (*ada)*paj[k];
986       }
987       flops += 2*pnzj;
988       ada++;
989     }
990     /* off-diagonal portion of A */
991     anzi  = aoi[i+1] - aoi[i];
992     for (j=0;j<anzi;j++) {
993       col = a->garray[*aoj];
994       if (col < cstart){
995         prow = *aoj;
996       } else if (col >= cend){
997         prow = *aoj;
998       } else {
999         SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Off-diagonal portion of A has wrong column map");
1000       }
1001       aoj++;
1002       pnzj = pi_oth[prow+1] - pi_oth[prow];
1003       pjj  = pj_oth + pi_oth[prow];
1004       paj  = pa_oth + pi_oth[prow];
1005       for (k=0;k<pnzj;k++) {
1006         if (!apjdense[pjj[k]]) {
1007           apjdense[pjj[k]] = -1;
1008           apj[apnzj++]     = pjj[k];
1009         }
1010         apa[pjj[k]] += (*aoa)*paj[k];
1011       }
1012       flops += 2*pnzj;
1013       aoa++;
1014     }
1015     /* Sort the j index array for quick sparse axpy. */
1016     ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr);
1017 
1018     /* Compute P_loc[i,:]^T*AP[i,:] using outer product */
1019     pnzi = pi_loc[i+1] - pi_loc[i];
1020     for (j=0;j<pnzi;j++) {
1021       nextap = 0;
1022       crow   = *pJ++;
1023       cjj    = cj + ci[crow];
1024       caj    = ca + ci[crow];
1025       /* Perform sparse axpy operation.  Note cjj includes apj. */
1026       for (k=0;nextap<apnzj;k++) {
1027         if (cjj[k]==apj[nextap]) {
1028           caj[k] += (*pA)*apa[apj[nextap++]];
1029         }
1030       }
1031       flops += 2*apnzj;
1032       pA++;
1033     }
1034 
1035     /* Zero the current row info for A*P */
1036     for (j=0;j<apnzj;j++) {
1037       apa[apj[j]]      = 0.;
1038       apjdense[apj[j]] = 0;
1039     }
1040   }
1041 
1042   /* Assemble the final matrix and clean up */
1043   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1044   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1045   ierr = PetscFree(apa);CHKERRQ(ierr);
1046   ierr = PetscLogFlops(flops);CHKERRQ(ierr);
1047   ierr = PetscLogEventEnd(logkey_matptapnumeric_local,A,0,0,0);CHKERRQ(ierr);
1048   PetscFunctionReturn(0);
1049 }
1050 static PetscEvent logkey_matptapsymbolic_local = 0;
1051 #undef __FUNCT__
1052 #define __FUNCT__ "MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local"
1053 PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local(Mat A,Mat P_loc,Mat P_oth,PetscInt prstart,PetscReal fill,Mat *C)
1054 {
1055   PetscErrorCode ierr;
1056   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
1057   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
1058   Mat_SeqAIJ     *ad=(Mat_SeqAIJ*)(a->A)->data,*ao=(Mat_SeqAIJ*)(a->B)->data,*c;
1059   PetscInt       *pti,*ptj,*ptJ,*ajj,*pjj;
1060   PetscInt       *adi=ad->i,*adj=ad->j,*aoi=ao->i,*aoj=ao->j,nnz,cstart=a->cstart,cend=a->cend,col;
1061   PetscInt       *ci,*cj,*ptaj;
1062   PetscInt       aN=A->N,am=A->m,pN=P_loc->N;
1063   PetscInt       i,j,k,ptnzi,arow,anzj,prow,pnzj,cnzi;
1064   PetscInt       pm=P_loc->m,nlnk,*lnk;
1065   MatScalar      *ca;
1066   PetscBT        lnkbt;
1067   PetscInt       prend,nprow_loc,nprow_oth;
1068   PetscInt       *ptadenserow_loc,*ptadenserow_oth,*ptasparserow_loc,*ptasparserow_oth;
1069   Mat_SeqAIJ     *p_loc=(Mat_SeqAIJ*)P_loc->data,*p_oth=(Mat_SeqAIJ*)P_oth->data;
1070   PetscInt       *pi_loc=p_loc->i,*pj_loc=p_loc->j,*pi_oth=p_oth->i,*pj_oth=p_oth->j;
1071 
1072   PetscFunctionBegin;
1073   if (!logkey_matptapsymbolic_local) {
1074     ierr = PetscLogEventRegister(&logkey_matptapsymbolic_local,"MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local",MAT_COOKIE);
1075   }
1076   ierr = PetscLogEventBegin(logkey_matptapsymbolic_local,A,P_loc,P_oth,0);CHKERRQ(ierr);
1077 
1078   prend = prstart + pm;
1079 
1080   /* get ij structure of P_loc^T */
1081   ierr = MatGetSymbolicTranspose_SeqAIJ(P_loc,&pti,&ptj);CHKERRQ(ierr);
1082   ptJ=ptj;
1083 
1084   /* Allocate ci array, arrays for fill computation and */
1085   /* free space for accumulating nonzero column info */
1086   ierr = PetscMalloc((pN+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
1087   ci[0] = 0;
1088 
1089   ierr = PetscMalloc((4*aN+1)*sizeof(PetscInt),&ptadenserow_loc);CHKERRQ(ierr);
1090   ierr = PetscMemzero(ptadenserow_loc,(4*aN+1)*sizeof(PetscInt));CHKERRQ(ierr);
1091   ptasparserow_loc = ptadenserow_loc + aN;
1092   ptadenserow_oth  = ptasparserow_loc + aN;
1093   ptasparserow_oth = ptadenserow_oth + aN;
1094 
1095   /* create and initialize a linked list */
1096   nlnk = pN+1;
1097   ierr = PetscLLCreate(pN,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr);
1098 
1099   /* Set initial free space to be nnz(A) scaled by fill*P->N/P->M. */
1100   /* This should be reasonable if sparsity of PtAP is similar to that of A. */
1101   nnz           = adi[am] + aoi[am];
1102   ierr          = GetMoreSpace((PetscInt)(fill*nnz*pN/aN+1),&free_space);
1103   current_space = free_space;
1104 
1105   /* determine symbolic info for each row of C: */
1106   for (i=0;i<pN;i++) {
1107     ptnzi  = pti[i+1] - pti[i];
1108     nprow_loc = 0; nprow_oth = 0;
1109     /* i-th row of symbolic P_loc^T*A_loc: */
1110     for (j=0;j<ptnzi;j++) {
1111       arow = *ptJ++;
1112       /* diagonal portion of A */
1113       anzj = adi[arow+1] - adi[arow];
1114       ajj  = adj + adi[arow];
1115       for (k=0;k<anzj;k++) {
1116         col = ajj[k]+prstart;
1117         if (!ptadenserow_loc[col]) {
1118           ptadenserow_loc[col]    = -1;
1119           ptasparserow_loc[nprow_loc++] = col;
1120         }
1121       }
1122       /* off-diagonal portion of A */
1123       anzj = aoi[arow+1] - aoi[arow];
1124       ajj  = aoj + aoi[arow];
1125       for (k=0;k<anzj;k++) {
1126         col = a->garray[ajj[k]];  /* global col */
1127         if (col < cstart){
1128           col = ajj[k];
1129         } else if (col >= cend){
1130           col = ajj[k] + pm;
1131         } else {
1132           SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Off-diagonal portion of A has wrong column map");
1133         }
1134         if (!ptadenserow_oth[col]) {
1135           ptadenserow_oth[col]    = -1;
1136           ptasparserow_oth[nprow_oth++] = col;
1137         }
1138       }
1139     }
1140 
1141     /* using symbolic info of local PtA, determine symbolic info for row of C: */
1142     cnzi   = 0;
1143     /* rows of P_loc */
1144     ptaj = ptasparserow_loc;
1145     for (j=0; j<nprow_loc; j++) {
1146       prow = *ptaj++;
1147       prow -= prstart; /* rm */
1148       pnzj = pi_loc[prow+1] - pi_loc[prow];
1149       pjj  = pj_loc + pi_loc[prow];
1150       /* add non-zero cols of P into the sorted linked list lnk */
1151       ierr = PetscLLAdd(pnzj,pjj,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr);
1152       cnzi += nlnk;
1153     }
1154     /* rows of P_oth */
1155     ptaj = ptasparserow_oth;
1156     for (j=0; j<nprow_oth; j++) {
1157       prow = *ptaj++;
1158       if (prow >= prend) prow -= pm; /* rm */
1159       pnzj = pi_oth[prow+1] - pi_oth[prow];
1160       pjj  = pj_oth + pi_oth[prow];
1161       /* add non-zero cols of P into the sorted linked list lnk */
1162       ierr = PetscLLAdd(pnzj,pjj,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr);
1163       cnzi += nlnk;
1164     }
1165 
1166     /* If free space is not available, make more free space */
1167     /* Double the amount of total space in the list */
1168     if (current_space->local_remaining<cnzi) {
1169       ierr = GetMoreSpace(current_space->total_array_size,&current_space);CHKERRQ(ierr);
1170     }
1171 
1172     /* Copy data into free space, and zero out denserows */
1173     ierr = PetscLLClean(pN,pN,cnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
1174     current_space->array           += cnzi;
1175     current_space->local_used      += cnzi;
1176     current_space->local_remaining -= cnzi;
1177 
1178     for (j=0;j<nprow_loc; j++) {
1179       ptadenserow_loc[ptasparserow_loc[j]] = 0;
1180     }
1181     for (j=0;j<nprow_oth; j++) {
1182       ptadenserow_oth[ptasparserow_oth[j]] = 0;
1183     }
1184 
1185     /* Aside: Perhaps we should save the pta info for the numerical factorization. */
1186     /*        For now, we will recompute what is needed. */
1187     ci[i+1] = ci[i] + cnzi;
1188   }
1189   /* nnz is now stored in ci[ptm], column indices are in the list of free space */
1190   /* Allocate space for cj, initialize cj, and */
1191   /* destroy list of free space and other temporary array(s) */
1192   ierr = PetscMalloc((ci[pN]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr);
1193   ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr);
1194   ierr = PetscFree(ptadenserow_loc);CHKERRQ(ierr);
1195   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
1196 
1197   /* Allocate space for ca */
1198   ierr = PetscMalloc((ci[pN]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr);
1199   ierr = PetscMemzero(ca,(ci[pN]+1)*sizeof(MatScalar));CHKERRQ(ierr);
1200 
1201   /* put together the new matrix */
1202   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,pN,pN,ci,cj,ca,C);CHKERRQ(ierr);
1203 
1204   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
1205   /* Since these are PETSc arrays, change flags to free them as necessary. */
1206   c = (Mat_SeqAIJ *)((*C)->data);
1207   c->freedata = PETSC_TRUE;
1208   c->nonew    = 0;
1209 
1210   /* Clean up. */
1211   ierr = MatRestoreSymbolicTranspose_SeqAIJ(P_loc,&pti,&ptj);CHKERRQ(ierr);
1212 
1213   ierr = PetscLogEventEnd(logkey_matptapsymbolic_local,A,P_loc,P_oth,0);CHKERRQ(ierr);
1214   PetscFunctionReturn(0);
1215 }
1216