xref: /petsc/src/mat/impls/aij/seq/matptap.c (revision 82afef89a5de20cc299e0d36137d907034be2a90)
1 /*
2   Defines projective product routines where A is a AIJ matrix
3           C = P^T * A * P
4 */
5 
6 #include "src/mat/impls/aij/seq/aij.h"   /*I "petscmat.h" I*/
7 #include "src/mat/utils/freespace.h"
8 #include "src/mat/impls/aij/mpi/mpiaij.h"
9 #include "petscbt.h"
10 
11 #undef __FUNCT__
12 #define __FUNCT__ "MatPtAP"
13 /*@
14    MatPtAP - Creates the matrix projection C = P^T * A * P
15 
16    Collective on Mat
17 
18    Input Parameters:
19 +  A - the matrix
20 .  P - the projection matrix
21 .  scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
22 -  fill - expected fill as ratio of nnz(C)/(nnz(A) + nnz(P))
23 
24    Output Parameters:
25 .  C - the product matrix
26 
27    Notes:
28    C will be created and must be destroyed by the user with MatDestroy().
29 
30    This routine is currently only implemented for pairs of SeqAIJ matrices and classes
31    which inherit from SeqAIJ.  C will be of type MATSEQAIJ.
32 
33    Level: intermediate
34 
35 .seealso: MatPtAPSymbolic(),MatPtAPNumeric(),MatMatMult()
36 @*/
37 PetscErrorCode MatPtAP(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C)
38 {
39   PetscErrorCode ierr;
40   PetscErrorCode (*fA)(Mat,Mat,MatReuse,PetscReal,Mat *);
41   PetscErrorCode (*fP)(Mat,Mat,MatReuse,PetscReal,Mat *);
42 
43   PetscFunctionBegin;
44   PetscValidHeaderSpecific(A,MAT_COOKIE,1);
45   PetscValidType(A,1);
46   MatPreallocated(A);
47   if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
48   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
49   PetscValidHeaderSpecific(P,MAT_COOKIE,2);
50   PetscValidType(P,2);
51   MatPreallocated(P);
52   if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
53   if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
54   PetscValidPointer(C,3);
55 
56   if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->M,A->N);
57 
58   if (fill <=0.0) SETERRQ1(PETSC_ERR_ARG_SIZ,"fill=%g must be > 0.0",fill);
59 
60   /* For now, we do not dispatch based on the type of A and P */
61   /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */
62   fA = A->ops->ptap;
63   if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatPtAP not supported for A of type %s",A->type_name);
64   fP = P->ops->ptap;
65   if (!fP) SETERRQ1(PETSC_ERR_SUP,"MatPtAP not supported for P of type %s",P->type_name);
66   if (fP!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatPtAP requires A, %s, to be compatible with P, %s",A->type_name,P->type_name);
67 
68   ierr = PetscLogEventBegin(MAT_PtAP,A,P,0,0);CHKERRQ(ierr);
69   ierr = (*fA)(A,P,scall,fill,C);CHKERRQ(ierr);
70   ierr = PetscLogEventEnd(MAT_PtAP,A,P,0,0);CHKERRQ(ierr);
71   PetscFunctionReturn(0);
72 }
73 
74 EXTERN PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local(Mat,Mat,Mat,PetscInt,PetscReal,Mat*);
75 EXTERN PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIAIJ_Local(Mat,Mat,Mat,PetscInt,Mat C);
76 
77 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat);
78 #undef __FUNCT__
79 #define __FUNCT__ "MatDestroy_MPIAIJ_PtAP"
80 PetscErrorCode MatDestroy_MPIAIJ_PtAP(Mat A)
81 {
82   PetscErrorCode       ierr;
83   Mat_MatMatMultMPI    *ptap;
84   PetscObjectContainer container;
85 
86   PetscFunctionBegin;
87   ierr = PetscObjectQuery((PetscObject)A,"MatPtAPMPI",(PetscObject *)&container);CHKERRQ(ierr);
88   if (container) {
89     ierr  = PetscObjectContainerGetPointer(container,(void **)&ptap);CHKERRQ(ierr);
90     ierr = MatDestroy(ptap->B_loc);CHKERRQ(ierr);
91     ierr = MatDestroy(ptap->B_oth);CHKERRQ(ierr);
92     ierr = ISDestroy(ptap->isrowb);CHKERRQ(ierr);
93     ierr = ISDestroy(ptap->iscolb);CHKERRQ(ierr);
94 
95     ierr = PetscObjectContainerDestroy(container);CHKERRQ(ierr);
96     ierr = PetscObjectCompose((PetscObject)A,"MatPtAPMPI",0);CHKERRQ(ierr);
97   }
98   ierr = PetscFree(ptap);CHKERRQ(ierr);
99 
100   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
101   PetscFunctionReturn(0);
102 }
103 
104 #undef __FUNCT__
105 #define __FUNCT__ "MatPtAP_MPIAIJ_MPIAIJ"
106 PetscErrorCode MatPtAP_MPIAIJ_MPIAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C)
107 {
108   PetscErrorCode       ierr;
109   Mat_MatMatMultMPI    *ptap;
110   PetscObjectContainer container;
111 
112   PetscFunctionBegin;
113   if (scall == MAT_INITIAL_MATRIX){
114     ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr);
115     ierr = PetscNew(Mat_MatMatMultMPI,&ptap);CHKERRQ(ierr);
116 
117     /* get P_oth by taking rows of P (= non-zero cols of local A) from other processors */
118     ierr = MatGetBrowsOfAoCols(A,P,scall,&ptap->isrowb,&ptap->iscolb,&ptap->brstart,&ptap->B_oth);CHKERRQ(ierr);
119 
120     /* get P_loc by taking all local rows of P */
121     ierr = MatGetLocalMat(P,scall,&ptap->B_loc);CHKERRQ(ierr);
122 
123     /* attach the supporting struct to P for reuse */
124     P->ops->destroy  = MatDestroy_MPIAIJ_PtAP;
125     ierr = PetscObjectContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
126     ierr = PetscObjectContainerSetPointer(container,ptap);CHKERRQ(ierr);
127     ierr = PetscObjectCompose((PetscObject)P,"MatPtAPMPI",(PetscObject)container);CHKERRQ(ierr);
128 
129     /* now, compute symbolic local P^T*A*P */
130     ierr = MatPtAPSymbolic_MPIAIJ_MPIAIJ(A,P,fill,C);CHKERRQ(ierr);/* numeric product is computed as well */
131     ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr);
132   } else if (scall == MAT_REUSE_MATRIX){
133     ierr = PetscObjectQuery((PetscObject)P,"MatPtAPMPI",(PetscObject *)&container);CHKERRQ(ierr);
134     if (container) {
135       ierr  = PetscObjectContainerGetPointer(container,(void **)&ptap);CHKERRQ(ierr);
136     } else {
137       SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
138     }
139 
140     /* get P_oth by taking rows of P (= non-zero cols of local A) from other processors */
141     ierr = MatGetBrowsOfAoCols(A,P,scall,&ptap->isrowb,&ptap->iscolb,&ptap->brstart,&ptap->B_oth);CHKERRQ(ierr);
142 
143     /* get P_loc by taking all local rows of P */
144     ierr = MatGetLocalMat(P,scall,&ptap->B_loc);CHKERRQ(ierr);
145 
146   } else {
147     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",scall);
148   }
149   /* now, compute numeric local P^T*A*P */
150   ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr);
151   ierr = MatPtAPNumeric_MPIAIJ_MPIAIJ(A,P,*C);CHKERRQ(ierr);
152   ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr);
153 
154   PetscFunctionReturn(0);
155 }
156 
157 #undef __FUNCT__
158 #define __FUNCT__ "MatPtAPSymbolic_MPIAIJ_MPIAIJ"
159 PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIAIJ(Mat A,Mat P,PetscReal fill,Mat *C)
160 {
161   PetscErrorCode       ierr;
162   Mat                  C_seq;
163   Mat_MatMatMultMPI    *ptap;
164   PetscObjectContainer container;
165 
166   PetscFunctionBegin;
167 
168   ierr = PetscObjectQuery((PetscObject)P,"MatPtAPMPI",(PetscObject *)&container);CHKERRQ(ierr);
169   if (container) {
170     ierr  = PetscObjectContainerGetPointer(container,(void **)&ptap);CHKERRQ(ierr);
171   } else {
172     SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
173   }
174   /* compute C_seq = P_loc^T * A_loc * P_seq */
175   ierr = MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local(A,ptap->B_loc,ptap->B_oth,ptap->brstart,fill,&C_seq);CHKERRQ(ierr);
176 
177   /* add C_seq into mpi C */
178   ierr = MatMerge_SeqsToMPISymbolic(A->comm,C_seq,P->n,P->n,C);CHKERRQ(ierr);
179 
180   PetscFunctionReturn(0);
181 }
182 
183 #undef __FUNCT__
184 #define __FUNCT__ "MatPtAPNumeric_MPIAIJ_MPIAIJ"
185 PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIAIJ(Mat A,Mat P,Mat C)
186 {
187   PetscErrorCode       ierr;
188   Mat_Merge_SeqsToMPI  *merge;
189   Mat_MatMatMultMPI    *ptap;
190   PetscObjectContainer cont_merge,cont_ptap;
191 
192   PetscInt       flops=0;
193   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data,*c=(Mat_MPIAIJ*)C->data;
194   Mat_SeqAIJ     *ad=(Mat_SeqAIJ*)(a->A)->data,*ao=(Mat_SeqAIJ*)(a->B)->data;
195   Mat_SeqAIJ     *cd=(Mat_SeqAIJ*)(c->A)->data,*co=(Mat_SeqAIJ*)(c->B)->data;
196   Mat            C_seq;
197   Mat_SeqAIJ     *p_loc,*p_oth;
198   PetscInt       *adi=ad->i,*adj=ad->j,*aoi=ao->i,*aoj=ao->j,*apj,*apjdense,cstart=a->cstart,cend=a->cend,col;
199   PetscInt       *pi_loc,*pj_loc,*pi_oth,*pj_oth,*pJ,*pjj;
200   PetscInt       i,j,k,nzi,pnzi,apnzj,nextap,pnzj,prow,crow;
201   PetscInt       *cjj;
202   MatScalar      *ada=ad->a,*aoa=ao->a,*apa,*paj,*cseqa,*caj;
203   MatScalar      *pa_loc,*pA,*pa_oth;
204   PetscInt       am=A->m,cN=C->N,cm=C->m;
205 
206   MPI_Comm             comm=C->comm;
207   PetscMPIInt          size,rank,taga,*len_s;
208   PetscInt             *owners;
209   PetscInt             proc;
210   PetscInt             **buf_ri,**buf_rj;
211   PetscInt             cseqnzi,*bj_i,*bi,*bj,bnzi,nextcseqj; /* bi, bj, ba: for C (mpi mat) */
212   PetscInt             nrows,**buf_ri_k,**nextrow,**nextcseqi;
213   MPI_Request          *s_waits,*r_waits;
214   MPI_Status           *status;
215   MatScalar            **abuf_r,*ba_i;
216   Mat_SeqAIJ           *cseq;
217   PetscInt             *cseqi,*cseqj;
218   MatScalar            *ca;
219 
220   PetscFunctionBegin;
221   ierr = PetscObjectQuery((PetscObject)C,"MatMergeSeqsToMPI",(PetscObject *)&cont_merge);CHKERRQ(ierr);
222   if (cont_merge) {
223     ierr  = PetscObjectContainerGetPointer(cont_merge,(void **)&merge);CHKERRQ(ierr);
224   } else {
225     SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix C does not posses an object container");
226   }
227   ierr = PetscObjectQuery((PetscObject)P,"MatPtAPMPI",(PetscObject *)&cont_ptap);CHKERRQ(ierr);
228   if (cont_ptap) {
229     ierr  = PetscObjectContainerGetPointer(cont_ptap,(void **)&ptap);CHKERRQ(ierr);
230   } else {
231     SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
232   }
233 #ifdef OLD
234   ierr = MatPtAPNumeric_MPIAIJ_MPIAIJ_Local(A,ptap->B_loc,ptap->B_oth,ptap->brstart,merge->C_seq);CHKERRQ(ierr);
235 #endif /* OLD */
236    /*--------------------------------------------------------------*/
237 
238   p_loc=(Mat_SeqAIJ*)(ptap->B_loc)->data;
239   p_oth=(Mat_SeqAIJ*)(ptap->B_oth)->data;
240   pi_loc=p_loc->i; pj_loc=p_loc->j; pJ=pj_loc; pa_loc=p_loc->a; pA=pa_loc;
241   pi_oth=p_oth->i; pj_oth=p_oth->j; pa_oth=p_oth->a;
242 
243   C_seq=merge->C_seq;
244   cseq=(Mat_SeqAIJ*)C_seq->data;
245   cseqi=cseq->i; cseqj=cseq->j;
246   cseqa=cseq->a;
247 
248   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
249   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
250   ierr = PetscMapGetGlobalRange(merge->rowmap,&owners);CHKERRQ(ierr);
251 
252   /* Allocate temporary array for storage of one row of A*P */
253   ierr = PetscMalloc(cN*(sizeof(MatScalar)+2*sizeof(PetscInt)),&apa);CHKERRQ(ierr);
254   ierr = PetscMemzero(apa,cN*(sizeof(MatScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr);
255   apj      = (PetscInt *)(apa + cN);
256   apjdense = apj + cN;
257 
258   /* Allocate temporary MatScalar array for storage of one row of C */
259   ierr = PetscMalloc(cN*(sizeof(MatScalar)),&ca);CHKERRQ(ierr);
260   ierr = PetscMemzero(ca,cN*(sizeof(MatScalar)));CHKERRQ(ierr);
261 
262   /* Clear old values in C_Seq and C */
263   ierr = PetscMemzero(cseqa,cseqi[cN]*sizeof(MatScalar));CHKERRQ(ierr);
264   ierr = PetscMemzero(cd->a,cd->i[cm]*sizeof(MatScalar));CHKERRQ(ierr);
265   ierr = PetscMemzero(co->a,co->i[cm]*sizeof(MatScalar));CHKERRQ(ierr);
266 
267   for (i=0;i<am;i++) {
268     /* Form i-th sparse row of A*P */
269      apnzj = 0;
270     /* diagonal portion of A */
271     nzi  = adi[i+1] - adi[i];
272     for (j=0;j<nzi;j++) {
273       prow = *adj;
274       adj++;
275       pnzj = pi_loc[prow+1] - pi_loc[prow];
276       pjj  = pj_loc + pi_loc[prow];
277       paj  = pa_loc + pi_loc[prow];
278       for (k=0;k<pnzj;k++) {
279         if (!apjdense[pjj[k]]) {
280           apjdense[pjj[k]] = -1;
281           apj[apnzj++]     = pjj[k];
282         }
283         apa[pjj[k]] += (*ada)*paj[k];
284       }
285       flops += 2*pnzj;
286       ada++;
287     }
288     /* off-diagonal portion of A */
289     nzi  = aoi[i+1] - aoi[i];
290     for (j=0;j<nzi;j++) {
291       col = a->garray[*aoj];
292       if (col < cstart){
293         prow = *aoj;
294       } else if (col >= cend){
295         prow = *aoj;
296       } else {
297         SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Off-diagonal portion of A has wrong column map");
298       }
299       aoj++;
300       pnzj = pi_oth[prow+1] - pi_oth[prow];
301       pjj  = pj_oth + pi_oth[prow];
302       paj  = pa_oth + pi_oth[prow];
303       for (k=0;k<pnzj;k++) {
304         if (!apjdense[pjj[k]]) {
305           apjdense[pjj[k]] = -1;
306           apj[apnzj++]     = pjj[k];
307         }
308         apa[pjj[k]] += (*aoa)*paj[k];
309       }
310       flops += 2*pnzj;
311       aoa++;
312     }
313     /* Sort the j index array for quick sparse axpy. */
314     ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr);
315 
316     /* Compute P_loc[i,:]^T*AP[i,:] using outer product */
317     pnzi = pi_loc[i+1] - pi_loc[i];
318     for (j=0;j<pnzi;j++) {
319       crow   = *pJ++;
320       cjj    = cseqj + cseqi[crow];
321       caj    = cseqa + cseqi[crow];
322       /* Perform sparse axpy operation.  Note cjj includes apj. */
323       if (crow >= owners[rank] && crow < owners[rank+1]){ /* add value into mpi C */
324         for (k=0; k<apnzj; k++) ca[k] = (*pA)*apa[apj[k]];
325         ierr = MatSetValues(C,1,&crow,apnzj,apj,ca,ADD_VALUES);CHKERRQ(ierr);
326         ierr = PetscMemzero(ca,apnzj*(sizeof(MatScalar)));CHKERRQ(ierr);
327       } else { /* add value into C_seq to be sent to other processors */
328         nextap = 0;
329         for (k=0;nextap<apnzj;k++) {
330           if (cjj[k]==apj[nextap]) {
331             caj[k] += (*pA)*apa[apj[nextap++]];
332           }
333         }
334       }
335       flops += 2*apnzj;
336       pA++;
337     }
338 
339     /* Zero the current row info for A*P */
340     for (j=0;j<apnzj;j++) {
341       apa[apj[j]]      = 0.;
342       apjdense[apj[j]] = 0;
343     }
344   }
345 
346   /* Assemble the final matrix and clean up */
347   ierr = MatAssemblyBegin(C_seq,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
348   ierr = MatAssemblyEnd(C_seq,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
349   ierr = PetscFree(apa);CHKERRQ(ierr);
350   ierr = PetscLogFlops(flops);CHKERRQ(ierr);
351 
352 #ifdef OLD1
353   ierr = MatMerge_SeqsToMPINumeric(merge->C_seq,C);CHKERRQ(ierr);
354 #endif /* OLD1 */
355   /*--------------------------------------------------------------*/
356   /* send and recv matrix values */
357   /*-----------------------------*/
358   bi     = merge->bi;
359   bj     = merge->bj;
360   buf_ri = merge->buf_ri;
361   buf_rj = merge->buf_rj;
362   len_s  = merge->len_s;
363   ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&taga);CHKERRQ(ierr);
364   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
365 
366   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
367   for (proc=0,k=0; proc<size; proc++){
368     if (!len_s[proc]) continue;
369     i = owners[proc];
370     ierr = MPI_Isend(cseqa+cseqi[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
371     k++;
372   }
373   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
374   ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);
375   ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);
376   ierr = PetscFree(status);CHKERRQ(ierr);
377 
378   ierr = PetscFree(s_waits);CHKERRQ(ierr);
379   ierr = PetscFree(r_waits);CHKERRQ(ierr);
380 
381   /* insert mat values of mpimat */
382   /*----------------------------*/
383   ierr = PetscMalloc(cN*sizeof(MatScalar),&ba_i);CHKERRQ(ierr);
384   ierr = PetscMemzero(ba_i,cN*sizeof(MatScalar));CHKERRQ(ierr);
385   ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr);
386   nextrow = buf_ri_k + merge->nrecv;
387   nextcseqi = nextrow + merge->nrecv;
388 
389   for (k=0; k<merge->nrecv; k++){
390     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
391     nrows = *(buf_ri_k[k]);
392     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
393     nextcseqi[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
394   }
395 
396   /* add received local vals to C */
397   for (i=0; i<cm; i++) {
398     crow = owners[rank] + i;
399     bj_i = bj+bi[i];  /* col indices of the i-th row of C */
400     bnzi = bi[i+1] - bi[i];
401     nzi = 0;
402     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
403       /* i-th row */
404       if (i == *nextrow[k]) {
405         cseqnzi = *(nextcseqi[k]+1) - *nextcseqi[k];
406         cseqj   = buf_rj[k] + *(nextcseqi[k]);
407         cseqa   = abuf_r[k] + *(nextcseqi[k]);
408         nextcseqj = 0;
409         for (j=0; nextcseqj<cseqnzi; j++){
410           if (*(bj_i + j) == cseqj[nextcseqj]){ /* bcol == cseqcol */
411             ba_i[j] += cseqa[nextcseqj++];
412             nzi++;
413           }
414         }
415         nextrow[k]++; nextcseqi[k]++;
416       }
417     }
418     if (nzi>0){
419       /* if (rank==1) printf(" [%d] set %d values on C, row: %d\n",rank,bnzi,crow); */
420       ierr = MatSetValues(C,1,&crow,bnzi,bj_i,ba_i,ADD_VALUES);CHKERRQ(ierr);
421       ierr = PetscMemzero(ba_i,bnzi*sizeof(MatScalar));CHKERRQ(ierr);
422     }
423   }
424   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
425   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
426 
427   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
428   ierr = PetscFree(ba_i);CHKERRQ(ierr);
429   ierr = PetscFree(buf_ri_k);CHKERRQ(ierr);
430   ierr = PetscFree(ca);CHKERRQ(ierr);
431 
432   PetscFunctionReturn(0);
433 }
434 
435 #undef __FUNCT__
436 #define __FUNCT__ "MatPtAP_SeqAIJ_SeqAIJ"
437 PetscErrorCode MatPtAP_SeqAIJ_SeqAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C)
438 {
439   PetscErrorCode ierr;
440 
441   PetscFunctionBegin;
442   if (scall == MAT_INITIAL_MATRIX){
443     ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr);
444     ierr = MatPtAPSymbolic_SeqAIJ_SeqAIJ(A,P,fill,C);CHKERRQ(ierr);
445     ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr);
446   }
447   ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr);
448   ierr = MatPtAPNumeric_SeqAIJ_SeqAIJ(A,P,*C);CHKERRQ(ierr);
449   ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr);
450   PetscFunctionReturn(0);
451 }
452 
453 /*
454    MatPtAPSymbolic - Creates the (i,j) structure of the matrix projection C = P^T * A * P
455 
456    Collective on Mat
457 
458    Input Parameters:
459 +  A - the matrix
460 -  P - the projection matrix
461 
462    Output Parameters:
463 .  C - the (i,j) structure of the product matrix
464 
465    Notes:
466    C will be created and must be destroyed by the user with MatDestroy().
467 
468    This routine is currently only implemented for pairs of SeqAIJ matrices and classes
469    which inherit from SeqAIJ.  C will be of type MATSEQAIJ.  The product is computed using
470    this (i,j) structure by calling MatPtAPNumeric().
471 
472    Level: intermediate
473 
474 .seealso: MatPtAP(),MatPtAPNumeric(),MatMatMultSymbolic()
475 */
476 #undef __FUNCT__
477 #define __FUNCT__ "MatPtAPSymbolic"
478 PetscErrorCode MatPtAPSymbolic(Mat A,Mat P,PetscReal fill,Mat *C)
479 {
480   PetscErrorCode ierr;
481   PetscErrorCode (*fA)(Mat,Mat,PetscReal,Mat*);
482   PetscErrorCode (*fP)(Mat,Mat,PetscReal,Mat*);
483 
484   PetscFunctionBegin;
485 
486   PetscValidHeaderSpecific(A,MAT_COOKIE,1);
487   PetscValidType(A,1);
488   MatPreallocated(A);
489   if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
490   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
491 
492   PetscValidHeaderSpecific(P,MAT_COOKIE,2);
493   PetscValidType(P,2);
494   MatPreallocated(P);
495   if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
496   if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
497 
498   PetscValidPointer(C,3);
499 
500   if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->M,A->N);
501   if (A->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %d != %d",A->M,A->N);
502 
503   /* For now, we do not dispatch based on the type of A and P */
504   /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */
505   fA = A->ops->ptapsymbolic;
506   if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatPtAPSymbolic not supported for A of type %s",A->type_name);
507   fP = P->ops->ptapsymbolic;
508   if (!fP) SETERRQ1(PETSC_ERR_SUP,"MatPtAPSymbolic not supported for P of type %s",P->type_name);
509   if (fP!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatPtAPSymbolic requires A, %s, to be compatible with P, %s",A->type_name,P->type_name);
510 
511   ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr);
512   ierr = (*fA)(A,P,fill,C);CHKERRQ(ierr);
513   ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr);
514 
515   PetscFunctionReturn(0);
516 }
517 
518 typedef struct {
519   Mat    symAP;
520 } Mat_PtAPstruct;
521 
522 EXTERN PetscErrorCode MatDestroy_SeqAIJ(Mat);
523 
524 #undef __FUNCT__
525 #define __FUNCT__ "MatDestroy_SeqAIJ_PtAP"
526 PetscErrorCode MatDestroy_SeqAIJ_PtAP(Mat A)
527 {
528   PetscErrorCode    ierr;
529   Mat_PtAPstruct    *ptap=(Mat_PtAPstruct*)A->spptr;
530 
531   PetscFunctionBegin;
532   ierr = MatDestroy(ptap->symAP);CHKERRQ(ierr);
533   ierr = PetscFree(ptap);CHKERRQ(ierr);
534   ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr);
535   PetscFunctionReturn(0);
536 }
537 
538 #undef __FUNCT__
539 #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqAIJ"
540 PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat P,PetscReal fill,Mat *C)
541 {
542   PetscErrorCode ierr;
543   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
544   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data,*p = (Mat_SeqAIJ*)P->data,*c;
545   PetscInt       *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj;
546   PetscInt       *ci,*cj,*ptadenserow,*ptasparserow,*ptaj;
547   PetscInt       an=A->N,am=A->M,pn=P->N,pm=P->M;
548   PetscInt       i,j,k,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi,nlnk,*lnk;
549   MatScalar      *ca;
550   PetscBT        lnkbt;
551 
552   PetscFunctionBegin;
553   /* Get ij structure of P^T */
554   ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
555   ptJ=ptj;
556 
557   /* Allocate ci array, arrays for fill computation and */
558   /* free space for accumulating nonzero column info */
559   ierr = PetscMalloc((pn+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
560   ci[0] = 0;
561 
562   ierr = PetscMalloc((2*an+1)*sizeof(PetscInt),&ptadenserow);CHKERRQ(ierr);
563   ierr = PetscMemzero(ptadenserow,(2*an+1)*sizeof(PetscInt));CHKERRQ(ierr);
564   ptasparserow = ptadenserow  + an;
565 
566   /* create and initialize a linked list */
567   nlnk = pn+1;
568   ierr = PetscLLCreate(pn,pn,nlnk,lnk,lnkbt);CHKERRQ(ierr);
569 
570   /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */
571   /* This should be reasonable if sparsity of PtAP is similar to that of A. */
572   ierr          = GetMoreSpace((ai[am]/pm)*pn,&free_space);
573   current_space = free_space;
574 
575   /* Determine symbolic info for each row of C: */
576   for (i=0;i<pn;i++) {
577     ptnzi  = pti[i+1] - pti[i];
578     ptanzi = 0;
579     /* Determine symbolic row of PtA: */
580     for (j=0;j<ptnzi;j++) {
581       arow = *ptJ++;
582       anzj = ai[arow+1] - ai[arow];
583       ajj  = aj + ai[arow];
584       for (k=0;k<anzj;k++) {
585         if (!ptadenserow[ajj[k]]) {
586           ptadenserow[ajj[k]]    = -1;
587           ptasparserow[ptanzi++] = ajj[k];
588         }
589       }
590     }
591       /* Using symbolic info for row of PtA, determine symbolic info for row of C: */
592     ptaj = ptasparserow;
593     cnzi   = 0;
594     for (j=0;j<ptanzi;j++) {
595       prow = *ptaj++;
596       pnzj = pi[prow+1] - pi[prow];
597       pjj  = pj + pi[prow];
598       /* add non-zero cols of P into the sorted linked list lnk */
599       ierr = PetscLLAdd(pnzj,pjj,pn,nlnk,lnk,lnkbt);CHKERRQ(ierr);
600       cnzi += nlnk;
601     }
602 
603     /* If free space is not available, make more free space */
604     /* Double the amount of total space in the list */
605     if (current_space->local_remaining<cnzi) {
606       ierr = GetMoreSpace(current_space->total_array_size,&current_space);CHKERRQ(ierr);
607     }
608 
609     /* Copy data into free space, and zero out denserows */
610     ierr = PetscLLClean(pn,pn,cnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
611     current_space->array           += cnzi;
612     current_space->local_used      += cnzi;
613     current_space->local_remaining -= cnzi;
614 
615     for (j=0;j<ptanzi;j++) {
616       ptadenserow[ptasparserow[j]] = 0;
617     }
618     /* Aside: Perhaps we should save the pta info for the numerical factorization. */
619     /*        For now, we will recompute what is needed. */
620     ci[i+1] = ci[i] + cnzi;
621   }
622   /* nnz is now stored in ci[ptm], column indices are in the list of free space */
623   /* Allocate space for cj, initialize cj, and */
624   /* destroy list of free space and other temporary array(s) */
625   ierr = PetscMalloc((ci[pn]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr);
626   ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr);
627   ierr = PetscFree(ptadenserow);CHKERRQ(ierr);
628   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
629 
630   /* Allocate space for ca */
631   ierr = PetscMalloc((ci[pn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr);
632   ierr = PetscMemzero(ca,(ci[pn]+1)*sizeof(MatScalar));CHKERRQ(ierr);
633 
634   /* put together the new matrix */
635   ierr = MatCreateSeqAIJWithArrays(A->comm,pn,pn,ci,cj,ca,C);CHKERRQ(ierr);
636 
637   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
638   /* Since these are PETSc arrays, change flags to free them as necessary. */
639   c = (Mat_SeqAIJ *)((*C)->data);
640   c->freedata = PETSC_TRUE;
641   c->nonew    = 0;
642 
643   /* Clean up. */
644   ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
645 
646   PetscFunctionReturn(0);
647 }
648 
649 #include "src/mat/impls/maij/maij.h"
650 EXTERN_C_BEGIN
651 #undef __FUNCT__
652 #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqMAIJ"
653 PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat *C)
654 {
655   /* This routine requires testing -- I don't think it works. */
656   PetscErrorCode ierr;
657   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
658   Mat_SeqMAIJ    *pp=(Mat_SeqMAIJ*)PP->data;
659   Mat            P=pp->AIJ;
660   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c;
661   PetscInt       *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj;
662   PetscInt       *ci,*cj,*denserow,*sparserow,*ptadenserow,*ptasparserow,*ptaj;
663   PetscInt       an=A->N,am=A->M,pn=P->N,pm=P->M,ppdof=pp->dof;
664   PetscInt       i,j,k,dof,pdof,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi;
665   MatScalar      *ca;
666 
667   PetscFunctionBegin;
668   /* Start timer */
669   ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr);
670 
671   /* Get ij structure of P^T */
672   ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
673 
674   /* Allocate ci array, arrays for fill computation and */
675   /* free space for accumulating nonzero column info */
676   ierr = PetscMalloc((pn+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
677   ci[0] = 0;
678 
679   ierr = PetscMalloc((2*pn+2*an+1)*sizeof(PetscInt),&ptadenserow);CHKERRQ(ierr);
680   ierr = PetscMemzero(ptadenserow,(2*pn+2*an+1)*sizeof(PetscInt));CHKERRQ(ierr);
681   ptasparserow = ptadenserow  + an;
682   denserow     = ptasparserow + an;
683   sparserow    = denserow     + pn;
684 
685   /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */
686   /* This should be reasonable if sparsity of PtAP is similar to that of A. */
687   ierr          = GetMoreSpace((ai[am]/pm)*pn,&free_space);
688   current_space = free_space;
689 
690   /* Determine symbolic info for each row of C: */
691   for (i=0;i<pn/ppdof;i++) {
692     ptnzi  = pti[i+1] - pti[i];
693     ptanzi = 0;
694     ptJ    = ptj + pti[i];
695     for (dof=0;dof<ppdof;dof++) {
696     /* Determine symbolic row of PtA: */
697       for (j=0;j<ptnzi;j++) {
698         arow = ptJ[j] + dof;
699         anzj = ai[arow+1] - ai[arow];
700         ajj  = aj + ai[arow];
701         for (k=0;k<anzj;k++) {
702           if (!ptadenserow[ajj[k]]) {
703             ptadenserow[ajj[k]]    = -1;
704             ptasparserow[ptanzi++] = ajj[k];
705           }
706         }
707       }
708       /* Using symbolic info for row of PtA, determine symbolic info for row of C: */
709       ptaj = ptasparserow;
710       cnzi   = 0;
711       for (j=0;j<ptanzi;j++) {
712         pdof = *ptaj%dof;
713         prow = (*ptaj++)/dof;
714         pnzj = pi[prow+1] - pi[prow];
715         pjj  = pj + pi[prow];
716         for (k=0;k<pnzj;k++) {
717           if (!denserow[pjj[k]+pdof]) {
718             denserow[pjj[k]+pdof] = -1;
719             sparserow[cnzi++]     = pjj[k]+pdof;
720           }
721         }
722       }
723 
724       /* sort sparserow */
725       ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr);
726 
727       /* If free space is not available, make more free space */
728       /* Double the amount of total space in the list */
729       if (current_space->local_remaining<cnzi) {
730         ierr = GetMoreSpace(current_space->total_array_size,&current_space);CHKERRQ(ierr);
731       }
732 
733       /* Copy data into free space, and zero out denserows */
734       ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(PetscInt));CHKERRQ(ierr);
735       current_space->array           += cnzi;
736       current_space->local_used      += cnzi;
737       current_space->local_remaining -= cnzi;
738 
739       for (j=0;j<ptanzi;j++) {
740         ptadenserow[ptasparserow[j]] = 0;
741       }
742       for (j=0;j<cnzi;j++) {
743         denserow[sparserow[j]] = 0;
744       }
745       /* Aside: Perhaps we should save the pta info for the numerical factorization. */
746       /*        For now, we will recompute what is needed. */
747       ci[i+1+dof] = ci[i+dof] + cnzi;
748     }
749   }
750   /* nnz is now stored in ci[ptm], column indices are in the list of free space */
751   /* Allocate space for cj, initialize cj, and */
752   /* destroy list of free space and other temporary array(s) */
753   ierr = PetscMalloc((ci[pn]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr);
754   ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr);
755   ierr = PetscFree(ptadenserow);CHKERRQ(ierr);
756 
757   /* Allocate space for ca */
758   ierr = PetscMalloc((ci[pn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr);
759   ierr = PetscMemzero(ca,(ci[pn]+1)*sizeof(MatScalar));CHKERRQ(ierr);
760 
761   /* put together the new matrix */
762   ierr = MatCreateSeqAIJWithArrays(A->comm,pn,pn,ci,cj,ca,C);CHKERRQ(ierr);
763 
764   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
765   /* Since these are PETSc arrays, change flags to free them as necessary. */
766   c = (Mat_SeqAIJ *)((*C)->data);
767   c->freedata = PETSC_TRUE;
768   c->nonew    = 0;
769 
770   /* Clean up. */
771   ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
772 
773   ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr);
774   PetscFunctionReturn(0);
775 }
776 EXTERN_C_END
777 
778 /*
779    MatPtAPNumeric - Computes the matrix projection C = P^T * A * P
780 
781    Collective on Mat
782 
783    Input Parameters:
784 +  A - the matrix
785 -  P - the projection matrix
786 
787    Output Parameters:
788 .  C - the product matrix
789 
790    Notes:
791    C must have been created by calling MatPtAPSymbolic and must be destroyed by
792    the user using MatDeatroy().
793 
794    This routine is currently only implemented for pairs of AIJ matrices and classes
795    which inherit from AIJ.  C will be of type MATAIJ.
796 
797    Level: intermediate
798 
799 .seealso: MatPtAP(),MatPtAPSymbolic(),MatMatMultNumeric()
800 */
801 #undef __FUNCT__
802 #define __FUNCT__ "MatPtAPNumeric"
803 PetscErrorCode MatPtAPNumeric(Mat A,Mat P,Mat C)
804 {
805   PetscErrorCode ierr;
806   PetscErrorCode (*fA)(Mat,Mat,Mat);
807   PetscErrorCode (*fP)(Mat,Mat,Mat);
808 
809   PetscFunctionBegin;
810 
811   PetscValidHeaderSpecific(A,MAT_COOKIE,1);
812   PetscValidType(A,1);
813   MatPreallocated(A);
814   if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
815   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
816 
817   PetscValidHeaderSpecific(P,MAT_COOKIE,2);
818   PetscValidType(P,2);
819   MatPreallocated(P);
820   if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
821   if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
822 
823   PetscValidHeaderSpecific(C,MAT_COOKIE,3);
824   PetscValidType(C,3);
825   MatPreallocated(C);
826   if (!C->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
827   if (C->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
828 
829   if (P->N!=C->M) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->N,C->M);
830   if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->M,A->N);
831   if (A->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %d != %d",A->M,A->N);
832   if (P->N!=C->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->N,C->N);
833 
834   /* For now, we do not dispatch based on the type of A and P */
835   /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */
836   fA = A->ops->ptapnumeric;
837   if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatPtAPNumeric not supported for A of type %s",A->type_name);
838   fP = P->ops->ptapnumeric;
839   if (!fP) SETERRQ1(PETSC_ERR_SUP,"MatPtAPNumeric not supported for P of type %s",P->type_name);
840   if (fP!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatPtAPNumeric requires A, %s, to be compatible with P, %s",A->type_name,P->type_name);
841 
842   ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr);
843   ierr = (*fA)(A,P,C);CHKERRQ(ierr);
844   ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr);
845 
846   PetscFunctionReturn(0);
847 }
848 
849 #undef __FUNCT__
850 #define __FUNCT__ "MatPtAPNumeric_SeqAIJ_SeqAIJ"
851 PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqAIJ(Mat A,Mat P,Mat C)
852 {
853   PetscErrorCode ierr;
854   PetscInt       flops=0;
855   Mat_SeqAIJ     *a  = (Mat_SeqAIJ *) A->data;
856   Mat_SeqAIJ     *p  = (Mat_SeqAIJ *) P->data;
857   Mat_SeqAIJ     *c  = (Mat_SeqAIJ *) C->data;
858   PetscInt       *ai=a->i,*aj=a->j,*apj,*apjdense,*pi=p->i,*pj=p->j,*pJ=p->j,*pjj;
859   PetscInt       *ci=c->i,*cj=c->j,*cjj;
860   PetscInt       am=A->M,cn=C->N,cm=C->M;
861   PetscInt       i,j,k,anzi,pnzi,apnzj,nextap,pnzj,prow,crow;
862   MatScalar      *aa=a->a,*apa,*pa=p->a,*pA=p->a,*paj,*ca=c->a,*caj;
863 
864   PetscFunctionBegin;
865   /* Allocate temporary array for storage of one row of A*P */
866   ierr = PetscMalloc(cn*(sizeof(MatScalar)+2*sizeof(PetscInt)),&apa);CHKERRQ(ierr);
867   ierr = PetscMemzero(apa,cn*(sizeof(MatScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr);
868 
869   apj      = (PetscInt *)(apa + cn);
870   apjdense = apj + cn;
871 
872   /* Clear old values in C */
873   ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr);
874 
875   for (i=0;i<am;i++) {
876     /* Form sparse row of A*P */
877     anzi  = ai[i+1] - ai[i];
878     apnzj = 0;
879     for (j=0;j<anzi;j++) {
880       prow = *aj++;
881       pnzj = pi[prow+1] - pi[prow];
882       pjj  = pj + pi[prow];
883       paj  = pa + pi[prow];
884       for (k=0;k<pnzj;k++) {
885         if (!apjdense[pjj[k]]) {
886           apjdense[pjj[k]] = -1;
887           apj[apnzj++]     = pjj[k];
888         }
889         apa[pjj[k]] += (*aa)*paj[k];
890       }
891       flops += 2*pnzj;
892       aa++;
893     }
894 
895     /* Sort the j index array for quick sparse axpy. */
896     ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr);
897 
898     /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */
899     pnzi = pi[i+1] - pi[i];
900     for (j=0;j<pnzi;j++) {
901       nextap = 0;
902       crow   = *pJ++;
903       cjj    = cj + ci[crow];
904       caj    = ca + ci[crow];
905       /* Perform sparse axpy operation.  Note cjj includes apj. */
906       for (k=0;nextap<apnzj;k++) {
907         if (cjj[k]==apj[nextap]) {
908           caj[k] += (*pA)*apa[apj[nextap++]];
909         }
910       }
911       flops += 2*apnzj;
912       pA++;
913     }
914 
915     /* Zero the current row info for A*P */
916     for (j=0;j<apnzj;j++) {
917       apa[apj[j]]      = 0.;
918       apjdense[apj[j]] = 0;
919     }
920   }
921 
922   /* Assemble the final matrix and clean up */
923   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
924   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
925   ierr = PetscFree(apa);CHKERRQ(ierr);
926   ierr = PetscLogFlops(flops);CHKERRQ(ierr);
927 
928   PetscFunctionReturn(0);
929 }
930 
931 /* Compute local C = P_loc^T * A * P - used by MatPtAP_MPIAIJ_MPIAIJ() */
932 static PetscEvent logkey_matptapnumeric_local = 0;
933 #undef __FUNCT__
934 #define __FUNCT__ "MatPtAPNumeric_MPIAIJ_MPIAIJ_Local"
935 PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIAIJ_Local(Mat A,Mat P_loc,Mat P_oth,PetscInt prstart,Mat C)
936 {
937   PetscErrorCode ierr;
938   PetscInt       flops=0;
939   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
940   Mat_SeqAIJ     *ad=(Mat_SeqAIJ*)(a->A)->data,*ao=(Mat_SeqAIJ*)(a->B)->data;
941   Mat_SeqAIJ     *c  = (Mat_SeqAIJ *) C->data;
942   Mat_SeqAIJ     *p_loc=(Mat_SeqAIJ*)P_loc->data,*p_oth=(Mat_SeqAIJ*)P_oth->data;
943   PetscInt       *pi_loc=p_loc->i,*pj_loc=p_loc->j,*pi_oth=p_oth->i,*pj_oth=p_oth->j;
944   PetscInt       *adi=ad->i,*adj=ad->j,*aoi=ao->i,*aoj=ao->j,*apj,*apjdense,cstart=a->cstart,cend=a->cend,col;
945   PetscInt       *pJ=pj_loc,*pjj;
946   PetscInt       *ci=c->i,*cj=c->j,*cjj;
947   PetscInt       am=A->m,cn=C->N,cm=C->M;
948   PetscInt       i,j,k,anzi,pnzi,apnzj,nextap,pnzj,prow,crow;
949   MatScalar      *ada=ad->a,*aoa=ao->a,*apa,*paj,*ca=c->a,*caj;
950   MatScalar      *pa_loc=p_loc->a,*pA=pa_loc,*pa_oth=p_oth->a;
951 
952   PetscFunctionBegin;
953   if (!logkey_matptapnumeric_local) {
954     ierr = PetscLogEventRegister(&logkey_matptapnumeric_local,"MatPtAPNumeric_MPIAIJ_MPIAIJ_Local",MAT_COOKIE);CHKERRQ(ierr);
955   }
956   ierr = PetscLogEventBegin(logkey_matptapnumeric_local,A,0,0,0);CHKERRQ(ierr);
957 
958   /* Allocate temporary array for storage of one row of A*P */
959   ierr = PetscMalloc(cn*(sizeof(MatScalar)+2*sizeof(PetscInt)),&apa);CHKERRQ(ierr);
960   ierr = PetscMemzero(apa,cn*(sizeof(MatScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr);
961   apj      = (PetscInt *)(apa + cn);
962   apjdense = apj + cn;
963 
964   /* Clear old values in C */
965   ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr);
966 
967   for (i=0;i<am;i++) {
968     /* Form i-th sparse row of A*P */
969      apnzj = 0;
970     /* diagonal portion of A */
971     anzi  = adi[i+1] - adi[i];
972     for (j=0;j<anzi;j++) {
973       prow = *adj;
974       adj++;
975       pnzj = pi_loc[prow+1] - pi_loc[prow];
976       pjj  = pj_loc + pi_loc[prow];
977       paj  = pa_loc + pi_loc[prow];
978       for (k=0;k<pnzj;k++) {
979         if (!apjdense[pjj[k]]) {
980           apjdense[pjj[k]] = -1;
981           apj[apnzj++]     = pjj[k];
982         }
983         apa[pjj[k]] += (*ada)*paj[k];
984       }
985       flops += 2*pnzj;
986       ada++;
987     }
988     /* off-diagonal portion of A */
989     anzi  = aoi[i+1] - aoi[i];
990     for (j=0;j<anzi;j++) {
991       col = a->garray[*aoj];
992       if (col < cstart){
993         prow = *aoj;
994       } else if (col >= cend){
995         prow = *aoj;
996       } else {
997         SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Off-diagonal portion of A has wrong column map");
998       }
999       aoj++;
1000       pnzj = pi_oth[prow+1] - pi_oth[prow];
1001       pjj  = pj_oth + pi_oth[prow];
1002       paj  = pa_oth + pi_oth[prow];
1003       for (k=0;k<pnzj;k++) {
1004         if (!apjdense[pjj[k]]) {
1005           apjdense[pjj[k]] = -1;
1006           apj[apnzj++]     = pjj[k];
1007         }
1008         apa[pjj[k]] += (*aoa)*paj[k];
1009       }
1010       flops += 2*pnzj;
1011       aoa++;
1012     }
1013     /* Sort the j index array for quick sparse axpy. */
1014     ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr);
1015 
1016     /* Compute P_loc[i,:]^T*AP[i,:] using outer product */
1017     pnzi = pi_loc[i+1] - pi_loc[i];
1018     for (j=0;j<pnzi;j++) {
1019       nextap = 0;
1020       crow   = *pJ++;
1021       cjj    = cj + ci[crow];
1022       caj    = ca + ci[crow];
1023       /* Perform sparse axpy operation.  Note cjj includes apj. */
1024       for (k=0;nextap<apnzj;k++) {
1025         if (cjj[k]==apj[nextap]) {
1026           caj[k] += (*pA)*apa[apj[nextap++]];
1027         }
1028       }
1029       flops += 2*apnzj;
1030       pA++;
1031     }
1032 
1033     /* Zero the current row info for A*P */
1034     for (j=0;j<apnzj;j++) {
1035       apa[apj[j]]      = 0.;
1036       apjdense[apj[j]] = 0;
1037     }
1038   }
1039 
1040   /* Assemble the final matrix and clean up */
1041   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1042   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1043   ierr = PetscFree(apa);CHKERRQ(ierr);
1044   ierr = PetscLogFlops(flops);CHKERRQ(ierr);
1045   ierr = PetscLogEventEnd(logkey_matptapnumeric_local,A,0,0,0);CHKERRQ(ierr);
1046   PetscFunctionReturn(0);
1047 }
1048 static PetscEvent logkey_matptapsymbolic_local = 0;
1049 #undef __FUNCT__
1050 #define __FUNCT__ "MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local"
1051 PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local(Mat A,Mat P_loc,Mat P_oth,PetscInt prstart,PetscReal fill,Mat *C)
1052 {
1053   PetscErrorCode ierr;
1054   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
1055   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
1056   Mat_SeqAIJ     *ad=(Mat_SeqAIJ*)(a->A)->data,*ao=(Mat_SeqAIJ*)(a->B)->data,*c;
1057   PetscInt       *pti,*ptj,*ptJ,*ajj,*pjj;
1058   PetscInt       *adi=ad->i,*adj=ad->j,*aoi=ao->i,*aoj=ao->j,nnz,cstart=a->cstart,cend=a->cend,col;
1059   PetscInt       *ci,*cj,*ptaj;
1060   PetscInt       aN=A->N,am=A->m,pN=P_loc->N;
1061   PetscInt       i,j,k,ptnzi,arow,anzj,prow,pnzj,cnzi;
1062   PetscInt       pm=P_loc->m,nlnk,*lnk;
1063   MatScalar      *ca;
1064   PetscBT        lnkbt;
1065   PetscInt       prend,nprow_loc,nprow_oth;
1066   PetscInt       *ptadenserow_loc,*ptadenserow_oth,*ptasparserow_loc,*ptasparserow_oth;
1067   Mat_SeqAIJ     *p_loc=(Mat_SeqAIJ*)P_loc->data,*p_oth=(Mat_SeqAIJ*)P_oth->data;
1068   PetscInt       *pi_loc=p_loc->i,*pj_loc=p_loc->j,*pi_oth=p_oth->i,*pj_oth=p_oth->j;
1069 
1070   PetscFunctionBegin;
1071   if (!logkey_matptapsymbolic_local) {
1072     ierr = PetscLogEventRegister(&logkey_matptapsymbolic_local,"MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local",MAT_COOKIE);
1073   }
1074   ierr = PetscLogEventBegin(logkey_matptapsymbolic_local,A,P_loc,P_oth,0);CHKERRQ(ierr);
1075 
1076   prend = prstart + pm;
1077 
1078   /* get ij structure of P_loc^T */
1079   ierr = MatGetSymbolicTranspose_SeqAIJ(P_loc,&pti,&ptj);CHKERRQ(ierr);
1080   ptJ=ptj;
1081 
1082   /* Allocate ci array, arrays for fill computation and */
1083   /* free space for accumulating nonzero column info */
1084   ierr = PetscMalloc((pN+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
1085   ci[0] = 0;
1086 
1087   ierr = PetscMalloc((4*aN+1)*sizeof(PetscInt),&ptadenserow_loc);CHKERRQ(ierr);
1088   ierr = PetscMemzero(ptadenserow_loc,(4*aN+1)*sizeof(PetscInt));CHKERRQ(ierr);
1089   ptasparserow_loc = ptadenserow_loc + aN;
1090   ptadenserow_oth  = ptasparserow_loc + aN;
1091   ptasparserow_oth = ptadenserow_oth + aN;
1092 
1093   /* create and initialize a linked list */
1094   nlnk = pN+1;
1095   ierr = PetscLLCreate(pN,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr);
1096 
1097   /* Set initial free space to be nnz(A) scaled by fill*P->N/P->M. */
1098   /* This should be reasonable if sparsity of PtAP is similar to that of A. */
1099   nnz           = adi[am] + aoi[am];
1100   ierr          = GetMoreSpace((PetscInt)(fill*nnz*pN/aN+1),&free_space);
1101   current_space = free_space;
1102 
1103   /* determine symbolic info for each row of C: */
1104   for (i=0;i<pN;i++) {
1105     ptnzi  = pti[i+1] - pti[i];
1106     nprow_loc = 0; nprow_oth = 0;
1107     /* i-th row of symbolic P_loc^T*A_loc: */
1108     for (j=0;j<ptnzi;j++) {
1109       arow = *ptJ++;
1110       /* diagonal portion of A */
1111       anzj = adi[arow+1] - adi[arow];
1112       ajj  = adj + adi[arow];
1113       for (k=0;k<anzj;k++) {
1114         col = ajj[k]+prstart;
1115         if (!ptadenserow_loc[col]) {
1116           ptadenserow_loc[col]    = -1;
1117           ptasparserow_loc[nprow_loc++] = col;
1118         }
1119       }
1120       /* off-diagonal portion of A */
1121       anzj = aoi[arow+1] - aoi[arow];
1122       ajj  = aoj + aoi[arow];
1123       for (k=0;k<anzj;k++) {
1124         col = a->garray[ajj[k]];  /* global col */
1125         if (col < cstart){
1126           col = ajj[k];
1127         } else if (col >= cend){
1128           col = ajj[k] + pm;
1129         } else {
1130           SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Off-diagonal portion of A has wrong column map");
1131         }
1132         if (!ptadenserow_oth[col]) {
1133           ptadenserow_oth[col]    = -1;
1134           ptasparserow_oth[nprow_oth++] = col;
1135         }
1136       }
1137     }
1138 
1139     /* using symbolic info of local PtA, determine symbolic info for row of C: */
1140     cnzi   = 0;
1141     /* rows of P_loc */
1142     ptaj = ptasparserow_loc;
1143     for (j=0; j<nprow_loc; j++) {
1144       prow = *ptaj++;
1145       prow -= prstart; /* rm */
1146       pnzj = pi_loc[prow+1] - pi_loc[prow];
1147       pjj  = pj_loc + pi_loc[prow];
1148       /* add non-zero cols of P into the sorted linked list lnk */
1149       ierr = PetscLLAdd(pnzj,pjj,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr);
1150       cnzi += nlnk;
1151     }
1152     /* rows of P_oth */
1153     ptaj = ptasparserow_oth;
1154     for (j=0; j<nprow_oth; j++) {
1155       prow = *ptaj++;
1156       if (prow >= prend) prow -= pm; /* rm */
1157       pnzj = pi_oth[prow+1] - pi_oth[prow];
1158       pjj  = pj_oth + pi_oth[prow];
1159       /* add non-zero cols of P into the sorted linked list lnk */
1160       ierr = PetscLLAdd(pnzj,pjj,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr);
1161       cnzi += nlnk;
1162     }
1163 
1164     /* If free space is not available, make more free space */
1165     /* Double the amount of total space in the list */
1166     if (current_space->local_remaining<cnzi) {
1167       ierr = GetMoreSpace(current_space->total_array_size,&current_space);CHKERRQ(ierr);
1168     }
1169 
1170     /* Copy data into free space, and zero out denserows */
1171     ierr = PetscLLClean(pN,pN,cnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
1172     current_space->array           += cnzi;
1173     current_space->local_used      += cnzi;
1174     current_space->local_remaining -= cnzi;
1175 
1176     for (j=0;j<nprow_loc; j++) {
1177       ptadenserow_loc[ptasparserow_loc[j]] = 0;
1178     }
1179     for (j=0;j<nprow_oth; j++) {
1180       ptadenserow_oth[ptasparserow_oth[j]] = 0;
1181     }
1182 
1183     /* Aside: Perhaps we should save the pta info for the numerical factorization. */
1184     /*        For now, we will recompute what is needed. */
1185     ci[i+1] = ci[i] + cnzi;
1186   }
1187   /* nnz is now stored in ci[ptm], column indices are in the list of free space */
1188   /* Allocate space for cj, initialize cj, and */
1189   /* destroy list of free space and other temporary array(s) */
1190   ierr = PetscMalloc((ci[pN]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr);
1191   ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr);
1192   ierr = PetscFree(ptadenserow_loc);CHKERRQ(ierr);
1193   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
1194 
1195   /* Allocate space for ca */
1196   ierr = PetscMalloc((ci[pN]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr);
1197   ierr = PetscMemzero(ca,(ci[pN]+1)*sizeof(MatScalar));CHKERRQ(ierr);
1198 
1199   /* put together the new matrix */
1200   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,pN,pN,ci,cj,ca,C);CHKERRQ(ierr);
1201 
1202   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
1203   /* Since these are PETSc arrays, change flags to free them as necessary. */
1204   c = (Mat_SeqAIJ *)((*C)->data);
1205   c->freedata = PETSC_TRUE;
1206   c->nonew    = 0;
1207 
1208   /* Clean up. */
1209   ierr = MatRestoreSymbolicTranspose_SeqAIJ(P_loc,&pti,&ptj);CHKERRQ(ierr);
1210 
1211   ierr = PetscLogEventEnd(logkey_matptapsymbolic_local,A,P_loc,P_oth,0);CHKERRQ(ierr);
1212   PetscFunctionReturn(0);
1213 }
1214