xref: /petsc/src/mat/impls/aij/seq/matptap.c (revision d7d82daadd7e0d6eee5c84c3bd60dd428ccf339b)
1 /*
2   Defines projective product routines where A is a AIJ matrix
3           C = P^T * A * P
4 */
5 
6 #include "src/mat/impls/aij/seq/aij.h"   /*I "petscmat.h" I*/
7 #include "src/mat/utils/freespace.h"
8 #include "src/mat/impls/aij/mpi/mpiaij.h"
9 #include "petscbt.h"
10 
11 #undef __FUNCT__
12 #define __FUNCT__ "MatPtAP"
13 /*@
14    MatPtAP - Creates the matrix projection C = P^T * A * P
15 
16    Collective on Mat
17 
18    Input Parameters:
19 +  A - the matrix
20 .  P - the projection matrix
21 .  scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
22 -  fill - expected fill as ratio of nnz(C)/(nnz(A) + nnz(P))
23 
24    Output Parameters:
25 .  C - the product matrix
26 
27    Notes:
28    C will be created and must be destroyed by the user with MatDestroy().
29 
30    This routine is currently only implemented for pairs of SeqAIJ matrices and classes
31    which inherit from SeqAIJ.  C will be of type MATSEQAIJ.
32 
33    Level: intermediate
34 
35 .seealso: MatPtAPSymbolic(),MatPtAPNumeric(),MatMatMult()
36 @*/
37 PetscErrorCode MatPtAP(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C)
38 {
39   PetscErrorCode ierr;
40   PetscErrorCode (*fA)(Mat,Mat,MatReuse,PetscReal,Mat *);
41   PetscErrorCode (*fP)(Mat,Mat,MatReuse,PetscReal,Mat *);
42 
43   PetscFunctionBegin;
44   PetscValidHeaderSpecific(A,MAT_COOKIE,1);
45   PetscValidType(A,1);
46   MatPreallocated(A);
47   if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
48   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
49   PetscValidHeaderSpecific(P,MAT_COOKIE,2);
50   PetscValidType(P,2);
51   MatPreallocated(P);
52   if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
53   if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
54   PetscValidPointer(C,3);
55 
56   if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %D != %D",P->M,A->N);
57 
58   if (fill <=0.0) SETERRQ1(PETSC_ERR_ARG_SIZ,"fill=%g must be > 0.0",fill);
59 
60   /* For now, we do not dispatch based on the type of A and P */
61   /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */
62   fA = A->ops->ptap;
63   if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatPtAP not supported for A of type %s",A->type_name);
64   fP = P->ops->ptap;
65   if (!fP) SETERRQ1(PETSC_ERR_SUP,"MatPtAP not supported for P of type %s",P->type_name);
66   if (fP!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatPtAP requires A, %s, to be compatible with P, %s",A->type_name,P->type_name);
67 
68   ierr = PetscLogEventBegin(MAT_PtAP,A,P,0,0);CHKERRQ(ierr);
69   ierr = (*fA)(A,P,scall,fill,C);CHKERRQ(ierr);
70   ierr = PetscLogEventEnd(MAT_PtAP,A,P,0,0);CHKERRQ(ierr);
71   PetscFunctionReturn(0);
72 }
73 
74 EXTERN PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local(Mat,Mat,Mat,PetscInt,PetscReal,Mat*);
75 EXTERN PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIAIJ_Local(Mat,Mat,Mat,PetscInt,Mat C);
76 
77 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat);
78 #undef __FUNCT__
79 #define __FUNCT__ "MatDestroy_MPIAIJ_PtAP"
80 PetscErrorCode MatDestroy_MPIAIJ_PtAP(Mat A)
81 {
82   PetscErrorCode       ierr;
83   Mat_MatMatMultMPI    *ptap;
84   PetscObjectContainer container;
85 
86   PetscFunctionBegin;
87   ierr = PetscObjectQuery((PetscObject)A,"MatPtAPMPI",(PetscObject *)&container);CHKERRQ(ierr);
88   if (container) {
89     ierr  = PetscObjectContainerGetPointer(container,(void **)&ptap);CHKERRQ(ierr);
90     ierr = MatDestroy(ptap->B_loc);CHKERRQ(ierr);
91     ierr = MatDestroy(ptap->B_oth);CHKERRQ(ierr);
92     ierr = ISDestroy(ptap->isrowb);CHKERRQ(ierr);
93     ierr = ISDestroy(ptap->iscolb);CHKERRQ(ierr);
94 
95     ierr = PetscObjectContainerDestroy(container);CHKERRQ(ierr);
96     ierr = PetscObjectCompose((PetscObject)A,"MatPtAPMPI",0);CHKERRQ(ierr);
97   }
98   ierr = PetscFree(ptap);CHKERRQ(ierr);
99 
100   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
101   PetscFunctionReturn(0);
102 }
103 
104 #undef __FUNCT__
105 #define __FUNCT__ "MatPtAP_MPIAIJ_MPIAIJ"
106 PetscErrorCode MatPtAP_MPIAIJ_MPIAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C)
107 {
108   PetscErrorCode       ierr;
109   Mat_MatMatMultMPI    *ptap;
110   PetscObjectContainer container;
111 
112   PetscFunctionBegin;
113   if (scall == MAT_INITIAL_MATRIX){
114     ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr);
115     ierr = PetscNew(Mat_MatMatMultMPI,&ptap);CHKERRQ(ierr);
116 
117     /* get P_oth by taking rows of P (= non-zero cols of local A) from other processors */
118     ierr = MatGetBrowsOfAoCols(A,P,scall,&ptap->isrowb,&ptap->iscolb,&ptap->brstart,&ptap->B_oth);CHKERRQ(ierr);
119 
120     /* get P_loc by taking all local rows of P */
121     ierr = MatGetLocalMat(P,scall,&ptap->B_loc);CHKERRQ(ierr);
122 
123     /* attach the supporting struct to P for reuse */
124     P->ops->destroy  = MatDestroy_MPIAIJ_PtAP;
125     ierr = PetscObjectContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
126     ierr = PetscObjectContainerSetPointer(container,ptap);CHKERRQ(ierr);
127     ierr = PetscObjectCompose((PetscObject)P,"MatPtAPMPI",(PetscObject)container);CHKERRQ(ierr);
128 
129     /* now, compute symbolic local P^T*A*P */
130     ierr = MatPtAPSymbolic_MPIAIJ_MPIAIJ(A,P,fill,C);CHKERRQ(ierr);/* numeric product is computed as well */
131     ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr);
132   } else if (scall == MAT_REUSE_MATRIX){
133     ierr = PetscObjectQuery((PetscObject)P,"MatPtAPMPI",(PetscObject *)&container);CHKERRQ(ierr);
134     if (container) {
135       ierr  = PetscObjectContainerGetPointer(container,(void **)&ptap);CHKERRQ(ierr);
136     } else {
137       SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
138     }
139     /* update P_oth */
140     ierr = MatGetBrowsOfAoCols(A,P,scall,&ptap->isrowb,&ptap->iscolb,&ptap->brstart,&ptap->B_oth);CHKERRQ(ierr);
141 
142   } else {
143     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %D",scall);
144   }
145   /* now, compute numeric local P^T*A*P */
146   ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr);
147   ierr = MatPtAPNumeric_MPIAIJ_MPIAIJ(A,P,*C);CHKERRQ(ierr);
148   ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr);
149 
150   PetscFunctionReturn(0);
151 }
152 
153 #undef __FUNCT__
154 #define __FUNCT__ "MatPtAPSymbolic_MPIAIJ_MPIAIJ"
155 PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIAIJ(Mat A,Mat P,PetscReal fill,Mat *C)
156 {
157   PetscErrorCode       ierr;
158   Mat                  C_seq;
159   Mat_MatMatMultMPI    *ptap;
160   PetscObjectContainer container;
161 
162   PetscFunctionBegin;
163 
164   ierr = PetscObjectQuery((PetscObject)P,"MatPtAPMPI",(PetscObject *)&container);CHKERRQ(ierr);
165   if (container) {
166     ierr  = PetscObjectContainerGetPointer(container,(void **)&ptap);CHKERRQ(ierr);
167   } else {
168     SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
169   }
170   /* compute C_seq = P_loc^T * A_loc * P_seq */
171   ierr = MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local(A,ptap->B_loc,ptap->B_oth,ptap->brstart,fill,&C_seq);CHKERRQ(ierr);
172 
173   /* add C_seq into mpi C */
174   ierr = MatMerge_SeqsToMPISymbolic(A->comm,C_seq,P->n,P->n,C);CHKERRQ(ierr);
175 
176   PetscFunctionReturn(0);
177 }
178 
179 #undef __FUNCT__
180 #define __FUNCT__ "MatPtAPNumeric_MPIAIJ_MPIAIJ"
181 PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIAIJ(Mat A,Mat P,Mat C)
182 {
183   PetscErrorCode       ierr;
184   Mat_Merge_SeqsToMPI  *merge;
185   Mat_MatMatMultMPI    *ptap;
186   PetscObjectContainer cont_merge,cont_ptap;
187   PetscInt             flops=0;
188   Mat_MPIAIJ           *a=(Mat_MPIAIJ*)A->data,*p=(Mat_MPIAIJ*)P->data,*c=(Mat_MPIAIJ*)C->data;
189   Mat_SeqAIJ           *ad=(Mat_SeqAIJ*)(a->A)->data,*ao=(Mat_SeqAIJ*)(a->B)->data,
190                        *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data,
191                        *cd=(Mat_SeqAIJ*)(c->A)->data,*co=(Mat_SeqAIJ*)(c->B)->data;
192   Mat                  C_seq;
193   Mat_SeqAIJ           *cseq,*p_oth;
194   PetscInt             *adi=ad->i,*adj=ad->j,*aoi=ao->i,*aoj=ao->j,*apj,*apjdense,cstart=a->cstart,cend=a->cend;
195   PetscInt             *pi_oth,*pj_oth,*pJ_d=pd->j,*pJ_o=po->j,*pjj;
196   PetscInt             i,j,k,nzi,pnzi,apnzj,nextap,pnzj,prow,crow;
197   PetscInt             *cjj;
198   MatScalar            *ada=ad->a,*aoa=ao->a,*apa,*paj,*cseqa,*caj;
199   MatScalar            *pA_d=pd->a,*pA_o=po->a,*pa_oth;
200   PetscInt             am=A->m,cN=C->N,cm=C->m;
201   MPI_Comm             comm=C->comm;
202   PetscMPIInt          size,rank,taga,*len_s;
203   PetscInt             *owners;
204   PetscInt             proc;
205   PetscInt             **buf_ri,**buf_rj;
206   PetscInt             cseqnzi,*bj_i,*bi,*bj,bnzi,nextcseqj; /* bi, bj, ba: for C (mpi mat) */
207   PetscInt             nrows,**buf_ri_k,**nextrow,**nextcseqi;
208   MPI_Request          *s_waits,*r_waits;
209   MPI_Status           *status;
210   MatScalar            **abuf_r,*ba_i,*ca;
211   PetscInt             *cseqi,*cseqj,col;
212 
213 
214   PetscFunctionBegin;
215   ierr = PetscObjectQuery((PetscObject)C,"MatMergeSeqsToMPI",(PetscObject *)&cont_merge);CHKERRQ(ierr);
216   if (cont_merge) {
217     ierr  = PetscObjectContainerGetPointer(cont_merge,(void **)&merge);CHKERRQ(ierr);
218   } else {
219     SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix C does not posses an object container");
220   }
221   ierr = PetscObjectQuery((PetscObject)P,"MatPtAPMPI",(PetscObject *)&cont_ptap);CHKERRQ(ierr);
222   if (cont_ptap) {
223     ierr  = PetscObjectContainerGetPointer(cont_ptap,(void **)&ptap);CHKERRQ(ierr);
224   } else {
225     SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
226   }
227 
228   p_oth=(Mat_SeqAIJ*)(ptap->B_oth)->data;
229   pi_oth=p_oth->i; pj_oth=p_oth->j; pa_oth=p_oth->a;
230 
231   C_seq=merge->C_seq;
232   cseq=(Mat_SeqAIJ*)C_seq->data;
233   cseqi=cseq->i; cseqj=cseq->j;
234   cseqa=cseq->a;
235 
236   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
237   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
238   ierr = PetscMapGetGlobalRange(merge->rowmap,&owners);CHKERRQ(ierr);
239 
240   /* Allocate temporary array for storage of one row of A*P */
241   ierr = PetscMalloc(cN*(sizeof(MatScalar)+2*sizeof(PetscInt)),&apa);CHKERRQ(ierr);
242   ierr = PetscMemzero(apa,cN*(sizeof(MatScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr);
243   apj      = (PetscInt *)(apa + cN);
244   apjdense = apj + cN;
245 
246   /* Allocate temporary MatScalar array for storage of one row of C */
247   ierr = PetscMalloc(cN*(sizeof(MatScalar)),&ca);CHKERRQ(ierr);
248   ierr = PetscMemzero(ca,cN*(sizeof(MatScalar)));CHKERRQ(ierr);
249 
250   /* Clear old values in C_Seq and C */
251   ierr = PetscMemzero(cseqa,cseqi[cN]*sizeof(MatScalar));CHKERRQ(ierr);
252   ierr = PetscMemzero(cd->a,cd->i[cm]*sizeof(MatScalar));CHKERRQ(ierr);
253   ierr = PetscMemzero(co->a,co->i[cm]*sizeof(MatScalar));CHKERRQ(ierr);
254 
255   for (i=0;i<am;i++) {
256     /* Form i-th sparse row of A*P */
257      apnzj = 0;
258     /* diagonal portion of A */
259     nzi  = adi[i+1] - adi[i];
260     for (j=0;j<nzi;j++) {
261       prow = *adj++;
262       /* diagonal portion of P */
263       pnzj = pd->i[prow+1] - pd->i[prow];
264       pjj  = pd->j + pd->i[prow]; /* local col index of P */
265       paj  = pd->a + pd->i[prow];
266       for (k=0;k<pnzj;k++) {
267         col = *pjj + p->cstart; pjj++; /* global col index of P */
268         if (!apjdense[col]) {
269           apjdense[col] = -1;
270           apj[apnzj++]  = col;
271         }
272         apa[col] += (*ada)*paj[k];
273       }
274       flops += 2*pnzj;
275       /* off-diagonal portion of P */
276       pnzj = po->i[prow+1] - po->i[prow];
277       pjj  = po->j + po->i[prow]; /* local col index of P */
278       paj  = po->a + po->i[prow];
279       for (k=0;k<pnzj;k++) {
280         col = p->garray[*pjj]; pjj++; /* global col index of P */
281         if (!apjdense[col]) {
282           apjdense[col] = -1;
283           apj[apnzj++]  = col;
284         }
285         apa[col] += (*ada)*paj[k];
286       }
287       flops += 2*pnzj;
288 
289       ada++;
290     }
291     /* off-diagonal portion of A */
292     nzi  = aoi[i+1] - aoi[i];
293     for (j=0;j<nzi;j++) {
294       prow = *aoj++;
295       pnzj = pi_oth[prow+1] - pi_oth[prow];
296       pjj  = pj_oth + pi_oth[prow];
297       paj  = pa_oth + pi_oth[prow];
298       for (k=0;k<pnzj;k++) {
299         if (!apjdense[pjj[k]]) {
300           apjdense[pjj[k]] = -1;
301           apj[apnzj++]     = pjj[k];
302         }
303         apa[pjj[k]] += (*aoa)*paj[k];
304       }
305       flops += 2*pnzj;
306       aoa++;
307     }
308     /* Sort the j index array for quick sparse axpy. */
309     ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr);
310 
311     /* Compute P_loc[i,:]^T*AP[i,:] using outer product */
312     /* diagonal portion of P */
313     pnzi = pd->i[i+1] - pd->i[i];
314     for (j=0;j<pnzi;j++) {
315       crow = (*pJ_d++) + owners[rank];
316       cjj    = cseqj + cseqi[crow];
317       caj    = cseqa + cseqi[crow];
318       /* add value into C */
319       for (k=0; k<apnzj; k++) ca[k] = (*pA_d)*apa[apj[k]];
320       ierr = MatSetValues(C,1,&crow,apnzj,apj,ca,ADD_VALUES);CHKERRQ(ierr);
321       ierr = PetscMemzero(ca,apnzj*(sizeof(MatScalar)));CHKERRQ(ierr);
322       pA_d++;
323       flops += 2*apnzj;
324     }
325 
326     /* off-diagonal portion of P */
327     pnzi = po->i[i+1] - po->i[i];
328     for (j=0;j<pnzi;j++) {
329       crow   = p->garray[*pJ_o++];
330       cjj    = cseqj + cseqi[crow];
331       caj    = cseqa + cseqi[crow];
332       /* add value into C_seq to be sent to other processors */
333       nextap = 0;
334       for (k=0;nextap<apnzj;k++) {
335         if (cjj[k]==apj[nextap]) {
336           caj[k] += (*pA_o)*apa[apj[nextap++]];
337         }
338       }
339       flops += 2*apnzj;
340       pA_o++;
341     }
342 
343     /* Zero the current row info for A*P */
344     for (j=0;j<apnzj;j++) {
345       apa[apj[j]]      = 0.;
346       apjdense[apj[j]] = 0;
347     }
348   }
349 
350   /* Assemble the final matrix and clean up */
351   ierr = MatAssemblyBegin(C_seq,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
352   ierr = MatAssemblyEnd(C_seq,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
353   ierr = PetscFree(apa);CHKERRQ(ierr);
354   ierr = PetscLogFlops(flops);CHKERRQ(ierr);
355 
356   /*--------------------------------------------------------------*/
357   /* send and recv matrix values */
358   /*-----------------------------*/
359   bi     = merge->bi;
360   bj     = merge->bj;
361   buf_ri = merge->buf_ri;
362   buf_rj = merge->buf_rj;
363   len_s  = merge->len_s;
364   ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&taga);CHKERRQ(ierr);
365   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
366 
367   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
368   for (proc=0,k=0; proc<size; proc++){
369     if (!len_s[proc]) continue;
370     i = owners[proc];
371     ierr = MPI_Isend(cseqa+cseqi[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
372     k++;
373   }
374   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
375   ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);
376   ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);
377   ierr = PetscFree(status);CHKERRQ(ierr);
378 
379   ierr = PetscFree(s_waits);CHKERRQ(ierr);
380   ierr = PetscFree(r_waits);CHKERRQ(ierr);
381 
382   /* insert mat values of mpimat */
383   /*----------------------------*/
384   ierr = PetscMalloc(cN*sizeof(MatScalar),&ba_i);CHKERRQ(ierr);
385   ierr = PetscMemzero(ba_i,cN*sizeof(MatScalar));CHKERRQ(ierr);
386   ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr);
387   nextrow = buf_ri_k + merge->nrecv;
388   nextcseqi = nextrow + merge->nrecv;
389 
390   for (k=0; k<merge->nrecv; k++){
391     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
392     nrows = *(buf_ri_k[k]);
393     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
394     nextcseqi[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
395   }
396 
397   /* add received local vals to C */
398   for (i=0; i<cm; i++) {
399     crow = owners[rank] + i;
400     bj_i = bj+bi[i];  /* col indices of the i-th row of C */
401     bnzi = bi[i+1] - bi[i];
402     nzi = 0;
403     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
404       /* i-th row */
405       if (i == *nextrow[k]) {
406         cseqnzi = *(nextcseqi[k]+1) - *nextcseqi[k];
407         cseqj   = buf_rj[k] + *(nextcseqi[k]);
408         cseqa   = abuf_r[k] + *(nextcseqi[k]);
409         nextcseqj = 0;
410         for (j=0; nextcseqj<cseqnzi; j++){
411           if (*(bj_i + j) == cseqj[nextcseqj]){ /* bcol == cseqcol */
412             ba_i[j] += cseqa[nextcseqj++];
413             nzi++;
414           }
415         }
416         nextrow[k]++; nextcseqi[k]++;
417       }
418     }
419     if (nzi>0){
420       ierr = MatSetValues(C,1,&crow,bnzi,bj_i,ba_i,ADD_VALUES);CHKERRQ(ierr);
421       ierr = PetscMemzero(ba_i,bnzi*sizeof(MatScalar));CHKERRQ(ierr);
422       flops += 2*nzi;
423     }
424   }
425   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
426   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
427 
428   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
429   ierr = PetscFree(ba_i);CHKERRQ(ierr);
430   ierr = PetscFree(buf_ri_k);CHKERRQ(ierr);
431   ierr = PetscFree(ca);CHKERRQ(ierr);
432 
433   PetscFunctionReturn(0);
434 }
435 
436 #undef __FUNCT__
437 #define __FUNCT__ "MatPtAP_SeqAIJ_SeqAIJ"
438 PetscErrorCode MatPtAP_SeqAIJ_SeqAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C)
439 {
440   PetscErrorCode ierr;
441 
442   PetscFunctionBegin;
443   if (scall == MAT_INITIAL_MATRIX){
444     ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr);
445     ierr = MatPtAPSymbolic_SeqAIJ_SeqAIJ(A,P,fill,C);CHKERRQ(ierr);
446     ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr);
447   }
448   ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr);
449   ierr = MatPtAPNumeric_SeqAIJ_SeqAIJ(A,P,*C);CHKERRQ(ierr);
450   ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr);
451   PetscFunctionReturn(0);
452 }
453 
454 /*
455    MatPtAPSymbolic - Creates the (i,j) structure of the matrix projection C = P^T * A * P
456 
457    Collective on Mat
458 
459    Input Parameters:
460 +  A - the matrix
461 -  P - the projection matrix
462 
463    Output Parameters:
464 .  C - the (i,j) structure of the product matrix
465 
466    Notes:
467    C will be created and must be destroyed by the user with MatDestroy().
468 
469    This routine is currently only implemented for pairs of SeqAIJ matrices and classes
470    which inherit from SeqAIJ.  C will be of type MATSEQAIJ.  The product is computed using
471    this (i,j) structure by calling MatPtAPNumeric().
472 
473    Level: intermediate
474 
475 .seealso: MatPtAP(),MatPtAPNumeric(),MatMatMultSymbolic()
476 */
477 #undef __FUNCT__
478 #define __FUNCT__ "MatPtAPSymbolic"
479 PetscErrorCode MatPtAPSymbolic(Mat A,Mat P,PetscReal fill,Mat *C)
480 {
481   PetscErrorCode ierr;
482   PetscErrorCode (*fA)(Mat,Mat,PetscReal,Mat*);
483   PetscErrorCode (*fP)(Mat,Mat,PetscReal,Mat*);
484 
485   PetscFunctionBegin;
486 
487   PetscValidHeaderSpecific(A,MAT_COOKIE,1);
488   PetscValidType(A,1);
489   MatPreallocated(A);
490   if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
491   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
492 
493   PetscValidHeaderSpecific(P,MAT_COOKIE,2);
494   PetscValidType(P,2);
495   MatPreallocated(P);
496   if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
497   if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
498 
499   PetscValidPointer(C,3);
500 
501   if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %D != %D",P->M,A->N);
502   if (A->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %D != %D",A->M,A->N);
503 
504   /* For now, we do not dispatch based on the type of A and P */
505   /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */
506   fA = A->ops->ptapsymbolic;
507   if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatPtAPSymbolic not supported for A of type %s",A->type_name);
508   fP = P->ops->ptapsymbolic;
509   if (!fP) SETERRQ1(PETSC_ERR_SUP,"MatPtAPSymbolic not supported for P of type %s",P->type_name);
510   if (fP!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatPtAPSymbolic requires A, %s, to be compatible with P, %s",A->type_name,P->type_name);
511 
512   ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr);
513   ierr = (*fA)(A,P,fill,C);CHKERRQ(ierr);
514   ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr);
515 
516   PetscFunctionReturn(0);
517 }
518 
519 typedef struct {
520   Mat    symAP;
521 } Mat_PtAPstruct;
522 
523 EXTERN PetscErrorCode MatDestroy_SeqAIJ(Mat);
524 
525 #undef __FUNCT__
526 #define __FUNCT__ "MatDestroy_SeqAIJ_PtAP"
527 PetscErrorCode MatDestroy_SeqAIJ_PtAP(Mat A)
528 {
529   PetscErrorCode    ierr;
530   Mat_PtAPstruct    *ptap=(Mat_PtAPstruct*)A->spptr;
531 
532   PetscFunctionBegin;
533   ierr = MatDestroy(ptap->symAP);CHKERRQ(ierr);
534   ierr = PetscFree(ptap);CHKERRQ(ierr);
535   ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr);
536   PetscFunctionReturn(0);
537 }
538 
539 #undef __FUNCT__
540 #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqAIJ"
541 PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat P,PetscReal fill,Mat *C)
542 {
543   PetscErrorCode ierr;
544   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
545   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data,*p = (Mat_SeqAIJ*)P->data,*c;
546   PetscInt       *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj;
547   PetscInt       *ci,*cj,*ptadenserow,*ptasparserow,*ptaj;
548   PetscInt       an=A->N,am=A->M,pn=P->N,pm=P->M;
549   PetscInt       i,j,k,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi,nlnk,*lnk;
550   MatScalar      *ca;
551   PetscBT        lnkbt;
552 
553   PetscFunctionBegin;
554   /* Get ij structure of P^T */
555   ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
556   ptJ=ptj;
557 
558   /* Allocate ci array, arrays for fill computation and */
559   /* free space for accumulating nonzero column info */
560   ierr = PetscMalloc((pn+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
561   ci[0] = 0;
562 
563   ierr = PetscMalloc((2*an+1)*sizeof(PetscInt),&ptadenserow);CHKERRQ(ierr);
564   ierr = PetscMemzero(ptadenserow,(2*an+1)*sizeof(PetscInt));CHKERRQ(ierr);
565   ptasparserow = ptadenserow  + an;
566 
567   /* create and initialize a linked list */
568   nlnk = pn+1;
569   ierr = PetscLLCreate(pn,pn,nlnk,lnk,lnkbt);CHKERRQ(ierr);
570 
571   /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */
572   /* This should be reasonable if sparsity of PtAP is similar to that of A. */
573   ierr          = GetMoreSpace((ai[am]/pm)*pn,&free_space);
574   current_space = free_space;
575 
576   /* Determine symbolic info for each row of C: */
577   for (i=0;i<pn;i++) {
578     ptnzi  = pti[i+1] - pti[i];
579     ptanzi = 0;
580     /* Determine symbolic row of PtA: */
581     for (j=0;j<ptnzi;j++) {
582       arow = *ptJ++;
583       anzj = ai[arow+1] - ai[arow];
584       ajj  = aj + ai[arow];
585       for (k=0;k<anzj;k++) {
586         if (!ptadenserow[ajj[k]]) {
587           ptadenserow[ajj[k]]    = -1;
588           ptasparserow[ptanzi++] = ajj[k];
589         }
590       }
591     }
592       /* Using symbolic info for row of PtA, determine symbolic info for row of C: */
593     ptaj = ptasparserow;
594     cnzi   = 0;
595     for (j=0;j<ptanzi;j++) {
596       prow = *ptaj++;
597       pnzj = pi[prow+1] - pi[prow];
598       pjj  = pj + pi[prow];
599       /* add non-zero cols of P into the sorted linked list lnk */
600       ierr = PetscLLAdd(pnzj,pjj,pn,nlnk,lnk,lnkbt);CHKERRQ(ierr);
601       cnzi += nlnk;
602     }
603 
604     /* If free space is not available, make more free space */
605     /* Double the amount of total space in the list */
606     if (current_space->local_remaining<cnzi) {
607       ierr = GetMoreSpace(current_space->total_array_size,&current_space);CHKERRQ(ierr);
608     }
609 
610     /* Copy data into free space, and zero out denserows */
611     ierr = PetscLLClean(pn,pn,cnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
612     current_space->array           += cnzi;
613     current_space->local_used      += cnzi;
614     current_space->local_remaining -= cnzi;
615 
616     for (j=0;j<ptanzi;j++) {
617       ptadenserow[ptasparserow[j]] = 0;
618     }
619     /* Aside: Perhaps we should save the pta info for the numerical factorization. */
620     /*        For now, we will recompute what is needed. */
621     ci[i+1] = ci[i] + cnzi;
622   }
623   /* nnz is now stored in ci[ptm], column indices are in the list of free space */
624   /* Allocate space for cj, initialize cj, and */
625   /* destroy list of free space and other temporary array(s) */
626   ierr = PetscMalloc((ci[pn]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr);
627   ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr);
628   ierr = PetscFree(ptadenserow);CHKERRQ(ierr);
629   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
630 
631   /* Allocate space for ca */
632   ierr = PetscMalloc((ci[pn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr);
633   ierr = PetscMemzero(ca,(ci[pn]+1)*sizeof(MatScalar));CHKERRQ(ierr);
634 
635   /* put together the new matrix */
636   ierr = MatCreateSeqAIJWithArrays(A->comm,pn,pn,ci,cj,ca,C);CHKERRQ(ierr);
637 
638   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
639   /* Since these are PETSc arrays, change flags to free them as necessary. */
640   c = (Mat_SeqAIJ *)((*C)->data);
641   c->freedata = PETSC_TRUE;
642   c->nonew    = 0;
643 
644   /* Clean up. */
645   ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
646 
647   PetscFunctionReturn(0);
648 }
649 
650 #include "src/mat/impls/maij/maij.h"
651 EXTERN_C_BEGIN
652 #undef __FUNCT__
653 #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqMAIJ"
654 PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat *C)
655 {
656   /* This routine requires testing -- I don't think it works. */
657   PetscErrorCode ierr;
658   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
659   Mat_SeqMAIJ    *pp=(Mat_SeqMAIJ*)PP->data;
660   Mat            P=pp->AIJ;
661   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c;
662   PetscInt       *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj;
663   PetscInt       *ci,*cj,*denserow,*sparserow,*ptadenserow,*ptasparserow,*ptaj;
664   PetscInt       an=A->N,am=A->M,pn=P->N,pm=P->M,ppdof=pp->dof;
665   PetscInt       i,j,k,dof,pdof,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi;
666   MatScalar      *ca;
667 
668   PetscFunctionBegin;
669   /* Start timer */
670   ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr);
671 
672   /* Get ij structure of P^T */
673   ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
674 
675   /* Allocate ci array, arrays for fill computation and */
676   /* free space for accumulating nonzero column info */
677   ierr = PetscMalloc((pn+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
678   ci[0] = 0;
679 
680   ierr = PetscMalloc((2*pn+2*an+1)*sizeof(PetscInt),&ptadenserow);CHKERRQ(ierr);
681   ierr = PetscMemzero(ptadenserow,(2*pn+2*an+1)*sizeof(PetscInt));CHKERRQ(ierr);
682   ptasparserow = ptadenserow  + an;
683   denserow     = ptasparserow + an;
684   sparserow    = denserow     + pn;
685 
686   /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */
687   /* This should be reasonable if sparsity of PtAP is similar to that of A. */
688   ierr          = GetMoreSpace((ai[am]/pm)*pn,&free_space);
689   current_space = free_space;
690 
691   /* Determine symbolic info for each row of C: */
692   for (i=0;i<pn/ppdof;i++) {
693     ptnzi  = pti[i+1] - pti[i];
694     ptanzi = 0;
695     ptJ    = ptj + pti[i];
696     for (dof=0;dof<ppdof;dof++) {
697     /* Determine symbolic row of PtA: */
698       for (j=0;j<ptnzi;j++) {
699         arow = ptJ[j] + dof;
700         anzj = ai[arow+1] - ai[arow];
701         ajj  = aj + ai[arow];
702         for (k=0;k<anzj;k++) {
703           if (!ptadenserow[ajj[k]]) {
704             ptadenserow[ajj[k]]    = -1;
705             ptasparserow[ptanzi++] = ajj[k];
706           }
707         }
708       }
709       /* Using symbolic info for row of PtA, determine symbolic info for row of C: */
710       ptaj = ptasparserow;
711       cnzi   = 0;
712       for (j=0;j<ptanzi;j++) {
713         pdof = *ptaj%dof;
714         prow = (*ptaj++)/dof;
715         pnzj = pi[prow+1] - pi[prow];
716         pjj  = pj + pi[prow];
717         for (k=0;k<pnzj;k++) {
718           if (!denserow[pjj[k]+pdof]) {
719             denserow[pjj[k]+pdof] = -1;
720             sparserow[cnzi++]     = pjj[k]+pdof;
721           }
722         }
723       }
724 
725       /* sort sparserow */
726       ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr);
727 
728       /* If free space is not available, make more free space */
729       /* Double the amount of total space in the list */
730       if (current_space->local_remaining<cnzi) {
731         ierr = GetMoreSpace(current_space->total_array_size,&current_space);CHKERRQ(ierr);
732       }
733 
734       /* Copy data into free space, and zero out denserows */
735       ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(PetscInt));CHKERRQ(ierr);
736       current_space->array           += cnzi;
737       current_space->local_used      += cnzi;
738       current_space->local_remaining -= cnzi;
739 
740       for (j=0;j<ptanzi;j++) {
741         ptadenserow[ptasparserow[j]] = 0;
742       }
743       for (j=0;j<cnzi;j++) {
744         denserow[sparserow[j]] = 0;
745       }
746       /* Aside: Perhaps we should save the pta info for the numerical factorization. */
747       /*        For now, we will recompute what is needed. */
748       ci[i+1+dof] = ci[i+dof] + cnzi;
749     }
750   }
751   /* nnz is now stored in ci[ptm], column indices are in the list of free space */
752   /* Allocate space for cj, initialize cj, and */
753   /* destroy list of free space and other temporary array(s) */
754   ierr = PetscMalloc((ci[pn]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr);
755   ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr);
756   ierr = PetscFree(ptadenserow);CHKERRQ(ierr);
757 
758   /* Allocate space for ca */
759   ierr = PetscMalloc((ci[pn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr);
760   ierr = PetscMemzero(ca,(ci[pn]+1)*sizeof(MatScalar));CHKERRQ(ierr);
761 
762   /* put together the new matrix */
763   ierr = MatCreateSeqAIJWithArrays(A->comm,pn,pn,ci,cj,ca,C);CHKERRQ(ierr);
764 
765   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
766   /* Since these are PETSc arrays, change flags to free them as necessary. */
767   c = (Mat_SeqAIJ *)((*C)->data);
768   c->freedata = PETSC_TRUE;
769   c->nonew    = 0;
770 
771   /* Clean up. */
772   ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
773 
774   ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr);
775   PetscFunctionReturn(0);
776 }
777 EXTERN_C_END
778 
779 /*
780    MatPtAPNumeric - Computes the matrix projection C = P^T * A * P
781 
782    Collective on Mat
783 
784    Input Parameters:
785 +  A - the matrix
786 -  P - the projection matrix
787 
788    Output Parameters:
789 .  C - the product matrix
790 
791    Notes:
792    C must have been created by calling MatPtAPSymbolic and must be destroyed by
793    the user using MatDeatroy().
794 
795    This routine is currently only implemented for pairs of AIJ matrices and classes
796    which inherit from AIJ.  C will be of type MATAIJ.
797 
798    Level: intermediate
799 
800 .seealso: MatPtAP(),MatPtAPSymbolic(),MatMatMultNumeric()
801 */
802 #undef __FUNCT__
803 #define __FUNCT__ "MatPtAPNumeric"
804 PetscErrorCode MatPtAPNumeric(Mat A,Mat P,Mat C)
805 {
806   PetscErrorCode ierr;
807   PetscErrorCode (*fA)(Mat,Mat,Mat);
808   PetscErrorCode (*fP)(Mat,Mat,Mat);
809 
810   PetscFunctionBegin;
811 
812   PetscValidHeaderSpecific(A,MAT_COOKIE,1);
813   PetscValidType(A,1);
814   MatPreallocated(A);
815   if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
816   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
817 
818   PetscValidHeaderSpecific(P,MAT_COOKIE,2);
819   PetscValidType(P,2);
820   MatPreallocated(P);
821   if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
822   if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
823 
824   PetscValidHeaderSpecific(C,MAT_COOKIE,3);
825   PetscValidType(C,3);
826   MatPreallocated(C);
827   if (!C->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
828   if (C->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
829 
830   if (P->N!=C->M) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %D != %D",P->N,C->M);
831   if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %D != %D",P->M,A->N);
832   if (A->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %D != %D",A->M,A->N);
833   if (P->N!=C->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %D != %D",P->N,C->N);
834 
835   /* For now, we do not dispatch based on the type of A and P */
836   /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */
837   fA = A->ops->ptapnumeric;
838   if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatPtAPNumeric not supported for A of type %s",A->type_name);
839   fP = P->ops->ptapnumeric;
840   if (!fP) SETERRQ1(PETSC_ERR_SUP,"MatPtAPNumeric not supported for P of type %s",P->type_name);
841   if (fP!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatPtAPNumeric requires A, %s, to be compatible with P, %s",A->type_name,P->type_name);
842 
843   ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr);
844   ierr = (*fA)(A,P,C);CHKERRQ(ierr);
845   ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr);
846 
847   PetscFunctionReturn(0);
848 }
849 
850 #undef __FUNCT__
851 #define __FUNCT__ "MatPtAPNumeric_SeqAIJ_SeqAIJ"
852 PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqAIJ(Mat A,Mat P,Mat C)
853 {
854   PetscErrorCode ierr;
855   PetscInt       flops=0;
856   Mat_SeqAIJ     *a  = (Mat_SeqAIJ *) A->data;
857   Mat_SeqAIJ     *p  = (Mat_SeqAIJ *) P->data;
858   Mat_SeqAIJ     *c  = (Mat_SeqAIJ *) C->data;
859   PetscInt       *ai=a->i,*aj=a->j,*apj,*apjdense,*pi=p->i,*pj=p->j,*pJ=p->j,*pjj;
860   PetscInt       *ci=c->i,*cj=c->j,*cjj;
861   PetscInt       am=A->M,cn=C->N,cm=C->M;
862   PetscInt       i,j,k,anzi,pnzi,apnzj,nextap,pnzj,prow,crow;
863   MatScalar      *aa=a->a,*apa,*pa=p->a,*pA=p->a,*paj,*ca=c->a,*caj;
864 
865   PetscFunctionBegin;
866   /* Allocate temporary array for storage of one row of A*P */
867   ierr = PetscMalloc(cn*(sizeof(MatScalar)+2*sizeof(PetscInt)),&apa);CHKERRQ(ierr);
868   ierr = PetscMemzero(apa,cn*(sizeof(MatScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr);
869 
870   apj      = (PetscInt *)(apa + cn);
871   apjdense = apj + cn;
872 
873   /* Clear old values in C */
874   ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr);
875 
876   for (i=0;i<am;i++) {
877     /* Form sparse row of A*P */
878     anzi  = ai[i+1] - ai[i];
879     apnzj = 0;
880     for (j=0;j<anzi;j++) {
881       prow = *aj++;
882       pnzj = pi[prow+1] - pi[prow];
883       pjj  = pj + pi[prow];
884       paj  = pa + pi[prow];
885       for (k=0;k<pnzj;k++) {
886         if (!apjdense[pjj[k]]) {
887           apjdense[pjj[k]] = -1;
888           apj[apnzj++]     = pjj[k];
889         }
890         apa[pjj[k]] += (*aa)*paj[k];
891       }
892       flops += 2*pnzj;
893       aa++;
894     }
895 
896     /* Sort the j index array for quick sparse axpy. */
897     ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr);
898 
899     /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */
900     pnzi = pi[i+1] - pi[i];
901     for (j=0;j<pnzi;j++) {
902       nextap = 0;
903       crow   = *pJ++;
904       cjj    = cj + ci[crow];
905       caj    = ca + ci[crow];
906       /* Perform sparse axpy operation.  Note cjj includes apj. */
907       for (k=0;nextap<apnzj;k++) {
908         if (cjj[k]==apj[nextap]) {
909           caj[k] += (*pA)*apa[apj[nextap++]];
910         }
911       }
912       flops += 2*apnzj;
913       pA++;
914     }
915 
916     /* Zero the current row info for A*P */
917     for (j=0;j<apnzj;j++) {
918       apa[apj[j]]      = 0.;
919       apjdense[apj[j]] = 0;
920     }
921   }
922 
923   /* Assemble the final matrix and clean up */
924   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
925   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
926   ierr = PetscFree(apa);CHKERRQ(ierr);
927   ierr = PetscLogFlops(flops);CHKERRQ(ierr);
928 
929   PetscFunctionReturn(0);
930 }
931 
932 /* Compute local C = P_loc^T * A * P - used by MatPtAP_MPIAIJ_MPIAIJ() */
933 static PetscEvent logkey_matptapnumeric_local = 0;
934 #undef __FUNCT__
935 #define __FUNCT__ "MatPtAPNumeric_MPIAIJ_MPIAIJ_Local"
936 PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIAIJ_Local(Mat A,Mat P_loc,Mat P_oth,PetscInt prstart,Mat C)
937 {
938   PetscErrorCode ierr;
939   PetscInt       flops=0;
940   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
941   Mat_SeqAIJ     *ad=(Mat_SeqAIJ*)(a->A)->data,*ao=(Mat_SeqAIJ*)(a->B)->data;
942   Mat_SeqAIJ     *c  = (Mat_SeqAIJ *) C->data;
943   Mat_SeqAIJ     *p_loc=(Mat_SeqAIJ*)P_loc->data,*p_oth=(Mat_SeqAIJ*)P_oth->data;
944   PetscInt       *pi_loc=p_loc->i,*pj_loc=p_loc->j,*pi_oth=p_oth->i,*pj_oth=p_oth->j;
945   PetscInt       *adi=ad->i,*adj=ad->j,*aoi=ao->i,*aoj=ao->j,*apj,*apjdense,cstart=a->cstart,cend=a->cend,col;
946   PetscInt       *pJ=pj_loc,*pjj;
947   PetscInt       *ci=c->i,*cj=c->j,*cjj;
948   PetscInt       am=A->m,cn=C->N,cm=C->M;
949   PetscInt       i,j,k,anzi,pnzi,apnzj,nextap,pnzj,prow,crow;
950   MatScalar      *ada=ad->a,*aoa=ao->a,*apa,*paj,*ca=c->a,*caj;
951   MatScalar      *pa_loc=p_loc->a,*pA=pa_loc,*pa_oth=p_oth->a;
952 
953   PetscFunctionBegin;
954   if (!logkey_matptapnumeric_local) {
955     ierr = PetscLogEventRegister(&logkey_matptapnumeric_local,"MatPtAPNumeric_MPIAIJ_MPIAIJ_Local",MAT_COOKIE);CHKERRQ(ierr);
956   }
957   ierr = PetscLogEventBegin(logkey_matptapnumeric_local,A,0,0,0);CHKERRQ(ierr);
958 
959   /* Allocate temporary array for storage of one row of A*P */
960   ierr = PetscMalloc(cn*(sizeof(MatScalar)+2*sizeof(PetscInt)),&apa);CHKERRQ(ierr);
961   ierr = PetscMemzero(apa,cn*(sizeof(MatScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr);
962   apj      = (PetscInt *)(apa + cn);
963   apjdense = apj + cn;
964 
965   /* Clear old values in C */
966   ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr);
967 
968   for (i=0;i<am;i++) {
969     /* Form i-th sparse row of A*P */
970      apnzj = 0;
971     /* diagonal portion of A */
972     anzi  = adi[i+1] - adi[i];
973     for (j=0;j<anzi;j++) {
974       prow = *adj;
975       adj++;
976       pnzj = pi_loc[prow+1] - pi_loc[prow];
977       pjj  = pj_loc + pi_loc[prow];
978       paj  = pa_loc + pi_loc[prow];
979       for (k=0;k<pnzj;k++) {
980         if (!apjdense[pjj[k]]) {
981           apjdense[pjj[k]] = -1;
982           apj[apnzj++]     = pjj[k];
983         }
984         apa[pjj[k]] += (*ada)*paj[k];
985       }
986       flops += 2*pnzj;
987       ada++;
988     }
989     /* off-diagonal portion of A */
990     anzi  = aoi[i+1] - aoi[i];
991     for (j=0;j<anzi;j++) {
992       col = a->garray[*aoj];
993       if (col < cstart){
994         prow = *aoj;
995       } else if (col >= cend){
996         prow = *aoj;
997       } else {
998         SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Off-diagonal portion of A has wrong column map");
999       }
1000       aoj++;
1001       pnzj = pi_oth[prow+1] - pi_oth[prow];
1002       pjj  = pj_oth + pi_oth[prow];
1003       paj  = pa_oth + pi_oth[prow];
1004       for (k=0;k<pnzj;k++) {
1005         if (!apjdense[pjj[k]]) {
1006           apjdense[pjj[k]] = -1;
1007           apj[apnzj++]     = pjj[k];
1008         }
1009         apa[pjj[k]] += (*aoa)*paj[k];
1010       }
1011       flops += 2*pnzj;
1012       aoa++;
1013     }
1014     /* Sort the j index array for quick sparse axpy. */
1015     ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr);
1016 
1017     /* Compute P_loc[i,:]^T*AP[i,:] using outer product */
1018     pnzi = pi_loc[i+1] - pi_loc[i];
1019     for (j=0;j<pnzi;j++) {
1020       nextap = 0;
1021       crow   = *pJ++;
1022       cjj    = cj + ci[crow];
1023       caj    = ca + ci[crow];
1024       /* Perform sparse axpy operation.  Note cjj includes apj. */
1025       for (k=0;nextap<apnzj;k++) {
1026         if (cjj[k]==apj[nextap]) {
1027           caj[k] += (*pA)*apa[apj[nextap++]];
1028         }
1029       }
1030       flops += 2*apnzj;
1031       pA++;
1032     }
1033 
1034     /* Zero the current row info for A*P */
1035     for (j=0;j<apnzj;j++) {
1036       apa[apj[j]]      = 0.;
1037       apjdense[apj[j]] = 0;
1038     }
1039   }
1040 
1041   /* Assemble the final matrix and clean up */
1042   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1043   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1044   ierr = PetscFree(apa);CHKERRQ(ierr);
1045   ierr = PetscLogFlops(flops);CHKERRQ(ierr);
1046   ierr = PetscLogEventEnd(logkey_matptapnumeric_local,A,0,0,0);CHKERRQ(ierr);
1047   PetscFunctionReturn(0);
1048 }
1049 static PetscEvent logkey_matptapsymbolic_local = 0;
1050 #undef __FUNCT__
1051 #define __FUNCT__ "MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local"
1052 PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local(Mat A,Mat P_loc,Mat P_oth,PetscInt prstart,PetscReal fill,Mat *C)
1053 {
1054   PetscErrorCode ierr;
1055   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
1056   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
1057   Mat_SeqAIJ     *ad=(Mat_SeqAIJ*)(a->A)->data,*ao=(Mat_SeqAIJ*)(a->B)->data,*c;
1058   PetscInt       *pti,*ptj,*ptJ,*ajj,*pjj;
1059   PetscInt       *adi=ad->i,*adj=ad->j,*aoi=ao->i,*aoj=ao->j,nnz,cstart=a->cstart,cend=a->cend,col;
1060   PetscInt       *ci,*cj,*ptaj;
1061   PetscInt       aN=A->N,am=A->m,pN=P_loc->N;
1062   PetscInt       i,j,k,ptnzi,arow,anzj,prow,pnzj,cnzi;
1063   PetscInt       pm=P_loc->m,nlnk,*lnk;
1064   MatScalar      *ca;
1065   PetscBT        lnkbt;
1066   PetscInt       prend,nprow_loc,nprow_oth;
1067   PetscInt       *ptadenserow_loc,*ptadenserow_oth,*ptasparserow_loc,*ptasparserow_oth;
1068   Mat_SeqAIJ     *p_loc=(Mat_SeqAIJ*)P_loc->data,*p_oth=(Mat_SeqAIJ*)P_oth->data;
1069   PetscInt       *pi_loc=p_loc->i,*pj_loc=p_loc->j,*pi_oth=p_oth->i,*pj_oth=p_oth->j;
1070 
1071   PetscFunctionBegin;
1072   if (!logkey_matptapsymbolic_local) {
1073     ierr = PetscLogEventRegister(&logkey_matptapsymbolic_local,"MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local",MAT_COOKIE);
1074   }
1075   ierr = PetscLogEventBegin(logkey_matptapsymbolic_local,A,P_loc,P_oth,0);CHKERRQ(ierr);
1076 
1077   prend = prstart + pm;
1078 
1079   /* get ij structure of P_loc^T */
1080   ierr = MatGetSymbolicTranspose_SeqAIJ(P_loc,&pti,&ptj);CHKERRQ(ierr);
1081   ptJ=ptj;
1082 
1083   /* Allocate ci array, arrays for fill computation and */
1084   /* free space for accumulating nonzero column info */
1085   ierr = PetscMalloc((pN+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
1086   ci[0] = 0;
1087 
1088   ierr = PetscMalloc((4*aN+1)*sizeof(PetscInt),&ptadenserow_loc);CHKERRQ(ierr);
1089   ierr = PetscMemzero(ptadenserow_loc,(4*aN+1)*sizeof(PetscInt));CHKERRQ(ierr);
1090   ptasparserow_loc = ptadenserow_loc + aN;
1091   ptadenserow_oth  = ptasparserow_loc + aN;
1092   ptasparserow_oth = ptadenserow_oth + aN;
1093 
1094   /* create and initialize a linked list */
1095   nlnk = pN+1;
1096   ierr = PetscLLCreate(pN,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr);
1097 
1098   /* Set initial free space to be nnz(A) scaled by fill*P->N/P->M. */
1099   /* This should be reasonable if sparsity of PtAP is similar to that of A. */
1100   nnz           = adi[am] + aoi[am];
1101   ierr          = GetMoreSpace((PetscInt)(fill*nnz*pN/aN+1),&free_space);
1102   current_space = free_space;
1103 
1104   /* determine symbolic info for each row of C: */
1105   for (i=0;i<pN;i++) {
1106     ptnzi  = pti[i+1] - pti[i];
1107     nprow_loc = 0; nprow_oth = 0;
1108     /* i-th row of symbolic P_loc^T*A_loc: */
1109     for (j=0;j<ptnzi;j++) {
1110       arow = *ptJ++;
1111       /* diagonal portion of A */
1112       anzj = adi[arow+1] - adi[arow];
1113       ajj  = adj + adi[arow];
1114       for (k=0;k<anzj;k++) {
1115         col = ajj[k]+prstart;
1116         if (!ptadenserow_loc[col]) {
1117           ptadenserow_loc[col]    = -1;
1118           ptasparserow_loc[nprow_loc++] = col;
1119         }
1120       }
1121       /* off-diagonal portion of A */
1122       anzj = aoi[arow+1] - aoi[arow];
1123       ajj  = aoj + aoi[arow];
1124       for (k=0;k<anzj;k++) {
1125         col = a->garray[ajj[k]];  /* global col */
1126         if (col < cstart){
1127           col = ajj[k];
1128         } else if (col >= cend){
1129           col = ajj[k] + pm;
1130         } else {
1131           SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Off-diagonal portion of A has wrong column map");
1132         }
1133         if (!ptadenserow_oth[col]) {
1134           ptadenserow_oth[col]    = -1;
1135           ptasparserow_oth[nprow_oth++] = col;
1136         }
1137       }
1138     }
1139 
1140     /* using symbolic info of local PtA, determine symbolic info for row of C: */
1141     cnzi   = 0;
1142     /* rows of P_loc */
1143     ptaj = ptasparserow_loc;
1144     for (j=0; j<nprow_loc; j++) {
1145       prow = *ptaj++;
1146       prow -= prstart; /* rm */
1147       pnzj = pi_loc[prow+1] - pi_loc[prow];
1148       pjj  = pj_loc + pi_loc[prow];
1149       /* add non-zero cols of P into the sorted linked list lnk */
1150       ierr = PetscLLAdd(pnzj,pjj,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr);
1151       cnzi += nlnk;
1152     }
1153     /* rows of P_oth */
1154     ptaj = ptasparserow_oth;
1155     for (j=0; j<nprow_oth; j++) {
1156       prow = *ptaj++;
1157       if (prow >= prend) prow -= pm; /* rm */
1158       pnzj = pi_oth[prow+1] - pi_oth[prow];
1159       pjj  = pj_oth + pi_oth[prow];
1160       /* add non-zero cols of P into the sorted linked list lnk */
1161       ierr = PetscLLAdd(pnzj,pjj,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr);
1162       cnzi += nlnk;
1163     }
1164 
1165     /* If free space is not available, make more free space */
1166     /* Double the amount of total space in the list */
1167     if (current_space->local_remaining<cnzi) {
1168       ierr = GetMoreSpace(current_space->total_array_size,&current_space);CHKERRQ(ierr);
1169     }
1170 
1171     /* Copy data into free space, and zero out denserows */
1172     ierr = PetscLLClean(pN,pN,cnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
1173     current_space->array           += cnzi;
1174     current_space->local_used      += cnzi;
1175     current_space->local_remaining -= cnzi;
1176 
1177     for (j=0;j<nprow_loc; j++) {
1178       ptadenserow_loc[ptasparserow_loc[j]] = 0;
1179     }
1180     for (j=0;j<nprow_oth; j++) {
1181       ptadenserow_oth[ptasparserow_oth[j]] = 0;
1182     }
1183 
1184     /* Aside: Perhaps we should save the pta info for the numerical factorization. */
1185     /*        For now, we will recompute what is needed. */
1186     ci[i+1] = ci[i] + cnzi;
1187   }
1188   /* nnz is now stored in ci[ptm], column indices are in the list of free space */
1189   /* Allocate space for cj, initialize cj, and */
1190   /* destroy list of free space and other temporary array(s) */
1191   ierr = PetscMalloc((ci[pN]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr);
1192   ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr);
1193   ierr = PetscFree(ptadenserow_loc);CHKERRQ(ierr);
1194   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
1195 
1196   /* Allocate space for ca */
1197   ierr = PetscMalloc((ci[pN]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr);
1198   ierr = PetscMemzero(ca,(ci[pN]+1)*sizeof(MatScalar));CHKERRQ(ierr);
1199 
1200   /* put together the new matrix */
1201   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,pN,pN,ci,cj,ca,C);CHKERRQ(ierr);
1202 
1203   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
1204   /* Since these are PETSc arrays, change flags to free them as necessary. */
1205   c = (Mat_SeqAIJ *)((*C)->data);
1206   c->freedata = PETSC_TRUE;
1207   c->nonew    = 0;
1208 
1209   /* Clean up. */
1210   ierr = MatRestoreSymbolicTranspose_SeqAIJ(P_loc,&pti,&ptj);CHKERRQ(ierr);
1211 
1212   ierr = PetscLogEventEnd(logkey_matptapsymbolic_local,A,P_loc,P_oth,0);CHKERRQ(ierr);
1213   PetscFunctionReturn(0);
1214 }
1215