xref: /petsc/src/mat/impls/aij/seq/matptap.c (revision 097ab81bd3f74b14d4b06ab00c8f3db64f59e618)
1 /*
2   Defines projective product routines where A is a AIJ matrix
3           C = P^T * A * P
4 */
5 
6 #include "src/mat/impls/aij/seq/aij.h"   /*I "petscmat.h" I*/
7 #include "src/mat/utils/freespace.h"
8 #include "src/mat/impls/aij/mpi/mpiaij.h"
9 #include "petscbt.h"
10 
11 #undef __FUNCT__
12 #define __FUNCT__ "MatPtAP"
13 /*@
14    MatPtAP - Creates the matrix projection C = P^T * A * P
15 
16    Collective on Mat
17 
18    Input Parameters:
19 +  A - the matrix
20 .  P - the projection matrix
21 .  scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
22 -  fill - expected fill as ratio of nnz(C)/(nnz(A) + nnz(P))
23 
24    Output Parameters:
25 .  C - the product matrix
26 
27    Notes:
28    C will be created and must be destroyed by the user with MatDestroy().
29 
30    This routine is currently only implemented for pairs of SeqAIJ matrices and classes
31    which inherit from SeqAIJ.  C will be of type MATSEQAIJ.
32 
33    Level: intermediate
34 
35 .seealso: MatPtAPSymbolic(),MatPtAPNumeric(),MatMatMult()
36 @*/
37 PetscErrorCode MatPtAP(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C)
38 {
39   PetscErrorCode ierr;
40   PetscErrorCode (*fA)(Mat,Mat,MatReuse,PetscReal,Mat *);
41   PetscErrorCode (*fP)(Mat,Mat,MatReuse,PetscReal,Mat *);
42 
43   PetscFunctionBegin;
44   PetscValidHeaderSpecific(A,MAT_COOKIE,1);
45   PetscValidType(A,1);
46   MatPreallocated(A);
47   if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
48   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
49   PetscValidHeaderSpecific(P,MAT_COOKIE,2);
50   PetscValidType(P,2);
51   MatPreallocated(P);
52   if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
53   if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
54   PetscValidPointer(C,3);
55 
56   if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->M,A->N);
57 
58   if (fill <=0.0) SETERRQ1(PETSC_ERR_ARG_SIZ,"fill=%g must be > 0.0",fill);
59 
60   /* For now, we do not dispatch based on the type of A and P */
61   /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */
62   fA = A->ops->ptap;
63   if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatPtAP not supported for A of type %s",A->type_name);
64   fP = P->ops->ptap;
65   if (!fP) SETERRQ1(PETSC_ERR_SUP,"MatPtAP not supported for P of type %s",P->type_name);
66   if (fP!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatPtAP requires A, %s, to be compatible with P, %s",A->type_name,P->type_name);
67 
68   ierr = PetscLogEventBegin(MAT_PtAP,A,P,0,0);CHKERRQ(ierr);
69   ierr = (*fA)(A,P,scall,fill,C);CHKERRQ(ierr);
70   ierr = PetscLogEventEnd(MAT_PtAP,A,P,0,0);CHKERRQ(ierr);
71   PetscFunctionReturn(0);
72 }
73 
74 EXTERN PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local(Mat,Mat,Mat,PetscInt,PetscReal,Mat*);
75 EXTERN PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIAIJ_Local(Mat,Mat,Mat,PetscInt,Mat C);
76 
77 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat);
78 #undef __FUNCT__
79 #define __FUNCT__ "MatDestroy_MPIAIJ_PtAP"
80 PetscErrorCode MatDestroy_MPIAIJ_PtAP(Mat A)
81 {
82   PetscErrorCode       ierr;
83   Mat_MatMatMultMPI    *ptap;
84   PetscObjectContainer container;
85 
86   PetscFunctionBegin;
87   ierr = PetscObjectQuery((PetscObject)A,"MatPtAPMPI",(PetscObject *)&container);CHKERRQ(ierr);
88   if (container) {
89     ierr  = PetscObjectContainerGetPointer(container,(void **)&ptap);CHKERRQ(ierr);
90     ierr = MatDestroy(ptap->B_loc);CHKERRQ(ierr);
91     ierr = MatDestroy(ptap->B_oth);CHKERRQ(ierr);
92     ierr = ISDestroy(ptap->isrowb);CHKERRQ(ierr);
93     ierr = ISDestroy(ptap->iscolb);CHKERRQ(ierr);
94 
95     ierr = PetscObjectContainerDestroy(container);CHKERRQ(ierr);
96     ierr = PetscObjectCompose((PetscObject)A,"MatPtAPMPI",0);CHKERRQ(ierr);
97   }
98   ierr = PetscFree(ptap);CHKERRQ(ierr);
99 
100   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
101   PetscFunctionReturn(0);
102 }
103 
104 #undef __FUNCT__
105 #define __FUNCT__ "MatPtAP_MPIAIJ_MPIAIJ"
106 PetscErrorCode MatPtAP_MPIAIJ_MPIAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C)
107 {
108   PetscErrorCode       ierr;
109   Mat_MatMatMultMPI    *ptap;
110   PetscObjectContainer container;
111 
112   PetscFunctionBegin;
113   if (scall == MAT_INITIAL_MATRIX){
114     ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr);
115     ierr = PetscNew(Mat_MatMatMultMPI,&ptap);CHKERRQ(ierr);
116 
117     /* get P_oth by taking rows of P (= non-zero cols of local A) from other processors */
118     ierr = MatGetBrowsOfAoCols(A,P,scall,&ptap->isrowb,&ptap->iscolb,&ptap->brstart,&ptap->B_oth);CHKERRQ(ierr);
119 
120     /* get P_loc by taking all local rows of P */
121     ierr = MatGetLocalMat(P,scall,&ptap->B_loc);CHKERRQ(ierr);
122 
123     /* attach the supporting struct to P for reuse */
124     P->ops->destroy  = MatDestroy_MPIAIJ_PtAP;
125     ierr = PetscObjectContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
126     ierr = PetscObjectContainerSetPointer(container,ptap);CHKERRQ(ierr);
127     ierr = PetscObjectCompose((PetscObject)P,"MatPtAPMPI",(PetscObject)container);CHKERRQ(ierr);
128 
129     /* now, compute symbolic local P^T*A*P */
130     ierr = MatPtAPSymbolic_MPIAIJ_MPIAIJ(A,P,fill,C);CHKERRQ(ierr);/* numeric product is computed as well */
131     ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr);
132   } else if (scall == MAT_REUSE_MATRIX){
133     ierr = PetscObjectQuery((PetscObject)P,"MatPtAPMPI",(PetscObject *)&container);CHKERRQ(ierr);
134     if (container) {
135       ierr  = PetscObjectContainerGetPointer(container,(void **)&ptap);CHKERRQ(ierr);
136     } else {
137       SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
138     }
139 
140     /* get P_oth by taking rows of P (= non-zero cols of local A) from other processors */
141     ierr = MatGetBrowsOfAoCols(A,P,scall,&ptap->isrowb,&ptap->iscolb,&ptap->brstart,&ptap->B_oth);CHKERRQ(ierr);
142 
143     /* get P_loc by taking all local rows of P */
144     ierr = MatGetLocalMat(P,scall,&ptap->B_loc);CHKERRQ(ierr);
145 
146   } else {
147     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",scall);
148   }
149   /* now, compute numeric local P^T*A*P */
150   ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr);
151   ierr = MatPtAPNumeric_MPIAIJ_MPIAIJ(A,P,*C);CHKERRQ(ierr);
152   ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr);
153 
154   PetscFunctionReturn(0);
155 }
156 
157 #undef __FUNCT__
158 #define __FUNCT__ "MatPtAPSymbolic_MPIAIJ_MPIAIJ"
159 PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIAIJ(Mat A,Mat P,PetscReal fill,Mat *C)
160 {
161   PetscErrorCode       ierr;
162   Mat                  C_seq;
163   Mat_MatMatMultMPI    *ptap;
164   PetscObjectContainer container;
165 
166   PetscFunctionBegin;
167 
168   ierr = PetscObjectQuery((PetscObject)P,"MatPtAPMPI",(PetscObject *)&container);CHKERRQ(ierr);
169   if (container) {
170     ierr  = PetscObjectContainerGetPointer(container,(void **)&ptap);CHKERRQ(ierr);
171   } else {
172     SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
173   }
174   /* compute C_seq = P_loc^T * A_loc * P_seq */
175   ierr = MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local(A,ptap->B_loc,ptap->B_oth,ptap->brstart,fill,&C_seq);CHKERRQ(ierr);
176 
177   /* add C_seq into mpi C */
178   ierr = MatMerge_SeqsToMPISymbolic(A->comm,C_seq,P->n,P->n,C);CHKERRQ(ierr);
179 
180   PetscFunctionReturn(0);
181 }
182 
183 #undef __FUNCT__
184 #define __FUNCT__ "MatPtAPNumeric_MPIAIJ_MPIAIJ"
185 PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIAIJ(Mat A,Mat P,Mat C)
186 {
187   PetscErrorCode       ierr;
188   Mat_Merge_SeqsToMPI  *merge;
189   Mat_MatMatMultMPI    *ptap;
190   PetscObjectContainer cont_merge,cont_ptap;
191 
192   PetscInt       flops=0;
193   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
194   Mat_SeqAIJ     *ad=(Mat_SeqAIJ*)(a->A)->data,*ao=(Mat_SeqAIJ*)(a->B)->data;
195   Mat            C_seq;
196   Mat_SeqAIJ     *c,*p_loc,*p_oth;
197   PetscInt       *adi=ad->i,*adj=ad->j,*aoi=ao->i,*aoj=ao->j,*apj,*apjdense,cstart=a->cstart,cend=a->cend,col;
198   PetscInt       *pi_loc,*pj_loc,*pi_oth,*pj_oth,*pJ,*pjj;
199   PetscInt       i,j,k,anzi,pnzi,apnzj,nextap,pnzj,prow,crow;
200   PetscInt       *cjj;
201   MatScalar      *ada=ad->a,*aoa=ao->a,*apa,*paj,*cseqa,*caj;
202   MatScalar      *pa_loc,*pA,*pa_oth;
203   PetscInt       am=A->m,cN=C->N;
204 
205   MPI_Comm             comm=C->comm;
206   PetscMPIInt          size,rank,taga,*len_s;
207   PetscInt             *owners;
208   PetscInt             proc;
209   PetscInt             **buf_ri,**buf_rj;
210   PetscInt             cseqnzi,*bj_i,*bi,*bj,cseqrow,bnzi,nextcseqj; /* bi, bj, ba: for C (mpi mat) */
211   PetscInt             nrows,**buf_ri_k,**nextrow,**nextcseqi;
212   MPI_Request          *s_waits,*r_waits;
213   MPI_Status           *status;
214   MatScalar            **abuf_r,*ba_i;
215   Mat_SeqAIJ           *cseq;
216   PetscInt             *cseqi,*cseqj;
217 
218   PetscFunctionBegin;
219   ierr = PetscObjectQuery((PetscObject)C,"MatMergeSeqsToMPI",(PetscObject *)&cont_merge);CHKERRQ(ierr);
220   if (cont_merge) {
221     ierr  = PetscObjectContainerGetPointer(cont_merge,(void **)&merge);CHKERRQ(ierr);
222   } else {
223     SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix C does not posses an object container");
224   }
225   ierr = PetscObjectQuery((PetscObject)P,"MatPtAPMPI",(PetscObject *)&cont_ptap);CHKERRQ(ierr);
226   if (cont_ptap) {
227     ierr  = PetscObjectContainerGetPointer(cont_ptap,(void **)&ptap);CHKERRQ(ierr);
228   } else {
229     SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
230   }
231 #ifdef OLD
232   ierr = MatPtAPNumeric_MPIAIJ_MPIAIJ_Local(A,ptap->B_loc,ptap->B_oth,ptap->brstart,merge->C_seq);CHKERRQ(ierr);
233 #endif /* OLD */
234    /*--------------------------------------------------------------*/
235 
236   p_loc=(Mat_SeqAIJ*)(ptap->B_loc)->data;
237   p_oth=(Mat_SeqAIJ*)(ptap->B_oth)->data;
238   pi_loc=p_loc->i; pj_loc=p_loc->j; pJ=pj_loc; pa_loc=p_loc->a; pA=pa_loc;
239   pi_oth=p_oth->i; pj_oth=p_oth->j; pa_oth=p_oth->a;
240 
241   C_seq=merge->C_seq;
242   cseq=(Mat_SeqAIJ*)C_seq->data;
243   cseqi=cseq->i; cseqj=cseq->j;
244   cseqa=cseq->a;
245 
246   /* Allocate temporary array for storage of one row of A*P */
247   ierr = PetscMalloc(cN*(sizeof(MatScalar)+2*sizeof(PetscInt)),&apa);CHKERRQ(ierr);
248   ierr = PetscMemzero(apa,cN*(sizeof(MatScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr);
249   apj      = (PetscInt *)(apa + cN);
250   apjdense = apj + cN;
251 
252   /* Clear old values in C_Seq */
253   ierr = PetscMemzero(cseqa,cseqi[cN]*sizeof(MatScalar));CHKERRQ(ierr);
254 
255   for (i=0;i<am;i++) {
256     /* Form i-th sparse row of A*P */
257      apnzj = 0;
258     /* diagonal portion of A */
259     anzi  = adi[i+1] - adi[i];
260     for (j=0;j<anzi;j++) {
261       prow = *adj;
262       adj++;
263       pnzj = pi_loc[prow+1] - pi_loc[prow];
264       pjj  = pj_loc + pi_loc[prow];
265       paj  = pa_loc + pi_loc[prow];
266       for (k=0;k<pnzj;k++) {
267         if (!apjdense[pjj[k]]) {
268           apjdense[pjj[k]] = -1;
269           apj[apnzj++]     = pjj[k];
270         }
271         apa[pjj[k]] += (*ada)*paj[k];
272       }
273       flops += 2*pnzj;
274       ada++;
275     }
276     /* off-diagonal portion of A */
277     anzi  = aoi[i+1] - aoi[i];
278     for (j=0;j<anzi;j++) {
279       col = a->garray[*aoj];
280       if (col < cstart){
281         prow = *aoj;
282       } else if (col >= cend){
283         prow = *aoj;
284       } else {
285         SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Off-diagonal portion of A has wrong column map");
286       }
287       aoj++;
288       pnzj = pi_oth[prow+1] - pi_oth[prow];
289       pjj  = pj_oth + pi_oth[prow];
290       paj  = pa_oth + pi_oth[prow];
291       for (k=0;k<pnzj;k++) {
292         if (!apjdense[pjj[k]]) {
293           apjdense[pjj[k]] = -1;
294           apj[apnzj++]     = pjj[k];
295         }
296         apa[pjj[k]] += (*aoa)*paj[k];
297       }
298       flops += 2*pnzj;
299       aoa++;
300     }
301     /* Sort the j index array for quick sparse axpy. */
302     ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr);
303 
304     /* Compute P_loc[i,:]^T*AP[i,:] using outer product */
305     pnzi = pi_loc[i+1] - pi_loc[i];
306     for (j=0;j<pnzi;j++) {
307       nextap = 0;
308       crow   = *pJ++;
309       cjj    = cseqj + cseqi[crow];
310       caj    = cseqa + cseqi[crow];
311       /* Perform sparse axpy operation.  Note cjj includes apj. */
312       for (k=0;nextap<apnzj;k++) {
313         if (cjj[k]==apj[nextap]) {
314           caj[k] += (*pA)*apa[apj[nextap++]];
315         }
316       }
317       flops += 2*apnzj;
318       pA++;
319     }
320 
321     /* Zero the current row info for A*P */
322     for (j=0;j<apnzj;j++) {
323       apa[apj[j]]      = 0.;
324       apjdense[apj[j]] = 0;
325     }
326   }
327 
328   /* Assemble the final matrix and clean up */
329   ierr = MatAssemblyBegin(C_seq,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
330   ierr = MatAssemblyEnd(C_seq,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
331   ierr = PetscFree(apa);CHKERRQ(ierr);
332   ierr = PetscLogFlops(flops);CHKERRQ(ierr);
333 
334 #ifdef OLD1
335   ierr = MatMerge_SeqsToMPINumeric(merge->C_seq,C);CHKERRQ(ierr);
336 #endif /* OLD1 */
337   /*--------------------------------------------------------------*/
338 
339   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
340   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
341 
342   bi     = merge->bi;
343   bj     = merge->bj;
344   buf_ri = merge->buf_ri;
345   buf_rj = merge->buf_rj;
346 
347   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
348   ierr   = PetscMapGetGlobalRange(merge->rowmap,&owners);CHKERRQ(ierr);
349 
350   /* send and recv matrix values */
351   /*-----------------------------*/
352   len_s  = merge->len_s;
353   ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&taga);CHKERRQ(ierr);
354   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
355 
356   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
357   for (proc=0,k=0; proc<size; proc++){
358     if (!len_s[proc]) continue;
359     i = owners[proc];
360     ierr = MPI_Isend(cseqa+cseqi[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
361     k++;
362   }
363 
364   ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);
365   ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);
366   ierr = PetscFree(status);CHKERRQ(ierr);
367 
368   ierr = PetscFree(s_waits);CHKERRQ(ierr);
369   ierr = PetscFree(r_waits);CHKERRQ(ierr);
370 
371   /* insert mat values of mpimat */
372   /*----------------------------*/
373   ierr = PetscMalloc(cN*sizeof(MatScalar),&ba_i);CHKERRQ(ierr);
374   ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr);
375   nextrow = buf_ri_k + merge->nrecv;
376   nextcseqi  = nextrow + merge->nrecv;
377 
378   for (k=0; k<merge->nrecv; k++){
379     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
380     nrows = *(buf_ri_k[k]);
381     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
382     nextcseqi[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
383   }
384 
385   /* set values of ba */
386   for (i=0; i<C->m; i++) {
387     cseqrow = owners[rank] + i;
388     bj_i = bj+bi[i];  /* col indices of the i-th row of C */
389     bnzi = bi[i+1] - bi[i];
390     ierr = PetscMemzero(ba_i,bnzi*sizeof(MatScalar));CHKERRQ(ierr);
391 
392     /* add local non-zero vals of this proc's C_seq into ba */
393     cseqnzi = cseqi[cseqrow+1] - cseqi[cseqrow];
394     cseqj   = cseq->j + cseqi[cseqrow];
395     cseqa   = cseq->a + cseqi[cseqrow];
396     nextcseqj = 0;
397     for (j=0; nextcseqj<cseqnzi; j++){
398       if (*(bj_i + j) == cseqj[nextcseqj]){ /* bcol == cseqcol */
399         ba_i[j] += cseqa[nextcseqj++];
400       }
401     }
402 
403     /* add received vals into ba */
404     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
405       /* i-th row */
406       if (i == *nextrow[k]) {
407         cseqnzi = *(nextcseqi[k]+1) - *nextcseqi[k];
408         cseqj   = buf_rj[k] + *(nextcseqi[k]);
409         cseqa   = abuf_r[k] + *(nextcseqi[k]);
410         nextcseqj = 0;
411         for (j=0; nextcseqj<cseqnzi; j++){
412           if (*(bj_i + j) == cseqj[nextcseqj]){ /* bcol == cseqcol */
413             ba_i[j] += cseqa[nextcseqj++];
414           }
415         }
416         nextrow[k]++; nextcseqi[k]++;
417       }
418     }
419     ierr = MatSetValues(C,1,&cseqrow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
420   }
421   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
422   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
423 
424   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
425   ierr = PetscFree(ba_i);CHKERRQ(ierr);
426   ierr = PetscFree(buf_ri_k);CHKERRQ(ierr);
427 
428   PetscFunctionReturn(0);
429 }
430 
431 #undef __FUNCT__
432 #define __FUNCT__ "MatPtAP_SeqAIJ_SeqAIJ"
433 PetscErrorCode MatPtAP_SeqAIJ_SeqAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C)
434 {
435   PetscErrorCode ierr;
436 
437   PetscFunctionBegin;
438   if (scall == MAT_INITIAL_MATRIX){
439     ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr);
440     ierr = MatPtAPSymbolic_SeqAIJ_SeqAIJ(A,P,fill,C);CHKERRQ(ierr);
441     ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr);
442   }
443   ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr);
444   ierr = MatPtAPNumeric_SeqAIJ_SeqAIJ(A,P,*C);CHKERRQ(ierr);
445   ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr);
446   PetscFunctionReturn(0);
447 }
448 
449 /*
450    MatPtAPSymbolic - Creates the (i,j) structure of the matrix projection C = P^T * A * P
451 
452    Collective on Mat
453 
454    Input Parameters:
455 +  A - the matrix
456 -  P - the projection matrix
457 
458    Output Parameters:
459 .  C - the (i,j) structure of the product matrix
460 
461    Notes:
462    C will be created and must be destroyed by the user with MatDestroy().
463 
464    This routine is currently only implemented for pairs of SeqAIJ matrices and classes
465    which inherit from SeqAIJ.  C will be of type MATSEQAIJ.  The product is computed using
466    this (i,j) structure by calling MatPtAPNumeric().
467 
468    Level: intermediate
469 
470 .seealso: MatPtAP(),MatPtAPNumeric(),MatMatMultSymbolic()
471 */
472 #undef __FUNCT__
473 #define __FUNCT__ "MatPtAPSymbolic"
474 PetscErrorCode MatPtAPSymbolic(Mat A,Mat P,PetscReal fill,Mat *C)
475 {
476   PetscErrorCode ierr;
477   PetscErrorCode (*fA)(Mat,Mat,PetscReal,Mat*);
478   PetscErrorCode (*fP)(Mat,Mat,PetscReal,Mat*);
479 
480   PetscFunctionBegin;
481 
482   PetscValidHeaderSpecific(A,MAT_COOKIE,1);
483   PetscValidType(A,1);
484   MatPreallocated(A);
485   if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
486   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
487 
488   PetscValidHeaderSpecific(P,MAT_COOKIE,2);
489   PetscValidType(P,2);
490   MatPreallocated(P);
491   if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
492   if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
493 
494   PetscValidPointer(C,3);
495 
496   if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->M,A->N);
497   if (A->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %d != %d",A->M,A->N);
498 
499   /* For now, we do not dispatch based on the type of A and P */
500   /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */
501   fA = A->ops->ptapsymbolic;
502   if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatPtAPSymbolic not supported for A of type %s",A->type_name);
503   fP = P->ops->ptapsymbolic;
504   if (!fP) SETERRQ1(PETSC_ERR_SUP,"MatPtAPSymbolic not supported for P of type %s",P->type_name);
505   if (fP!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatPtAPSymbolic requires A, %s, to be compatible with P, %s",A->type_name,P->type_name);
506 
507   ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr);
508   ierr = (*fA)(A,P,fill,C);CHKERRQ(ierr);
509   ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr);
510 
511   PetscFunctionReturn(0);
512 }
513 
514 typedef struct {
515   Mat    symAP;
516 } Mat_PtAPstruct;
517 
518 EXTERN PetscErrorCode MatDestroy_SeqAIJ(Mat);
519 
520 #undef __FUNCT__
521 #define __FUNCT__ "MatDestroy_SeqAIJ_PtAP"
522 PetscErrorCode MatDestroy_SeqAIJ_PtAP(Mat A)
523 {
524   PetscErrorCode    ierr;
525   Mat_PtAPstruct    *ptap=(Mat_PtAPstruct*)A->spptr;
526 
527   PetscFunctionBegin;
528   ierr = MatDestroy(ptap->symAP);CHKERRQ(ierr);
529   ierr = PetscFree(ptap);CHKERRQ(ierr);
530   ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr);
531   PetscFunctionReturn(0);
532 }
533 
534 #undef __FUNCT__
535 #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqAIJ"
536 PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat P,PetscReal fill,Mat *C)
537 {
538   PetscErrorCode ierr;
539   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
540   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data,*p = (Mat_SeqAIJ*)P->data,*c;
541   PetscInt       *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj;
542   PetscInt       *ci,*cj,*ptadenserow,*ptasparserow,*ptaj;
543   PetscInt       an=A->N,am=A->M,pn=P->N,pm=P->M;
544   PetscInt       i,j,k,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi,nlnk,*lnk;
545   MatScalar      *ca;
546   PetscBT        lnkbt;
547 
548   PetscFunctionBegin;
549   /* Get ij structure of P^T */
550   ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
551   ptJ=ptj;
552 
553   /* Allocate ci array, arrays for fill computation and */
554   /* free space for accumulating nonzero column info */
555   ierr = PetscMalloc((pn+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
556   ci[0] = 0;
557 
558   ierr = PetscMalloc((2*an+1)*sizeof(PetscInt),&ptadenserow);CHKERRQ(ierr);
559   ierr = PetscMemzero(ptadenserow,(2*an+1)*sizeof(PetscInt));CHKERRQ(ierr);
560   ptasparserow = ptadenserow  + an;
561 
562   /* create and initialize a linked list */
563   nlnk = pn+1;
564   ierr = PetscLLCreate(pn,pn,nlnk,lnk,lnkbt);CHKERRQ(ierr);
565 
566   /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */
567   /* This should be reasonable if sparsity of PtAP is similar to that of A. */
568   ierr          = GetMoreSpace((ai[am]/pm)*pn,&free_space);
569   current_space = free_space;
570 
571   /* Determine symbolic info for each row of C: */
572   for (i=0;i<pn;i++) {
573     ptnzi  = pti[i+1] - pti[i];
574     ptanzi = 0;
575     /* Determine symbolic row of PtA: */
576     for (j=0;j<ptnzi;j++) {
577       arow = *ptJ++;
578       anzj = ai[arow+1] - ai[arow];
579       ajj  = aj + ai[arow];
580       for (k=0;k<anzj;k++) {
581         if (!ptadenserow[ajj[k]]) {
582           ptadenserow[ajj[k]]    = -1;
583           ptasparserow[ptanzi++] = ajj[k];
584         }
585       }
586     }
587       /* Using symbolic info for row of PtA, determine symbolic info for row of C: */
588     ptaj = ptasparserow;
589     cnzi   = 0;
590     for (j=0;j<ptanzi;j++) {
591       prow = *ptaj++;
592       pnzj = pi[prow+1] - pi[prow];
593       pjj  = pj + pi[prow];
594       /* add non-zero cols of P into the sorted linked list lnk */
595       ierr = PetscLLAdd(pnzj,pjj,pn,nlnk,lnk,lnkbt);CHKERRQ(ierr);
596       cnzi += nlnk;
597     }
598 
599     /* If free space is not available, make more free space */
600     /* Double the amount of total space in the list */
601     if (current_space->local_remaining<cnzi) {
602       ierr = GetMoreSpace(current_space->total_array_size,&current_space);CHKERRQ(ierr);
603     }
604 
605     /* Copy data into free space, and zero out denserows */
606     ierr = PetscLLClean(pn,pn,cnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
607     current_space->array           += cnzi;
608     current_space->local_used      += cnzi;
609     current_space->local_remaining -= cnzi;
610 
611     for (j=0;j<ptanzi;j++) {
612       ptadenserow[ptasparserow[j]] = 0;
613     }
614     /* Aside: Perhaps we should save the pta info for the numerical factorization. */
615     /*        For now, we will recompute what is needed. */
616     ci[i+1] = ci[i] + cnzi;
617   }
618   /* nnz is now stored in ci[ptm], column indices are in the list of free space */
619   /* Allocate space for cj, initialize cj, and */
620   /* destroy list of free space and other temporary array(s) */
621   ierr = PetscMalloc((ci[pn]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr);
622   ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr);
623   ierr = PetscFree(ptadenserow);CHKERRQ(ierr);
624   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
625 
626   /* Allocate space for ca */
627   ierr = PetscMalloc((ci[pn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr);
628   ierr = PetscMemzero(ca,(ci[pn]+1)*sizeof(MatScalar));CHKERRQ(ierr);
629 
630   /* put together the new matrix */
631   ierr = MatCreateSeqAIJWithArrays(A->comm,pn,pn,ci,cj,ca,C);CHKERRQ(ierr);
632 
633   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
634   /* Since these are PETSc arrays, change flags to free them as necessary. */
635   c = (Mat_SeqAIJ *)((*C)->data);
636   c->freedata = PETSC_TRUE;
637   c->nonew    = 0;
638 
639   /* Clean up. */
640   ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
641 
642   PetscFunctionReturn(0);
643 }
644 
645 #include "src/mat/impls/maij/maij.h"
646 EXTERN_C_BEGIN
647 #undef __FUNCT__
648 #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqMAIJ"
649 PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat *C)
650 {
651   /* This routine requires testing -- I don't think it works. */
652   PetscErrorCode ierr;
653   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
654   Mat_SeqMAIJ    *pp=(Mat_SeqMAIJ*)PP->data;
655   Mat            P=pp->AIJ;
656   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c;
657   PetscInt       *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj;
658   PetscInt       *ci,*cj,*denserow,*sparserow,*ptadenserow,*ptasparserow,*ptaj;
659   PetscInt       an=A->N,am=A->M,pn=P->N,pm=P->M,ppdof=pp->dof;
660   PetscInt       i,j,k,dof,pdof,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi;
661   MatScalar      *ca;
662 
663   PetscFunctionBegin;
664   /* Start timer */
665   ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr);
666 
667   /* Get ij structure of P^T */
668   ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
669 
670   /* Allocate ci array, arrays for fill computation and */
671   /* free space for accumulating nonzero column info */
672   ierr = PetscMalloc((pn+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
673   ci[0] = 0;
674 
675   ierr = PetscMalloc((2*pn+2*an+1)*sizeof(PetscInt),&ptadenserow);CHKERRQ(ierr);
676   ierr = PetscMemzero(ptadenserow,(2*pn+2*an+1)*sizeof(PetscInt));CHKERRQ(ierr);
677   ptasparserow = ptadenserow  + an;
678   denserow     = ptasparserow + an;
679   sparserow    = denserow     + pn;
680 
681   /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */
682   /* This should be reasonable if sparsity of PtAP is similar to that of A. */
683   ierr          = GetMoreSpace((ai[am]/pm)*pn,&free_space);
684   current_space = free_space;
685 
686   /* Determine symbolic info for each row of C: */
687   for (i=0;i<pn/ppdof;i++) {
688     ptnzi  = pti[i+1] - pti[i];
689     ptanzi = 0;
690     ptJ    = ptj + pti[i];
691     for (dof=0;dof<ppdof;dof++) {
692     /* Determine symbolic row of PtA: */
693       for (j=0;j<ptnzi;j++) {
694         arow = ptJ[j] + dof;
695         anzj = ai[arow+1] - ai[arow];
696         ajj  = aj + ai[arow];
697         for (k=0;k<anzj;k++) {
698           if (!ptadenserow[ajj[k]]) {
699             ptadenserow[ajj[k]]    = -1;
700             ptasparserow[ptanzi++] = ajj[k];
701           }
702         }
703       }
704       /* Using symbolic info for row of PtA, determine symbolic info for row of C: */
705       ptaj = ptasparserow;
706       cnzi   = 0;
707       for (j=0;j<ptanzi;j++) {
708         pdof = *ptaj%dof;
709         prow = (*ptaj++)/dof;
710         pnzj = pi[prow+1] - pi[prow];
711         pjj  = pj + pi[prow];
712         for (k=0;k<pnzj;k++) {
713           if (!denserow[pjj[k]+pdof]) {
714             denserow[pjj[k]+pdof] = -1;
715             sparserow[cnzi++]     = pjj[k]+pdof;
716           }
717         }
718       }
719 
720       /* sort sparserow */
721       ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr);
722 
723       /* If free space is not available, make more free space */
724       /* Double the amount of total space in the list */
725       if (current_space->local_remaining<cnzi) {
726         ierr = GetMoreSpace(current_space->total_array_size,&current_space);CHKERRQ(ierr);
727       }
728 
729       /* Copy data into free space, and zero out denserows */
730       ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(PetscInt));CHKERRQ(ierr);
731       current_space->array           += cnzi;
732       current_space->local_used      += cnzi;
733       current_space->local_remaining -= cnzi;
734 
735       for (j=0;j<ptanzi;j++) {
736         ptadenserow[ptasparserow[j]] = 0;
737       }
738       for (j=0;j<cnzi;j++) {
739         denserow[sparserow[j]] = 0;
740       }
741       /* Aside: Perhaps we should save the pta info for the numerical factorization. */
742       /*        For now, we will recompute what is needed. */
743       ci[i+1+dof] = ci[i+dof] + cnzi;
744     }
745   }
746   /* nnz is now stored in ci[ptm], column indices are in the list of free space */
747   /* Allocate space for cj, initialize cj, and */
748   /* destroy list of free space and other temporary array(s) */
749   ierr = PetscMalloc((ci[pn]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr);
750   ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr);
751   ierr = PetscFree(ptadenserow);CHKERRQ(ierr);
752 
753   /* Allocate space for ca */
754   ierr = PetscMalloc((ci[pn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr);
755   ierr = PetscMemzero(ca,(ci[pn]+1)*sizeof(MatScalar));CHKERRQ(ierr);
756 
757   /* put together the new matrix */
758   ierr = MatCreateSeqAIJWithArrays(A->comm,pn,pn,ci,cj,ca,C);CHKERRQ(ierr);
759 
760   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
761   /* Since these are PETSc arrays, change flags to free them as necessary. */
762   c = (Mat_SeqAIJ *)((*C)->data);
763   c->freedata = PETSC_TRUE;
764   c->nonew    = 0;
765 
766   /* Clean up. */
767   ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
768 
769   ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr);
770   PetscFunctionReturn(0);
771 }
772 EXTERN_C_END
773 
774 /*
775    MatPtAPNumeric - Computes the matrix projection C = P^T * A * P
776 
777    Collective on Mat
778 
779    Input Parameters:
780 +  A - the matrix
781 -  P - the projection matrix
782 
783    Output Parameters:
784 .  C - the product matrix
785 
786    Notes:
787    C must have been created by calling MatPtAPSymbolic and must be destroyed by
788    the user using MatDeatroy().
789 
790    This routine is currently only implemented for pairs of AIJ matrices and classes
791    which inherit from AIJ.  C will be of type MATAIJ.
792 
793    Level: intermediate
794 
795 .seealso: MatPtAP(),MatPtAPSymbolic(),MatMatMultNumeric()
796 */
797 #undef __FUNCT__
798 #define __FUNCT__ "MatPtAPNumeric"
799 PetscErrorCode MatPtAPNumeric(Mat A,Mat P,Mat C)
800 {
801   PetscErrorCode ierr;
802   PetscErrorCode (*fA)(Mat,Mat,Mat);
803   PetscErrorCode (*fP)(Mat,Mat,Mat);
804 
805   PetscFunctionBegin;
806 
807   PetscValidHeaderSpecific(A,MAT_COOKIE,1);
808   PetscValidType(A,1);
809   MatPreallocated(A);
810   if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
811   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
812 
813   PetscValidHeaderSpecific(P,MAT_COOKIE,2);
814   PetscValidType(P,2);
815   MatPreallocated(P);
816   if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
817   if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
818 
819   PetscValidHeaderSpecific(C,MAT_COOKIE,3);
820   PetscValidType(C,3);
821   MatPreallocated(C);
822   if (!C->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
823   if (C->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
824 
825   if (P->N!=C->M) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->N,C->M);
826   if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->M,A->N);
827   if (A->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %d != %d",A->M,A->N);
828   if (P->N!=C->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->N,C->N);
829 
830   /* For now, we do not dispatch based on the type of A and P */
831   /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */
832   fA = A->ops->ptapnumeric;
833   if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatPtAPNumeric not supported for A of type %s",A->type_name);
834   fP = P->ops->ptapnumeric;
835   if (!fP) SETERRQ1(PETSC_ERR_SUP,"MatPtAPNumeric not supported for P of type %s",P->type_name);
836   if (fP!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatPtAPNumeric requires A, %s, to be compatible with P, %s",A->type_name,P->type_name);
837 
838   ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr);
839   ierr = (*fA)(A,P,C);CHKERRQ(ierr);
840   ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr);
841 
842   PetscFunctionReturn(0);
843 }
844 
845 #undef __FUNCT__
846 #define __FUNCT__ "MatPtAPNumeric_SeqAIJ_SeqAIJ"
847 PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqAIJ(Mat A,Mat P,Mat C)
848 {
849   PetscErrorCode ierr;
850   PetscInt       flops=0;
851   Mat_SeqAIJ     *a  = (Mat_SeqAIJ *) A->data;
852   Mat_SeqAIJ     *p  = (Mat_SeqAIJ *) P->data;
853   Mat_SeqAIJ     *c  = (Mat_SeqAIJ *) C->data;
854   PetscInt       *ai=a->i,*aj=a->j,*apj,*apjdense,*pi=p->i,*pj=p->j,*pJ=p->j,*pjj;
855   PetscInt       *ci=c->i,*cj=c->j,*cjj;
856   PetscInt       am=A->M,cn=C->N,cm=C->M;
857   PetscInt       i,j,k,anzi,pnzi,apnzj,nextap,pnzj,prow,crow;
858   MatScalar      *aa=a->a,*apa,*pa=p->a,*pA=p->a,*paj,*ca=c->a,*caj;
859 
860   PetscFunctionBegin;
861   /* Allocate temporary array for storage of one row of A*P */
862   ierr = PetscMalloc(cn*(sizeof(MatScalar)+2*sizeof(PetscInt)),&apa);CHKERRQ(ierr);
863   ierr = PetscMemzero(apa,cn*(sizeof(MatScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr);
864 
865   apj      = (PetscInt *)(apa + cn);
866   apjdense = apj + cn;
867 
868   /* Clear old values in C */
869   ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr);
870 
871   for (i=0;i<am;i++) {
872     /* Form sparse row of A*P */
873     anzi  = ai[i+1] - ai[i];
874     apnzj = 0;
875     for (j=0;j<anzi;j++) {
876       prow = *aj++;
877       pnzj = pi[prow+1] - pi[prow];
878       pjj  = pj + pi[prow];
879       paj  = pa + pi[prow];
880       for (k=0;k<pnzj;k++) {
881         if (!apjdense[pjj[k]]) {
882           apjdense[pjj[k]] = -1;
883           apj[apnzj++]     = pjj[k];
884         }
885         apa[pjj[k]] += (*aa)*paj[k];
886       }
887       flops += 2*pnzj;
888       aa++;
889     }
890 
891     /* Sort the j index array for quick sparse axpy. */
892     ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr);
893 
894     /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */
895     pnzi = pi[i+1] - pi[i];
896     for (j=0;j<pnzi;j++) {
897       nextap = 0;
898       crow   = *pJ++;
899       cjj    = cj + ci[crow];
900       caj    = ca + ci[crow];
901       /* Perform sparse axpy operation.  Note cjj includes apj. */
902       for (k=0;nextap<apnzj;k++) {
903         if (cjj[k]==apj[nextap]) {
904           caj[k] += (*pA)*apa[apj[nextap++]];
905         }
906       }
907       flops += 2*apnzj;
908       pA++;
909     }
910 
911     /* Zero the current row info for A*P */
912     for (j=0;j<apnzj;j++) {
913       apa[apj[j]]      = 0.;
914       apjdense[apj[j]] = 0;
915     }
916   }
917 
918   /* Assemble the final matrix and clean up */
919   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
920   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
921   ierr = PetscFree(apa);CHKERRQ(ierr);
922   ierr = PetscLogFlops(flops);CHKERRQ(ierr);
923 
924   PetscFunctionReturn(0);
925 }
926 
927 /* Compute local C = P_loc^T * A * P - used by MatPtAP_MPIAIJ_MPIAIJ() */
928 static PetscEvent logkey_matptapnumeric_local = 0;
929 #undef __FUNCT__
930 #define __FUNCT__ "MatPtAPNumeric_MPIAIJ_MPIAIJ_Local"
931 PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIAIJ_Local(Mat A,Mat P_loc,Mat P_oth,PetscInt prstart,Mat C)
932 {
933   PetscErrorCode ierr;
934   PetscInt       flops=0;
935   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
936   Mat_SeqAIJ     *ad=(Mat_SeqAIJ*)(a->A)->data,*ao=(Mat_SeqAIJ*)(a->B)->data;
937   Mat_SeqAIJ     *c  = (Mat_SeqAIJ *) C->data;
938   Mat_SeqAIJ     *p_loc=(Mat_SeqAIJ*)P_loc->data,*p_oth=(Mat_SeqAIJ*)P_oth->data;
939   PetscInt       *pi_loc=p_loc->i,*pj_loc=p_loc->j,*pi_oth=p_oth->i,*pj_oth=p_oth->j;
940   PetscInt       *adi=ad->i,*adj=ad->j,*aoi=ao->i,*aoj=ao->j,*apj,*apjdense,cstart=a->cstart,cend=a->cend,col;
941   PetscInt       *pJ=pj_loc,*pjj;
942   PetscInt       *ci=c->i,*cj=c->j,*cjj;
943   PetscInt       am=A->m,cn=C->N,cm=C->M;
944   PetscInt       i,j,k,anzi,pnzi,apnzj,nextap,pnzj,prow,crow;
945   MatScalar      *ada=ad->a,*aoa=ao->a,*apa,*paj,*ca=c->a,*caj;
946   MatScalar      *pa_loc=p_loc->a,*pA=pa_loc,*pa_oth=p_oth->a;
947 
948   PetscFunctionBegin;
949   if (!logkey_matptapnumeric_local) {
950     ierr = PetscLogEventRegister(&logkey_matptapnumeric_local,"MatPtAPNumeric_MPIAIJ_MPIAIJ_Local",MAT_COOKIE);CHKERRQ(ierr);
951   }
952   ierr = PetscLogEventBegin(logkey_matptapnumeric_local,A,0,0,0);CHKERRQ(ierr);
953 
954   /* Allocate temporary array for storage of one row of A*P */
955   ierr = PetscMalloc(cn*(sizeof(MatScalar)+2*sizeof(PetscInt)),&apa);CHKERRQ(ierr);
956   ierr = PetscMemzero(apa,cn*(sizeof(MatScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr);
957   apj      = (PetscInt *)(apa + cn);
958   apjdense = apj + cn;
959 
960   /* Clear old values in C */
961   ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr);
962 
963   for (i=0;i<am;i++) {
964     /* Form i-th sparse row of A*P */
965      apnzj = 0;
966     /* diagonal portion of A */
967     anzi  = adi[i+1] - adi[i];
968     for (j=0;j<anzi;j++) {
969       prow = *adj;
970       adj++;
971       pnzj = pi_loc[prow+1] - pi_loc[prow];
972       pjj  = pj_loc + pi_loc[prow];
973       paj  = pa_loc + pi_loc[prow];
974       for (k=0;k<pnzj;k++) {
975         if (!apjdense[pjj[k]]) {
976           apjdense[pjj[k]] = -1;
977           apj[apnzj++]     = pjj[k];
978         }
979         apa[pjj[k]] += (*ada)*paj[k];
980       }
981       flops += 2*pnzj;
982       ada++;
983     }
984     /* off-diagonal portion of A */
985     anzi  = aoi[i+1] - aoi[i];
986     for (j=0;j<anzi;j++) {
987       col = a->garray[*aoj];
988       if (col < cstart){
989         prow = *aoj;
990       } else if (col >= cend){
991         prow = *aoj;
992       } else {
993         SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Off-diagonal portion of A has wrong column map");
994       }
995       aoj++;
996       pnzj = pi_oth[prow+1] - pi_oth[prow];
997       pjj  = pj_oth + pi_oth[prow];
998       paj  = pa_oth + pi_oth[prow];
999       for (k=0;k<pnzj;k++) {
1000         if (!apjdense[pjj[k]]) {
1001           apjdense[pjj[k]] = -1;
1002           apj[apnzj++]     = pjj[k];
1003         }
1004         apa[pjj[k]] += (*aoa)*paj[k];
1005       }
1006       flops += 2*pnzj;
1007       aoa++;
1008     }
1009     /* Sort the j index array for quick sparse axpy. */
1010     ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr);
1011 
1012     /* Compute P_loc[i,:]^T*AP[i,:] using outer product */
1013     pnzi = pi_loc[i+1] - pi_loc[i];
1014     for (j=0;j<pnzi;j++) {
1015       nextap = 0;
1016       crow   = *pJ++;
1017       cjj    = cj + ci[crow];
1018       caj    = ca + ci[crow];
1019       /* Perform sparse axpy operation.  Note cjj includes apj. */
1020       for (k=0;nextap<apnzj;k++) {
1021         if (cjj[k]==apj[nextap]) {
1022           caj[k] += (*pA)*apa[apj[nextap++]];
1023         }
1024       }
1025       flops += 2*apnzj;
1026       pA++;
1027     }
1028 
1029     /* Zero the current row info for A*P */
1030     for (j=0;j<apnzj;j++) {
1031       apa[apj[j]]      = 0.;
1032       apjdense[apj[j]] = 0;
1033     }
1034   }
1035 
1036   /* Assemble the final matrix and clean up */
1037   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1038   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1039   ierr = PetscFree(apa);CHKERRQ(ierr);
1040   ierr = PetscLogFlops(flops);CHKERRQ(ierr);
1041   ierr = PetscLogEventEnd(logkey_matptapnumeric_local,A,0,0,0);CHKERRQ(ierr);
1042   PetscFunctionReturn(0);
1043 }
1044 static PetscEvent logkey_matptapsymbolic_local = 0;
1045 #undef __FUNCT__
1046 #define __FUNCT__ "MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local"
1047 PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local(Mat A,Mat P_loc,Mat P_oth,PetscInt prstart,PetscReal fill,Mat *C)
1048 {
1049   PetscErrorCode ierr;
1050   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
1051   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
1052   Mat_SeqAIJ     *ad=(Mat_SeqAIJ*)(a->A)->data,*ao=(Mat_SeqAIJ*)(a->B)->data,*c;
1053   PetscInt       *pti,*ptj,*ptJ,*ajj,*pjj;
1054   PetscInt       *adi=ad->i,*adj=ad->j,*aoi=ao->i,*aoj=ao->j,nnz,cstart=a->cstart,cend=a->cend,col;
1055   PetscInt       *ci,*cj,*ptaj;
1056   PetscInt       aN=A->N,am=A->m,pN=P_loc->N;
1057   PetscInt       i,j,k,ptnzi,arow,anzj,prow,pnzj,cnzi;
1058   PetscInt       pm=P_loc->m,nlnk,*lnk;
1059   MatScalar      *ca;
1060   PetscBT        lnkbt;
1061   PetscInt       prend,nprow_loc,nprow_oth;
1062   PetscInt       *ptadenserow_loc,*ptadenserow_oth,*ptasparserow_loc,*ptasparserow_oth;
1063   Mat_SeqAIJ     *p_loc=(Mat_SeqAIJ*)P_loc->data,*p_oth=(Mat_SeqAIJ*)P_oth->data;
1064   PetscInt       *pi_loc=p_loc->i,*pj_loc=p_loc->j,*pi_oth=p_oth->i,*pj_oth=p_oth->j;
1065 
1066   PetscFunctionBegin;
1067   if (!logkey_matptapsymbolic_local) {
1068     ierr = PetscLogEventRegister(&logkey_matptapsymbolic_local,"MatPtAPSymbolic_MPIAIJ_MPIAIJ_Local",MAT_COOKIE);
1069   }
1070   ierr = PetscLogEventBegin(logkey_matptapsymbolic_local,A,P_loc,P_oth,0);CHKERRQ(ierr);
1071 
1072   prend = prstart + pm;
1073 
1074   /* get ij structure of P_loc^T */
1075   ierr = MatGetSymbolicTranspose_SeqAIJ(P_loc,&pti,&ptj);CHKERRQ(ierr);
1076   ptJ=ptj;
1077 
1078   /* Allocate ci array, arrays for fill computation and */
1079   /* free space for accumulating nonzero column info */
1080   ierr = PetscMalloc((pN+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
1081   ci[0] = 0;
1082 
1083   ierr = PetscMalloc((4*aN+1)*sizeof(PetscInt),&ptadenserow_loc);CHKERRQ(ierr);
1084   ierr = PetscMemzero(ptadenserow_loc,(4*aN+1)*sizeof(PetscInt));CHKERRQ(ierr);
1085   ptasparserow_loc = ptadenserow_loc + aN;
1086   ptadenserow_oth  = ptasparserow_loc + aN;
1087   ptasparserow_oth = ptadenserow_oth + aN;
1088 
1089   /* create and initialize a linked list */
1090   nlnk = pN+1;
1091   ierr = PetscLLCreate(pN,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr);
1092 
1093   /* Set initial free space to be nnz(A) scaled by fill*P->N/P->M. */
1094   /* This should be reasonable if sparsity of PtAP is similar to that of A. */
1095   nnz           = adi[am] + aoi[am];
1096   ierr          = GetMoreSpace((PetscInt)(fill*nnz*pN/aN+1),&free_space);
1097   current_space = free_space;
1098 
1099   /* determine symbolic info for each row of C: */
1100   for (i=0;i<pN;i++) {
1101     ptnzi  = pti[i+1] - pti[i];
1102     nprow_loc = 0; nprow_oth = 0;
1103     /* i-th row of symbolic P_loc^T*A_loc: */
1104     for (j=0;j<ptnzi;j++) {
1105       arow = *ptJ++;
1106       /* diagonal portion of A */
1107       anzj = adi[arow+1] - adi[arow];
1108       ajj  = adj + adi[arow];
1109       for (k=0;k<anzj;k++) {
1110         col = ajj[k]+prstart;
1111         if (!ptadenserow_loc[col]) {
1112           ptadenserow_loc[col]    = -1;
1113           ptasparserow_loc[nprow_loc++] = col;
1114         }
1115       }
1116       /* off-diagonal portion of A */
1117       anzj = aoi[arow+1] - aoi[arow];
1118       ajj  = aoj + aoi[arow];
1119       for (k=0;k<anzj;k++) {
1120         col = a->garray[ajj[k]];  /* global col */
1121         if (col < cstart){
1122           col = ajj[k];
1123         } else if (col >= cend){
1124           col = ajj[k] + pm;
1125         } else {
1126           SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Off-diagonal portion of A has wrong column map");
1127         }
1128         if (!ptadenserow_oth[col]) {
1129           ptadenserow_oth[col]    = -1;
1130           ptasparserow_oth[nprow_oth++] = col;
1131         }
1132       }
1133     }
1134 
1135     /* using symbolic info of local PtA, determine symbolic info for row of C: */
1136     cnzi   = 0;
1137     /* rows of P_loc */
1138     ptaj = ptasparserow_loc;
1139     for (j=0; j<nprow_loc; j++) {
1140       prow = *ptaj++;
1141       prow -= prstart; /* rm */
1142       pnzj = pi_loc[prow+1] - pi_loc[prow];
1143       pjj  = pj_loc + pi_loc[prow];
1144       /* add non-zero cols of P into the sorted linked list lnk */
1145       ierr = PetscLLAdd(pnzj,pjj,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr);
1146       cnzi += nlnk;
1147     }
1148     /* rows of P_oth */
1149     ptaj = ptasparserow_oth;
1150     for (j=0; j<nprow_oth; j++) {
1151       prow = *ptaj++;
1152       if (prow >= prend) prow -= pm; /* rm */
1153       pnzj = pi_oth[prow+1] - pi_oth[prow];
1154       pjj  = pj_oth + pi_oth[prow];
1155       /* add non-zero cols of P into the sorted linked list lnk */
1156       ierr = PetscLLAdd(pnzj,pjj,pN,nlnk,lnk,lnkbt);CHKERRQ(ierr);
1157       cnzi += nlnk;
1158     }
1159 
1160     /* If free space is not available, make more free space */
1161     /* Double the amount of total space in the list */
1162     if (current_space->local_remaining<cnzi) {
1163       ierr = GetMoreSpace(current_space->total_array_size,&current_space);CHKERRQ(ierr);
1164     }
1165 
1166     /* Copy data into free space, and zero out denserows */
1167     ierr = PetscLLClean(pN,pN,cnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
1168     current_space->array           += cnzi;
1169     current_space->local_used      += cnzi;
1170     current_space->local_remaining -= cnzi;
1171 
1172     for (j=0;j<nprow_loc; j++) {
1173       ptadenserow_loc[ptasparserow_loc[j]] = 0;
1174     }
1175     for (j=0;j<nprow_oth; j++) {
1176       ptadenserow_oth[ptasparserow_oth[j]] = 0;
1177     }
1178 
1179     /* Aside: Perhaps we should save the pta info for the numerical factorization. */
1180     /*        For now, we will recompute what is needed. */
1181     ci[i+1] = ci[i] + cnzi;
1182   }
1183   /* nnz is now stored in ci[ptm], column indices are in the list of free space */
1184   /* Allocate space for cj, initialize cj, and */
1185   /* destroy list of free space and other temporary array(s) */
1186   ierr = PetscMalloc((ci[pN]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr);
1187   ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr);
1188   ierr = PetscFree(ptadenserow_loc);CHKERRQ(ierr);
1189   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
1190 
1191   /* Allocate space for ca */
1192   ierr = PetscMalloc((ci[pN]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr);
1193   ierr = PetscMemzero(ca,(ci[pN]+1)*sizeof(MatScalar));CHKERRQ(ierr);
1194 
1195   /* put together the new matrix */
1196   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,pN,pN,ci,cj,ca,C);CHKERRQ(ierr);
1197 
1198   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
1199   /* Since these are PETSc arrays, change flags to free them as necessary. */
1200   c = (Mat_SeqAIJ *)((*C)->data);
1201   c->freedata = PETSC_TRUE;
1202   c->nonew    = 0;
1203 
1204   /* Clean up. */
1205   ierr = MatRestoreSymbolicTranspose_SeqAIJ(P_loc,&pti,&ptj);CHKERRQ(ierr);
1206 
1207   ierr = PetscLogEventEnd(logkey_matptapsymbolic_local,A,P_loc,P_oth,0);CHKERRQ(ierr);
1208   PetscFunctionReturn(0);
1209 }
1210