xref: /petsc/src/mat/impls/aij/seq/matptap.c (revision d20bfe6f30cee1ec1490c5d7eb71afa766e1d3aa)
1 /*
2   Defines projective product routines where A is a AIJ matrix
3           C = P^T * A * P
4 */
5 
6 #include "src/mat/impls/aij/seq/aij.h"   /*I "petscmat.h" I*/
7 #include "src/mat/utils/freespace.h"
8 #include "src/mat/impls/aij/mpi/mpiaij.h"
9 
10 #undef __FUNCT__
11 #define __FUNCT__ "MatPtAP"
12 /*@
13    MatPtAP - Creates the matrix projection C = P^T * A * P
14 
15    Collective on Mat
16 
17    Input Parameters:
18 +  A - the matrix
19 .  P - the projection matrix
20 .  scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
21 -  fill - expected fill as ratio of nnz(C)/(nnz(A) + nnz(P))
22 
23    Output Parameters:
24 .  C - the product matrix
25 
26    Notes:
27    C will be created and must be destroyed by the user with MatDestroy().
28 
29    This routine is currently only implemented for pairs of SeqAIJ matrices and classes
30    which inherit from SeqAIJ.  C will be of type MATSEQAIJ.
31 
32    Level: intermediate
33 
34 .seealso: MatPtAPSymbolic(),MatPtAPNumeric(),MatMatMult()
35 @*/
36 PetscErrorCode MatPtAP(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C) {
37   PetscErrorCode ierr;
38   PetscErrorCode (*fA)(Mat,Mat,MatReuse,PetscReal,Mat *);
39   PetscErrorCode (*fP)(Mat,Mat,MatReuse,PetscReal,Mat *);
40 
41   PetscFunctionBegin;
42   PetscValidHeaderSpecific(A,MAT_COOKIE,1);
43   PetscValidType(A,1);
44   MatPreallocated(A);
45   if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
46   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
47   PetscValidHeaderSpecific(P,MAT_COOKIE,2);
48   PetscValidType(P,2);
49   MatPreallocated(P);
50   if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
51   if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
52   PetscValidPointer(C,3);
53 
54   if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->M,A->N);
55 
56   if (fill <=0.0) SETERRQ1(PETSC_ERR_ARG_SIZ,"fill=%g must be > 0.0",fill);
57 
58   /* For now, we do not dispatch based on the type of A and P */
59   /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */
60   fA = A->ops->ptap;
61   if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatPtAP not supported for A of type %s",A->type_name);
62   fP = P->ops->ptap;
63   if (!fP) SETERRQ1(PETSC_ERR_SUP,"MatPtAP not supported for P of type %s",P->type_name);
64   if (fP!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatPtAP requires A, %s, to be compatible with P, %s",A->type_name,P->type_name);
65 
66   ierr = PetscLogEventBegin(MAT_PtAP,A,P,0,0);CHKERRQ(ierr);
67   ierr = (*fA)(A,P,scall,fill,C);CHKERRQ(ierr);
68   ierr = PetscLogEventEnd(MAT_PtAP,A,P,0,0);CHKERRQ(ierr);
69   PetscFunctionReturn(0);
70 }
71 
72 #undef __FUNCT__
73 #define __FUNCT__ "MatPtAP_MPIAIJ_MPIAIJ"
74 PetscErrorCode MatPtAP_MPIAIJ_MPIAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C)
75 {
76 #ifdef TMP
77   PetscErrorCode    ierr;
78   Mat               C_mpi,AP_seq,P_seq,P_subseq,*psubseq;
79   Mat_MPIAIJ        *p = (Mat_MPIAIJ*)P->data;
80   Mat_MatMatMultMPI *mult;
81   int               i,prow,prstart,prend,m=P->m,pncols;
82   const int         *pcols;
83   const PetscScalar *pvals;
84   PetscMPIInt       rank;
85 
86   PetscFunctionBegin;
87   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
88 
89   ierr = MatMatMultSymbolic_MPIAIJ_MPIAIJ(A,P,fill,&C_mpi);CHKERRQ(ierr);
90   mult = (Mat_MatMatMultMPI*)C_mpi->spptr;
91   P_seq   = mult->bseq[0];
92   AP_seq  = mult->C_seq;
93   prstart = mult->brstart;
94   prend   = prstart + m;
95   ierr = PetscPrintf(PETSC_COMM_SELF," [%d] prstart: %d, prend: %d, dim of P_seq: %d, %d\n",rank,prstart,prend,P_seq->m,P_seq->n);
96 
97   /* get seq matrix P_subseq by taking local rows of P */
98   IS  isrow,iscol;
99   ierr = ISCreateStride(PETSC_COMM_SELF,m,prstart,1,&isrow);CHKERRQ(ierr);
100   ierr = ISCreateStride(PETSC_COMM_SELF,P_seq->n,0,1,&iscol);CHKERRQ(ierr);
101   ierr = MatGetSubMatrices(P_seq,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&psubseq);CHKERRQ(ierr);
102   P_subseq = psubseq[0];
103   ierr = PetscPrintf(PETSC_COMM_SELF," [%d] dim of P_subseq: %d, %d\n",rank,P_subseq->m,P_subseq->n);
104 
105   /* Compute P_subseq^T*C_seq using outer product (P_loc^T)[:,i]*C_seq[i,:]. */
106   for (i=0;i<m;i++) {
107     prow = prstart + i;
108     ierr = MatGetRow(P_seq,prow,&pncols,&pcols,&pvals);CHKERRQ(ierr);
109     ierr = MatRestoreRow(P_seq,prow,&pncols,&pcols,&pvals);CHKERRQ(ierr);
110   }
111 
112   *C = C_mpi; /* to be removed! */
113 #endif /* TMP */
114   PetscFunctionReturn(0);
115 }
116 
117 #undef __FUNCT__
118 #define __FUNCT__ "MatMatMultSymbolic_MPIAIJ_MPIAIJ"
119 PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
120 {
121   Mat_MPIAIJ        *a = (Mat_MPIAIJ*)A->data;
122   PetscErrorCode    ierr;
123 
124   PetscFunctionBegin;
125   PetscFunctionReturn(0);
126 }
127 
128 #undef __FUNCT__
129 #define __FUNCT__ "MatMatMultSymbolic_MPIAIJ_MPIAIJ"
130 PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIAIJ(Mat A,Mat B,Mat C)
131 {
132   Mat_MPIAIJ        *a = (Mat_MPIAIJ*)A->data;
133   PetscErrorCode    ierr;
134 
135   PetscFunctionBegin;
136   PetscFunctionReturn(0);
137 }
138 
139 #undef __FUNCT__
140 #define __FUNCT__ "MatPtAP_SeqAIJ_SeqAIJ"
141 PetscErrorCode MatPtAP_SeqAIJ_SeqAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C)
142 {
143   PetscErrorCode ierr;
144   PetscFunctionBegin;
145   if (scall == MAT_INITIAL_MATRIX){
146     ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr);
147     ierr = MatPtAPSymbolic_SeqAIJ_SeqAIJ(A,P,fill,C);CHKERRQ(ierr);
148     ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr);
149   }
150   ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr);
151   ierr = MatPtAPNumeric_SeqAIJ_SeqAIJ(A,P,*C);CHKERRQ(ierr);
152   ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr);
153   PetscFunctionReturn(0);
154 }
155 
156 #undef __FUNCT__
157 #define __FUNCT__ "MatPtAPSymbolic"
158 /*
159    MatPtAPSymbolic - Creates the (i,j) structure of the matrix projection C = P^T * A * P
160 
161    Collective on Mat
162 
163    Input Parameters:
164 +  A - the matrix
165 -  P - the projection matrix
166 
167    Output Parameters:
168 .  C - the (i,j) structure of the product matrix
169 
170    Notes:
171    C will be created and must be destroyed by the user with MatDestroy().
172 
173    This routine is currently only implemented for pairs of SeqAIJ matrices and classes
174    which inherit from SeqAIJ.  C will be of type MATSEQAIJ.  The product is computed using
175    this (i,j) structure by calling MatPtAPNumeric().
176 
177    Level: intermediate
178 
179 .seealso: MatPtAP(),MatPtAPNumeric(),MatMatMultSymbolic()
180 */
181 PetscErrorCode MatPtAPSymbolic(Mat A,Mat P,PetscReal fill,Mat *C) {
182   PetscErrorCode ierr;
183   PetscErrorCode (*fA)(Mat,Mat,PetscReal,Mat*);
184   PetscErrorCode (*fP)(Mat,Mat,PetscReal,Mat*);
185 
186   PetscFunctionBegin;
187 
188   PetscValidHeaderSpecific(A,MAT_COOKIE,1);
189   PetscValidType(A,1);
190   MatPreallocated(A);
191   if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
192   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
193 
194   PetscValidHeaderSpecific(P,MAT_COOKIE,2);
195   PetscValidType(P,2);
196   MatPreallocated(P);
197   if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
198   if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
199 
200   PetscValidPointer(C,3);
201 
202   if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->M,A->N);
203   if (A->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %d != %d",A->M,A->N);
204 
205   /* For now, we do not dispatch based on the type of A and P */
206   /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */
207   fA = A->ops->ptapsymbolic;
208   if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatPtAPSymbolic not supported for A of type %s",A->type_name);
209   fP = P->ops->ptapsymbolic;
210   if (!fP) SETERRQ1(PETSC_ERR_SUP,"MatPtAPSymbolic not supported for P of type %s",P->type_name);
211   if (fP!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatPtAPSymbolic requires A, %s, to be compatible with P, %s",A->type_name,P->type_name);
212 
213   ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr);
214   ierr = (*fA)(A,P,fill,C);CHKERRQ(ierr);
215   ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr);
216 
217   PetscFunctionReturn(0);
218 }
219 
220 typedef struct {
221   Mat    symAP;
222 } Mat_PtAPstruct;
223 
224 EXTERN PetscErrorCode MatDestroy_SeqAIJ(Mat);
225 
226 #undef __FUNCT__
227 #define __FUNCT__ "MatDestroy_SeqAIJ_PtAP"
228 PetscErrorCode MatDestroy_SeqAIJ_PtAP(Mat A)
229 {
230   PetscErrorCode    ierr;
231   Mat_PtAPstruct    *ptap=(Mat_PtAPstruct*)A->spptr;
232 
233   PetscFunctionBegin;
234   ierr = MatDestroy(ptap->symAP);CHKERRQ(ierr);
235   ierr = PetscFree(ptap);CHKERRQ(ierr);
236   ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr);
237   PetscFunctionReturn(0);
238 }
239 
240 #undef __FUNCT__
241 #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqAIJ"
242 PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat P,PetscReal fill,Mat *C) {
243   PetscErrorCode ierr;
244   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
245   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c;
246   int            *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj;
247   int            *ci,*cj,*denserow,*sparserow,*ptadenserow,*ptasparserow,*ptaj;
248   int            an=A->N,am=A->M,pn=P->N,pm=P->M;
249   int            i,j,k,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi;
250   MatScalar      *ca;
251 
252   PetscFunctionBegin;
253   /* Get ij structure of P^T */
254   ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
255   ptJ=ptj;
256 
257   /* Allocate ci array, arrays for fill computation and */
258   /* free space for accumulating nonzero column info */
259   ierr = PetscMalloc((pn+1)*sizeof(int),&ci);CHKERRQ(ierr);
260   ci[0] = 0;
261 
262   ierr = PetscMalloc((2*pn+2*an+1)*sizeof(int),&ptadenserow);CHKERRQ(ierr);
263   ierr = PetscMemzero(ptadenserow,(2*pn+2*an+1)*sizeof(int));CHKERRQ(ierr);
264   ptasparserow = ptadenserow  + an;
265   denserow     = ptasparserow + an;
266   sparserow    = denserow     + pn;
267 
268   /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */
269   /* This should be reasonable if sparsity of PtAP is similar to that of A. */
270   ierr          = GetMoreSpace((ai[am]/pm)*pn,&free_space);
271   current_space = free_space;
272 
273   /* Determine symbolic info for each row of C: */
274   for (i=0;i<pn;i++) {
275     ptnzi  = pti[i+1] - pti[i];
276     ptanzi = 0;
277     /* Determine symbolic row of PtA: */
278     for (j=0;j<ptnzi;j++) {
279       arow = *ptJ++;
280       anzj = ai[arow+1] - ai[arow];
281       ajj  = aj + ai[arow];
282       for (k=0;k<anzj;k++) {
283         if (!ptadenserow[ajj[k]]) {
284           ptadenserow[ajj[k]]    = -1;
285           ptasparserow[ptanzi++] = ajj[k];
286         }
287       }
288     }
289       /* Using symbolic info for row of PtA, determine symbolic info for row of C: */
290     ptaj = ptasparserow;
291     cnzi   = 0;
292     for (j=0;j<ptanzi;j++) {
293       prow = *ptaj++;
294       pnzj = pi[prow+1] - pi[prow];
295       pjj  = pj + pi[prow];
296       for (k=0;k<pnzj;k++) {
297         if (!denserow[pjj[k]]) {
298             denserow[pjj[k]]  = -1;
299             sparserow[cnzi++] = pjj[k];
300         }
301       }
302     }
303 
304     /* sort sparserow */
305     ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr);
306 
307     /* If free space is not available, make more free space */
308     /* Double the amount of total space in the list */
309     if (current_space->local_remaining<cnzi) {
310       ierr = GetMoreSpace(current_space->total_array_size,&current_space);CHKERRQ(ierr);
311     }
312 
313     /* Copy data into free space, and zero out denserows */
314     ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(int));CHKERRQ(ierr);
315     current_space->array           += cnzi;
316     current_space->local_used      += cnzi;
317     current_space->local_remaining -= cnzi;
318 
319     for (j=0;j<ptanzi;j++) {
320       ptadenserow[ptasparserow[j]] = 0;
321     }
322     for (j=0;j<cnzi;j++) {
323       denserow[sparserow[j]] = 0;
324     }
325       /* Aside: Perhaps we should save the pta info for the numerical factorization. */
326       /*        For now, we will recompute what is needed. */
327     ci[i+1] = ci[i] + cnzi;
328   }
329   /* nnz is now stored in ci[ptm], column indices are in the list of free space */
330   /* Allocate space for cj, initialize cj, and */
331   /* destroy list of free space and other temporary array(s) */
332   ierr = PetscMalloc((ci[pn]+1)*sizeof(int),&cj);CHKERRQ(ierr);
333   ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr);
334   ierr = PetscFree(ptadenserow);CHKERRQ(ierr);
335 
336   /* Allocate space for ca */
337   ierr = PetscMalloc((ci[pn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr);
338   ierr = PetscMemzero(ca,(ci[pn]+1)*sizeof(MatScalar));CHKERRQ(ierr);
339 
340   /* put together the new matrix */
341   ierr = MatCreateSeqAIJWithArrays(A->comm,pn,pn,ci,cj,ca,C);CHKERRQ(ierr);
342 
343   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
344   /* Since these are PETSc arrays, change flags to free them as necessary. */
345   c = (Mat_SeqAIJ *)((*C)->data);
346   c->freedata = PETSC_TRUE;
347   c->nonew    = 0;
348 
349   /* Clean up. */
350   ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
351 
352   PetscFunctionReturn(0);
353 }
354 
355 #include "src/mat/impls/maij/maij.h"
356 EXTERN_C_BEGIN
357 #undef __FUNCT__
358 #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqMAIJ"
359 PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat *C) {
360   /* This routine requires testing -- I don't think it works. */
361   PetscErrorCode ierr;
362   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
363   Mat_SeqMAIJ    *pp=(Mat_SeqMAIJ*)PP->data;
364   Mat            P=pp->AIJ;
365   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c;
366   int            *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj;
367   int            *ci,*cj,*denserow,*sparserow,*ptadenserow,*ptasparserow,*ptaj;
368   int            an=A->N,am=A->M,pn=P->N,pm=P->M,ppdof=pp->dof;
369   int            i,j,k,dof,pdof,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi;
370   MatScalar      *ca;
371 
372   PetscFunctionBegin;
373   /* Start timer */
374   ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr);
375 
376   /* Get ij structure of P^T */
377   ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
378 
379   /* Allocate ci array, arrays for fill computation and */
380   /* free space for accumulating nonzero column info */
381   ierr = PetscMalloc((pn+1)*sizeof(int),&ci);CHKERRQ(ierr);
382   ci[0] = 0;
383 
384   ierr = PetscMalloc((2*pn+2*an+1)*sizeof(int),&ptadenserow);CHKERRQ(ierr);
385   ierr = PetscMemzero(ptadenserow,(2*pn+2*an+1)*sizeof(int));CHKERRQ(ierr);
386   ptasparserow = ptadenserow  + an;
387   denserow     = ptasparserow + an;
388   sparserow    = denserow     + pn;
389 
390   /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */
391   /* This should be reasonable if sparsity of PtAP is similar to that of A. */
392   ierr          = GetMoreSpace((ai[am]/pm)*pn,&free_space);
393   current_space = free_space;
394 
395   /* Determine symbolic info for each row of C: */
396   for (i=0;i<pn/ppdof;i++) {
397     ptnzi  = pti[i+1] - pti[i];
398     ptanzi = 0;
399     ptJ    = ptj + pti[i];
400     for (dof=0;dof<ppdof;dof++) {
401     /* Determine symbolic row of PtA: */
402       for (j=0;j<ptnzi;j++) {
403         arow = ptJ[j] + dof;
404         anzj = ai[arow+1] - ai[arow];
405         ajj  = aj + ai[arow];
406         for (k=0;k<anzj;k++) {
407           if (!ptadenserow[ajj[k]]) {
408             ptadenserow[ajj[k]]    = -1;
409             ptasparserow[ptanzi++] = ajj[k];
410           }
411         }
412       }
413       /* Using symbolic info for row of PtA, determine symbolic info for row of C: */
414       ptaj = ptasparserow;
415       cnzi   = 0;
416       for (j=0;j<ptanzi;j++) {
417         pdof = *ptaj%dof;
418         prow = (*ptaj++)/dof;
419         pnzj = pi[prow+1] - pi[prow];
420         pjj  = pj + pi[prow];
421         for (k=0;k<pnzj;k++) {
422           if (!denserow[pjj[k]+pdof]) {
423             denserow[pjj[k]+pdof] = -1;
424             sparserow[cnzi++]     = pjj[k]+pdof;
425           }
426         }
427       }
428 
429       /* sort sparserow */
430       ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr);
431 
432       /* If free space is not available, make more free space */
433       /* Double the amount of total space in the list */
434       if (current_space->local_remaining<cnzi) {
435         ierr = GetMoreSpace(current_space->total_array_size,&current_space);CHKERRQ(ierr);
436       }
437 
438       /* Copy data into free space, and zero out denserows */
439       ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(int));CHKERRQ(ierr);
440       current_space->array           += cnzi;
441       current_space->local_used      += cnzi;
442       current_space->local_remaining -= cnzi;
443 
444       for (j=0;j<ptanzi;j++) {
445         ptadenserow[ptasparserow[j]] = 0;
446       }
447       for (j=0;j<cnzi;j++) {
448         denserow[sparserow[j]] = 0;
449       }
450       /* Aside: Perhaps we should save the pta info for the numerical factorization. */
451       /*        For now, we will recompute what is needed. */
452       ci[i+1+dof] = ci[i+dof] + cnzi;
453     }
454   }
455   /* nnz is now stored in ci[ptm], column indices are in the list of free space */
456   /* Allocate space for cj, initialize cj, and */
457   /* destroy list of free space and other temporary array(s) */
458   ierr = PetscMalloc((ci[pn]+1)*sizeof(int),&cj);CHKERRQ(ierr);
459   ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr);
460   ierr = PetscFree(ptadenserow);CHKERRQ(ierr);
461 
462   /* Allocate space for ca */
463   ierr = PetscMalloc((ci[pn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr);
464   ierr = PetscMemzero(ca,(ci[pn]+1)*sizeof(MatScalar));CHKERRQ(ierr);
465 
466   /* put together the new matrix */
467   ierr = MatCreateSeqAIJWithArrays(A->comm,pn,pn,ci,cj,ca,C);CHKERRQ(ierr);
468 
469   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
470   /* Since these are PETSc arrays, change flags to free them as necessary. */
471   c = (Mat_SeqAIJ *)((*C)->data);
472   c->freedata = PETSC_TRUE;
473   c->nonew    = 0;
474 
475   /* Clean up. */
476   ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
477 
478   ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr);
479   PetscFunctionReturn(0);
480 }
481 EXTERN_C_END
482 
483 #undef __FUNCT__
484 #define __FUNCT__ "MatPtAPNumeric"
485 /*
486    MatPtAPNumeric - Computes the matrix projection C = P^T * A * P
487 
488    Collective on Mat
489 
490    Input Parameters:
491 +  A - the matrix
492 -  P - the projection matrix
493 
494    Output Parameters:
495 .  C - the product matrix
496 
497    Notes:
498    C must have been created by calling MatPtAPSymbolic and must be destroyed by
499    the user using MatDeatroy().
500 
501    This routine is currently only implemented for pairs of AIJ matrices and classes
502    which inherit from AIJ.  C will be of type MATAIJ.
503 
504    Level: intermediate
505 
506 .seealso: MatPtAP(),MatPtAPSymbolic(),MatMatMultNumeric()
507 */
508 PetscErrorCode MatPtAPNumeric(Mat A,Mat P,Mat C) {
509   PetscErrorCode ierr;
510   PetscErrorCode (*fA)(Mat,Mat,Mat);
511   PetscErrorCode (*fP)(Mat,Mat,Mat);
512 
513   PetscFunctionBegin;
514 
515   PetscValidHeaderSpecific(A,MAT_COOKIE,1);
516   PetscValidType(A,1);
517   MatPreallocated(A);
518   if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
519   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
520 
521   PetscValidHeaderSpecific(P,MAT_COOKIE,2);
522   PetscValidType(P,2);
523   MatPreallocated(P);
524   if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
525   if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
526 
527   PetscValidHeaderSpecific(C,MAT_COOKIE,3);
528   PetscValidType(C,3);
529   MatPreallocated(C);
530   if (!C->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
531   if (C->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
532 
533   if (P->N!=C->M) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->N,C->M);
534   if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->M,A->N);
535   if (A->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %d != %d",A->M,A->N);
536   if (P->N!=C->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->N,C->N);
537 
538   /* For now, we do not dispatch based on the type of A and P */
539   /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */
540   fA = A->ops->ptapnumeric;
541   if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatPtAPNumeric not supported for A of type %s",A->type_name);
542   fP = P->ops->ptapnumeric;
543   if (!fP) SETERRQ1(PETSC_ERR_SUP,"MatPtAPNumeric not supported for P of type %s",P->type_name);
544   if (fP!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatPtAPNumeric requires A, %s, to be compatible with P, %s",A->type_name,P->type_name);
545 
546   ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr);
547   ierr = (*fA)(A,P,C);CHKERRQ(ierr);
548   ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr);
549 
550   PetscFunctionReturn(0);
551 }
552 
553 #undef __FUNCT__
554 #define __FUNCT__ "MatPtAPNumeric_SeqAIJ_SeqAIJ"
555 PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqAIJ(Mat A,Mat P,Mat C)
556 {
557   PetscErrorCode ierr;
558   int            flops=0;
559   Mat_SeqAIJ     *a  = (Mat_SeqAIJ *) A->data;
560   Mat_SeqAIJ     *p  = (Mat_SeqAIJ *) P->data;
561   Mat_SeqAIJ     *c  = (Mat_SeqAIJ *) C->data;
562   int            *ai=a->i,*aj=a->j,*apj,*apjdense,*pi=p->i,*pj=p->j,*pJ=p->j,*pjj;
563   int            *ci=c->i,*cj=c->j,*cjj;
564   int            am=A->M,cn=C->N,cm=C->M;
565   int            i,j,k,anzi,pnzi,apnzj,nextap,pnzj,prow,crow;
566   MatScalar      *aa=a->a,*apa,*pa=p->a,*pA=p->a,*paj,*ca=c->a,*caj;
567 
568   PetscFunctionBegin;
569   /* Allocate temporary array for storage of one row of A*P */
570   ierr = PetscMalloc(cn*(sizeof(MatScalar)+2*sizeof(int)),&apa);CHKERRQ(ierr);
571   ierr = PetscMemzero(apa,cn*(sizeof(MatScalar)+2*sizeof(int)));CHKERRQ(ierr);
572 
573   apj      = (int *)(apa + cn);
574   apjdense = apj + cn;
575 
576   /* Clear old values in C */
577   ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr);
578 
579   for (i=0;i<am;i++) {
580     /* Form sparse row of A*P */
581     anzi  = ai[i+1] - ai[i];
582     apnzj = 0;
583     for (j=0;j<anzi;j++) {
584       prow = *aj++;
585       pnzj = pi[prow+1] - pi[prow];
586       pjj  = pj + pi[prow];
587       paj  = pa + pi[prow];
588       for (k=0;k<pnzj;k++) {
589         if (!apjdense[pjj[k]]) {
590           apjdense[pjj[k]] = -1;
591           apj[apnzj++]     = pjj[k];
592         }
593         apa[pjj[k]] += (*aa)*paj[k];
594       }
595       flops += 2*pnzj;
596       aa++;
597     }
598 
599     /* Sort the j index array for quick sparse axpy. */
600     ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr);
601 
602     /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */
603     pnzi = pi[i+1] - pi[i];
604     for (j=0;j<pnzi;j++) {
605       nextap = 0;
606       crow   = *pJ++;
607       cjj    = cj + ci[crow];
608       caj    = ca + ci[crow];
609       /* Perform sparse axpy operation.  Note cjj includes apj. */
610       for (k=0;nextap<apnzj;k++) {
611         if (cjj[k]==apj[nextap]) {
612           caj[k] += (*pA)*apa[apj[nextap++]];
613         }
614       }
615       flops += 2*apnzj;
616       pA++;
617     }
618 
619     /* Zero the current row info for A*P */
620     for (j=0;j<apnzj;j++) {
621       apa[apj[j]]      = 0.;
622       apjdense[apj[j]] = 0;
623     }
624   }
625 
626   /* Assemble the final matrix and clean up */
627   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
628   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
629   ierr = PetscFree(apa);CHKERRQ(ierr);
630   ierr = PetscLogFlops(flops);CHKERRQ(ierr);
631 
632   PetscFunctionReturn(0);
633 }
634