xref: /petsc/src/mat/impls/aij/seq/matptap.c (revision 634064b45b5c838063ae82f97ffb7e99245dcdb5)
1 /*
2   Defines projective product routines where A is a SeqAIJ matrix
3           C = P^T * A * P
4 */
5 
6 #include "src/mat/impls/aij/seq/aij.h"   /*I "petscmat.h" I*/
7 #include "src/mat/utils/freespace.h"
8 
9 EXTERN int MatSeqAIJPtAP(Mat,Mat,Mat*);
10 EXTERN int MatSeqAIJPtAPSymbolic(Mat,Mat,Mat*);
11 EXTERN int MatSeqAIJPtAPNumeric(Mat,Mat,Mat);
12 EXTERN int RegisterMatMatMultRoutines_Private(Mat);
13 
14 static int MATSeqAIJ_PtAP         = 0;
15 static int MATSeqAIJ_PtAPSymbolic = 0;
16 static int MATSeqAIJ_PtAPNumeric  = 0;
17 
18 #undef __FUNCT__
19 #define __FUNCT__ "MatSeqAIJPtAP"
20 /*@
21    MatSeqAIJPtAP - Creates the matrix projection C = P^T * A * P
22 
23    Collective on Mat
24 
25    Input Parameters:
26 +  A - the matrix
27 -  P - the projection matrix
28 
29    Output Parameters:
30 .  C - the product matrix
31 
32    Notes:
33    C will be created and must be destroyed by the user with MatDestroy().
34 
35    This routine is currently only implemented for pairs of SeqAIJ matrices and classes
36    which inherit from SeqAIJ.  C will be of type MATSEQAIJ.
37 
38    Level: intermediate
39 
40 .seealso: MatSeqAIJPtAPSymbolic(),MatSeqAIJPtAPNumeric(),MatMatMult()
41 @*/
42 int MatSeqAIJPtAP(Mat A,Mat P,Mat *C) {
43   int ierr;
44   char funct[80];
45   int (*f)(Mat,Mat,Mat);
46 
47   PetscFunctionBegin;
48   ierr = PetscLogEventBegin(MATSeqAIJ_PtAP,A,P,0,0);CHKERRQ(ierr);
49 
50   ierr = MatSeqAIJPtAPSymbolic(A,P,C);CHKERRQ(ierr);
51 
52   /* Avoid additional error checking included in */
53 /*   ierr = MatSeqAIJApplyPtAPNumeric(A,P,*C);CHKERRQ(ierr); */
54 
55   /* Currently only _seqaij_seqaij is implemented, so just query for it in A and P. */
56   /* When other implementations exist, attack the multiple dispatch problem. */
57   ierr = PetscStrcpy(funct,"MatApplyPtAPNumeric_seqaij_seqaij");CHKERRQ(ierr);
58   ierr = PetscObjectQueryFunction((PetscObject)P,funct,(PetscVoidFunction)&f);CHKERRQ(ierr);
59   if (!f) SETERRQ1(PETSC_ERR_SUP,"MatSeqAIJPtAPNumeric is not supported for P of type %s",P->type_name);
60   ierr = PetscObjectQueryFunction((PetscObject)A,funct,(PetscVoidFunction)&f);CHKERRQ(ierr);
61   if (!f) SETERRQ1(PETSC_ERR_SUP,"MatSeqAIJPtAPNumeric is not supported for A of type %s",A->type_name);
62 
63   ierr = (*f)(A,P,*C);CHKERRQ(ierr);
64 
65   ierr = PetscLogEventEnd(MATSeqAIJ_PtAP,A,P,0,0);CHKERRQ(ierr);
66   PetscFunctionReturn(0);
67 }
68 
69 #undef __FUNCT__
70 #define __FUNCT__ "MatSeqAIJPtAPSymbolic"
71 /*@
72    MatSeqAIJPtAPSymbolic - Creates the (i,j) structure of the matrix projection C = P^T * A * P
73 
74    Collective on Mat
75 
76    Input Parameters:
77 +  A - the matrix
78 -  P - the projection matrix
79 
80    Output Parameters:
81 .  C - the (i,j) structure of the product matrix
82 
83    Notes:
84    C will be created and must be destroyed by the user with MatDestroy().
85 
86    This routine is currently only implemented for pairs of SeqAIJ matrices and classes
87    which inherit from SeqAIJ.  C will be of type MATSEQAIJ.  The product is computed using
88    this (i,j) structure by calling MatSeqAIJPtAPNumeric().
89 
90    Level: intermediate
91 
92 .seealso: MatSeqAIJPtAP(),MatSeqAIJPtAPNumeric(),MatMatMultSymbolic()
93 @*/
94 int MatSeqAIJPtAPSymbolic(Mat A,Mat P,Mat *C) {
95   int ierr;
96   char funct[80];
97   int (*f)(Mat,Mat,Mat*);
98 
99   PetscFunctionBegin;
100 
101   PetscValidHeaderSpecific(A,MAT_COOKIE,1);
102   PetscValidType(A,1);
103   MatPreallocated(A);
104   if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
105   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
106 
107   PetscValidHeaderSpecific(P,MAT_COOKIE,2);
108   PetscValidType(P,2);
109   MatPreallocated(P);
110   if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
111   if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
112 
113   PetscValidPointer(C,3);
114 
115   if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->M,A->N);
116   if (A->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %d != %d",A->M,A->N);
117 
118   /* Currently only _seqaij_seqaij is implemented, so just query for it. */
119   /* When other implementations exist, attack the multiple dispatch problem. */
120   ierr = PetscStrcpy(funct,"MatApplyPtAPSymbolic_seqaij_seqaij");CHKERRQ(ierr);
121   ierr = PetscObjectQueryFunction((PetscObject)P,funct,(PetscVoidFunction)&f);CHKERRQ(ierr);
122   if (!f) SETERRQ1(PETSC_ERR_SUP,"MatSeqAIJPtAPSymbolic is not supported for P of type %s",P->type_name);
123   ierr = PetscObjectQueryFunction((PetscObject)A,funct,(PetscVoidFunction)&f);CHKERRQ(ierr);
124   if (!f) SETERRQ1(PETSC_ERR_SUP,"MatSeqAIJPtAPSymbolic is not supported for A of type %s",A->type_name);
125 
126   ierr = (*f)(A,P,C);CHKERRQ(ierr);
127 
128   PetscFunctionReturn(0);
129 }
130 
131 EXTERN_C_BEGIN
132 #undef __FUNCT__
133 #define __FUNCT__ "MatApplyPtAPSymbolic_SeqAIJ_SeqAIJ"
134 int MatApplyPtAPSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat P,Mat *C) {
135   int            ierr;
136   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
137   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c;
138   int            *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj;
139   int            *ci,*cj,*denserow,*sparserow,*ptadenserow,*ptasparserow,*ptaj;
140   int            an=A->N,am=A->M,pn=P->N,pm=P->M;
141   int            i,j,k,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi;
142   MatScalar      *ca;
143 
144   PetscFunctionBegin;
145 
146   /* Start timer */
147   ierr = PetscLogEventBegin(MATSeqAIJ_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr);
148 
149   /* Get ij structure of P^T */
150   ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
151   ptJ=ptj;
152 
153   /* Allocate ci array, arrays for fill computation and */
154   /* free space for accumulating nonzero column info */
155   ierr = PetscMalloc((pn+1)*sizeof(int),&ci);CHKERRQ(ierr);
156   ci[0] = 0;
157 
158   ierr = PetscMalloc((2*pn+2*an+1)*sizeof(int),&ptadenserow);CHKERRQ(ierr);
159   ierr = PetscMemzero(ptadenserow,(2*pn+2*an+1)*sizeof(int));CHKERRQ(ierr);
160   ptasparserow = ptadenserow  + an;
161   denserow     = ptasparserow + an;
162   sparserow    = denserow     + pn;
163 
164   /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */
165   /* This should be reasonable if sparsity of PtAP is similar to that of A. */
166   ierr          = GetMoreSpace((ai[am]/pm)*pn,&free_space);
167   current_space = free_space;
168 
169   /* Determine symbolic info for each row of C: */
170   for (i=0;i<pn;i++) {
171     ptnzi  = pti[i+1] - pti[i];
172     ptanzi = 0;
173     /* Determine symbolic row of PtA: */
174     for (j=0;j<ptnzi;j++) {
175       arow = *ptJ++;
176       anzj = ai[arow+1] - ai[arow];
177       ajj  = aj + ai[arow];
178       for (k=0;k<anzj;k++) {
179         if (!ptadenserow[ajj[k]]) {
180           ptadenserow[ajj[k]]    = -1;
181           ptasparserow[ptanzi++] = ajj[k];
182         }
183       }
184     }
185       /* Using symbolic info for row of PtA, determine symbolic info for row of C: */
186     ptaj = ptasparserow;
187     cnzi   = 0;
188     for (j=0;j<ptanzi;j++) {
189       prow = *ptaj++;
190       pnzj = pi[prow+1] - pi[prow];
191       pjj  = pj + pi[prow];
192       for (k=0;k<pnzj;k++) {
193         if (!denserow[pjj[k]]) {
194             denserow[pjj[k]]  = -1;
195             sparserow[cnzi++] = pjj[k];
196         }
197       }
198     }
199 
200     /* sort sparserow */
201     ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr);
202 
203     /* If free space is not available, make more free space */
204     /* Double the amount of total space in the list */
205     if (current_space->local_remaining<cnzi) {
206       ierr = GetMoreSpace(current_space->total_array_size,&current_space);CHKERRQ(ierr);
207     }
208 
209     /* Copy data into free space, and zero out denserows */
210     ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(int));CHKERRQ(ierr);
211     current_space->array           += cnzi;
212     current_space->local_used      += cnzi;
213     current_space->local_remaining -= cnzi;
214 
215     for (j=0;j<ptanzi;j++) {
216       ptadenserow[ptasparserow[j]] = 0;
217     }
218     for (j=0;j<cnzi;j++) {
219       denserow[sparserow[j]] = 0;
220     }
221       /* Aside: Perhaps we should save the pta info for the numerical factorization. */
222       /*        For now, we will recompute what is needed. */
223     ci[i+1] = ci[i] + cnzi;
224   }
225   /* nnz is now stored in ci[ptm], column indices are in the list of free space */
226   /* Allocate space for cj, initialize cj, and */
227   /* destroy list of free space and other temporary array(s) */
228   ierr = PetscMalloc((ci[pn]+1)*sizeof(int),&cj);CHKERRQ(ierr);
229   ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr);
230   ierr = PetscFree(ptadenserow);CHKERRQ(ierr);
231 
232   /* Allocate space for ca */
233   ierr = PetscMalloc((ci[pn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr);
234   ierr = PetscMemzero(ca,(ci[pn]+1)*sizeof(MatScalar));CHKERRQ(ierr);
235 
236   /* put together the new matrix */
237   ierr = MatCreateSeqAIJWithArrays(A->comm,pn,pn,ci,cj,ca,C);CHKERRQ(ierr);
238 
239   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
240   /* Since these are PETSc arrays, change flags to free them as necessary. */
241   c = (Mat_SeqAIJ *)((*C)->data);
242   c->freedata = PETSC_TRUE;
243   c->nonew    = 0;
244 
245   /* Clean up. */
246   ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
247 
248   ierr = PetscLogEventEnd(MATSeqAIJ_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr);
249   PetscFunctionReturn(0);
250 }
251 EXTERN_C_END
252 
253 #include "src/mat/impls/maij/maij.h"
254 EXTERN_C_BEGIN
255 #undef __FUNCT__
256 #define __FUNCT__ "MatApplyPtAPSymbolic_SeqAIJ_SeqMAIJ"
257 int MatApplyPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat *C) {
258   /* This routine requires testing -- I don't think it works. */
259   int            ierr;
260   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
261   Mat_SeqMAIJ    *pp=(Mat_SeqMAIJ*)PP->data;
262   Mat            P=pp->AIJ;
263   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c;
264   int            *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj;
265   int            *ci,*cj,*denserow,*sparserow,*ptadenserow,*ptasparserow,*ptaj;
266   int            an=A->N,am=A->M,pn=P->N,pm=P->M,ppdof=pp->dof;
267   int            i,j,k,dof,pdof,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi;
268   MatScalar      *ca;
269 
270   PetscFunctionBegin;
271   /* Start timer */
272   ierr = PetscLogEventBegin(MATSeqAIJ_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr);
273 
274   /* Get ij structure of P^T */
275   ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
276 
277   /* Allocate ci array, arrays for fill computation and */
278   /* free space for accumulating nonzero column info */
279   ierr = PetscMalloc((pn+1)*sizeof(int),&ci);CHKERRQ(ierr);
280   ci[0] = 0;
281 
282   ierr = PetscMalloc((2*pn+2*an+1)*sizeof(int),&ptadenserow);CHKERRQ(ierr);
283   ierr = PetscMemzero(ptadenserow,(2*pn+2*an+1)*sizeof(int));CHKERRQ(ierr);
284   ptasparserow = ptadenserow  + an;
285   denserow     = ptasparserow + an;
286   sparserow    = denserow     + pn;
287 
288   /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */
289   /* This should be reasonable if sparsity of PtAP is similar to that of A. */
290   ierr          = GetMoreSpace((ai[am]/pm)*pn,&free_space);
291   current_space = free_space;
292 
293   /* Determine symbolic info for each row of C: */
294   for (i=0;i<pn/ppdof;i++) {
295     ptnzi  = pti[i+1] - pti[i];
296     ptanzi = 0;
297     ptJ    = ptj + pti[i];
298     for (dof=0;dof<ppdof;dof++) {
299     /* Determine symbolic row of PtA: */
300       for (j=0;j<ptnzi;j++) {
301         arow = ptJ[j] + dof;
302         anzj = ai[arow+1] - ai[arow];
303         ajj  = aj + ai[arow];
304         for (k=0;k<anzj;k++) {
305           if (!ptadenserow[ajj[k]]) {
306             ptadenserow[ajj[k]]    = -1;
307             ptasparserow[ptanzi++] = ajj[k];
308           }
309         }
310       }
311       /* Using symbolic info for row of PtA, determine symbolic info for row of C: */
312       ptaj = ptasparserow;
313       cnzi   = 0;
314       for (j=0;j<ptanzi;j++) {
315         pdof = *ptaj%dof;
316         prow = (*ptaj++)/dof;
317         pnzj = pi[prow+1] - pi[prow];
318         pjj  = pj + pi[prow];
319         for (k=0;k<pnzj;k++) {
320           if (!denserow[pjj[k]+pdof]) {
321             denserow[pjj[k]+pdof] = -1;
322             sparserow[cnzi++]     = pjj[k]+pdof;
323           }
324         }
325       }
326 
327       /* sort sparserow */
328       ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr);
329 
330       /* If free space is not available, make more free space */
331       /* Double the amount of total space in the list */
332       if (current_space->local_remaining<cnzi) {
333         ierr = GetMoreSpace(current_space->total_array_size,&current_space);CHKERRQ(ierr);
334       }
335 
336       /* Copy data into free space, and zero out denserows */
337       ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(int));CHKERRQ(ierr);
338       current_space->array           += cnzi;
339       current_space->local_used      += cnzi;
340       current_space->local_remaining -= cnzi;
341 
342       for (j=0;j<ptanzi;j++) {
343         ptadenserow[ptasparserow[j]] = 0;
344       }
345       for (j=0;j<cnzi;j++) {
346         denserow[sparserow[j]] = 0;
347       }
348       /* Aside: Perhaps we should save the pta info for the numerical factorization. */
349       /*        For now, we will recompute what is needed. */
350       ci[i+1+dof] = ci[i+dof] + cnzi;
351     }
352   }
353   /* nnz is now stored in ci[ptm], column indices are in the list of free space */
354   /* Allocate space for cj, initialize cj, and */
355   /* destroy list of free space and other temporary array(s) */
356   ierr = PetscMalloc((ci[pn]+1)*sizeof(int),&cj);CHKERRQ(ierr);
357   ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr);
358   ierr = PetscFree(ptadenserow);CHKERRQ(ierr);
359 
360   /* Allocate space for ca */
361   ierr = PetscMalloc((ci[pn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr);
362   ierr = PetscMemzero(ca,(ci[pn]+1)*sizeof(MatScalar));CHKERRQ(ierr);
363 
364   /* put together the new matrix */
365   ierr = MatCreateSeqAIJWithArrays(A->comm,pn,pn,ci,cj,ca,C);CHKERRQ(ierr);
366 
367   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
368   /* Since these are PETSc arrays, change flags to free them as necessary. */
369   c = (Mat_SeqAIJ *)((*C)->data);
370   c->freedata = PETSC_TRUE;
371   c->nonew    = 0;
372 
373   /* Clean up. */
374   ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
375 
376   ierr = PetscLogEventEnd(MATSeqAIJ_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr);
377   PetscFunctionReturn(0);
378 }
379 EXTERN_C_END
380 
381 #undef __FUNCT__
382 #define __FUNCT__ "MatSeqAIJPtAPNumeric"
383 /*@
384    MatSeqAIJPtAPNumeric - Computes the matrix projection C = P^T * A * P
385 
386    Collective on Mat
387 
388    Input Parameters:
389 +  A - the matrix
390 -  P - the projection matrix
391 
392    Output Parameters:
393 .  C - the product matrix
394 
395    Notes:
396    C must have been created by calling MatSeqAIJPtAPSymbolic and must be destroyed by
397    the user using MatDeatroy().
398 
399    This routine is currently only implemented for pairs of SeqAIJ matrices and classes
400    which inherit from SeqAIJ.  C will be of type MATSEQAIJ.
401 
402    Level: intermediate
403 
404 .seealso: MatSeqAIJPtAP(),MatSeqAIJPtAPSymbolic(),MatMatMultNumeric()
405 @*/
406 int MatSeqAIJPtAPNumeric(Mat A,Mat P,Mat C) {
407   int ierr;
408   char funct[80];
409   int (*f)(Mat,Mat,Mat);
410 
411   PetscFunctionBegin;
412 
413   PetscValidHeaderSpecific(A,MAT_COOKIE,1);
414   PetscValidType(A,1);
415   MatPreallocated(A);
416   if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
417   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
418 
419   PetscValidHeaderSpecific(P,MAT_COOKIE,2);
420   PetscValidType(P,2);
421   MatPreallocated(P);
422   if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
423   if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
424 
425   PetscValidHeaderSpecific(C,MAT_COOKIE,3);
426   PetscValidType(C,3);
427   MatPreallocated(C);
428   if (!C->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
429   if (C->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
430 
431   if (P->N!=C->M) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->N,C->M);
432   if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->M,A->N);
433   if (A->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %d != %d",A->M,A->N);
434   if (P->N!=C->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->N,C->N);
435 
436   /* Currently only _seqaij_seqaij is implemented, so just query for it. */
437   /* When other implementations exist, attack the multiple dispatch problem. */
438   ierr = PetscStrcpy(funct,"MatApplyPtAPNumeric_seqaij_seqaij");CHKERRQ(ierr);
439   ierr = PetscObjectQueryFunction((PetscObject)P,funct,(PetscVoidFunction)&f);CHKERRQ(ierr);
440   if (!f) SETERRQ1(PETSC_ERR_SUP,"MatSeqAIJPtAPNumeric is not supported for P of type %s",P->type_name);
441   ierr = PetscObjectQueryFunction((PetscObject)A,funct,(PetscVoidFunction)&f);CHKERRQ(ierr);
442   if (!f) SETERRQ1(PETSC_ERR_SUP,"MatSeqAIJPtAPNumeric is not supported for A of type %s",A->type_name);
443 
444   ierr = (*f)(A,P,C);CHKERRQ(ierr);
445 
446   PetscFunctionReturn(0);
447 }
448 
449 EXTERN_C_BEGIN
450 #undef __FUNCT__
451 #define __FUNCT__ "MatApplyPtAPNumeric_SeqAIJ_SeqAIJ"
452 int MatApplyPtAPNumeric_SeqAIJ_SeqAIJ(Mat A,Mat P,Mat C) {
453   int        ierr,flops=0;
454   Mat_SeqAIJ *a  = (Mat_SeqAIJ *) A->data;
455   Mat_SeqAIJ *p  = (Mat_SeqAIJ *) P->data;
456   Mat_SeqAIJ *c  = (Mat_SeqAIJ *) C->data;
457   int        *ai=a->i,*aj=a->j,*apj,*apjdense,*pi=p->i,*pj=p->j,*pJ=p->j,*pjj;
458   int        *ci=c->i,*cj=c->j,*cjj;
459   int        am=A->M,cn=C->N,cm=C->M;
460   int        i,j,k,anzi,pnzi,apnzj,nextap,pnzj,prow,crow;
461   MatScalar  *aa=a->a,*apa,*pa=p->a,*pA=p->a,*paj,*ca=c->a,*caj;
462 
463   PetscFunctionBegin;
464   ierr = PetscLogEventBegin(MATSeqAIJ_PtAPNumeric,A,P,C,0);CHKERRQ(ierr);
465 
466   /* Allocate temporary array for storage of one row of A*P */
467   ierr = PetscMalloc(cn*(sizeof(MatScalar)+2*sizeof(int)),&apa);CHKERRQ(ierr);
468   ierr = PetscMemzero(apa,cn*(sizeof(MatScalar)+2*sizeof(int)));CHKERRQ(ierr);
469 
470   apj      = (int*)(apa + cn);
471   apjdense = apj + cn;
472 
473   /* Clear old values in C */
474   ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr);
475 
476   for (i=0;i<am;i++) {
477     /* Form sparse row of A*P */
478     anzi  = ai[i+1] - ai[i];
479     apnzj = 0;
480     for (j=0;j<anzi;j++) {
481       prow = *aj++;
482       pnzj = pi[prow+1] - pi[prow];
483       pjj  = pj + pi[prow];
484       paj  = pa + pi[prow];
485       for (k=0;k<pnzj;k++) {
486         if (!apjdense[pjj[k]]) {
487           apjdense[pjj[k]] = -1;
488           apj[apnzj++]     = pjj[k];
489         }
490         apa[pjj[k]] += (*aa)*paj[k];
491       }
492       flops += 2*pnzj;
493       aa++;
494     }
495 
496     /* Sort the j index array for quick sparse axpy. */
497     ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr);
498 
499     /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */
500     pnzi = pi[i+1] - pi[i];
501     for (j=0;j<pnzi;j++) {
502       nextap = 0;
503       crow   = *pJ++;
504       cjj    = cj + ci[crow];
505       caj    = ca + ci[crow];
506       /* Perform sparse axpy operation.  Note cjj includes apj. */
507       for (k=0;nextap<apnzj;k++) {
508         if (cjj[k]==apj[nextap]) {
509           caj[k] += (*pA)*apa[apj[nextap++]];
510         }
511       }
512       flops += 2*apnzj;
513       pA++;
514     }
515 
516     /* Zero the current row info for A*P */
517     for (j=0;j<apnzj;j++) {
518       apa[apj[j]]      = 0.;
519       apjdense[apj[j]] = 0;
520     }
521   }
522 
523   /* Assemble the final matrix and clean up */
524   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
525   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
526   ierr = PetscFree(apa);CHKERRQ(ierr);
527   ierr = PetscLogFlops(flops);CHKERRQ(ierr);
528   ierr = PetscLogEventEnd(MATSeqAIJ_PtAPNumeric,A,P,C,0);CHKERRQ(ierr);
529 
530   PetscFunctionReturn(0);
531 }
532 EXTERN_C_END
533 
534 #undef __FUNCT__
535 #define __FUNCT__ "RegisterApplyPtAPRoutines_Private"
536 int RegisterApplyPtAPRoutines_Private(Mat A) {
537   int ierr;
538 
539   PetscFunctionBegin;
540 
541   if (!MATSeqAIJ_PtAP) {
542     ierr = PetscLogEventRegister(&MATSeqAIJ_PtAP,"MatSeqAIJApplyPtAP",MAT_COOKIE);CHKERRQ(ierr);
543   }
544 
545   if (!MATSeqAIJ_PtAPSymbolic) {
546     ierr = PetscLogEventRegister(&MATSeqAIJ_PtAPSymbolic,"MatSeqAIJApplyPtAPSymbolic",MAT_COOKIE);CHKERRQ(ierr);
547   }
548   ierr = PetscObjectComposeFunctionDynamic((PetscObject)A,"MatApplyPtAPSymbolic_seqaij_seqaij",
549                                            "MatApplyPtAPSymbolic_SeqAIJ_SeqAIJ",
550                                            MatApplyPtAPSymbolic_SeqAIJ_SeqAIJ);CHKERRQ(ierr);
551 
552   if (!MATSeqAIJ_PtAPNumeric) {
553     ierr = PetscLogEventRegister(&MATSeqAIJ_PtAPNumeric,"MatSeqAIJApplyPtAPNumeric",MAT_COOKIE);CHKERRQ(ierr);
554   }
555   ierr = PetscObjectComposeFunctionDynamic((PetscObject)A,"MatApplyPtAPNumeric_seqaij_seqaij",
556                                            "MatApplyPtAPNumeric_SeqAIJ_SeqAIJ",
557                                            MatApplyPtAPNumeric_SeqAIJ_SeqAIJ);CHKERRQ(ierr);
558   ierr = RegisterMatMatMultRoutines_Private(A);CHKERRQ(ierr);
559   PetscFunctionReturn(0);
560 }
561