xref: /petsc/src/mat/impls/aij/seq/matptap.c (revision 9af31e4ad595286b4e2df8194fee047feeccfe42)
1 /*
2   Defines projective product routines where A is a AIJ matrix
3           C = P^T * A * P
4 */
5 
6 #include "src/mat/impls/aij/seq/aij.h"   /*I "petscmat.h" I*/
7 #include "src/mat/utils/freespace.h"
8 #include "src/mat/impls/aij/mpi/mpiaij.h"
9 
10 EXTERN int RegisterMatMatMultRoutines_Private(Mat);
11 
12 static int MAT_PtAPSymbolic = 0;
13 static int MAT_PtAPNumeric  = 0;
14 
15 #undef __FUNCT__
16 #define __FUNCT__ "MatPtAP"
17 /*@
18    MatPtAP - Creates the matrix projection C = P^T * A * P
19 
20    Collective on Mat
21 
22    Input Parameters:
23 +  A - the matrix
24 -  P - the projection matrix
25 
26    Output Parameters:
27 .  C - the product matrix
28 
29    Notes:
30    C will be created and must be destroyed by the user with MatDestroy().
31 
32    This routine is currently only implemented for pairs of SeqAIJ matrices and classes
33    which inherit from SeqAIJ.  C will be of type MATSEQAIJ.
34 
35    Level: intermediate
36 
37 .seealso: MatPtAPSymbolic(),MatPtAPNumeric(),MatMatMult()
38 @*/
39 int MatPtAP(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C) {
40   int ierr;
41 
42   PetscFunctionBegin;
43   PetscValidHeaderSpecific(A,MAT_COOKIE,1);
44   PetscValidType(A,1);
45   MatPreallocated(A);
46   if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
47   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
48   PetscValidHeaderSpecific(P,MAT_COOKIE,2);
49   PetscValidType(P,2);
50   MatPreallocated(P);
51   if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
52   if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
53   PetscValidPointer(C,3);
54   if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->M,A->N);
55 
56   if (fill <=0.0) SETERRQ1(PETSC_ERR_ARG_SIZ,"fill=%g must be > 0.0",fill);
57 
58   ierr = PetscLogEventBegin(MAT_PtAP,A,P,0,0);CHKERRQ(ierr);
59   ierr = (*A->ops->matptap)(A,P,scall,fill,C);CHKERRQ(ierr);
60   ierr = PetscLogEventEnd(MAT_PtAP,A,P,0,0);CHKERRQ(ierr);
61   PetscFunctionReturn(0);
62 }
63 
64 #undef __FUNCT__
65 #define __FUNCT__ "MatPtAP_SeqAIJ_SeqAIJ"
66 int MatPtAP_SeqAIJ_SeqAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C)
67 {
68   int ierr;
69   PetscFunctionBegin;
70   if (scall == MAT_INITIAL_MATRIX){
71     ierr = MatPtAPSymbolic_SeqAIJ_SeqAIJ(A,P,fill,C);CHKERRQ(ierr);
72   }
73   ierr = MatPtAPNumeric_SeqAIJ_SeqAIJ(A,P,*C);CHKERRQ(ierr);
74   PetscFunctionReturn(0);
75 }
76 
77 #undef __FUNCT__
78 #define __FUNCT__ "MatPtAPSymbolic"
79 /*@
80    MatPtAPSymbolic - Creates the (i,j) structure of the matrix projection C = P^T * A * P
81 
82    Collective on Mat
83 
84    Input Parameters:
85 +  A - the matrix
86 -  P - the projection matrix
87 
88    Output Parameters:
89 .  C - the (i,j) structure of the product matrix
90 
91    Notes:
92    C will be created and must be destroyed by the user with MatDestroy().
93 
94    This routine is currently only implemented for pairs of SeqAIJ matrices and classes
95    which inherit from SeqAIJ.  C will be of type MATSEQAIJ.  The product is computed using
96    this (i,j) structure by calling MatPtAPNumeric().
97 
98    Level: intermediate
99 
100 .seealso: MatPtAP(),MatPtAPNumeric(),MatMatMultSymbolic()
101 @*/
102 int MatPtAPSymbolic(Mat A,Mat P,PetscReal fill,Mat *C) {
103   int ierr;
104   char funct[80];
105   int (*f)(Mat,Mat,Mat*);
106 
107   PetscFunctionBegin;
108 
109   PetscValidHeaderSpecific(A,MAT_COOKIE,1);
110   PetscValidType(A,1);
111   MatPreallocated(A);
112   if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
113   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
114 
115   PetscValidHeaderSpecific(P,MAT_COOKIE,2);
116   PetscValidType(P,2);
117   MatPreallocated(P);
118   if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
119   if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
120 
121   PetscValidPointer(C,3);
122 
123   if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->M,A->N);
124   if (A->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %d != %d",A->M,A->N);
125 
126   /* Currently only _seqaij_seqaij is implemented, so just query for it. */
127   /* When other implementations exist, attack the multiple dispatch problem. */
128   ierr = PetscStrcpy(funct,"MatPtAPSymbolic_seqaij_seqaij");CHKERRQ(ierr);
129   ierr = PetscObjectQueryFunction((PetscObject)P,funct,(PetscVoidFunction)&f);CHKERRQ(ierr);
130   if (!f) SETERRQ1(PETSC_ERR_SUP,"MatPtAPSymbolic is not supported for P of type %s",P->type_name);
131   ierr = PetscObjectQueryFunction((PetscObject)A,funct,(PetscVoidFunction)&f);CHKERRQ(ierr);
132   if (!f) SETERRQ1(PETSC_ERR_SUP,"MatPtAPSymbolic is not supported for A of type %s",A->type_name);
133 
134   ierr = (*f)(A,P,C);CHKERRQ(ierr);
135 
136   PetscFunctionReturn(0);
137 }
138 
139 EXTERN_C_BEGIN
140 #undef __FUNCT__
141 #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqAIJ"
142 int MatPtAPSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat P,PetscReal fill,Mat *C) {
143   int            ierr;
144   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
145   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c;
146   int            *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj;
147   int            *ci,*cj,*denserow,*sparserow,*ptadenserow,*ptasparserow,*ptaj;
148   int            an=A->N,am=A->M,pn=P->N,pm=P->M;
149   int            i,j,k,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi;
150   MatScalar      *ca;
151 
152   PetscFunctionBegin;
153 
154   /* Start timer */
155   ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr);
156 
157   /* Get ij structure of P^T */
158   ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
159   ptJ=ptj;
160 
161   /* Allocate ci array, arrays for fill computation and */
162   /* free space for accumulating nonzero column info */
163   ierr = PetscMalloc((pn+1)*sizeof(int),&ci);CHKERRQ(ierr);
164   ci[0] = 0;
165 
166   ierr = PetscMalloc((2*pn+2*an+1)*sizeof(int),&ptadenserow);CHKERRQ(ierr);
167   ierr = PetscMemzero(ptadenserow,(2*pn+2*an+1)*sizeof(int));CHKERRQ(ierr);
168   ptasparserow = ptadenserow  + an;
169   denserow     = ptasparserow + an;
170   sparserow    = denserow     + pn;
171 
172   /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */
173   /* This should be reasonable if sparsity of PtAP is similar to that of A. */
174   ierr          = GetMoreSpace((ai[am]/pm)*pn,&free_space);
175   current_space = free_space;
176 
177   /* Determine symbolic info for each row of C: */
178   for (i=0;i<pn;i++) {
179     ptnzi  = pti[i+1] - pti[i];
180     ptanzi = 0;
181     /* Determine symbolic row of PtA: */
182     for (j=0;j<ptnzi;j++) {
183       arow = *ptJ++;
184       anzj = ai[arow+1] - ai[arow];
185       ajj  = aj + ai[arow];
186       for (k=0;k<anzj;k++) {
187         if (!ptadenserow[ajj[k]]) {
188           ptadenserow[ajj[k]]    = -1;
189           ptasparserow[ptanzi++] = ajj[k];
190         }
191       }
192     }
193       /* Using symbolic info for row of PtA, determine symbolic info for row of C: */
194     ptaj = ptasparserow;
195     cnzi   = 0;
196     for (j=0;j<ptanzi;j++) {
197       prow = *ptaj++;
198       pnzj = pi[prow+1] - pi[prow];
199       pjj  = pj + pi[prow];
200       for (k=0;k<pnzj;k++) {
201         if (!denserow[pjj[k]]) {
202             denserow[pjj[k]]  = -1;
203             sparserow[cnzi++] = pjj[k];
204         }
205       }
206     }
207 
208     /* sort sparserow */
209     ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr);
210 
211     /* If free space is not available, make more free space */
212     /* Double the amount of total space in the list */
213     if (current_space->local_remaining<cnzi) {
214       ierr = GetMoreSpace(current_space->total_array_size,&current_space);CHKERRQ(ierr);
215     }
216 
217     /* Copy data into free space, and zero out denserows */
218     ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(int));CHKERRQ(ierr);
219     current_space->array           += cnzi;
220     current_space->local_used      += cnzi;
221     current_space->local_remaining -= cnzi;
222 
223     for (j=0;j<ptanzi;j++) {
224       ptadenserow[ptasparserow[j]] = 0;
225     }
226     for (j=0;j<cnzi;j++) {
227       denserow[sparserow[j]] = 0;
228     }
229       /* Aside: Perhaps we should save the pta info for the numerical factorization. */
230       /*        For now, we will recompute what is needed. */
231     ci[i+1] = ci[i] + cnzi;
232   }
233   /* nnz is now stored in ci[ptm], column indices are in the list of free space */
234   /* Allocate space for cj, initialize cj, and */
235   /* destroy list of free space and other temporary array(s) */
236   ierr = PetscMalloc((ci[pn]+1)*sizeof(int),&cj);CHKERRQ(ierr);
237   ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr);
238   ierr = PetscFree(ptadenserow);CHKERRQ(ierr);
239 
240   /* Allocate space for ca */
241   ierr = PetscMalloc((ci[pn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr);
242   ierr = PetscMemzero(ca,(ci[pn]+1)*sizeof(MatScalar));CHKERRQ(ierr);
243 
244   /* put together the new matrix */
245   ierr = MatCreateSeqAIJWithArrays(A->comm,pn,pn,ci,cj,ca,C);CHKERRQ(ierr);
246 
247   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
248   /* Since these are PETSc arrays, change flags to free them as necessary. */
249   c = (Mat_SeqAIJ *)((*C)->data);
250   c->freedata = PETSC_TRUE;
251   c->nonew    = 0;
252 
253   /* Clean up. */
254   ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
255 
256   ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr);
257   PetscFunctionReturn(0);
258 }
259 EXTERN_C_END
260 
261 #include "src/mat/impls/maij/maij.h"
262 EXTERN_C_BEGIN
263 #undef __FUNCT__
264 #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqMAIJ"
265 int MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat *C) {
266   /* This routine requires testing -- I don't think it works. */
267   int            ierr;
268   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
269   Mat_SeqMAIJ    *pp=(Mat_SeqMAIJ*)PP->data;
270   Mat            P=pp->AIJ;
271   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c;
272   int            *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj;
273   int            *ci,*cj,*denserow,*sparserow,*ptadenserow,*ptasparserow,*ptaj;
274   int            an=A->N,am=A->M,pn=P->N,pm=P->M,ppdof=pp->dof;
275   int            i,j,k,dof,pdof,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi;
276   MatScalar      *ca;
277 
278   PetscFunctionBegin;
279   /* Start timer */
280   ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr);
281 
282   /* Get ij structure of P^T */
283   ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
284 
285   /* Allocate ci array, arrays for fill computation and */
286   /* free space for accumulating nonzero column info */
287   ierr = PetscMalloc((pn+1)*sizeof(int),&ci);CHKERRQ(ierr);
288   ci[0] = 0;
289 
290   ierr = PetscMalloc((2*pn+2*an+1)*sizeof(int),&ptadenserow);CHKERRQ(ierr);
291   ierr = PetscMemzero(ptadenserow,(2*pn+2*an+1)*sizeof(int));CHKERRQ(ierr);
292   ptasparserow = ptadenserow  + an;
293   denserow     = ptasparserow + an;
294   sparserow    = denserow     + pn;
295 
296   /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */
297   /* This should be reasonable if sparsity of PtAP is similar to that of A. */
298   ierr          = GetMoreSpace((ai[am]/pm)*pn,&free_space);
299   current_space = free_space;
300 
301   /* Determine symbolic info for each row of C: */
302   for (i=0;i<pn/ppdof;i++) {
303     ptnzi  = pti[i+1] - pti[i];
304     ptanzi = 0;
305     ptJ    = ptj + pti[i];
306     for (dof=0;dof<ppdof;dof++) {
307     /* Determine symbolic row of PtA: */
308       for (j=0;j<ptnzi;j++) {
309         arow = ptJ[j] + dof;
310         anzj = ai[arow+1] - ai[arow];
311         ajj  = aj + ai[arow];
312         for (k=0;k<anzj;k++) {
313           if (!ptadenserow[ajj[k]]) {
314             ptadenserow[ajj[k]]    = -1;
315             ptasparserow[ptanzi++] = ajj[k];
316           }
317         }
318       }
319       /* Using symbolic info for row of PtA, determine symbolic info for row of C: */
320       ptaj = ptasparserow;
321       cnzi   = 0;
322       for (j=0;j<ptanzi;j++) {
323         pdof = *ptaj%dof;
324         prow = (*ptaj++)/dof;
325         pnzj = pi[prow+1] - pi[prow];
326         pjj  = pj + pi[prow];
327         for (k=0;k<pnzj;k++) {
328           if (!denserow[pjj[k]+pdof]) {
329             denserow[pjj[k]+pdof] = -1;
330             sparserow[cnzi++]     = pjj[k]+pdof;
331           }
332         }
333       }
334 
335       /* sort sparserow */
336       ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr);
337 
338       /* If free space is not available, make more free space */
339       /* Double the amount of total space in the list */
340       if (current_space->local_remaining<cnzi) {
341         ierr = GetMoreSpace(current_space->total_array_size,&current_space);CHKERRQ(ierr);
342       }
343 
344       /* Copy data into free space, and zero out denserows */
345       ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(int));CHKERRQ(ierr);
346       current_space->array           += cnzi;
347       current_space->local_used      += cnzi;
348       current_space->local_remaining -= cnzi;
349 
350       for (j=0;j<ptanzi;j++) {
351         ptadenserow[ptasparserow[j]] = 0;
352       }
353       for (j=0;j<cnzi;j++) {
354         denserow[sparserow[j]] = 0;
355       }
356       /* Aside: Perhaps we should save the pta info for the numerical factorization. */
357       /*        For now, we will recompute what is needed. */
358       ci[i+1+dof] = ci[i+dof] + cnzi;
359     }
360   }
361   /* nnz is now stored in ci[ptm], column indices are in the list of free space */
362   /* Allocate space for cj, initialize cj, and */
363   /* destroy list of free space and other temporary array(s) */
364   ierr = PetscMalloc((ci[pn]+1)*sizeof(int),&cj);CHKERRQ(ierr);
365   ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr);
366   ierr = PetscFree(ptadenserow);CHKERRQ(ierr);
367 
368   /* Allocate space for ca */
369   ierr = PetscMalloc((ci[pn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr);
370   ierr = PetscMemzero(ca,(ci[pn]+1)*sizeof(MatScalar));CHKERRQ(ierr);
371 
372   /* put together the new matrix */
373   ierr = MatCreateSeqAIJWithArrays(A->comm,pn,pn,ci,cj,ca,C);CHKERRQ(ierr);
374 
375   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
376   /* Since these are PETSc arrays, change flags to free them as necessary. */
377   c = (Mat_SeqAIJ *)((*C)->data);
378   c->freedata = PETSC_TRUE;
379   c->nonew    = 0;
380 
381   /* Clean up. */
382   ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
383 
384   ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr);
385   PetscFunctionReturn(0);
386 }
387 EXTERN_C_END
388 
389 #undef __FUNCT__
390 #define __FUNCT__ "MatPtAPNumeric"
391 /*@
392    MatPtAPNumeric - Computes the matrix projection C = P^T * A * P
393 
394    Collective on Mat
395 
396    Input Parameters:
397 +  A - the matrix
398 -  P - the projection matrix
399 
400    Output Parameters:
401 .  C - the product matrix
402 
403    Notes:
404    C must have been created by calling MatPtAPSymbolic and must be destroyed by
405    the user using MatDeatroy().
406 
407    This routine is currently only implemented for pairs of SeqAIJ matrices and classes
408    which inherit from SeqAIJ.  C will be of type MATSEQAIJ.
409 
410    Level: intermediate
411 
412 .seealso: MatPtAP(),MatPtAPSymbolic(),MatMatMultNumeric()
413 @*/
414 int MatPtAPNumeric(Mat A,Mat P,Mat C) {
415   int ierr;
416   char funct[80];
417   int (*f)(Mat,Mat,Mat);
418 
419   PetscFunctionBegin;
420 
421   PetscValidHeaderSpecific(A,MAT_COOKIE,1);
422   PetscValidType(A,1);
423   MatPreallocated(A);
424   if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
425   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
426 
427   PetscValidHeaderSpecific(P,MAT_COOKIE,2);
428   PetscValidType(P,2);
429   MatPreallocated(P);
430   if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
431   if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
432 
433   PetscValidHeaderSpecific(C,MAT_COOKIE,3);
434   PetscValidType(C,3);
435   MatPreallocated(C);
436   if (!C->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
437   if (C->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
438 
439   if (P->N!=C->M) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->N,C->M);
440   if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->M,A->N);
441   if (A->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %d != %d",A->M,A->N);
442   if (P->N!=C->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->N,C->N);
443 
444   /* Currently only _seqaij_seqaij is implemented, so just query for it. */
445   /* When other implementations exist, attack the multiple dispatch problem. */
446   ierr = PetscStrcpy(funct,"MatPtAPNumeric_seqaij_seqaij");CHKERRQ(ierr);
447   ierr = PetscObjectQueryFunction((PetscObject)P,funct,(PetscVoidFunction)&f);CHKERRQ(ierr);
448   if (!f) SETERRQ1(PETSC_ERR_SUP,"MatPtAPNumeric is not supported for P of type %s",P->type_name);
449   ierr = PetscObjectQueryFunction((PetscObject)A,funct,(PetscVoidFunction)&f);CHKERRQ(ierr);
450   if (!f) SETERRQ1(PETSC_ERR_SUP,"MatPtAPNumeric is not supported for A of type %s",A->type_name);
451 
452   ierr = (*f)(A,P,C);CHKERRQ(ierr);
453 
454   PetscFunctionReturn(0);
455 }
456 
457 EXTERN_C_BEGIN
458 #undef __FUNCT__
459 #define __FUNCT__ "MatPtAPNumeric_SeqAIJ_SeqAIJ"
460 int MatPtAPNumeric_SeqAIJ_SeqAIJ(Mat A,Mat P,Mat C) {
461   int        ierr,flops=0;
462   Mat_SeqAIJ *a  = (Mat_SeqAIJ *) A->data;
463   Mat_SeqAIJ *p  = (Mat_SeqAIJ *) P->data;
464   Mat_SeqAIJ *c  = (Mat_SeqAIJ *) C->data;
465   int        *ai=a->i,*aj=a->j,*apj,*apjdense,*pi=p->i,*pj=p->j,*pJ=p->j,*pjj;
466   int        *ci=c->i,*cj=c->j,*cjj;
467   int        am=A->M,cn=C->N,cm=C->M;
468   int        i,j,k,anzi,pnzi,apnzj,nextap,pnzj,prow,crow;
469   MatScalar  *aa=a->a,*apa,*pa=p->a,*pA=p->a,*paj,*ca=c->a,*caj;
470 
471   PetscFunctionBegin;
472   ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,C,0);CHKERRQ(ierr);
473 
474   /* Allocate temporary array for storage of one row of A*P */
475   ierr = PetscMalloc(cn*(sizeof(MatScalar)+2*sizeof(int)),&apa);CHKERRQ(ierr);
476   ierr = PetscMemzero(apa,cn*(sizeof(MatScalar)+2*sizeof(int)));CHKERRQ(ierr);
477 
478   apj      = (int*)(apa + cn);
479   apjdense = apj + cn;
480 
481   /* Clear old values in C */
482   ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr);
483 
484   for (i=0;i<am;i++) {
485     /* Form sparse row of A*P */
486     anzi  = ai[i+1] - ai[i];
487     apnzj = 0;
488     for (j=0;j<anzi;j++) {
489       prow = *aj++;
490       pnzj = pi[prow+1] - pi[prow];
491       pjj  = pj + pi[prow];
492       paj  = pa + pi[prow];
493       for (k=0;k<pnzj;k++) {
494         if (!apjdense[pjj[k]]) {
495           apjdense[pjj[k]] = -1;
496           apj[apnzj++]     = pjj[k];
497         }
498         apa[pjj[k]] += (*aa)*paj[k];
499       }
500       flops += 2*pnzj;
501       aa++;
502     }
503 
504     /* Sort the j index array for quick sparse axpy. */
505     ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr);
506 
507     /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */
508     pnzi = pi[i+1] - pi[i];
509     for (j=0;j<pnzi;j++) {
510       nextap = 0;
511       crow   = *pJ++;
512       cjj    = cj + ci[crow];
513       caj    = ca + ci[crow];
514       /* Perform sparse axpy operation.  Note cjj includes apj. */
515       for (k=0;nextap<apnzj;k++) {
516         if (cjj[k]==apj[nextap]) {
517           caj[k] += (*pA)*apa[apj[nextap++]];
518         }
519       }
520       flops += 2*apnzj;
521       pA++;
522     }
523 
524     /* Zero the current row info for A*P */
525     for (j=0;j<apnzj;j++) {
526       apa[apj[j]]      = 0.;
527       apjdense[apj[j]] = 0;
528     }
529   }
530 
531   /* Assemble the final matrix and clean up */
532   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
533   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
534   ierr = PetscFree(apa);CHKERRQ(ierr);
535   ierr = PetscLogFlops(flops);CHKERRQ(ierr);
536   ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,C,0);CHKERRQ(ierr);
537 
538   PetscFunctionReturn(0);
539 }
540 EXTERN_C_END
541 
542 #undef __FUNCT__
543 #define __FUNCT__ "RegisterPtAPRoutines_Private"
544 int RegisterPtAPRoutines_Private(Mat A) {
545   int ierr;
546 
547   PetscFunctionBegin;
548   if (!MAT_PtAPSymbolic) {
549     ierr = PetscLogEventRegister(&MAT_PtAPSymbolic,"MatPtAPSymbolic",MAT_COOKIE);CHKERRQ(ierr);
550   }
551   if (!MAT_PtAPNumeric) {
552     ierr = PetscLogEventRegister(&MAT_PtAPNumeric,"MatPtAPNumeric",MAT_COOKIE);CHKERRQ(ierr);
553   }
554   ierr = RegisterMatMatMultRoutines_Private(A);CHKERRQ(ierr);
555 
556   PetscFunctionReturn(0);
557 }
558