xref: /petsc/src/mat/impls/aij/seq/matptap.c (revision dfbe8321245cdf9338262824a41580dace4e4143)
1 /*
2   Defines projective product routines where A is a AIJ matrix
3           C = P^T * A * P
4 */
5 
6 #include "src/mat/impls/aij/seq/aij.h"   /*I "petscmat.h" I*/
7 #include "src/mat/utils/freespace.h"
8 #include "src/mat/impls/aij/mpi/mpiaij.h"
9 
10 EXTERN PetscErrorCode RegisterMatMatMultRoutines_Private(Mat);
11 
12 static int MAT_PtAPSymbolic = 0;
13 static int MAT_PtAPNumeric  = 0;
14 
15 #undef __FUNCT__
16 #define __FUNCT__ "MatPtAP"
17 /*@
18    MatPtAP - Creates the matrix projection C = P^T * A * P
19 
20    Collective on Mat
21 
22    Input Parameters:
23 +  A - the matrix
24 -  P - the projection matrix
25 
26    Output Parameters:
27 .  C - the product matrix
28 
29    Notes:
30    C will be created and must be destroyed by the user with MatDestroy().
31 
32    This routine is currently only implemented for pairs of SeqAIJ matrices and classes
33    which inherit from SeqAIJ.  C will be of type MATSEQAIJ.
34 
35    Level: intermediate
36 
37 .seealso: MatPtAPSymbolic(),MatPtAPNumeric(),MatMatMult()
38 @*/
39 PetscErrorCode MatPtAP(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C) {
40   PetscErrorCode ierr;
41 
42   PetscFunctionBegin;
43   PetscValidHeaderSpecific(A,MAT_COOKIE,1);
44   PetscValidType(A,1);
45   MatPreallocated(A);
46   if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
47   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
48   PetscValidHeaderSpecific(P,MAT_COOKIE,2);
49   PetscValidType(P,2);
50   MatPreallocated(P);
51   if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
52   if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
53   PetscValidPointer(C,3);
54   if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->M,A->N);
55 
56   if (fill <=0.0) SETERRQ1(PETSC_ERR_ARG_SIZ,"fill=%g must be > 0.0",fill);
57 
58   ierr = PetscLogEventBegin(MAT_PtAP,A,P,0,0);CHKERRQ(ierr);
59   ierr = (*A->ops->matptap)(A,P,scall,fill,C);CHKERRQ(ierr);
60   ierr = PetscLogEventEnd(MAT_PtAP,A,P,0,0);CHKERRQ(ierr);
61   PetscFunctionReturn(0);
62 }
63 
64 #undef __FUNCT__
65 #define __FUNCT__ "MatPtAP_SeqAIJ_SeqAIJ"
66 PetscErrorCode MatPtAP_SeqAIJ_SeqAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C)
67 {
68   PetscErrorCode ierr;
69   PetscFunctionBegin;
70   if (scall == MAT_INITIAL_MATRIX){
71     ierr = MatPtAPSymbolic_SeqAIJ_SeqAIJ(A,P,fill,C);CHKERRQ(ierr);
72   }
73   ierr = MatPtAPNumeric_SeqAIJ_SeqAIJ(A,P,*C);CHKERRQ(ierr);
74   PetscFunctionReturn(0);
75 }
76 
77 #undef __FUNCT__
78 #define __FUNCT__ "MatPtAPSymbolic"
79 /*@
80    MatPtAPSymbolic - Creates the (i,j) structure of the matrix projection C = P^T * A * P
81 
82    Collective on Mat
83 
84    Input Parameters:
85 +  A - the matrix
86 -  P - the projection matrix
87 
88    Output Parameters:
89 .  C - the (i,j) structure of the product matrix
90 
91    Notes:
92    C will be created and must be destroyed by the user with MatDestroy().
93 
94    This routine is currently only implemented for pairs of SeqAIJ matrices and classes
95    which inherit from SeqAIJ.  C will be of type MATSEQAIJ.  The product is computed using
96    this (i,j) structure by calling MatPtAPNumeric().
97 
98    Level: intermediate
99 
100 .seealso: MatPtAP(),MatPtAPNumeric(),MatMatMultSymbolic()
101 @*/
102 PetscErrorCode MatPtAPSymbolic(Mat A,Mat P,PetscReal fill,Mat *C) {
103   PetscErrorCode ierr;
104   char funct[80];
105   int (*f)(Mat,Mat,Mat*);
106 
107   PetscFunctionBegin;
108 
109   PetscValidHeaderSpecific(A,MAT_COOKIE,1);
110   PetscValidType(A,1);
111   MatPreallocated(A);
112   if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
113   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
114 
115   PetscValidHeaderSpecific(P,MAT_COOKIE,2);
116   PetscValidType(P,2);
117   MatPreallocated(P);
118   if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
119   if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
120 
121   PetscValidPointer(C,3);
122 
123   if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->M,A->N);
124   if (A->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %d != %d",A->M,A->N);
125 
126   /* Currently only _seqaij_seqaij is implemented, so just query for it. */
127   /* When other implementations exist, attack the multiple dispatch problem. */
128   ierr = PetscStrcpy(funct,"MatPtAPSymbolic_seqaij_seqaij");CHKERRQ(ierr);
129   ierr = PetscObjectQueryFunction((PetscObject)P,funct,(PetscVoidFunction)&f);CHKERRQ(ierr);
130   if (!f) SETERRQ1(PETSC_ERR_SUP,"MatPtAPSymbolic is not supported for P of type %s",P->type_name);
131   ierr = PetscObjectQueryFunction((PetscObject)A,funct,(PetscVoidFunction)&f);CHKERRQ(ierr);
132   if (!f) SETERRQ1(PETSC_ERR_SUP,"MatPtAPSymbolic is not supported for A of type %s",A->type_name);
133 
134   ierr = (*f)(A,P,C);CHKERRQ(ierr);
135 
136   PetscFunctionReturn(0);
137 }
138 
139 EXTERN_C_BEGIN
140 #undef __FUNCT__
141 #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqAIJ"
142 PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat P,PetscReal fill,Mat *C) {
143   PetscErrorCode ierr;
144   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
145   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c;
146   int            *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj;
147   int            *ci,*cj,*denserow,*sparserow,*ptadenserow,*ptasparserow,*ptaj;
148   int            an=A->N,am=A->M,pn=P->N,pm=P->M;
149   int            i,j,k,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi;
150   MatScalar      *ca;
151 
152   PetscFunctionBegin;
153 
154   /* Start timer */
155   ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr);
156 
157   /* Get ij structure of P^T */
158   ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
159   ptJ=ptj;
160 
161   /* Allocate ci array, arrays for fill computation and */
162   /* free space for accumulating nonzero column info */
163   ierr = PetscMalloc((pn+1)*sizeof(int),&ci);CHKERRQ(ierr);
164   ci[0] = 0;
165 
166   ierr = PetscMalloc((2*pn+2*an+1)*sizeof(int),&ptadenserow);CHKERRQ(ierr);
167   ierr = PetscMemzero(ptadenserow,(2*pn+2*an+1)*sizeof(int));CHKERRQ(ierr);
168   ptasparserow = ptadenserow  + an;
169   denserow     = ptasparserow + an;
170   sparserow    = denserow     + pn;
171 
172   /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */
173   /* This should be reasonable if sparsity of PtAP is similar to that of A. */
174   ierr          = GetMoreSpace((ai[am]/pm)*pn,&free_space);
175   current_space = free_space;
176 
177   /* Determine symbolic info for each row of C: */
178   for (i=0;i<pn;i++) {
179     ptnzi  = pti[i+1] - pti[i];
180     ptanzi = 0;
181     /* Determine symbolic row of PtA: */
182     for (j=0;j<ptnzi;j++) {
183       arow = *ptJ++;
184       anzj = ai[arow+1] - ai[arow];
185       ajj  = aj + ai[arow];
186       for (k=0;k<anzj;k++) {
187         if (!ptadenserow[ajj[k]]) {
188           ptadenserow[ajj[k]]    = -1;
189           ptasparserow[ptanzi++] = ajj[k];
190         }
191       }
192     }
193       /* Using symbolic info for row of PtA, determine symbolic info for row of C: */
194     ptaj = ptasparserow;
195     cnzi   = 0;
196     for (j=0;j<ptanzi;j++) {
197       prow = *ptaj++;
198       pnzj = pi[prow+1] - pi[prow];
199       pjj  = pj + pi[prow];
200       for (k=0;k<pnzj;k++) {
201         if (!denserow[pjj[k]]) {
202             denserow[pjj[k]]  = -1;
203             sparserow[cnzi++] = pjj[k];
204         }
205       }
206     }
207 
208     /* sort sparserow */
209     ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr);
210 
211     /* If free space is not available, make more free space */
212     /* Double the amount of total space in the list */
213     if (current_space->local_remaining<cnzi) {
214       ierr = GetMoreSpace(current_space->total_array_size,&current_space);CHKERRQ(ierr);
215     }
216 
217     /* Copy data into free space, and zero out denserows */
218     ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(int));CHKERRQ(ierr);
219     current_space->array           += cnzi;
220     current_space->local_used      += cnzi;
221     current_space->local_remaining -= cnzi;
222 
223     for (j=0;j<ptanzi;j++) {
224       ptadenserow[ptasparserow[j]] = 0;
225     }
226     for (j=0;j<cnzi;j++) {
227       denserow[sparserow[j]] = 0;
228     }
229       /* Aside: Perhaps we should save the pta info for the numerical factorization. */
230       /*        For now, we will recompute what is needed. */
231     ci[i+1] = ci[i] + cnzi;
232   }
233   /* nnz is now stored in ci[ptm], column indices are in the list of free space */
234   /* Allocate space for cj, initialize cj, and */
235   /* destroy list of free space and other temporary array(s) */
236   ierr = PetscMalloc((ci[pn]+1)*sizeof(int),&cj);CHKERRQ(ierr);
237   ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr);
238   ierr = PetscFree(ptadenserow);CHKERRQ(ierr);
239 
240   /* Allocate space for ca */
241   ierr = PetscMalloc((ci[pn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr);
242   ierr = PetscMemzero(ca,(ci[pn]+1)*sizeof(MatScalar));CHKERRQ(ierr);
243 
244   /* put together the new matrix */
245   ierr = MatCreateSeqAIJWithArrays(A->comm,pn,pn,ci,cj,ca,C);CHKERRQ(ierr);
246 
247   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
248   /* Since these are PETSc arrays, change flags to free them as necessary. */
249   c = (Mat_SeqAIJ *)((*C)->data);
250   c->freedata = PETSC_TRUE;
251   c->nonew    = 0;
252 
253   /* Clean up. */
254   ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
255 
256   ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr);
257   PetscFunctionReturn(0);
258 }
259 EXTERN_C_END
260 
261 #include "src/mat/impls/maij/maij.h"
262 EXTERN_C_BEGIN
263 #undef __FUNCT__
264 #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqMAIJ"
265 PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat *C) {
266   /* This routine requires testing -- I don't think it works. */
267   PetscErrorCode ierr;
268   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
269   Mat_SeqMAIJ    *pp=(Mat_SeqMAIJ*)PP->data;
270   Mat            P=pp->AIJ;
271   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c;
272   int            *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj;
273   int            *ci,*cj,*denserow,*sparserow,*ptadenserow,*ptasparserow,*ptaj;
274   int            an=A->N,am=A->M,pn=P->N,pm=P->M,ppdof=pp->dof;
275   int            i,j,k,dof,pdof,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi;
276   MatScalar      *ca;
277 
278   PetscFunctionBegin;
279   /* Start timer */
280   ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr);
281 
282   /* Get ij structure of P^T */
283   ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
284 
285   /* Allocate ci array, arrays for fill computation and */
286   /* free space for accumulating nonzero column info */
287   ierr = PetscMalloc((pn+1)*sizeof(int),&ci);CHKERRQ(ierr);
288   ci[0] = 0;
289 
290   ierr = PetscMalloc((2*pn+2*an+1)*sizeof(int),&ptadenserow);CHKERRQ(ierr);
291   ierr = PetscMemzero(ptadenserow,(2*pn+2*an+1)*sizeof(int));CHKERRQ(ierr);
292   ptasparserow = ptadenserow  + an;
293   denserow     = ptasparserow + an;
294   sparserow    = denserow     + pn;
295 
296   /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */
297   /* This should be reasonable if sparsity of PtAP is similar to that of A. */
298   ierr          = GetMoreSpace((ai[am]/pm)*pn,&free_space);
299   current_space = free_space;
300 
301   /* Determine symbolic info for each row of C: */
302   for (i=0;i<pn/ppdof;i++) {
303     ptnzi  = pti[i+1] - pti[i];
304     ptanzi = 0;
305     ptJ    = ptj + pti[i];
306     for (dof=0;dof<ppdof;dof++) {
307     /* Determine symbolic row of PtA: */
308       for (j=0;j<ptnzi;j++) {
309         arow = ptJ[j] + dof;
310         anzj = ai[arow+1] - ai[arow];
311         ajj  = aj + ai[arow];
312         for (k=0;k<anzj;k++) {
313           if (!ptadenserow[ajj[k]]) {
314             ptadenserow[ajj[k]]    = -1;
315             ptasparserow[ptanzi++] = ajj[k];
316           }
317         }
318       }
319       /* Using symbolic info for row of PtA, determine symbolic info for row of C: */
320       ptaj = ptasparserow;
321       cnzi   = 0;
322       for (j=0;j<ptanzi;j++) {
323         pdof = *ptaj%dof;
324         prow = (*ptaj++)/dof;
325         pnzj = pi[prow+1] - pi[prow];
326         pjj  = pj + pi[prow];
327         for (k=0;k<pnzj;k++) {
328           if (!denserow[pjj[k]+pdof]) {
329             denserow[pjj[k]+pdof] = -1;
330             sparserow[cnzi++]     = pjj[k]+pdof;
331           }
332         }
333       }
334 
335       /* sort sparserow */
336       ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr);
337 
338       /* If free space is not available, make more free space */
339       /* Double the amount of total space in the list */
340       if (current_space->local_remaining<cnzi) {
341         ierr = GetMoreSpace(current_space->total_array_size,&current_space);CHKERRQ(ierr);
342       }
343 
344       /* Copy data into free space, and zero out denserows */
345       ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(int));CHKERRQ(ierr);
346       current_space->array           += cnzi;
347       current_space->local_used      += cnzi;
348       current_space->local_remaining -= cnzi;
349 
350       for (j=0;j<ptanzi;j++) {
351         ptadenserow[ptasparserow[j]] = 0;
352       }
353       for (j=0;j<cnzi;j++) {
354         denserow[sparserow[j]] = 0;
355       }
356       /* Aside: Perhaps we should save the pta info for the numerical factorization. */
357       /*        For now, we will recompute what is needed. */
358       ci[i+1+dof] = ci[i+dof] + cnzi;
359     }
360   }
361   /* nnz is now stored in ci[ptm], column indices are in the list of free space */
362   /* Allocate space for cj, initialize cj, and */
363   /* destroy list of free space and other temporary array(s) */
364   ierr = PetscMalloc((ci[pn]+1)*sizeof(int),&cj);CHKERRQ(ierr);
365   ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr);
366   ierr = PetscFree(ptadenserow);CHKERRQ(ierr);
367 
368   /* Allocate space for ca */
369   ierr = PetscMalloc((ci[pn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr);
370   ierr = PetscMemzero(ca,(ci[pn]+1)*sizeof(MatScalar));CHKERRQ(ierr);
371 
372   /* put together the new matrix */
373   ierr = MatCreateSeqAIJWithArrays(A->comm,pn,pn,ci,cj,ca,C);CHKERRQ(ierr);
374 
375   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
376   /* Since these are PETSc arrays, change flags to free them as necessary. */
377   c = (Mat_SeqAIJ *)((*C)->data);
378   c->freedata = PETSC_TRUE;
379   c->nonew    = 0;
380 
381   /* Clean up. */
382   ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
383 
384   ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr);
385   PetscFunctionReturn(0);
386 }
387 EXTERN_C_END
388 
389 #undef __FUNCT__
390 #define __FUNCT__ "MatPtAPNumeric"
391 /*@
392    MatPtAPNumeric - Computes the matrix projection C = P^T * A * P
393 
394    Collective on Mat
395 
396    Input Parameters:
397 +  A - the matrix
398 -  P - the projection matrix
399 
400    Output Parameters:
401 .  C - the product matrix
402 
403    Notes:
404    C must have been created by calling MatPtAPSymbolic and must be destroyed by
405    the user using MatDeatroy().
406 
407    This routine is currently only implemented for pairs of SeqAIJ matrices and classes
408    which inherit from SeqAIJ.  C will be of type MATSEQAIJ.
409 
410    Level: intermediate
411 
412 .seealso: MatPtAP(),MatPtAPSymbolic(),MatMatMultNumeric()
413 @*/
414 PetscErrorCode MatPtAPNumeric(Mat A,Mat P,Mat C) {
415   PetscErrorCode ierr;
416   char funct[80];
417   int (*f)(Mat,Mat,Mat);
418 
419   PetscFunctionBegin;
420 
421   PetscValidHeaderSpecific(A,MAT_COOKIE,1);
422   PetscValidType(A,1);
423   MatPreallocated(A);
424   if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
425   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
426 
427   PetscValidHeaderSpecific(P,MAT_COOKIE,2);
428   PetscValidType(P,2);
429   MatPreallocated(P);
430   if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
431   if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
432 
433   PetscValidHeaderSpecific(C,MAT_COOKIE,3);
434   PetscValidType(C,3);
435   MatPreallocated(C);
436   if (!C->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
437   if (C->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
438 
439   if (P->N!=C->M) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->N,C->M);
440   if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->M,A->N);
441   if (A->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %d != %d",A->M,A->N);
442   if (P->N!=C->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->N,C->N);
443 
444   /* Currently only _seqaij_seqaij is implemented, so just query for it. */
445   /* When other implementations exist, attack the multiple dispatch problem. */
446   ierr = PetscStrcpy(funct,"MatPtAPNumeric_seqaij_seqaij");CHKERRQ(ierr);
447   ierr = PetscObjectQueryFunction((PetscObject)P,funct,(PetscVoidFunction)&f);CHKERRQ(ierr);
448   if (!f) SETERRQ1(PETSC_ERR_SUP,"MatPtAPNumeric is not supported for P of type %s",P->type_name);
449   ierr = PetscObjectQueryFunction((PetscObject)A,funct,(PetscVoidFunction)&f);CHKERRQ(ierr);
450   if (!f) SETERRQ1(PETSC_ERR_SUP,"MatPtAPNumeric is not supported for A of type %s",A->type_name);
451 
452   ierr = (*f)(A,P,C);CHKERRQ(ierr);
453 
454   PetscFunctionReturn(0);
455 }
456 
457 EXTERN_C_BEGIN
458 #undef __FUNCT__
459 #define __FUNCT__ "MatPtAPNumeric_SeqAIJ_SeqAIJ"
460 PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqAIJ(Mat A,Mat P,Mat C)
461 {
462   PetscErrorCode ierr;
463   int        flops=0;
464   Mat_SeqAIJ *a  = (Mat_SeqAIJ *) A->data;
465   Mat_SeqAIJ *p  = (Mat_SeqAIJ *) P->data;
466   Mat_SeqAIJ *c  = (Mat_SeqAIJ *) C->data;
467   int        *ai=a->i,*aj=a->j,*apj,*apjdense,*pi=p->i,*pj=p->j,*pJ=p->j,*pjj;
468   int        *ci=c->i,*cj=c->j,*cjj;
469   int        am=A->M,cn=C->N,cm=C->M;
470   int        i,j,k,anzi,pnzi,apnzj,nextap,pnzj,prow,crow;
471   MatScalar  *aa=a->a,*apa,*pa=p->a,*pA=p->a,*paj,*ca=c->a,*caj;
472 
473   PetscFunctionBegin;
474   ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,C,0);CHKERRQ(ierr);
475 
476   /* Allocate temporary array for storage of one row of A*P */
477   ierr = PetscMalloc(cn*(sizeof(MatScalar)+2*sizeof(int)),&apa);CHKERRQ(ierr);
478   ierr = PetscMemzero(apa,cn*(sizeof(MatScalar)+2*sizeof(int)));CHKERRQ(ierr);
479 
480   apj      = (int*)(apa + cn);
481   apjdense = apj + cn;
482 
483   /* Clear old values in C */
484   ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr);
485 
486   for (i=0;i<am;i++) {
487     /* Form sparse row of A*P */
488     anzi  = ai[i+1] - ai[i];
489     apnzj = 0;
490     for (j=0;j<anzi;j++) {
491       prow = *aj++;
492       pnzj = pi[prow+1] - pi[prow];
493       pjj  = pj + pi[prow];
494       paj  = pa + pi[prow];
495       for (k=0;k<pnzj;k++) {
496         if (!apjdense[pjj[k]]) {
497           apjdense[pjj[k]] = -1;
498           apj[apnzj++]     = pjj[k];
499         }
500         apa[pjj[k]] += (*aa)*paj[k];
501       }
502       flops += 2*pnzj;
503       aa++;
504     }
505 
506     /* Sort the j index array for quick sparse axpy. */
507     ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr);
508 
509     /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */
510     pnzi = pi[i+1] - pi[i];
511     for (j=0;j<pnzi;j++) {
512       nextap = 0;
513       crow   = *pJ++;
514       cjj    = cj + ci[crow];
515       caj    = ca + ci[crow];
516       /* Perform sparse axpy operation.  Note cjj includes apj. */
517       for (k=0;nextap<apnzj;k++) {
518         if (cjj[k]==apj[nextap]) {
519           caj[k] += (*pA)*apa[apj[nextap++]];
520         }
521       }
522       flops += 2*apnzj;
523       pA++;
524     }
525 
526     /* Zero the current row info for A*P */
527     for (j=0;j<apnzj;j++) {
528       apa[apj[j]]      = 0.;
529       apjdense[apj[j]] = 0;
530     }
531   }
532 
533   /* Assemble the final matrix and clean up */
534   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
535   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
536   ierr = PetscFree(apa);CHKERRQ(ierr);
537   ierr = PetscLogFlops(flops);CHKERRQ(ierr);
538   ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,C,0);CHKERRQ(ierr);
539 
540   PetscFunctionReturn(0);
541 }
542 EXTERN_C_END
543 
544 #undef __FUNCT__
545 #define __FUNCT__ "RegisterPtAPRoutines_Private"
546 PetscErrorCode RegisterPtAPRoutines_Private(Mat A)
547 {
548   PetscErrorCode ierr;
549 
550   PetscFunctionBegin;
551   if (!MAT_PtAPSymbolic) {
552     ierr = PetscLogEventRegister(&MAT_PtAPSymbolic,"MatPtAPSymbolic",MAT_COOKIE);CHKERRQ(ierr);
553   }
554   if (!MAT_PtAPNumeric) {
555     ierr = PetscLogEventRegister(&MAT_PtAPNumeric,"MatPtAPNumeric",MAT_COOKIE);CHKERRQ(ierr);
556   }
557   ierr = RegisterMatMatMultRoutines_Private(A);CHKERRQ(ierr);
558 
559   PetscFunctionReturn(0);
560 }
561