xref: /petsc/src/mat/impls/aij/seq/matptap.c (revision 534c1384a790e876391cbb8541980f0b12ff9287)
1 /*
2   Defines projective product routines where A is a AIJ matrix
3           C = P^T * A * P
4 */
5 
6 #include "src/mat/impls/aij/seq/aij.h"   /*I "petscmat.h" I*/
7 #include "src/mat/utils/freespace.h"
8 #include "src/mat/impls/aij/mpi/mpiaij.h"
9 
10 EXTERN PetscErrorCode RegisterMatMatMultRoutines_Private(Mat);
11 
12 #undef __FUNCT__
13 #define __FUNCT__ "MatPtAP"
14 /*@
15    MatPtAP - Creates the matrix projection C = P^T * A * P
16 
17    Collective on Mat
18 
19    Input Parameters:
20 +  A - the matrix
21 .  P - the projection matrix
22 .  scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
23 -  fill - expected fill as ratio of nnz(C)/(nnz(A) + nnz(P))
24 
25    Output Parameters:
26 .  C - the product matrix
27 
28    Notes:
29    C will be created and must be destroyed by the user with MatDestroy().
30 
31    This routine is currently only implemented for pairs of SeqAIJ matrices and classes
32    which inherit from SeqAIJ.  C will be of type MATSEQAIJ.
33 
34    Level: intermediate
35 
36 .seealso: MatPtAPSymbolic(),MatPtAPNumeric(),MatMatMult()
37 @*/
38 PetscErrorCode MatPtAP(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C) {
39   PetscErrorCode ierr;
40   PetscErrorCode (*fA)(Mat,Mat,MatReuse,PetscReal,Mat *);
41   PetscErrorCode (*fP)(Mat,Mat,MatReuse,PetscReal,Mat *);
42 
43   PetscFunctionBegin;
44   PetscValidHeaderSpecific(A,MAT_COOKIE,1);
45   PetscValidType(A,1);
46   MatPreallocated(A);
47   if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
48   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
49   PetscValidHeaderSpecific(P,MAT_COOKIE,2);
50   PetscValidType(P,2);
51   MatPreallocated(P);
52   if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
53   if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
54   PetscValidPointer(C,3);
55 
56   if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->M,A->N);
57 
58   if (fill <=0.0) SETERRQ1(PETSC_ERR_ARG_SIZ,"fill=%g must be > 0.0",fill);
59 
60   /* For now, we do not dispatch based on the type of A and P */
61   /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */
62   fA = A->ops->ptap;
63   if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatPtAP not supported for A of type %s",A->type_name);
64   fP = P->ops->ptap;
65   if (!fP) SETERRQ1(PETSC_ERR_SUP,"MatPtAP not supported for P of type %s",P->type_name);
66   if (fP!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatPtAP requires A, %s, to be compatible with P, %s",A->type_name,P->type_name);
67 
68   ierr = PetscLogEventBegin(MAT_PtAP,A,P,0,0);CHKERRQ(ierr);
69   ierr = (*fA)(A,P,scall,fill,C);CHKERRQ(ierr);
70   ierr = PetscLogEventEnd(MAT_PtAP,A,P,0,0);CHKERRQ(ierr);
71   PetscFunctionReturn(0);
72 }
73 
74 #undef __FUNCT__
75 #define __FUNCT__ "MatPtAP_SeqAIJ_SeqAIJ"
76 PetscErrorCode MatPtAP_SeqAIJ_SeqAIJ(Mat A,Mat P,MatReuse scall,PetscReal fill,Mat *C)
77 {
78   PetscErrorCode ierr;
79   PetscFunctionBegin;
80   if (scall == MAT_INITIAL_MATRIX){
81     ierr = MatPtAPSymbolic_SeqAIJ_SeqAIJ(A,P,fill,C);CHKERRQ(ierr);
82   }
83   ierr = MatPtAPNumeric_SeqAIJ_SeqAIJ(A,P,*C);CHKERRQ(ierr);
84   PetscFunctionReturn(0);
85 }
86 
87 #undef __FUNCT__
88 #define __FUNCT__ "MatPtAPSymbolic"
89 /*
90    MatPtAPSymbolic - Creates the (i,j) structure of the matrix projection C = P^T * A * P
91 
92    Collective on Mat
93 
94    Input Parameters:
95 +  A - the matrix
96 -  P - the projection matrix
97 
98    Output Parameters:
99 .  C - the (i,j) structure of the product matrix
100 
101    Notes:
102    C will be created and must be destroyed by the user with MatDestroy().
103 
104    This routine is currently only implemented for pairs of SeqAIJ matrices and classes
105    which inherit from SeqAIJ.  C will be of type MATSEQAIJ.  The product is computed using
106    this (i,j) structure by calling MatPtAPNumeric().
107 
108    Level: intermediate
109 
110 .seealso: MatPtAP(),MatPtAPNumeric(),MatMatMultSymbolic()
111 */
112 PetscErrorCode MatPtAPSymbolic(Mat A,Mat P,PetscReal fill,Mat *C) {
113   PetscErrorCode ierr;
114   PetscErrorCode (*fA)(Mat,Mat,PetscReal,Mat*);
115   PetscErrorCode (*fP)(Mat,Mat,PetscReal,Mat*);
116 
117   PetscFunctionBegin;
118 
119   PetscValidHeaderSpecific(A,MAT_COOKIE,1);
120   PetscValidType(A,1);
121   MatPreallocated(A);
122   if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
123   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
124 
125   PetscValidHeaderSpecific(P,MAT_COOKIE,2);
126   PetscValidType(P,2);
127   MatPreallocated(P);
128   if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
129   if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
130 
131   PetscValidPointer(C,3);
132 
133   if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->M,A->N);
134   if (A->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %d != %d",A->M,A->N);
135 
136   /* For now, we do not dispatch based on the type of A and P */
137   /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */
138   fA = A->ops->ptapsymbolic;
139   if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatPtAPSymbolic not supported for A of type %s",A->type_name);
140   fP = P->ops->ptapsymbolic;
141   if (!fP) SETERRQ1(PETSC_ERR_SUP,"MatPtAPSymbolic not supported for P of type %s",P->type_name);
142   if (fP!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatPtAPSymbolic requires A, %s, to be compatible with P, %s",A->type_name,P->type_name);
143 
144   ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr);
145   ierr = (*fA)(A,P,fill,C);CHKERRQ(ierr);
146   ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,P,0,0);CHKERRQ(ierr);
147 
148   PetscFunctionReturn(0);
149 }
150 
151 typedef struct {
152   Mat    symAP;
153 } Mat_PtAPstruct;
154 
155 EXTERN PetscErrorCode MatDestroy_SeqAIJ(Mat);
156 
157 #undef __FUNCT__
158 #define __FUNCT__ "MatDestroy_SeqAIJ_PtAP"
159 PetscErrorCode MatDestroy_SeqAIJ_PtAP(Mat A)
160 {
161   PetscErrorCode    ierr;
162   Mat_PtAPstruct    *ptap=(Mat_PtAPstruct*)A->spptr;
163 
164   PetscFunctionBegin;
165   ierr = MatDestroy(ptap->symAP);CHKERRQ(ierr);
166   ierr = PetscFree(ptap);CHKERRQ(ierr);
167   ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr);
168   PetscFunctionReturn(0);
169 }
170 
171 #undef __FUNCT__
172 #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqAIJ"
173 PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat P,PetscReal fill,Mat *C) {
174   PetscErrorCode ierr;
175   int            *pti,*ptj;
176   Mat            Pt,AP;
177   Mat_PtAPstruct *ptap;
178 
179   PetscFunctionBegin;
180   /* create symbolic Pt */
181   ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
182   ierr = MatCreateSeqAIJWithArrays(P->comm,P->N,P->M,pti,ptj,PETSC_NULL,&Pt);CHKERRQ(ierr);
183 
184   /* get symbolic AP=A*P and C=Pt*AP */
185   ierr = MatMatMultSymbolic_SeqAIJ_SeqAIJ(A,P,fill,&AP);CHKERRQ(ierr);
186   ierr = MatMatMultSymbolic_SeqAIJ_SeqAIJ(Pt,AP,fill,C);CHKERRQ(ierr);
187 
188   /* clean up */
189   ierr = MatRestoreSymbolicTranspose_SeqAIJ(Pt,&pti,&ptj);CHKERRQ(ierr);
190   ierr = MatDestroy(Pt);CHKERRQ(ierr);
191 
192   /* save symbolic AP - to be used by MatPtAPNumeric_SeqAIJ_SeqAIJ() */
193   ierr = PetscNew(Mat_PtAPstruct,&ptap);CHKERRQ(ierr);
194   ptap->symAP = AP;
195   (*C)->spptr = (void*)ptap;
196   (*C)->ops->destroy  = MatDestroy_SeqAIJ_PtAP;
197 
198   PetscFunctionReturn(0);
199 }
200 
201 #include "src/mat/impls/maij/maij.h"
202 EXTERN_C_BEGIN
203 #undef __FUNCT__
204 #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqMAIJ"
205 PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat *C) {
206   /* This routine requires testing -- I don't think it works. */
207   PetscErrorCode ierr;
208   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
209   Mat_SeqMAIJ    *pp=(Mat_SeqMAIJ*)PP->data;
210   Mat            P=pp->AIJ;
211   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c;
212   int            *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj;
213   int            *ci,*cj,*denserow,*sparserow,*ptadenserow,*ptasparserow,*ptaj;
214   int            an=A->N,am=A->M,pn=P->N,pm=P->M,ppdof=pp->dof;
215   int            i,j,k,dof,pdof,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi;
216   MatScalar      *ca;
217 
218   PetscFunctionBegin;
219   /* Start timer */
220   ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr);
221 
222   /* Get ij structure of P^T */
223   ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
224 
225   /* Allocate ci array, arrays for fill computation and */
226   /* free space for accumulating nonzero column info */
227   ierr = PetscMalloc((pn+1)*sizeof(int),&ci);CHKERRQ(ierr);
228   ci[0] = 0;
229 
230   ierr = PetscMalloc((2*pn+2*an+1)*sizeof(int),&ptadenserow);CHKERRQ(ierr);
231   ierr = PetscMemzero(ptadenserow,(2*pn+2*an+1)*sizeof(int));CHKERRQ(ierr);
232   ptasparserow = ptadenserow  + an;
233   denserow     = ptasparserow + an;
234   sparserow    = denserow     + pn;
235 
236   /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */
237   /* This should be reasonable if sparsity of PtAP is similar to that of A. */
238   ierr          = GetMoreSpace((ai[am]/pm)*pn,&free_space);
239   current_space = free_space;
240 
241   /* Determine symbolic info for each row of C: */
242   for (i=0;i<pn/ppdof;i++) {
243     ptnzi  = pti[i+1] - pti[i];
244     ptanzi = 0;
245     ptJ    = ptj + pti[i];
246     for (dof=0;dof<ppdof;dof++) {
247     /* Determine symbolic row of PtA: */
248       for (j=0;j<ptnzi;j++) {
249         arow = ptJ[j] + dof;
250         anzj = ai[arow+1] - ai[arow];
251         ajj  = aj + ai[arow];
252         for (k=0;k<anzj;k++) {
253           if (!ptadenserow[ajj[k]]) {
254             ptadenserow[ajj[k]]    = -1;
255             ptasparserow[ptanzi++] = ajj[k];
256           }
257         }
258       }
259       /* Using symbolic info for row of PtA, determine symbolic info for row of C: */
260       ptaj = ptasparserow;
261       cnzi   = 0;
262       for (j=0;j<ptanzi;j++) {
263         pdof = *ptaj%dof;
264         prow = (*ptaj++)/dof;
265         pnzj = pi[prow+1] - pi[prow];
266         pjj  = pj + pi[prow];
267         for (k=0;k<pnzj;k++) {
268           if (!denserow[pjj[k]+pdof]) {
269             denserow[pjj[k]+pdof] = -1;
270             sparserow[cnzi++]     = pjj[k]+pdof;
271           }
272         }
273       }
274 
275       /* sort sparserow */
276       ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr);
277 
278       /* If free space is not available, make more free space */
279       /* Double the amount of total space in the list */
280       if (current_space->local_remaining<cnzi) {
281         ierr = GetMoreSpace(current_space->total_array_size,&current_space);CHKERRQ(ierr);
282       }
283 
284       /* Copy data into free space, and zero out denserows */
285       ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(int));CHKERRQ(ierr);
286       current_space->array           += cnzi;
287       current_space->local_used      += cnzi;
288       current_space->local_remaining -= cnzi;
289 
290       for (j=0;j<ptanzi;j++) {
291         ptadenserow[ptasparserow[j]] = 0;
292       }
293       for (j=0;j<cnzi;j++) {
294         denserow[sparserow[j]] = 0;
295       }
296       /* Aside: Perhaps we should save the pta info for the numerical factorization. */
297       /*        For now, we will recompute what is needed. */
298       ci[i+1+dof] = ci[i+dof] + cnzi;
299     }
300   }
301   /* nnz is now stored in ci[ptm], column indices are in the list of free space */
302   /* Allocate space for cj, initialize cj, and */
303   /* destroy list of free space and other temporary array(s) */
304   ierr = PetscMalloc((ci[pn]+1)*sizeof(int),&cj);CHKERRQ(ierr);
305   ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr);
306   ierr = PetscFree(ptadenserow);CHKERRQ(ierr);
307 
308   /* Allocate space for ca */
309   ierr = PetscMalloc((ci[pn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr);
310   ierr = PetscMemzero(ca,(ci[pn]+1)*sizeof(MatScalar));CHKERRQ(ierr);
311 
312   /* put together the new matrix */
313   ierr = MatCreateSeqAIJWithArrays(A->comm,pn,pn,ci,cj,ca,C);CHKERRQ(ierr);
314 
315   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
316   /* Since these are PETSc arrays, change flags to free them as necessary. */
317   c = (Mat_SeqAIJ *)((*C)->data);
318   c->freedata = PETSC_TRUE;
319   c->nonew    = 0;
320 
321   /* Clean up. */
322   ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr);
323 
324   ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr);
325   PetscFunctionReturn(0);
326 }
327 EXTERN_C_END
328 
329 #undef __FUNCT__
330 #define __FUNCT__ "MatPtAPNumeric"
331 /*
332    MatPtAPNumeric - Computes the matrix projection C = P^T * A * P
333 
334    Collective on Mat
335 
336    Input Parameters:
337 +  A - the matrix
338 -  P - the projection matrix
339 
340    Output Parameters:
341 .  C - the product matrix
342 
343    Notes:
344    C must have been created by calling MatPtAPSymbolic and must be destroyed by
345    the user using MatDeatroy().
346 
347    This routine is currently only implemented for pairs of SeqAIJ matrices and classes
348    which inherit from SeqAIJ.  C will be of type MATSEQAIJ.
349 
350    Level: intermediate
351 
352 .seealso: MatPtAP(),MatPtAPSymbolic(),MatMatMultNumeric()
353 */
354 PetscErrorCode MatPtAPNumeric(Mat A,Mat P,Mat C) {
355   PetscErrorCode ierr;
356   PetscErrorCode (*fA)(Mat,Mat,Mat);
357   PetscErrorCode (*fP)(Mat,Mat,Mat);
358 
359   PetscFunctionBegin;
360 
361   PetscValidHeaderSpecific(A,MAT_COOKIE,1);
362   PetscValidType(A,1);
363   MatPreallocated(A);
364   if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
365   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
366 
367   PetscValidHeaderSpecific(P,MAT_COOKIE,2);
368   PetscValidType(P,2);
369   MatPreallocated(P);
370   if (!P->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
371   if (P->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
372 
373   PetscValidHeaderSpecific(C,MAT_COOKIE,3);
374   PetscValidType(C,3);
375   MatPreallocated(C);
376   if (!C->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
377   if (C->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
378 
379   if (P->N!=C->M) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->N,C->M);
380   if (P->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->M,A->N);
381   if (A->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %d != %d",A->M,A->N);
382   if (P->N!=C->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",P->N,C->N);
383 
384   /* For now, we do not dispatch based on the type of A and P */
385   /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */
386   fA = A->ops->ptapnumeric;
387   if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatPtAPNumeric not supported for A of type %s",A->type_name);
388   fP = P->ops->ptapnumeric;
389   if (!fP) SETERRQ1(PETSC_ERR_SUP,"MatPtAPNumeric not supported for P of type %s",P->type_name);
390   if (fP!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatPtAPNumeric requires A, %s, to be compatible with P, %s",A->type_name,P->type_name);
391 
392   ierr = PetscLogEventBegin(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr);
393   ierr = (*fA)(A,P,C);CHKERRQ(ierr);
394   ierr = PetscLogEventEnd(MAT_PtAPNumeric,A,P,0,0);CHKERRQ(ierr);
395 
396   PetscFunctionReturn(0);
397 }
398 
399 #undef __FUNCT__
400 #define __FUNCT__ "MatPtAPNumeric_SeqAIJ_SeqAIJ"
401 PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqAIJ(Mat A,Mat P,Mat C)
402 {
403   PetscErrorCode ierr;
404   int        flops=0;
405   Mat_SeqAIJ *a  = (Mat_SeqAIJ *) A->data;
406   Mat_SeqAIJ *p  = (Mat_SeqAIJ *) P->data;
407   Mat_SeqAIJ *c  = (Mat_SeqAIJ *) C->data;
408   int        *ai=a->i,*aj=a->j,*pi=p->i,*pj=p->j,*pJ=p->j,*pjj;
409   int        *ci=c->i,*cj=c->j,*cjj;
410   int        am=A->M,cn=C->N,cm=C->M;
411   int        i,j,k,anzi,pnzi,apnzj,nextap,pnzj,prow,crow;
412   MatScalar  *aa=a->a,*apa,*pa=p->a,*pA=p->a,*paj,*ca=c->a,*caj;
413   Mat_PtAPstruct *ptap=(Mat_PtAPstruct*)C->spptr;
414   Mat_SeqAIJ     *ap = (Mat_SeqAIJ *)(ptap->symAP)->data;
415   int            *api=ap->i,*apj=ap->j,apj_nextap;
416 
417   PetscFunctionBegin;
418   /* Allocate temporary array for storage of one row of A*P */
419   ierr = PetscMalloc(cn*sizeof(MatScalar),&apa);CHKERRQ(ierr);
420   ierr = PetscMemzero(apa,cn*sizeof(MatScalar));CHKERRQ(ierr);
421 
422   /* Clear old values in C */
423   ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr);
424 
425   for (i=0;i<am;i++) {
426     /* Get sparse values of A*P[i,:] */
427     anzi  = ai[i+1] - ai[i];
428     apnzj = 0;
429     for (j=0;j<anzi;j++) {
430       prow = *aj++;
431       pnzj = pi[prow+1] - pi[prow];
432       pjj  = pj + pi[prow];
433       paj  = pa + pi[prow];
434       for (k=0;k<pnzj;k++) {
435         apa[pjj[k]] += (*aa)*paj[k];
436       }
437       flops += 2*pnzj;
438       aa++;
439     }
440 
441     /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */
442     apj   = ap->j + api[i];
443     apnzj = api[i+1] - api[i];
444     pnzi  = pi[i+1] - pi[i];
445     for (j=0;j<pnzi;j++) {
446       nextap = 0;
447       crow   = *pJ++;
448       cjj    = cj + ci[crow];
449       caj    = ca + ci[crow];
450       /* Perform sparse axpy operation.  Note cjj includes apj. */
451       for (k=0; nextap<apnzj; k++) {
452         apj_nextap = *(apj+nextap);
453         if (cjj[k]==apj_nextap) {
454           caj[k] += (*pA)*apa[apj_nextap];
455           nextap++;
456         }
457       }
458       flops += 2*apnzj;
459       pA++;
460     }
461 
462     /* Zero the current row values for A*P */
463     for (j=0;j<apnzj;j++) apa[apj[j]] = 0.0;
464   }
465 
466   /* Assemble the final matrix and clean up */
467   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
468   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
469   ierr = PetscFree(apa);CHKERRQ(ierr);
470   ierr = PetscLogFlops(flops);CHKERRQ(ierr);
471 
472   PetscFunctionReturn(0);
473 }
474 
475 #undef __FUNCT__
476 #define __FUNCT__ "RegisterPtAPRoutines_Private"
477 PetscErrorCode RegisterPtAPRoutines_Private(Mat A)
478 {
479   PetscErrorCode ierr;
480 
481   PetscFunctionBegin;
482   ierr = RegisterMatMatMultRoutines_Private(A);CHKERRQ(ierr);
483 
484   PetscFunctionReturn(0);
485 }
486