xref: /petsc/src/mat/impls/aij/seq/matmatmult.c (revision f747e1ae54eac4c971caa6b7e82576b5c3be8258)
1 /*
2   Defines matrix-matrix product routines for pairs of AIJ matrices
3           C = A * B
4 */
5 
6 #include "src/mat/impls/aij/seq/aij.h" /*I "petscmat.h" I*/
7 #include "src/mat/utils/freespace.h"
8 #include "src/mat/impls/aij/mpi/mpiaij.h"
9 
10 typedef struct { /* used by MatMatMult_MPIAIJ_MPIAIJ for reusing symbolic mat product */
11   IS     isrowa,isrowb,iscolb;
12   Mat    *aseq,*bseq,C_seq;
13 } Mat_MatMatMultMPI;
14 
15 static int logkey_matmatmult_symbolic = 0;
16 static int logkey_matmatmult_numeric  = 0;
17 
18 #undef __FUNCT__
19 #define __FUNCT__ "MatMatMult"
20 /*@
21    MatMatMult - Performs Matrix-Matrix Multiplication C=A*B.
22 
23    Collective on Mat
24 
25    Input Parameters:
26 +  A - the left matrix
27 .  B - the right matrix
28 .  scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
29 -  fill - expected fill as ratio of nnz(C)/(nnz(A) + nnz(B))
30 
31    Output Parameters:
32 .  C - the product matrix
33 
34    Notes:
35    C will be created and must be destroyed by the user with MatDestroy().
36 
37    This routine is currently only implemented for pairs of SeqAIJ matrices and classes
38    which inherit from SeqAIJ.  C will be of type MATSEQAIJ.
39 
40    Level: intermediate
41 
42 .seealso: MatMatMultSymbolic(),MatMatMultNumeric()
43 @*/
44 int MatMatMult(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
45 {
46   int  ierr;
47 
48   PetscFunctionBegin;
49   PetscValidHeaderSpecific(A,MAT_COOKIE,1);
50   PetscValidType(A,1);
51   MatPreallocated(A);
52   if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
53   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
54   PetscValidHeaderSpecific(B,MAT_COOKIE,2);
55   PetscValidType(B,2);
56   MatPreallocated(B);
57   if (!B->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
58   if (B->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
59   PetscValidPointer(C,3);
60   if (B->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",B->M,A->N);
61 
62   if (fill <=0.0) SETERRQ1(PETSC_ERR_ARG_SIZ,"fill=%g must be > 0.0",fill);
63 
64   ierr = PetscLogEventBegin(MAT_MatMult,A,B,0,0);CHKERRQ(ierr);
65   ierr = (*A->ops->matmult)(A,B,scall,fill,C);CHKERRQ(ierr);
66   ierr = PetscLogEventEnd(MAT_MatMult,A,B,0,0);CHKERRQ(ierr);
67 
68   PetscFunctionReturn(0);
69 }
70 
71 #undef __FUNCT__
72 #define __FUNCT__ "MatMatMult_MPIAIJ_MPIAIJ"
73 int MatMatMult_MPIAIJ_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill, Mat *C)
74 {
75   int ierr;
76 
77   PetscFunctionBegin;
78   if (scall == MAT_INITIAL_MATRIX){
79     ierr = MatMatMultSymbolic_MPIAIJ_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);/* numeric product is computed as well */
80   } else if (scall == MAT_REUSE_MATRIX){
81     ierr = MatMatMultNumeric_MPIAIJ_MPIAIJ(A,B,*C);CHKERRQ(ierr);
82   } else {
83     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",scall);
84   }
85   PetscFunctionReturn(0);
86 }
87 
88 #undef __FUNCT__
89 #define __FUNCT__ "MatMatMult_SeqAIJ_SeqAIJ"
90 int MatMatMult_SeqAIJ_SeqAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) {
91   int ierr;
92 
93   PetscFunctionBegin;
94   if (scall == MAT_INITIAL_MATRIX){
95     ierr = MatMatMultSymbolic_SeqAIJ_SeqAIJ(A,B,fill,C);CHKERRQ(ierr);
96   }
97   ierr = MatMatMultNumeric_SeqAIJ_SeqAIJ(A,B,*C);CHKERRQ(ierr);
98   PetscFunctionReturn(0);
99 }
100 
101 #undef __FUNCT__
102 #define __FUNCT__ "MatMatMultSymbolic"
103 /*@
104    MatMatMultSymbolic - Performs construction, preallocation, and computes the ij structure
105    of the matrix-matrix product C=A*B.  Call this routine before calling MatMatMultNumeric().
106 
107    Collective on Mat
108 
109    Input Parameters:
110 +  A - the left matrix
111 .  B - the right matrix
112 -  fill - expected fill as ratio of nnz(C)/(nnz(A) + nnz(B))
113 
114    Output Parameters:
115 .  C - the matrix containing the ij structure of product matrix
116 
117    Notes:
118    C will be created as a MATSEQAIJ matrix and must be destroyed by the user with MatDestroy().
119 
120    This routine is currently only implemented for SeqAIJ matrices and classes which inherit from SeqAIJ.
121 
122    Level: intermediate
123 
124 .seealso: MatMatMult(),MatMatMultNumeric()
125 @*/
126 int MatMatMultSymbolic(Mat A,Mat B,PetscReal fill,Mat *C) {
127   /* Perhaps this "interface" routine should be moved into the interface directory.*/
128   /* To facilitate implementations with varying types, QueryFunction is used.*/
129   /* It is assumed that implementations will be composed as "MatMatMultSymbolic_<type of A><type of B>". */
130   int  ierr;
131   char symfunct[80];
132   int  (*symbolic)(Mat,Mat,PetscReal,Mat *);
133 
134   PetscFunctionBegin;
135   PetscValidHeaderSpecific(A,MAT_COOKIE,1);
136   PetscValidType(A,1);
137   MatPreallocated(A);
138   if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
139   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
140 
141   PetscValidHeaderSpecific(B,MAT_COOKIE,2);
142   PetscValidType(B,2);
143   MatPreallocated(B);
144   if (!B->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
145   if (B->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
146   PetscValidPointer(C,3);
147 
148   if (B->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",B->M,A->N);
149   if (fill <=0.0) SETERRQ1(PETSC_ERR_ARG_SIZ,"fill=%g must be > 0.0",fill);
150 
151   /* Currently only _seqaijseqaij is implemented, so just query for it in A and B. */
152   /* When other implementations exist, attack the multiple dispatch problem. */
153   ierr = PetscStrcpy(symfunct,"MatMatMultSymbolic_seqaijseqaij");CHKERRQ(ierr);
154   ierr = PetscObjectQueryFunction((PetscObject)B,symfunct,(PetscVoidFunction)&symbolic);CHKERRQ(ierr);
155   if (!symbolic) SETERRQ1(PETSC_ERR_SUP,"C=A*B not implemented for B of type %s",B->type_name);
156   ierr = PetscObjectQueryFunction((PetscObject)A,symfunct,(PetscVoidFunction)&symbolic);CHKERRQ(ierr);
157   if (!symbolic) SETERRQ1(PETSC_ERR_SUP,"C=A*B not implemented for A of type %s",A->type_name);
158   ierr = (*symbolic)(A,B,fill,C);CHKERRQ(ierr);
159 
160   PetscFunctionReturn(0);
161 }
162 
163 EXTERN int MatDestroy_MPIAIJ(Mat);
164 #undef __FUNCT__
165 #define __FUNCT__ "MatDestroy_MPIAIJ_MatMatMult"
166 int MatDestroy_MPIAIJ_MatMatMult(Mat A)
167 {
168   int               ierr;
169   Mat_MatMatMultMPI *mult=(Mat_MatMatMultMPI*)A->spptr;
170 
171   PetscFunctionBegin;
172   ierr = ISDestroy(mult->isrowb);CHKERRQ(ierr);
173   ierr = ISDestroy(mult->iscolb);CHKERRQ(ierr);
174   ierr = ISDestroy(mult->isrowa);CHKERRQ(ierr);
175   ierr = MatDestroyMatrices(1,&mult->aseq);CHKERRQ(ierr);
176   ierr = MatDestroyMatrices(1,&mult->bseq);CHKERRQ(ierr);
177   ierr = MatDestroy(mult->C_seq);CHKERRQ(ierr);
178   ierr = PetscFree(mult);CHKERRQ(ierr);
179 
180   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
181 
182   PetscFunctionReturn(0);
183 }
184 
185 #undef __FUNCT__
186 #define __FUNCT__ "MatMatMultSymbolic_MPIAIJ_MPIAIJ"
187 int MatMatMultSymbolic_MPIAIJ_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
188 {
189   Mat_MPIAIJ        *a = (Mat_MPIAIJ*)A->data;
190   int               ierr,*idx,i,start,end,ncols,imark,nzA,nzB,*cmap;
191   Mat_MatMatMultMPI *mult;
192 
193   PetscFunctionBegin;
194   ierr = PetscNew(Mat_MatMatMultMPI,&mult);CHKERRQ(ierr);
195 
196   /* create a seq matrix B_seq = submatrix of B by taking rows of B that equal to nonzero col of A */
197   start = a->cstart;
198   cmap  = a->garray;
199   nzA   = a->A->n;
200   nzB   = a->B->n;
201   ierr  = PetscMalloc((nzA+nzB)*sizeof(int), &idx);CHKERRQ(ierr);
202   ncols = 0;
203   for (i=0; i<nzB; i++) {
204     if (cmap[i] < start) idx[ncols++] = cmap[i];
205     else break;
206   }
207   imark = i;
208   for (i=0; i<nzA; i++) idx[ncols++] = start + i;
209   for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
210   ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&mult->isrowb);CHKERRQ(ierr);
211   ierr = PetscFree(idx);CHKERRQ(ierr);
212   ierr = ISCreateStride(PETSC_COMM_SELF,B->N,0,1,&mult->iscolb);CHKERRQ(ierr);
213   ierr = MatGetSubMatrices(B,1,&mult->isrowb,&mult->iscolb,MAT_INITIAL_MATRIX,&mult->bseq);CHKERRQ(ierr)
214 
215   /*  create a seq matrix A_seq = submatrix of A by taking all local rows of A */
216   start = a->rstart; end = a->rend;
217   ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&mult->isrowa);CHKERRQ(ierr);
218   ierr = MatGetSubMatrices(A,1,&mult->isrowa,&mult->isrowb,MAT_INITIAL_MATRIX,&mult->aseq);CHKERRQ(ierr);
219 
220   /* compute C_seq = A_seq * B_seq */
221   ierr = MatMatMult_SeqAIJ_SeqAIJ(mult->aseq[0],mult->bseq[0],MAT_INITIAL_MATRIX,fill,&mult->C_seq);CHKERRQ(ierr);
222 
223   /* create mpi matrix C by concatinating C_seq */
224   ierr = PetscObjectReference((PetscObject)mult->C_seq);CHKERRQ(ierr); /* prevent C_seq being destroyed by MatMerge() */
225   ierr = MatMerge(A->comm,mult->C_seq,MAT_INITIAL_MATRIX,C);CHKERRQ(ierr);
226 
227   /* attach the supporting struct to C for reuse of symbolic C */
228   (*C)->spptr         = (void*)mult;
229   (*C)->ops->destroy  = MatDestroy_MPIAIJ_MatMatMult;
230 
231   PetscFunctionReturn(0);
232 }
233 
234 #undef __FUNCT__
235 #define __FUNCT__ "MatMatMultSymbolic_SeqAIJ_SeqAIJ"
236 int MatMatMultSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
237 {
238   int            ierr;
239   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
240   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c;
241   int            *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bjj;
242   int            *ci,*cj,*lnk;
243   int            am=A->M,bn=B->N,bm=B->M;
244   int            i,j,anzi,brow,bnzj,cnzi,nlnk,lnk_init=-1,nspacedouble=0;
245   MatScalar      *ca;
246 
247   PetscFunctionBegin;
248   /* Start timers */
249   ierr = PetscLogEventBegin(logkey_matmatmult_symbolic,A,B,0,0);CHKERRQ(ierr);
250   /* Set up */
251   /* Allocate ci array, arrays for fill computation and */
252   /* free space for accumulating nonzero column info */
253   ierr = PetscMalloc(((am+1)+1)*sizeof(int),&ci);CHKERRQ(ierr);
254   ci[0] = 0;
255 
256   ierr = PetscMalloc((bn+1)*sizeof(int),&lnk);CHKERRQ(ierr);
257   nlnk = bn+1;
258   ierr = PetscLLInitialize(lnk_init,nlnk,lnk);CHKERRQ(ierr);
259 
260   /* Initial FreeSpace size is fill*(nnz(A)+nnz(B)) */
261   ierr = GetMoreSpace((int)(fill*(ai[am]+bi[bm])),&free_space);CHKERRQ(ierr);
262   current_space = free_space;
263 
264   /* Determine symbolic info for each row of the product: */
265   for (i=0;i<am;i++) {
266     anzi = ai[i+1] - ai[i];
267     cnzi = 0;
268     lnk[bn] = bn;
269     j       = anzi;
270     aj      = a->j + ai[i];
271     while (j){/* assume cols are almost in increasing order, starting from its end saves computation */
272       j--;
273       brow = *(aj + j);
274       bnzj = bi[brow+1] - bi[brow];
275       bjj  = bj + bi[brow];
276       /* add non-zero cols of B into the sorted linked list lnk */
277       ierr = PetscLLAdd(bnzj,bjj,bn,lnk_init,nlnk,lnk);CHKERRQ(ierr);
278       cnzi += nlnk;
279     }
280 
281     /* If free space is not available, make more free space */
282     /* Double the amount of total space in the list */
283     if (current_space->local_remaining<cnzi) {
284       /* printf("MatMatMultSymbolic_SeqAIJ_SeqAIJ()...%d -th row, double space ...\n",i);*/
285       ierr = GetMoreSpace(current_space->total_array_size,&current_space);CHKERRQ(ierr);
286       nspacedouble++;
287     }
288 
289     /* Copy data into free space, then initialize lnk */
290     ierr = PetscLLClear(bn,lnk_init,cnzi,lnk,current_space->array);CHKERRQ(ierr);
291     current_space->array += cnzi;
292 
293     current_space->local_used      += cnzi;
294     current_space->local_remaining -= cnzi;
295 
296     ci[i+1] = ci[i] + cnzi;
297   }
298 
299   /* Column indices are in the list of free space */
300   /* Allocate space for cj, initialize cj, and */
301   /* destroy list of free space and other temporary array(s) */
302   ierr = PetscMalloc((ci[am]+1)*sizeof(int),&cj);CHKERRQ(ierr);
303   ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr);
304   ierr = PetscFree(lnk);CHKERRQ(ierr);
305 
306   /* Allocate space for ca */
307   ierr = PetscMalloc((ci[am]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr);
308   ierr = PetscMemzero(ca,(ci[am]+1)*sizeof(MatScalar));CHKERRQ(ierr);
309 
310   /* put together the new symbolic matrix */
311   ierr = MatCreateSeqAIJWithArrays(A->comm,am,bn,ci,cj,ca,C);CHKERRQ(ierr);
312 
313   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
314   /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */
315   c = (Mat_SeqAIJ *)((*C)->data);
316   c->freedata = PETSC_TRUE;
317   c->nonew    = 0;
318 
319   ierr = PetscLogEventEnd(logkey_matmatmult_symbolic,A,B,0,0);CHKERRQ(ierr);
320   PetscLogInfo((PetscObject)(*C),"Number of calls to GetMoreSpace(): %d\n",nspacedouble);
321   PetscFunctionReturn(0);
322 }
323 
324 #undef __FUNCT__
325 #define __FUNCT__ "MatMatMultNumeric"
326 /*@
327    MatMatMultNumeric - Performs the numeric matrix-matrix product.
328    Call this routine after first calling MatMatMultSymbolic().
329 
330    Collective on Mat
331 
332    Input Parameters:
333 +  A - the left matrix
334 -  B - the right matrix
335 
336    Output Parameters:
337 .  C - the product matrix, whose ij structure was defined from MatMatMultSymbolic().
338 
339    Notes:
340    C must have been created with MatMatMultSymbolic.
341 
342    This routine is currently only implemented for SeqAIJ type matrices.
343 
344    Level: intermediate
345 
346 .seealso: MatMatMult(),MatMatMultSymbolic()
347 @*/
348 int MatMatMultNumeric(Mat A,Mat B,Mat C){
349   /* Perhaps this "interface" routine should be moved into the interface directory.*/
350   /* To facilitate implementations with varying types, QueryFunction is used.*/
351   /* It is assumed that implementations will be composed as "MatMatMultNumeric_<type of A><type of B>". */
352   int ierr;
353   char numfunct[80];
354   int (*numeric)(Mat,Mat,Mat);
355 
356   PetscFunctionBegin;
357 
358   PetscValidHeaderSpecific(A,MAT_COOKIE,1);
359   PetscValidType(A,1);
360   MatPreallocated(A);
361   if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
362   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
363 
364   PetscValidHeaderSpecific(B,MAT_COOKIE,2);
365   PetscValidType(B,2);
366   MatPreallocated(B);
367   if (!B->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
368   if (B->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
369 
370   PetscValidHeaderSpecific(C,MAT_COOKIE,3);
371   PetscValidType(C,3);
372   MatPreallocated(C);
373   if (!C->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
374   if (C->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
375 
376   if (B->N!=C->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",B->N,C->N);
377   if (B->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",B->M,A->N);
378   if (A->M!=C->M) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",A->M,C->M);
379 
380   /* Currently only _seqaijseqaij is implemented, so just query for it in A and B. */
381   /* When other implementations exist, attack the multiple dispatch problem. */
382   ierr = PetscStrcpy(numfunct,"MatMatMultNumeric_seqaijseqaij");CHKERRQ(ierr);
383   ierr = PetscObjectQueryFunction((PetscObject)A,numfunct,(PetscVoidFunction)&numeric);CHKERRQ(ierr);
384   if (!numeric) SETERRQ1(PETSC_ERR_SUP,"C=A*B not implemented for A of type %s",A->type_name);
385   ierr = PetscObjectQueryFunction((PetscObject)B,numfunct,(PetscVoidFunction)&numeric);CHKERRQ(ierr);
386   if (!numeric) SETERRQ1(PETSC_ERR_SUP,"C=A*B not implemented for B of type %s",B->type_name);
387 
388   ierr = (*numeric)(A,B,C);CHKERRQ(ierr);
389 
390   PetscFunctionReturn(0);
391 }
392 
393 /* This routine is called ONLY in the case of reusing previously computed symbolic C */
394 #undef __FUNCT__
395 #define __FUNCT__ "MatMatMultNumeric_MPIAIJ_MPIAIJ"
396 int MatMatMultNumeric_MPIAIJ_MPIAIJ(Mat A,Mat B,Mat C)
397 {
398   int               ierr;
399   Mat_MatMatMultMPI *mult=(Mat_MatMatMultMPI*)C->spptr;
400 
401   PetscFunctionBegin;
402   ierr = MatGetSubMatrices(B,1,&mult->isrowb,&mult->iscolb,MAT_REUSE_MATRIX,&mult->bseq);CHKERRQ(ierr)
403   ierr = MatGetSubMatrices(A,1,&mult->isrowa,&mult->isrowb,MAT_REUSE_MATRIX,&mult->aseq);CHKERRQ(ierr);
404   ierr = MatMatMult_SeqAIJ_SeqAIJ(mult->aseq[0],mult->bseq[0],MAT_REUSE_MATRIX,0.0,&mult->C_seq);CHKERRQ(ierr);
405 
406   ierr = PetscObjectReference((PetscObject)mult->C_seq);CHKERRQ(ierr);
407   ierr = MatMerge(A->comm,mult->C_seq,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
408 
409   PetscFunctionReturn(0);
410 }
411 
412 #undef __FUNCT__
413 #define __FUNCT__ "MatMatMultNumeric_SeqAIJ_SeqAIJ"
414 int MatMatMultNumeric_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat C)
415 {
416   int        ierr,flops=0;
417   Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
418   Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data;
419   Mat_SeqAIJ *c = (Mat_SeqAIJ *)C->data;
420   int        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bjj,*ci=c->i,*cj=c->j;
421   int        am=A->M,cn=C->N;
422   int        i,j,k,anzi,bnzi,cnzi,brow;
423   MatScalar  *aa=a->a,*ba=b->a,*baj,*ca=c->a,*temp;
424 
425   PetscFunctionBegin;
426 
427   /* Start timers */
428   ierr = PetscLogEventBegin(logkey_matmatmult_numeric,A,B,C,0);CHKERRQ(ierr);
429 
430   /* Allocate temp accumulation space to avoid searching for nonzero columns in C */
431   ierr = PetscMalloc((cn+1)*sizeof(MatScalar),&temp);CHKERRQ(ierr);
432   ierr = PetscMemzero(temp,cn*sizeof(MatScalar));CHKERRQ(ierr);
433   /* Traverse A row-wise. */
434   /* Build the ith row in C by summing over nonzero columns in A, */
435   /* the rows of B corresponding to nonzeros of A. */
436   for (i=0;i<am;i++) {
437     anzi = ai[i+1] - ai[i];
438     for (j=0;j<anzi;j++) {
439       brow = *aj++;
440       bnzi = bi[brow+1] - bi[brow];
441       bjj  = bj + bi[brow];
442       baj  = ba + bi[brow];
443       for (k=0;k<bnzi;k++) {
444         temp[bjj[k]] += (*aa)*baj[k];
445       }
446       flops += 2*bnzi;
447       aa++;
448     }
449     /* Store row back into C, and re-zero temp */
450     cnzi = ci[i+1] - ci[i];
451     for (j=0;j<cnzi;j++) {
452       ca[j] = temp[cj[j]];
453       temp[cj[j]] = 0.0;
454     }
455     ca += cnzi;
456     cj += cnzi;
457   }
458   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
459   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
460 
461   /* Free temp */
462   ierr = PetscFree(temp);CHKERRQ(ierr);
463   ierr = PetscLogFlops(flops);CHKERRQ(ierr);
464   ierr = PetscLogEventEnd(logkey_matmatmult_numeric,A,B,C,0);CHKERRQ(ierr);
465   PetscFunctionReturn(0);
466 }
467 
468 #undef __FUNCT__
469 #define __FUNCT__ "RegisterMatMatMultRoutines_Private"
470 int RegisterMatMatMultRoutines_Private(Mat A) {
471   int ierr;
472 
473   PetscFunctionBegin;
474   if (!logkey_matmatmult_symbolic) {
475     ierr = PetscLogEventRegister(&logkey_matmatmult_symbolic,"MatMatMultSymbolic",MAT_COOKIE);CHKERRQ(ierr);
476   }
477   if (!logkey_matmatmult_numeric) {
478     ierr = PetscLogEventRegister(&logkey_matmatmult_numeric,"MatMatMultNumeric",MAT_COOKIE);CHKERRQ(ierr);
479   }
480 
481   PetscFunctionReturn(0);
482 }
483