xref: /petsc/src/mat/impls/aij/seq/matmatmult.c (revision 6d835ef5ffe513362f885ca78532dab451c155fe)
1 /*
2   Defines matrix-matrix product routines for pairs of AIJ matrices
3           C = A * B
4 */
5 
6 #include "src/mat/impls/aij/seq/aij.h" /*I "petscmat.h" I*/
7 #include "src/mat/utils/freespace.h"
8 #include "src/mat/impls/aij/mpi/mpiaij.h"
9 
10 /*
11   Initialize a linked list
12   Input Parameters:
13     lnk_init  - the initial index value indicating the entry in the list is not set yet
14     nlnk      - max length of the list
15     lnk       - linked list(an integer array) that is allocated
16   output Parameters:
17     lnk       - the linked list with all values set as lnk_int
18 */
19 #define LNKLISTINITIALIZE(lnk_init,nlnk,lnk){\
20   int _i;\
21   for (_i=0; _i<nlnk; _i++) lnk[_i] = lnk_init;\
22 }
23 
24 /*
25   Add a index set into a sorted linked list
26   Input Parameters:
27     nidx      - number of input indices
28     indices   - interger array
29     lnk_head  - the header of the list
30     lnk_init  - the initial index value indicating the entry in the list is not set yet
31     lnk       - linked list(an integer array) that is created
32   output Parameters:
33     nlnk      - number of newly added indices
34     lnk       - the sorted(increasing order) linked list containing new and non-redundate entries from indices
35 */
36 #define LNKLISTADD(nidx,indices,lnk_head,lnk_init,nlnk,lnk){\
37   int _k,_entry,_lidx=lnk_head,_idx;\
38   nlnk = 0;\
39   _k=nidx;\
40   while (_k){/* assume indices are almost in increasing order, starting from its end saves computation */\
41     _entry = indices[--_k];\
42     if (lnk[_entry] == lnk_init) { /* new col */\
43       do {\
44         _idx = _lidx;\
45         _lidx  = lnk[_idx];\
46       } while (_entry > _lidx);\
47       lnk[_idx] = _entry;\
48       lnk[_entry] = _lidx;\
49       nlnk++;\
50     }\
51   }\
52 }
53 /*
54   Copy data on the list into an array, then initialize the list
55   Input Parameters:
56     lnk_head  - the header of the list
57     lnk_init  - the initial index value indicating the entry in the list is not set yet
58     nlnk      - number of data on the list to be copied
59     lnk       - linked list
60   output Parameters:
61     indices   - array that contains the copied data
62 */
63 #define LNKLISTCLEAR(lnk_head,lnk_init,nlnk,lnk,indices){\
64   int _j,_idx=lnk_head,_idx0;\
65   for (_j=0; _j<nlnk; _j++){\
66     _idx0 = _idx; _idx = lnk[_idx0];\
67     *(indices+_j) = _idx;\
68     lnk[_idx0] = lnk_init;\
69   }\
70   lnk[_idx] = lnk_init;\
71 }
72 
73 static int logkey_matmatmult_symbolic = 0;
74 static int logkey_matmatmult_numeric  = 0;
75 
76 #undef __FUNCT__
77 #define __FUNCT__ "MatMatMult"
78 /*@
79    MatMatMult - Performs Matrix-Matrix Multiplication C=A*B.
80 
81    Collective on Mat
82 
83    Input Parameters:
84 +  A - the left matrix
85 .  B - the right matrix
86 .  scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
87 -  fill - expected fill as ratio of nnz(C)/(nnz(A) + nnz(B))
88 
89    Output Parameters:
90 .  C - the product matrix
91 
92    Notes:
93    C will be created and must be destroyed by the user with MatDestroy().
94 
95    This routine is currently only implemented for pairs of SeqAIJ matrices and classes
96    which inherit from SeqAIJ.  C will be of type MATSEQAIJ.
97 
98    Level: intermediate
99 
100 .seealso: MatMatMultSymbolic(),MatMatMultNumeric()
101 @*/
102 int MatMatMult(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
103 {
104   int  ierr;
105 
106   PetscFunctionBegin;
107   PetscValidHeaderSpecific(A,MAT_COOKIE,1);
108   PetscValidType(A,1);
109   MatPreallocated(A);
110   if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
111   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
112   PetscValidHeaderSpecific(B,MAT_COOKIE,2);
113   PetscValidType(B,2);
114   MatPreallocated(B);
115   if (!B->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
116   if (B->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
117   PetscValidPointer(C,3);
118   if (B->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",B->M,A->N);
119 
120   if (fill <=0) SETERRQ1(PETSC_ERR_ARG_SIZ,"fill=%d must be > 0",fill);
121 
122   ierr = PetscLogEventBegin(MAT_MatMult,A,B,0,0);CHKERRQ(ierr);
123   ierr = (*A->ops->matmult)(A,B,scall,fill,C);CHKERRQ(ierr);
124   ierr = PetscLogEventEnd(MAT_MatMult,A,B,0,0);CHKERRQ(ierr);
125 
126   PetscFunctionReturn(0);
127 }
128 
129 #undef __FUNCT__
130 #define __FUNCT__ "MatMatMult_MPIAIJ_MPIAIJ"
131 int MatMatMult_MPIAIJ_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill, Mat *C)
132 {
133   Mat           *aseq,*bseq,A_seq=PETSC_NULL,B_seq=PETSC_NULL,C_seq;
134   Mat_MPIAIJ    *a = (Mat_MPIAIJ*)A->data;
135   int           ierr,*idx,i,start,end,ncols,imark,nzA,nzB,*cmap;
136   IS            isrow,iscol;
137 
138   PetscFunctionBegin;
139   /* create a seq matrix B_seq = submatrix of B by taking rows of B that equal to nonzero col of A */
140   start = a->cstart;
141   cmap  = a->garray;
142   nzA   = a->A->n;
143   nzB   = a->B->n;
144   ierr  = PetscMalloc((nzA+nzB)*sizeof(int), &idx);CHKERRQ(ierr);
145   ncols = 0;
146   for (i=0; i<nzB; i++) {
147     if (cmap[i] < start) idx[ncols++] = cmap[i];
148     else break;
149   }
150   imark = i;
151   for (i=0; i<nzA; i++) idx[ncols++] = start + i;
152   for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
153   ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrow);CHKERRQ(ierr); /* isrow of B = iscol of A! */
154   ierr = PetscFree(idx);CHKERRQ(ierr);
155   ierr = ISCreateStride(PETSC_COMM_SELF,B->N,0,1,&iscol);CHKERRQ(ierr);
156   ierr = MatGetSubMatrices(B,1,&isrow,&iscol,scall,&bseq);CHKERRQ(ierr);
157   ierr = ISDestroy(iscol);CHKERRQ(ierr);
158   B_seq = bseq[0];
159 
160   /*  create a seq matrix A_seq = submatrix of A by taking all local rows of A */
161   start = a->rstart; end = a->rend;
162   ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&iscol);CHKERRQ(ierr); /* isrow of A = iscol */
163   ierr = MatGetSubMatrices(A,1,&iscol,&isrow,scall,&aseq);CHKERRQ(ierr);
164   ierr = ISDestroy(isrow);CHKERRQ(ierr);
165   ierr = ISDestroy(iscol);CHKERRQ(ierr);
166   A_seq = aseq[0];
167 
168   /* compute C_seq = A_seq * B_seq */
169   ierr = MatMatMult_SeqAIJ_SeqAIJ(A_seq,B_seq,scall,fill,&C_seq);CHKERRQ(ierr);
170   /*
171   int rank;
172   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
173   ierr = PetscPrintf(PETSC_COMM_SELF," [%d] A_seq: %d, %d; B_seq: %d, %d; C_seq: %d, %d;\n",rank,A_seq->m,A_seq->n,B_seq->m,B_seq->n,C_seq->m,C_seq->n);
174   */
175   ierr = MatDestroyMatrices(1,&aseq);CHKERRQ(ierr);
176   ierr = MatDestroyMatrices(1,&bseq);CHKERRQ(ierr);
177 
178   /* create mpi matrix C by concatinating C_seq */
179   ierr = MatMerge(A->comm,C_seq,C);CHKERRQ(ierr);
180 
181   PetscFunctionReturn(0);
182 }
183 
184 #undef __FUNCT__
185 #define __FUNCT__ "MatMatMult_SeqAIJ_SeqAIJ"
186 int MatMatMult_SeqAIJ_SeqAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) {
187   int ierr;
188   char symfunct[80],numfunct[80];
189   int (*symbolic)(Mat,Mat,PetscReal,Mat*),(*numeric)(Mat,Mat,Mat);
190 
191   PetscFunctionBegin;
192   /* Currently only _seqaijseqaij is implemented, so just query for it in A and B. */
193   /* When other implementations exist, attack the multiple dispatch problem. */
194   ierr = PetscStrcpy(symfunct,"MatMatMultSymbolic_seqaijseqaij");CHKERRQ(ierr);
195   ierr = PetscObjectQueryFunction((PetscObject)B,symfunct,(PetscVoidFunction)&symbolic);CHKERRQ(ierr);
196   if (!symbolic) SETERRQ1(PETSC_ERR_SUP,"C=A*B not implemented for B of type %s",B->type_name);
197   ierr = PetscObjectQueryFunction((PetscObject)A,symfunct,(PetscVoidFunction)&symbolic);CHKERRQ(ierr);
198   if (!symbolic) SETERRQ1(PETSC_ERR_SUP,"C=A*B not implemented for A of type %s",A->type_name);
199 
200   ierr = PetscStrcpy(numfunct,"MatMatMultNumeric_seqaijseqaij");CHKERRQ(ierr);
201   ierr = PetscObjectQueryFunction((PetscObject)A,numfunct,(PetscVoidFunction)&numeric);CHKERRQ(ierr);
202   if (!numeric) SETERRQ1(PETSC_ERR_SUP,"C=A*B not implemented for A of type %s",A->type_name);
203   ierr = PetscObjectQueryFunction((PetscObject)B,numfunct,(PetscVoidFunction)&numeric);CHKERRQ(ierr);
204   if (!numeric) SETERRQ1(PETSC_ERR_SUP,"C=A*B not implemented for B of type %s",B->type_name);
205 
206   ierr = (*symbolic)(A,B,fill,C);CHKERRQ(ierr);
207   ierr = (*numeric)(A,B,*C);CHKERRQ(ierr);
208   PetscFunctionReturn(0);
209 }
210 
211 #undef __FUNCT__
212 #define __FUNCT__ "MatMatMultSymbolic"
213 /*@
214    MatMatMultSymbolic - Performs construction, preallocation, and computes the ij structure
215    of the matrix-matrix product C=A*B.  Call this routine before calling MatMatMultNumeric().
216 
217    Collective on Mat
218 
219    Input Parameters:
220 +  A - the left matrix
221 .  B - the right matrix
222 -  fill - expected fill as ratio of nnz(C)/(nnz(A) + nnz(B))
223 
224    Output Parameters:
225 .  C - the matrix containing the ij structure of product matrix
226 
227    Notes:
228    C will be created as a MATSEQAIJ matrix and must be destroyed by the user with MatDestroy().
229 
230    This routine is currently only implemented for SeqAIJ matrices and classes which inherit from SeqAIJ.
231 
232    Level: intermediate
233 
234 .seealso: MatMatMult(),MatMatMultNumeric()
235 @*/
236 int MatMatMultSymbolic(Mat A,Mat B,PetscReal fill,Mat *C) {
237   /* Perhaps this "interface" routine should be moved into the interface directory.*/
238   /* To facilitate implementations with varying types, QueryFunction is used.*/
239   /* It is assumed that implementations will be composed as "MatMatMultSymbolic_<type of A><type of B>". */
240   int  ierr;
241   char symfunct[80];
242   int  (*symbolic)(Mat,Mat,PetscReal,Mat *);
243 
244   PetscFunctionBegin;
245   PetscValidHeaderSpecific(A,MAT_COOKIE,1);
246   PetscValidType(A,1);
247   MatPreallocated(A);
248   if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
249   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
250 
251   PetscValidHeaderSpecific(B,MAT_COOKIE,2);
252   PetscValidType(B,2);
253   MatPreallocated(B);
254   if (!B->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
255   if (B->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
256   PetscValidPointer(C,3);
257 
258   if (B->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",B->M,A->N);
259   if (fill <=0) SETERRQ1(PETSC_ERR_ARG_SIZ,"fill=%d must be > 0",fill);
260 
261   /* Currently only _seqaijseqaij is implemented, so just query for it in A and B. */
262   /* When other implementations exist, attack the multiple dispatch problem. */
263   ierr = PetscStrcpy(symfunct,"MatMatMultSymbolic_seqaijseqaij");CHKERRQ(ierr);
264   ierr = PetscObjectQueryFunction((PetscObject)B,symfunct,(PetscVoidFunction)&symbolic);CHKERRQ(ierr);
265   if (!symbolic) SETERRQ1(PETSC_ERR_SUP,"C=A*B not implemented for B of type %s",B->type_name);
266   ierr = PetscObjectQueryFunction((PetscObject)A,symfunct,(PetscVoidFunction)&symbolic);CHKERRQ(ierr);
267   if (!symbolic) SETERRQ1(PETSC_ERR_SUP,"C=A*B not implemented for A of type %s",A->type_name);
268   ierr = (*symbolic)(A,B,fill,C);CHKERRQ(ierr);
269 
270   PetscFunctionReturn(0);
271 }
272 
273 #undef __FUNCT__
274 #define __FUNCT__ "MatMatMult_Symbolic_SeqAIJ_SeqAIJ"
275 int MatMatMult_Symbolic_SeqAIJ_SeqAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
276 {
277   int            ierr;
278   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
279   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c;
280   int            *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bjj;
281   int            *ci,*cj,*lnk,idx0,idx;
282   int            am=A->M,bn=B->N,bm=B->M;
283   int            i,j,anzi,brow,bnzj,cnzi,nlnk,lnk_init=-1,nspacedouble=0;
284   MatScalar      *ca;
285 
286   PetscFunctionBegin;
287   /* Start timers */
288   ierr = PetscLogEventBegin(logkey_matmatmult_symbolic,A,B,0,0);CHKERRQ(ierr);
289   /* Set up */
290   /* Allocate ci array, arrays for fill computation and */
291   /* free space for accumulating nonzero column info */
292   ierr = PetscMalloc(((am+1)+1)*sizeof(int),&ci);CHKERRQ(ierr);
293   ci[0] = 0;
294 
295   ierr = PetscMalloc((bn+1)*sizeof(int),&lnk);CHKERRQ(ierr);
296   LNKLISTINITIALIZE(lnk_init,bn,lnk);
297 
298   /* Initial FreeSpace size is fill*(nnz(A)+nnz(B)) */
299   ierr = GetMoreSpace(fill*(ai[am]+bi[bm]),&free_space);CHKERRQ(ierr);
300   current_space = free_space;
301 
302   /* Determine symbolic info for each row of the product: */
303   for (i=0;i<am;i++) {
304     anzi = ai[i+1] - ai[i];
305     cnzi = 0;
306     lnk[bn] = bn;
307     j       = anzi;
308     aj      = a->j + ai[i];
309     while (j){/* assume cols are almost in increasing order, starting from its end saves computation */
310       j--;
311       brow = *(aj + j);
312       bnzj = bi[brow+1] - bi[brow];
313       bjj  = bj + bi[brow];
314       /* add non-zero cols of B into the sorted linked list lnk */
315       LNKLISTADD(bnzj,bjj,bn,lnk_init,nlnk,lnk);
316       cnzi += nlnk;
317     }
318 
319     /* If free space is not available, make more free space */
320     /* Double the amount of total space in the list */
321     if (current_space->local_remaining<cnzi) {
322       /* printf("MatMatMult_Symbolic_SeqAIJ_SeqAIJ()...%d -th row, double space ...\n",i);*/
323       ierr = GetMoreSpace(current_space->total_array_size,&current_space);CHKERRQ(ierr);
324       nspacedouble++;
325     }
326 
327     /* Copy data into free space, then initialize lnk */
328     LNKLISTCLEAR(bn,lnk_init,cnzi,lnk,current_space->array);
329     current_space->array += cnzi;
330 
331     current_space->local_used      += cnzi;
332     current_space->local_remaining -= cnzi;
333 
334     ci[i+1] = ci[i] + cnzi;
335   }
336 
337   /* Column indices are in the list of free space */
338   /* Allocate space for cj, initialize cj, and */
339   /* destroy list of free space and other temporary array(s) */
340   ierr = PetscMalloc((ci[am]+1)*sizeof(int),&cj);CHKERRQ(ierr);
341   ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr);
342   ierr = PetscFree(lnk);CHKERRQ(ierr);
343 
344   /* Allocate space for ca */
345   ierr = PetscMalloc((ci[am]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr);
346   ierr = PetscMemzero(ca,(ci[am]+1)*sizeof(MatScalar));CHKERRQ(ierr);
347 
348   /* put together the new matrix */
349   ierr = MatCreateSeqAIJWithArrays(A->comm,am,bn,ci,cj,ca,C);CHKERRQ(ierr);
350 
351   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
352   /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */
353   c = (Mat_SeqAIJ *)((*C)->data);
354   c->freedata = PETSC_TRUE;
355   c->nonew    = 0;
356 
357   ierr = PetscLogEventEnd(logkey_matmatmult_symbolic,A,B,0,0);CHKERRQ(ierr);
358   PetscLogInfo((PetscObject)(*C),"Number of calls to GetMoreSpace(): %d\n",nspacedouble);
359   PetscFunctionReturn(0);
360 }
361 
362 #undef __FUNCT__
363 #define __FUNCT__ "MatMatMultNumeric"
364 /*@
365    MatMatMultNumeric - Performs the numeric matrix-matrix product.
366    Call this routine after first calling MatMatMultSymbolic().
367 
368    Collective on Mat
369 
370    Input Parameters:
371 +  A - the left matrix
372 -  B - the right matrix
373 
374    Output Parameters:
375 .  C - the product matrix, whose ij structure was defined from MatMatMultSymbolic().
376 
377    Notes:
378    C must have been created with MatMatMultSymbolic.
379 
380    This routine is currently only implemented for SeqAIJ type matrices.
381 
382    Level: intermediate
383 
384 .seealso: MatMatMult(),MatMatMultSymbolic()
385 @*/
386 int MatMatMultNumeric(Mat A,Mat B,Mat C){
387   /* Perhaps this "interface" routine should be moved into the interface directory.*/
388   /* To facilitate implementations with varying types, QueryFunction is used.*/
389   /* It is assumed that implementations will be composed as "MatMatMultNumeric_<type of A><type of B>". */
390   int ierr;
391   char numfunct[80];
392   int (*numeric)(Mat,Mat,Mat);
393 
394   PetscFunctionBegin;
395 
396   PetscValidHeaderSpecific(A,MAT_COOKIE,1);
397   PetscValidType(A,1);
398   MatPreallocated(A);
399   if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
400   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
401 
402   PetscValidHeaderSpecific(B,MAT_COOKIE,2);
403   PetscValidType(B,2);
404   MatPreallocated(B);
405   if (!B->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
406   if (B->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
407 
408   PetscValidHeaderSpecific(C,MAT_COOKIE,3);
409   PetscValidType(C,3);
410   MatPreallocated(C);
411   if (!C->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
412   if (C->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
413 
414   if (B->N!=C->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",B->N,C->N);
415   if (B->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",B->M,A->N);
416   if (A->M!=C->M) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",A->M,C->M);
417 
418   /* Currently only _seqaijseqaij is implemented, so just query for it in A and B. */
419   /* When other implementations exist, attack the multiple dispatch problem. */
420   ierr = PetscStrcpy(numfunct,"MatMatMultNumeric_seqaijseqaij");CHKERRQ(ierr);
421   ierr = PetscObjectQueryFunction((PetscObject)A,numfunct,(PetscVoidFunction)&numeric);CHKERRQ(ierr);
422   if (!numeric) SETERRQ1(PETSC_ERR_SUP,"C=A*B not implemented for A of type %s",A->type_name);
423   ierr = PetscObjectQueryFunction((PetscObject)B,numfunct,(PetscVoidFunction)&numeric);CHKERRQ(ierr);
424   if (!numeric) SETERRQ1(PETSC_ERR_SUP,"C=A*B not implemented for B of type %s",B->type_name);
425 
426   ierr = (*numeric)(A,B,C);CHKERRQ(ierr);
427 
428   PetscFunctionReturn(0);
429 }
430 
431 #undef __FUNCT__
432 #define __FUNCT__ "MatMatMult_Numeric_SeqAIJ_SeqAIJ"
433 int MatMatMult_Numeric_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat C)
434 {
435   int        ierr,flops=0;
436   Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
437   Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data;
438   Mat_SeqAIJ *c = (Mat_SeqAIJ *)C->data;
439   int        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bjj,*ci=c->i,*cj=c->j;
440   int        am=A->M,cn=C->N;
441   int        i,j,k,anzi,bnzi,cnzi,brow;
442   MatScalar  *aa=a->a,*ba=b->a,*baj,*ca=c->a,*temp;
443 
444   PetscFunctionBegin;
445 
446   /* Start timers */
447   ierr = PetscLogEventBegin(logkey_matmatmult_numeric,A,B,C,0);CHKERRQ(ierr);
448 
449   /* Allocate temp accumulation space to avoid searching for nonzero columns in C */
450   ierr = PetscMalloc((cn+1)*sizeof(MatScalar),&temp);CHKERRQ(ierr);
451   ierr = PetscMemzero(temp,cn*sizeof(MatScalar));CHKERRQ(ierr);
452   /* Traverse A row-wise. */
453   /* Build the ith row in C by summing over nonzero columns in A, */
454   /* the rows of B corresponding to nonzeros of A. */
455   for (i=0;i<am;i++) {
456     anzi = ai[i+1] - ai[i];
457     for (j=0;j<anzi;j++) {
458       brow = *aj++;
459       bnzi = bi[brow+1] - bi[brow];
460       bjj  = bj + bi[brow];
461       baj  = ba + bi[brow];
462       for (k=0;k<bnzi;k++) {
463         temp[bjj[k]] += (*aa)*baj[k];
464       }
465       flops += 2*bnzi;
466       aa++;
467     }
468     /* Store row back into C, and re-zero temp */
469     cnzi = ci[i+1] - ci[i];
470     for (j=0;j<cnzi;j++) {
471       ca[j] = temp[cj[j]];
472       temp[cj[j]] = 0.0;
473     }
474     ca += cnzi;
475     cj += cnzi;
476   }
477   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
478   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
479 
480   /* Free temp */
481   ierr = PetscFree(temp);CHKERRQ(ierr);
482   ierr = PetscLogFlops(flops);CHKERRQ(ierr);
483   ierr = PetscLogEventEnd(logkey_matmatmult_numeric,A,B,C,0);CHKERRQ(ierr);
484   PetscFunctionReturn(0);
485 }
486 
487 #undef __FUNCT__
488 #define __FUNCT__ "RegisterMatMatMultRoutines_Private"
489 int RegisterMatMatMultRoutines_Private(Mat A) {
490   int ierr;
491 
492   PetscFunctionBegin;
493   if (!logkey_matmatmult_symbolic) {
494     ierr = PetscLogEventRegister(&logkey_matmatmult_symbolic,"MatMatMult_Symbolic",MAT_COOKIE);CHKERRQ(ierr);
495   }
496   ierr = PetscObjectComposeFunctionDynamic((PetscObject)A,"MatMatMultSymbolic_seqaijseqaij",
497                                            "MatMatMult_Symbolic_SeqAIJ_SeqAIJ",
498                                            MatMatMult_Symbolic_SeqAIJ_SeqAIJ);CHKERRQ(ierr);
499   if (!logkey_matmatmult_numeric) {
500     ierr = PetscLogEventRegister(&logkey_matmatmult_numeric,"MatMatMult_Numeric",MAT_COOKIE);CHKERRQ(ierr);
501   }
502   ierr = PetscObjectComposeFunctionDynamic((PetscObject)A,"MatMatMultNumeric_seqaijseqaij",
503                                            "MatMatMult_Numeric_SeqAIJ_SeqAIJ",
504                                            MatMatMult_Numeric_SeqAIJ_SeqAIJ);CHKERRQ(ierr);
505   PetscFunctionReturn(0);
506 }
507