xref: /petsc/src/mat/impls/aij/seq/matmatmult.c (revision 7f79fd589f0cc333cb4f63ed4d995034cf8282ff)
1 /*
2   Defines matrix-matrix product routines for pairs of AIJ matrices
3           C = A * B
4 */
5 
6 #include "src/mat/impls/aij/seq/aij.h" /*I "petscmat.h" I*/
7 #include "src/mat/utils/freespace.h"
8 #include "src/mat/impls/aij/mpi/mpiaij.h"
9 #include "petscbt.h"
10 
11 #undef __FUNCT__
12 #define __FUNCT__ "MatMatMult"
13 /*@
14    MatMatMult - Performs Matrix-Matrix Multiplication C=A*B.
15 
16    Collective on Mat
17 
18    Input Parameters:
19 +  A - the left matrix
20 .  B - the right matrix
21 .  scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
22 -  fill - expected fill as ratio of nnz(C)/(nnz(A) + nnz(B))
23 
24    Output Parameters:
25 .  C - the product matrix
26 
27    Notes:
28    C will be created and must be destroyed by the user with MatDestroy().
29 
30    This routine is currently only implemented for pairs of AIJ matrices and classes
31    which inherit from AIJ.  C will be of type MATAIJ.
32 
33    Level: intermediate
34 
35 .seealso: MatMatMultSymbolic(),MatMatMultNumeric()
36 @*/
37 PetscErrorCode MatMatMult(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
38 {
39   PetscErrorCode ierr;
40   PetscErrorCode (*fA)(Mat,Mat,MatReuse,PetscReal,Mat*);
41   PetscErrorCode (*fB)(Mat,Mat,MatReuse,PetscReal,Mat*);
42 
43   PetscFunctionBegin;
44   PetscValidHeaderSpecific(A,MAT_COOKIE,1);
45   PetscValidType(A,1);
46   MatPreallocated(A);
47   if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
48   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
49   PetscValidHeaderSpecific(B,MAT_COOKIE,2);
50   PetscValidType(B,2);
51   MatPreallocated(B);
52   if (!B->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
53   if (B->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
54   PetscValidPointer(C,3);
55   if (B->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",B->M,A->N);
56 
57   if (fill <=0.0) SETERRQ1(PETSC_ERR_ARG_SIZ,"fill=%g must be > 0.0",fill);
58 
59   /* For now, we do not dispatch based on the type of A and B */
60   /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */
61   fA = A->ops->matmult;
62   if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatMatMult not supported for A of type %s",A->type_name);
63   fB = B->ops->matmult;
64   if (!fB) SETERRQ1(PETSC_ERR_SUP,"MatMatMult not supported for B of type %s",B->type_name);
65   if (fB!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatMatMult requires A, %s, to be compatible with B, %s",A->type_name,B->type_name);
66 
67   ierr = PetscLogEventBegin(MAT_MatMult,A,B,0,0);CHKERRQ(ierr);
68   ierr = (*A->ops->matmult)(A,B,scall,fill,C);CHKERRQ(ierr);
69   ierr = PetscLogEventEnd(MAT_MatMult,A,B,0,0);CHKERRQ(ierr);
70 
71   PetscFunctionReturn(0);
72 }
73 
74 #undef __FUNCT__
75 #define __FUNCT__ "MatMatMult_MPIAIJ_MPIAIJ"
76 PetscErrorCode MatMatMult_MPIAIJ_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill, Mat *C)
77 {
78   PetscErrorCode ierr;
79 
80   PetscFunctionBegin;
81   if (scall == MAT_INITIAL_MATRIX){
82     ierr = MatMatMultSymbolic_MPIAIJ_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);/* numeric product is computed as well */
83   } else if (scall == MAT_REUSE_MATRIX){
84     ierr = MatMatMultNumeric_MPIAIJ_MPIAIJ(A,B,*C);CHKERRQ(ierr);
85   } else {
86     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",scall);
87   }
88   PetscFunctionReturn(0);
89 }
90 
91 #undef __FUNCT__
92 #define __FUNCT__ "MatMatMult_SeqAIJ_SeqAIJ"
93 PetscErrorCode MatMatMult_SeqAIJ_SeqAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) {
94   PetscErrorCode ierr;
95 
96   PetscFunctionBegin;
97   if (scall == MAT_INITIAL_MATRIX){
98     ierr = MatMatMultSymbolic_SeqAIJ_SeqAIJ(A,B,fill,C);CHKERRQ(ierr);
99   }
100   ierr = MatMatMultNumeric_SeqAIJ_SeqAIJ(A,B,*C);CHKERRQ(ierr);
101   PetscFunctionReturn(0);
102 }
103 
104 #undef __FUNCT__
105 #define __FUNCT__ "MatMatMultSymbolic"
106 /*@
107    MatMatMultSymbolic - Performs construction, preallocation, and computes the ij structure
108    of the matrix-matrix product C=A*B.  Call this routine before calling MatMatMultNumeric().
109 
110    Collective on Mat
111 
112    Input Parameters:
113 +  A - the left matrix
114 .  B - the right matrix
115 -  fill - expected fill as ratio of nnz(C)/(nnz(A) + nnz(B))
116 
117    Output Parameters:
118 .  C - the matrix containing the ij structure of product matrix
119 
120    Notes:
121    C will be created as a MATSEQAIJ matrix and must be destroyed by the user with MatDestroy().
122 
123    This routine is currently only implemented for SeqAIJ matrices and classes which inherit from SeqAIJ.
124 
125    Level: intermediate
126 
127 .seealso: MatMatMult(),MatMatMultNumeric()
128 @*/
129 PetscErrorCode MatMatMultSymbolic(Mat A,Mat B,PetscReal fill,Mat *C) {
130   PetscErrorCode ierr;
131   PetscErrorCode (*Asymbolic)(Mat,Mat,PetscReal,Mat *);
132   PetscErrorCode (*Bsymbolic)(Mat,Mat,PetscReal,Mat *);
133 
134   PetscFunctionBegin;
135   PetscValidHeaderSpecific(A,MAT_COOKIE,1);
136   PetscValidType(A,1);
137   MatPreallocated(A);
138   if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
139   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
140 
141   PetscValidHeaderSpecific(B,MAT_COOKIE,2);
142   PetscValidType(B,2);
143   MatPreallocated(B);
144   if (!B->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
145   if (B->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
146   PetscValidPointer(C,3);
147 
148   if (B->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",B->M,A->N);
149   if (fill <=0.0) SETERRQ1(PETSC_ERR_ARG_SIZ,"fill=%g must be > 0.0",fill);
150 
151   /* For now, we do not dispatch based on the type of A and P */
152   /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */
153   Asymbolic = A->ops->matmultsymbolic;
154   if (!Asymbolic) SETERRQ1(PETSC_ERR_SUP,"C=A*B not implemented for A of type %s",A->type_name);
155   Bsymbolic = B->ops->matmultsymbolic;
156   if (!Bsymbolic) SETERRQ1(PETSC_ERR_SUP,"C=A*B not implemented for B of type %s",B->type_name);
157   if (Bsymbolic!=Asymbolic) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatMatMultSymbolic requires A, %s, to be compatible with B, %s",A->type_name,B->type_name);
158 
159   ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
160   ierr = (*Asymbolic)(A,B,fill,C);CHKERRQ(ierr);
161   ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
162 
163   PetscFunctionReturn(0);
164 }
165 
166 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat);
167 #undef __FUNCT__
168 #define __FUNCT__ "MatDestroy_MPIAIJ_MatMatMult"
169 PetscErrorCode MatDestroy_MPIAIJ_MatMatMult(Mat A)
170 {
171   PetscErrorCode       ierr;
172   Mat_MatMatMultMPI    *mult;
173   PetscObjectContainer container;
174 
175   PetscFunctionBegin;
176   ierr = PetscObjectQuery((PetscObject)A,"MatMatMultMPI",(PetscObject *)&container);CHKERRQ(ierr);
177   if (container) {
178     ierr  = PetscObjectContainerGetPointer(container,(void **)&mult);CHKERRQ(ierr);
179     ierr = ISDestroy(mult->isrowb);CHKERRQ(ierr);
180     ierr = ISDestroy(mult->iscolb);CHKERRQ(ierr);
181     ierr = ISDestroy(mult->isrowa);CHKERRQ(ierr);
182     ierr = MatDestroy(mult->A_loc);CHKERRQ(ierr);
183     ierr = MatDestroy(mult->B_seq);CHKERRQ(ierr);
184     ierr = MatDestroy(mult->C_seq);CHKERRQ(ierr);
185 
186     ierr = PetscObjectContainerDestroy(container);CHKERRQ(ierr);
187     ierr = PetscObjectCompose((PetscObject)A,"MatMatMultMPI",0);CHKERRQ(ierr);
188   }
189   ierr = PetscFree(mult);CHKERRQ(ierr);
190   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
191 
192   PetscFunctionReturn(0);
193 }
194 
195 #undef __FUNCT__
196 #define __FUNCT__ "MatMatMultSymbolic_MPIAIJ_MPIAIJ"
197 PetscErrorCode MatMatMultSymbolic_MPIAIJ_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
198 {
199   Mat_MPIAIJ           *a=(Mat_MPIAIJ*)A->data,*b=(Mat_MPIAIJ*)B->data;
200   PetscErrorCode       ierr;
201   int                  start,end;
202   Mat_MatMatMultMPI    *mult;
203   PetscObjectContainer container;
204 
205   PetscFunctionBegin;
206   if (a->cstart != b->rstart || a->cend != b->rend){
207     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",a->cstart,a->cend,b->rstart,b->rend);
208   }
209   ierr = PetscNew(Mat_MatMatMultMPI,&mult);CHKERRQ(ierr);
210 
211   /* create a seq matrix B_seq = submatrix of B by taking rows of B that equal to nonzero col of A */
212   ierr = MatGetBrowsOfAcols(A,B,MAT_INITIAL_MATRIX,&mult->isrowb,&mult->iscolb,&mult->brstart,&mult->B_seq);CHKERRQ(ierr);
213 
214   /*  create a seq matrix A_seq = submatrix of A by taking all local rows of A */
215   start = a->rstart; end = a->rend;
216   ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&mult->isrowa);CHKERRQ(ierr);
217   ierr = MatGetLocalMat(A,MAT_INITIAL_MATRIX,&mult->isrowa,&mult->isrowb,&mult->A_loc);CHKERRQ(ierr);
218 
219   /* compute C_seq = A_seq * B_seq */
220   ierr = MatMatMult_SeqAIJ_SeqAIJ(mult->A_loc,mult->B_seq,MAT_INITIAL_MATRIX,fill,&mult->C_seq);CHKERRQ(ierr);
221 
222   /* create mpi matrix C by concatinating C_seq */
223   ierr = PetscObjectReference((PetscObject)mult->C_seq);CHKERRQ(ierr); /* prevent C_seq being destroyed by MatMerge() */
224   ierr = MatMerge(A->comm,mult->C_seq,B->n,MAT_INITIAL_MATRIX,C);CHKERRQ(ierr);
225 
226   /* attach the supporting struct to C for reuse of symbolic C */
227   ierr = PetscObjectContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
228   ierr = PetscObjectContainerSetPointer(container,mult);CHKERRQ(ierr);
229   ierr = PetscObjectCompose((PetscObject)(*C),"MatMatMultMPI",(PetscObject)container);CHKERRQ(ierr);
230 
231   (*C)->ops->destroy  = MatDestroy_MPIAIJ_MatMatMult;
232 
233   PetscFunctionReturn(0);
234 }
235 
236 #undef __FUNCT__
237 #define __FUNCT__ "MatMatMultSymbolic_SeqAIJ_SeqAIJ"
238 PetscErrorCode MatMatMultSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
239 {
240   PetscErrorCode ierr;
241   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
242   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c;
243   int            *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bjj,*ci,*cj;
244   int            am=A->M,bn=B->N,bm=B->M;
245   int            i,j,anzi,brow,bnzj,cnzi,nlnk,*lnk,nspacedouble=0;
246   MatScalar      *ca;
247   PetscBT        lnkbt;
248 
249   PetscFunctionBegin;
250   /* Set up */
251   /* Allocate ci array, arrays for fill computation and */
252   /* free space for accumulating nonzero column info */
253   ierr = PetscMalloc(((am+1)+1)*sizeof(int),&ci);CHKERRQ(ierr);
254   ci[0] = 0;
255 
256   /* create and initialize a linked list */
257   nlnk = bn+1;
258   ierr = PetscLLCreate(bn,bn,nlnk,lnk,lnkbt);CHKERRQ(ierr);
259 
260   /* Initial FreeSpace size is fill*(nnz(A)+nnz(B)) */
261   ierr = GetMoreSpace((int)(fill*(ai[am]+bi[bm])),&free_space);CHKERRQ(ierr);
262   current_space = free_space;
263 
264   /* Determine symbolic info for each row of the product: */
265   for (i=0;i<am;i++) {
266     anzi = ai[i+1] - ai[i];
267     cnzi = 0;
268     j    = anzi;
269     aj   = a->j + ai[i];
270     while (j){/* assume cols are almost in increasing order, starting from its end saves computation */
271       j--;
272       brow = *(aj + j);
273       bnzj = bi[brow+1] - bi[brow];
274       bjj  = bj + bi[brow];
275       /* add non-zero cols of B into the sorted linked list lnk */
276       ierr = PetscLLAdd(bnzj,bjj,bn,nlnk,lnk,lnkbt);CHKERRQ(ierr);
277       cnzi += nlnk;
278     }
279 
280     /* If free space is not available, make more free space */
281     /* Double the amount of total space in the list */
282     if (current_space->local_remaining<cnzi) {
283       ierr = GetMoreSpace(current_space->total_array_size,&current_space);CHKERRQ(ierr);
284       nspacedouble++;
285     }
286 
287     /* Copy data into free space, then initialize lnk */
288     ierr = PetscLLClean(bn,bn,cnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
289     current_space->array           += cnzi;
290     current_space->local_used      += cnzi;
291     current_space->local_remaining -= cnzi;
292 
293     ci[i+1] = ci[i] + cnzi;
294   }
295 
296   /* Column indices are in the list of free space */
297   /* Allocate space for cj, initialize cj, and */
298   /* destroy list of free space and other temporary array(s) */
299   ierr = PetscMalloc((ci[am]+1)*sizeof(int),&cj);CHKERRQ(ierr);
300   ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr);
301   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
302 
303   /* Allocate space for ca */
304   ierr = PetscMalloc((ci[am]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr);
305   ierr = PetscMemzero(ca,(ci[am]+1)*sizeof(MatScalar));CHKERRQ(ierr);
306 
307   /* put together the new symbolic matrix */
308   ierr = MatCreateSeqAIJWithArrays(A->comm,am,bn,ci,cj,ca,C);CHKERRQ(ierr);
309 
310   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
311   /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */
312   c = (Mat_SeqAIJ *)((*C)->data);
313   c->freedata = PETSC_TRUE;
314   c->nonew    = 0;
315 
316   if (nspacedouble){
317     PetscLogInfo((PetscObject)(*C),"MatMatMultSymbolic_SeqAIJ_SeqAIJ: nspacedouble:%d, nnz(A):%d, nnz(B):%d, fill:%g, nnz(C):%d\n",nspacedouble,ai[am],bi[bm],fill,ci[am]);
318   }
319   PetscFunctionReturn(0);
320 }
321 
322 #undef __FUNCT__
323 #define __FUNCT__ "MatMatMultNumeric"
324 /*@
325    MatMatMultNumeric - Performs the numeric matrix-matrix product.
326    Call this routine after first calling MatMatMultSymbolic().
327 
328    Collective on Mat
329 
330    Input Parameters:
331 +  A - the left matrix
332 -  B - the right matrix
333 
334    Output Parameters:
335 .  C - the product matrix, whose ij structure was defined from MatMatMultSymbolic().
336 
337    Notes:
338    C must have been created with MatMatMultSymbolic.
339 
340    This routine is currently only implemented for SeqAIJ type matrices.
341 
342    Level: intermediate
343 
344 .seealso: MatMatMult(),MatMatMultSymbolic()
345 @*/
346 PetscErrorCode MatMatMultNumeric(Mat A,Mat B,Mat C){
347   PetscErrorCode ierr;
348   PetscErrorCode (*Anumeric)(Mat,Mat,Mat);
349   PetscErrorCode (*Bnumeric)(Mat,Mat,Mat);
350 
351   PetscFunctionBegin;
352 
353   PetscValidHeaderSpecific(A,MAT_COOKIE,1);
354   PetscValidType(A,1);
355   MatPreallocated(A);
356   if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
357   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
358 
359   PetscValidHeaderSpecific(B,MAT_COOKIE,2);
360   PetscValidType(B,2);
361   MatPreallocated(B);
362   if (!B->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
363   if (B->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
364 
365   PetscValidHeaderSpecific(C,MAT_COOKIE,3);
366   PetscValidType(C,3);
367   MatPreallocated(C);
368   if (!C->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
369   if (C->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
370 
371   if (B->N!=C->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",B->N,C->N);
372   if (B->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",B->M,A->N);
373   if (A->M!=C->M) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",A->M,C->M);
374 
375   /* For now, we do not dispatch based on the type of A and B */
376   /* When implementations like _SeqAIJ_MAIJ exist, attack the multiple dispatch problem. */
377   Anumeric = A->ops->matmultnumeric;
378   if (!Anumeric) SETERRQ1(PETSC_ERR_SUP,"MatMatMultNumeric not supported for A of type %s",A->type_name);
379   Bnumeric = B->ops->matmultnumeric;
380   if (!Bnumeric) SETERRQ1(PETSC_ERR_SUP,"MatMatMultNumeric not supported for B of type %s",B->type_name);
381   if (Bnumeric!=Anumeric) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatMatMultNumeric requires A, %s, to be compatible with B, %s",A->type_name,B->type_name);
382 
383   ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
384   ierr = (*Anumeric)(A,B,C);CHKERRQ(ierr);
385   ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
386 
387   PetscFunctionReturn(0);
388 }
389 
390 /* This routine is called ONLY in the case of reusing previously computed symbolic C */
391 #undef __FUNCT__
392 #define __FUNCT__ "MatMatMultNumeric_MPIAIJ_MPIAIJ"
393 PetscErrorCode MatMatMultNumeric_MPIAIJ_MPIAIJ(Mat A,Mat B,Mat C)
394 {
395   PetscErrorCode       ierr;
396   Mat                  *seq;
397   Mat_MatMatMultMPI    *mult;
398   PetscObjectContainer container;
399 
400   PetscFunctionBegin;
401   ierr = PetscObjectQuery((PetscObject)C,"MatMatMultMPI",(PetscObject *)&container);CHKERRQ(ierr);
402   if (container) {
403     ierr  = PetscObjectContainerGetPointer(container,(void **)&mult);CHKERRQ(ierr);
404   }
405 
406   seq = &mult->B_seq;
407   ierr = MatGetSubMatrices(B,1,&mult->isrowb,&mult->iscolb,MAT_REUSE_MATRIX,&seq);CHKERRQ(ierr);
408   mult->B_seq = *seq;
409 
410   seq = &mult->A_loc;
411   ierr = MatGetSubMatrices(A,1,&mult->isrowa,&mult->isrowb,MAT_REUSE_MATRIX,&seq);CHKERRQ(ierr);
412   mult->A_loc = *seq;
413 
414   ierr = MatMatMult_SeqAIJ_SeqAIJ(mult->A_loc,mult->B_seq,MAT_REUSE_MATRIX,0.0,&mult->C_seq);CHKERRQ(ierr);
415 
416   ierr = PetscObjectReference((PetscObject)mult->C_seq);CHKERRQ(ierr);
417   ierr = MatMerge(A->comm,mult->C_seq,B->n,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
418 
419   PetscFunctionReturn(0);
420 }
421 
422 #undef __FUNCT__
423 #define __FUNCT__ "MatMatMultNumeric_SeqAIJ_SeqAIJ"
424 PetscErrorCode MatMatMultNumeric_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat C)
425 {
426   PetscErrorCode ierr;
427   int        flops=0;
428   Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
429   Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data;
430   Mat_SeqAIJ *c = (Mat_SeqAIJ *)C->data;
431   int        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bjj,*ci=c->i,*cj=c->j;
432   int        am=A->M,cn=C->N;
433   int        i,j,k,anzi,bnzi,cnzi,brow;
434   MatScalar  *aa=a->a,*ba=b->a,*baj,*ca=c->a,*temp;
435 
436   PetscFunctionBegin;
437 
438   /* Allocate temp accumulation space to avoid searching for nonzero columns in C */
439   ierr = PetscMalloc((cn+1)*sizeof(MatScalar),&temp);CHKERRQ(ierr);
440   ierr = PetscMemzero(temp,cn*sizeof(MatScalar));CHKERRQ(ierr);
441   /* Traverse A row-wise. */
442   /* Build the ith row in C by summing over nonzero columns in A, */
443   /* the rows of B corresponding to nonzeros of A. */
444   for (i=0;i<am;i++) {
445     anzi = ai[i+1] - ai[i];
446     for (j=0;j<anzi;j++) {
447       brow = *aj++;
448       bnzi = bi[brow+1] - bi[brow];
449       bjj  = bj + bi[brow];
450       baj  = ba + bi[brow];
451       for (k=0;k<bnzi;k++) {
452         temp[bjj[k]] += (*aa)*baj[k];
453       }
454       flops += 2*bnzi;
455       aa++;
456     }
457     /* Store row back into C, and re-zero temp */
458     cnzi = ci[i+1] - ci[i];
459     for (j=0;j<cnzi;j++) {
460       ca[j] = temp[cj[j]];
461       temp[cj[j]] = 0.0;
462     }
463     ca += cnzi;
464     cj += cnzi;
465   }
466   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
467   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
468 
469   /* Free temp */
470   ierr = PetscFree(temp);CHKERRQ(ierr);
471   ierr = PetscLogFlops(flops);CHKERRQ(ierr);
472   PetscFunctionReturn(0);
473 }
474 
475 #undef __FUNCT__
476 #define __FUNCT__ "MatMatMultTranspose"
477 /*@
478    MatMatMultTranspose - Performs Matrix-Matrix Multiplication C=A^T*B.
479 
480    Collective on Mat
481 
482    Input Parameters:
483 +  A - the left matrix
484 .  B - the right matrix
485 .  scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
486 -  fill - expected fill as ratio of nnz(C)/(nnz(A) + nnz(B))
487 
488    Output Parameters:
489 .  C - the product matrix
490 
491    Notes:
492    C will be created and must be destroyed by the user with MatDestroy().
493 
494    This routine is currently only implemented for pairs of SeqAIJ matrices and classes
495    which inherit from SeqAIJ.  C will be of type MATSEQAIJ.
496 
497    Level: intermediate
498 
499 .seealso: MatMatMultTransposeSymbolic(),MatMatMultTransposeNumeric()
500 @*/
501 PetscErrorCode MatMatMultTranspose(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
502 {
503   PetscErrorCode ierr;
504   PetscErrorCode (*fA)(Mat,Mat,MatReuse,PetscReal,Mat*);
505   PetscErrorCode (*fB)(Mat,Mat,MatReuse,PetscReal,Mat*);
506 
507   PetscFunctionBegin;
508   PetscValidHeaderSpecific(A,MAT_COOKIE,1);
509   PetscValidType(A,1);
510   MatPreallocated(A);
511   if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
512   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
513   PetscValidHeaderSpecific(B,MAT_COOKIE,2);
514   PetscValidType(B,2);
515   MatPreallocated(B);
516   if (!B->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
517   if (B->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
518   PetscValidPointer(C,3);
519   if (B->M!=A->M) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",B->M,A->M);
520 
521   if (fill <=0.0) SETERRQ1(PETSC_ERR_ARG_SIZ,"fill=%g must be > 0.0",fill);
522 
523   fA = A->ops->matmulttranspose;
524   if (!fA) SETERRQ1(PETSC_ERR_SUP,"MatMatMultTranspose not supported for A of type %s",A->type_name);
525   fB = B->ops->matmulttranspose;
526   if (!fB) SETERRQ1(PETSC_ERR_SUP,"MatMatMultTranspose not supported for B of type %s",B->type_name);
527   if (fB!=fA) SETERRQ2(PETSC_ERR_ARG_INCOMP,"MatMatMultTranspose requires A, %s, to be compatible with B, %s",A->type_name,B->type_name);
528 
529   ierr = PetscLogEventBegin(MAT_MatMultTranspose,A,B,0,0);CHKERRQ(ierr);
530   ierr = (*A->ops->matmulttranspose)(A,B,scall,fill,C);CHKERRQ(ierr);
531   ierr = PetscLogEventEnd(MAT_MatMultTranspose,A,B,0,0);CHKERRQ(ierr);
532 
533   PetscFunctionReturn(0);
534 }
535 
536 #undef __FUNCT__
537 #define __FUNCT__ "MatMatMultTranspose_SeqAIJ_SeqAIJ"
538 PetscErrorCode MatMatMultTranspose_SeqAIJ_SeqAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) {
539   PetscErrorCode ierr;
540 
541   PetscFunctionBegin;
542   if (scall == MAT_INITIAL_MATRIX){
543     ierr = MatMatMultTransposeSymbolic_SeqAIJ_SeqAIJ(A,B,fill,C);CHKERRQ(ierr);
544   }
545   ierr = MatMatMultTransposeNumeric_SeqAIJ_SeqAIJ(A,B,*C);CHKERRQ(ierr);
546   PetscFunctionReturn(0);
547 }
548 
549 #undef __FUNCT__
550 #define __FUNCT__ "MatMatMultTransposeSymbolic_SeqAIJ_SeqAIJ"
551 PetscErrorCode MatMatMultTransposeSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
552 {
553   PetscErrorCode ierr;
554   Mat            At;
555   int            *ati,*atj;
556 
557   PetscFunctionBegin;
558   /* create symbolic At */
559   ierr = MatGetSymbolicTranspose_SeqAIJ(A,&ati,&atj);CHKERRQ(ierr);
560   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,A->n,A->m,ati,atj,PETSC_NULL,&At);CHKERRQ(ierr);
561 
562   /* get symbolic C=At*B */
563   ierr = MatMatMultSymbolic_SeqAIJ_SeqAIJ(At,B,fill,C);CHKERRQ(ierr);
564 
565   /* clean up */
566   ierr = MatDestroy(At);CHKERRQ(ierr);
567   ierr = MatRestoreSymbolicTranspose_SeqAIJ(A,&ati,&atj);CHKERRQ(ierr);
568 
569   PetscFunctionReturn(0);
570 }
571 
572 #undef __FUNCT__
573 #define __FUNCT__ "MatMatMultTransposeNumeric_SeqAIJ_SeqAIJ"
574 PetscErrorCode MatMatMultTransposeNumeric_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat C)
575 {
576   PetscErrorCode ierr;
577   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c=(Mat_SeqAIJ*)C->data;
578   int            am=A->m,anzi,*ai=a->i,*aj=a->j,*bi=b->i,*bj,bnzi,nextb;
579   int            cm=C->m,*ci=c->i,*cj=c->j,crow,*cjj,i,j,k,flops=0;
580   MatScalar      *aa=a->a,*ba,*ca=c->a,*caj;
581 
582   PetscFunctionBegin;
583   /* clear old values in C */
584   ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr);
585 
586   /* compute A^T*B using outer product (A^T)[:,i]*B[i,:] */
587   for (i=0;i<am;i++) {
588     bj   = b->j + bi[i];
589     ba   = b->a + bi[i];
590     bnzi = bi[i+1] - bi[i];
591     anzi = ai[i+1] - ai[i];
592     for (j=0; j<anzi; j++) {
593       nextb = 0;
594       crow  = *aj++;
595       cjj   = cj + ci[crow];
596       caj   = ca + ci[crow];
597       /* perform sparse axpy operation.  Note cjj includes bj. */
598       for (k=0; nextb<bnzi; k++) {
599         if (cjj[k] == *(bj+nextb)) { /* ccol == bcol */
600           caj[k] += (*aa)*(*(ba+nextb));
601           nextb++;
602         }
603       }
604       flops += 2*bnzi;
605       aa++;
606     }
607   }
608 
609   /* Assemble the final matrix and clean up */
610   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
611   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
612   ierr = PetscLogFlops(flops);CHKERRQ(ierr);
613   PetscFunctionReturn(0);
614 }
615