xref: /petsc/src/mat/impls/aij/seq/matmatmult.c (revision d6bb3c2dee159a50a2e4d6d5d3faf2a787a98be5)
1d50806bdSBarry Smith /*
2a50f8bf6SHong Zhang   Defines matrix-matrix product routines for pairs of AIJ matrices
3d50806bdSBarry Smith           C = A * B
4d50806bdSBarry Smith */
5d50806bdSBarry Smith 
6c1f4806aSKris Buschelman #include "src/mat/impls/aij/seq/aij.h" /*I "petscmat.h" I*/
770f19b1fSKris Buschelman #include "src/mat/utils/freespace.h"
82d09714cSHong Zhang #include "src/mat/impls/aij/mpi/mpiaij.h"
9d50806bdSBarry Smith 
101c239cc6SHong Zhang /*
11c5db241fSHong Zhang   Initialize a linked list
12c5db241fSHong Zhang   Input Parameters:
13c5db241fSHong Zhang     lnk_init  - the initial index value indicating the entry in the list is not set yet
14c5db241fSHong Zhang     nlnk      - max length of the list
15c5db241fSHong Zhang     lnk       - linked list(an integer array) that is allocated
16c5db241fSHong Zhang   output Parameters:
17c5db241fSHong Zhang     lnk       - the linked list with all values set as lnk_int
18c5db241fSHong Zhang */
19c5db241fSHong Zhang #define LNKLISTINITIALIZE(lnk_init,nlnk,lnk){\
20c5db241fSHong Zhang   int _i;\
21c5db241fSHong Zhang   for (_i=0; _i<nlnk; _i++) lnk[_i] = lnk_init;\
22c5db241fSHong Zhang }
23c5db241fSHong Zhang 
24c5db241fSHong Zhang /*
251c239cc6SHong Zhang   Add a index set into a sorted linked list
26a50f8bf6SHong Zhang   Input Parameters:
271c239cc6SHong Zhang     nidx      - number of input indices
281c239cc6SHong Zhang     indices   - interger array
29c5db241fSHong Zhang     lnk_head  - the header of the list
30c5db241fSHong Zhang     lnk_init  - the initial index value indicating the entry in the list is not set yet
311c239cc6SHong Zhang     lnk       - linked list(an integer array) that is created
32a50f8bf6SHong Zhang   output Parameters:
331c239cc6SHong Zhang     nlnk      - number of newly added indices
341c239cc6SHong Zhang     lnk       - the sorted(increasing order) linked list containing new and non-redundate entries from indices
351c239cc6SHong Zhang */
36c5db241fSHong Zhang #define LNKLISTADD(nidx,indices,lnk_head,lnk_init,nlnk,lnk){\
37c5db241fSHong Zhang   int _k,_entry,_lidx=lnk_head,_idx;\
38a50f8bf6SHong Zhang   nlnk = 0;\
39a50f8bf6SHong Zhang   _k=nidx;\
40a50f8bf6SHong Zhang   while (_k){/* assume indices are almost in increasing order, starting from its end saves computation */\
41a50f8bf6SHong Zhang     _entry = indices[--_k];\
42c5db241fSHong Zhang     if (lnk[_entry] == lnk_init) { /* new col */\
43a50f8bf6SHong Zhang       do {\
44a50f8bf6SHong Zhang         _idx = _lidx;\
45a50f8bf6SHong Zhang         _lidx  = lnk[_idx];\
46a50f8bf6SHong Zhang       } while (_entry > _lidx);\
47a50f8bf6SHong Zhang       lnk[_idx] = _entry;\
48a50f8bf6SHong Zhang       lnk[_entry] = _lidx;\
49a50f8bf6SHong Zhang       nlnk++;\
50a50f8bf6SHong Zhang     }\
51a50f8bf6SHong Zhang   }\
521c239cc6SHong Zhang }
53c5db241fSHong Zhang /*
54c5db241fSHong Zhang   Copy data on the list into an array, then initialize the list
55c5db241fSHong Zhang   Input Parameters:
56c5db241fSHong Zhang     lnk_head  - the header of the list
57c5db241fSHong Zhang     lnk_init  - the initial index value indicating the entry in the list is not set yet
58c5db241fSHong Zhang     nlnk      - number of data on the list to be copied
59c5db241fSHong Zhang     lnk       - linked list
60c5db241fSHong Zhang   output Parameters:
61c5db241fSHong Zhang     indices   - array that contains the copied data
62c5db241fSHong Zhang */
63c5db241fSHong Zhang #define LNKLISTCLEAR(lnk_head,lnk_init,nlnk,lnk,indices){\
64c5db241fSHong Zhang   int _j,_idx=lnk_head,_idx0;\
65c5db241fSHong Zhang   for (_j=0; _j<nlnk; _j++){\
66c5db241fSHong Zhang     _idx0 = _idx; _idx = lnk[_idx0];\
67c5db241fSHong Zhang     *(indices+_j) = _idx;\
68c5db241fSHong Zhang     lnk[_idx0] = lnk_init;\
69c5db241fSHong Zhang   }\
70c5db241fSHong Zhang   lnk[_idx] = lnk_init;\
71c5db241fSHong Zhang }
721c239cc6SHong Zhang 
7326be0446SHong Zhang typedef struct { /* used by MatMatMult_MPIAIJ_MPIAIJ for reusing symbolic mat product */
7426be0446SHong Zhang   IS        isrowa,isrowb,iscolb;
7526be0446SHong Zhang   Mat       *aseq,*bseq,C_seq;
7626be0446SHong Zhang } Mat_MatMatMultMPI;
7726be0446SHong Zhang 
782216b3a4SKris Buschelman static int logkey_matmatmult_symbolic = 0;
792216b3a4SKris Buschelman static int logkey_matmatmult_numeric  = 0;
802216b3a4SKris Buschelman 
81c1f4806aSKris Buschelman #undef __FUNCT__
82c1f4806aSKris Buschelman #define __FUNCT__ "MatMatMult"
835c66b693SKris Buschelman /*@
845c66b693SKris Buschelman    MatMatMult - Performs Matrix-Matrix Multiplication C=A*B.
8594e3eecaSKris Buschelman 
865c66b693SKris Buschelman    Collective on Mat
87d50806bdSBarry Smith 
885c66b693SKris Buschelman    Input Parameters:
895c66b693SKris Buschelman +  A - the left matrix
901c741599SHong Zhang .  B - the right matrix
911c741599SHong Zhang .  scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
92c5db241fSHong Zhang -  fill - expected fill as ratio of nnz(C)/(nnz(A) + nnz(B))
935c66b693SKris Buschelman 
945c66b693SKris Buschelman    Output Parameters:
955c66b693SKris Buschelman .  C - the product matrix
965c66b693SKris Buschelman 
975c66b693SKris Buschelman    Notes:
985c66b693SKris Buschelman    C will be created and must be destroyed by the user with MatDestroy().
995c66b693SKris Buschelman 
1004d3841fdSKris Buschelman    This routine is currently only implemented for pairs of SeqAIJ matrices and classes
1014d3841fdSKris Buschelman    which inherit from SeqAIJ.  C will be of type MATSEQAIJ.
1025c66b693SKris Buschelman 
1035c66b693SKris Buschelman    Level: intermediate
1045c66b693SKris Buschelman 
1055c66b693SKris Buschelman .seealso: MatMatMultSymbolic(),MatMatMultNumeric()
1065c66b693SKris Buschelman @*/
1071c741599SHong Zhang int MatMatMult(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
1082d09714cSHong Zhang {
109d50806bdSBarry Smith   int  ierr;
1102d09714cSHong Zhang 
111d50806bdSBarry Smith   PetscFunctionBegin;
1124482741eSBarry Smith   PetscValidHeaderSpecific(A,MAT_COOKIE,1);
113c9780b6fSBarry Smith   PetscValidType(A,1);
1145c66b693SKris Buschelman   MatPreallocated(A);
1155c66b693SKris Buschelman   if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
1165c66b693SKris Buschelman   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
1174482741eSBarry Smith   PetscValidHeaderSpecific(B,MAT_COOKIE,2);
118c9780b6fSBarry Smith   PetscValidType(B,2);
1195c66b693SKris Buschelman   MatPreallocated(B);
1205c66b693SKris Buschelman   if (!B->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
1215c66b693SKris Buschelman   if (B->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
1224482741eSBarry Smith   PetscValidPointer(C,3);
1235c66b693SKris Buschelman   if (B->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",B->M,A->N);
124d50806bdSBarry Smith 
1251c24bd37SHong Zhang   if (fill <=0.0) SETERRQ1(PETSC_ERR_ARG_SIZ,"fill=%g must be > 0.0",fill);
126c5db241fSHong Zhang 
1276284ec50SHong Zhang   ierr = PetscLogEventBegin(MAT_MatMult,A,B,0,0);CHKERRQ(ierr);
1281c741599SHong Zhang   ierr = (*A->ops->matmult)(A,B,scall,fill,C);CHKERRQ(ierr);
1296284ec50SHong Zhang   ierr = PetscLogEventEnd(MAT_MatMult,A,B,0,0);CHKERRQ(ierr);
1304d3841fdSKris Buschelman 
1316284ec50SHong Zhang   PetscFunctionReturn(0);
1326284ec50SHong Zhang }
1336284ec50SHong Zhang 
1346284ec50SHong Zhang #undef __FUNCT__
1356284ec50SHong Zhang #define __FUNCT__ "MatMatMult_MPIAIJ_MPIAIJ"
1361c741599SHong Zhang int MatMatMult_MPIAIJ_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill, Mat *C)
1372d09714cSHong Zhang {
13826be0446SHong Zhang   int ierr;
1396284ec50SHong Zhang 
1406284ec50SHong Zhang   PetscFunctionBegin;
14126be0446SHong Zhang   if (scall == MAT_INITIAL_MATRIX){
142*d6bb3c2dSHong Zhang     ierr = MatMatMultSymbolic_MPIAIJ_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);/* numeric product is computed as well */
143*d6bb3c2dSHong Zhang   } else if (scall == MAT_REUSE_MATRIX){
14426be0446SHong Zhang     ierr = MatMatMultNumeric_MPIAIJ_MPIAIJ(A,B,*C);CHKERRQ(ierr);
145*d6bb3c2dSHong Zhang   } else {
146*d6bb3c2dSHong Zhang     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",scall);
147*d6bb3c2dSHong Zhang   }
148d50806bdSBarry Smith   PetscFunctionReturn(0);
149d50806bdSBarry Smith }
1505c66b693SKris Buschelman 
1515c66b693SKris Buschelman #undef __FUNCT__
1525c66b693SKris Buschelman #define __FUNCT__ "MatMatMult_SeqAIJ_SeqAIJ"
1531c741599SHong Zhang int MatMatMult_SeqAIJ_SeqAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) {
1545c66b693SKris Buschelman   int ierr;
1555c66b693SKris Buschelman 
1565c66b693SKris Buschelman   PetscFunctionBegin;
15726be0446SHong Zhang   if (scall == MAT_INITIAL_MATRIX){
15826be0446SHong Zhang     ierr = MatMatMultSymbolic_SeqAIJ_SeqAIJ(A,B,fill,C);CHKERRQ(ierr);
15926be0446SHong Zhang   }
16026be0446SHong Zhang   ierr = MatMatMultNumeric_SeqAIJ_SeqAIJ(A,B,*C);CHKERRQ(ierr);
1615c66b693SKris Buschelman   PetscFunctionReturn(0);
1625c66b693SKris Buschelman }
1635c66b693SKris Buschelman 
164c1f4806aSKris Buschelman #undef __FUNCT__
165c1f4806aSKris Buschelman #define __FUNCT__ "MatMatMultSymbolic"
1665c66b693SKris Buschelman /*@
1675c66b693SKris Buschelman    MatMatMultSymbolic - Performs construction, preallocation, and computes the ij structure
1685c66b693SKris Buschelman    of the matrix-matrix product C=A*B.  Call this routine before calling MatMatMultNumeric().
1695c66b693SKris Buschelman 
1705c66b693SKris Buschelman    Collective on Mat
1715c66b693SKris Buschelman 
1725c66b693SKris Buschelman    Input Parameters:
1735c66b693SKris Buschelman +  A - the left matrix
174c5db241fSHong Zhang .  B - the right matrix
175c5db241fSHong Zhang -  fill - expected fill as ratio of nnz(C)/(nnz(A) + nnz(B))
1765c66b693SKris Buschelman 
1775c66b693SKris Buschelman    Output Parameters:
1785c66b693SKris Buschelman .  C - the matrix containing the ij structure of product matrix
1795c66b693SKris Buschelman 
1805c66b693SKris Buschelman    Notes:
1814d3841fdSKris Buschelman    C will be created as a MATSEQAIJ matrix and must be destroyed by the user with MatDestroy().
1825c66b693SKris Buschelman 
1834d3841fdSKris Buschelman    This routine is currently only implemented for SeqAIJ matrices and classes which inherit from SeqAIJ.
1845c66b693SKris Buschelman 
1855c66b693SKris Buschelman    Level: intermediate
1865c66b693SKris Buschelman 
1875c66b693SKris Buschelman .seealso: MatMatMult(),MatMatMultNumeric()
1885c66b693SKris Buschelman @*/
189c5db241fSHong Zhang int MatMatMultSymbolic(Mat A,Mat B,PetscReal fill,Mat *C) {
1905c66b693SKris Buschelman   /* Perhaps this "interface" routine should be moved into the interface directory.*/
1915c66b693SKris Buschelman   /* To facilitate implementations with varying types, QueryFunction is used.*/
1925c66b693SKris Buschelman   /* It is assumed that implementations will be composed as "MatMatMultSymbolic_<type of A><type of B>". */
1935c66b693SKris Buschelman   int  ierr;
1944d3841fdSKris Buschelman   char symfunct[80];
195c5db241fSHong Zhang   int  (*symbolic)(Mat,Mat,PetscReal,Mat *);
1965c66b693SKris Buschelman 
1975c66b693SKris Buschelman   PetscFunctionBegin;
1984482741eSBarry Smith   PetscValidHeaderSpecific(A,MAT_COOKIE,1);
199c9780b6fSBarry Smith   PetscValidType(A,1);
2005c66b693SKris Buschelman   MatPreallocated(A);
2015c66b693SKris Buschelman   if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
2025c66b693SKris Buschelman   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
2035c66b693SKris Buschelman 
2044482741eSBarry Smith   PetscValidHeaderSpecific(B,MAT_COOKIE,2);
205c9780b6fSBarry Smith   PetscValidType(B,2);
2065c66b693SKris Buschelman   MatPreallocated(B);
2075c66b693SKris Buschelman   if (!B->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
2085c66b693SKris Buschelman   if (B->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
2094482741eSBarry Smith   PetscValidPointer(C,3);
2104482741eSBarry Smith 
2115c66b693SKris Buschelman   if (B->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",B->M,A->N);
2121c24bd37SHong Zhang   if (fill <=0.0) SETERRQ1(PETSC_ERR_ARG_SIZ,"fill=%g must be > 0.0",fill);
2135c66b693SKris Buschelman 
2144d3841fdSKris Buschelman   /* Currently only _seqaijseqaij is implemented, so just query for it in A and B. */
2154d3841fdSKris Buschelman   /* When other implementations exist, attack the multiple dispatch problem. */
2164d3841fdSKris Buschelman   ierr = PetscStrcpy(symfunct,"MatMatMultSymbolic_seqaijseqaij");CHKERRQ(ierr);
2174d3841fdSKris Buschelman   ierr = PetscObjectQueryFunction((PetscObject)B,symfunct,(PetscVoidFunction)&symbolic);CHKERRQ(ierr);
2184d3841fdSKris Buschelman   if (!symbolic) SETERRQ1(PETSC_ERR_SUP,"C=A*B not implemented for B of type %s",B->type_name);
2194d3841fdSKris Buschelman   ierr = PetscObjectQueryFunction((PetscObject)A,symfunct,(PetscVoidFunction)&symbolic);CHKERRQ(ierr);
2204d3841fdSKris Buschelman   if (!symbolic) SETERRQ1(PETSC_ERR_SUP,"C=A*B not implemented for A of type %s",A->type_name);
221c5db241fSHong Zhang   ierr = (*symbolic)(A,B,fill,C);CHKERRQ(ierr);
2225c66b693SKris Buschelman 
2235c66b693SKris Buschelman   PetscFunctionReturn(0);
2245c66b693SKris Buschelman }
2251c24bd37SHong Zhang 
2261c24bd37SHong Zhang EXTERN int MatDestroy_MPIAIJ(Mat);
22726be0446SHong Zhang #undef __FUNCT__
22826be0446SHong Zhang #define __FUNCT__ "MatDestroy_MPIAIJ_MatMatMult"
22926be0446SHong Zhang int MatDestroy_MPIAIJ_MatMatMult(Mat A)
23026be0446SHong Zhang {
23126be0446SHong Zhang   int               ierr;
23226be0446SHong Zhang   Mat_MatMatMultMPI *mult=(Mat_MatMatMultMPI*)A->spptr;
23326be0446SHong Zhang 
23426be0446SHong Zhang   PetscFunctionBegin;
235*d6bb3c2dSHong Zhang   ierr = ISDestroy(mult->isrowb);CHKERRQ(ierr);
236*d6bb3c2dSHong Zhang   ierr = ISDestroy(mult->iscolb);CHKERRQ(ierr);
237*d6bb3c2dSHong Zhang   ierr = ISDestroy(mult->isrowa);CHKERRQ(ierr);
238*d6bb3c2dSHong Zhang   ierr = MatDestroyMatrices(1,&mult->aseq);CHKERRQ(ierr);
239*d6bb3c2dSHong Zhang   ierr = MatDestroyMatrices(1,&mult->bseq);CHKERRQ(ierr);
240*d6bb3c2dSHong Zhang   ierr = MatDestroy(mult->C_seq);CHKERRQ(ierr);
24126be0446SHong Zhang   ierr = PetscFree(mult);CHKERRQ(ierr);
242*d6bb3c2dSHong Zhang 
24326be0446SHong Zhang   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
24426be0446SHong Zhang 
24526be0446SHong Zhang   PetscFunctionReturn(0);
24626be0446SHong Zhang }
24758c24d83SHong Zhang 
24858c24d83SHong Zhang #undef __FUNCT__
24926be0446SHong Zhang #define __FUNCT__ "MatMatMultSymbolic_MPIAIJ_MPIAIJ"
25026be0446SHong Zhang int MatMatMultSymbolic_MPIAIJ_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
25126be0446SHong Zhang {
25226be0446SHong Zhang   Mat_MPIAIJ        *a = (Mat_MPIAIJ*)A->data;
25326be0446SHong Zhang   int               ierr,*idx,i,start,end,ncols,imark,nzA,nzB,*cmap;
25426be0446SHong Zhang   Mat_MatMatMultMPI *mult;
25526be0446SHong Zhang 
25626be0446SHong Zhang   PetscFunctionBegin;
257*d6bb3c2dSHong Zhang   ierr = PetscNew(Mat_MatMatMultMPI,&mult);CHKERRQ(ierr);
258*d6bb3c2dSHong Zhang 
25926be0446SHong Zhang   /* create a seq matrix B_seq = submatrix of B by taking rows of B that equal to nonzero col of A */
26026be0446SHong Zhang   start = a->cstart;
26126be0446SHong Zhang   cmap  = a->garray;
26226be0446SHong Zhang   nzA   = a->A->n;
26326be0446SHong Zhang   nzB   = a->B->n;
26426be0446SHong Zhang   ierr  = PetscMalloc((nzA+nzB)*sizeof(int), &idx);CHKERRQ(ierr);
26526be0446SHong Zhang   ncols = 0;
26626be0446SHong Zhang   for (i=0; i<nzB; i++) {
26726be0446SHong Zhang     if (cmap[i] < start) idx[ncols++] = cmap[i];
26826be0446SHong Zhang     else break;
26926be0446SHong Zhang   }
27026be0446SHong Zhang   imark = i;
27126be0446SHong Zhang   for (i=0; i<nzA; i++) idx[ncols++] = start + i;
27226be0446SHong Zhang   for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
273*d6bb3c2dSHong Zhang   ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&mult->isrowb);CHKERRQ(ierr);
27426be0446SHong Zhang   ierr = PetscFree(idx);CHKERRQ(ierr);
275*d6bb3c2dSHong Zhang   ierr = ISCreateStride(PETSC_COMM_SELF,B->N,0,1,&mult->iscolb);CHKERRQ(ierr);
276*d6bb3c2dSHong Zhang   ierr = MatGetSubMatrices(B,1,&mult->isrowb,&mult->iscolb,MAT_INITIAL_MATRIX,&mult->bseq);CHKERRQ(ierr)
27726be0446SHong Zhang 
27826be0446SHong Zhang   /*  create a seq matrix A_seq = submatrix of A by taking all local rows of A */
27926be0446SHong Zhang   start = a->rstart; end = a->rend;
280*d6bb3c2dSHong Zhang   ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&mult->isrowa);CHKERRQ(ierr);
281*d6bb3c2dSHong Zhang   ierr = MatGetSubMatrices(A,1,&mult->isrowa,&mult->isrowb,MAT_INITIAL_MATRIX,&mult->aseq);CHKERRQ(ierr);
28226be0446SHong Zhang 
28326be0446SHong Zhang   /* compute C_seq = A_seq * B_seq */
284*d6bb3c2dSHong Zhang   ierr = MatMatMult_SeqAIJ_SeqAIJ(mult->aseq[0],mult->bseq[0],MAT_INITIAL_MATRIX,fill,&mult->C_seq);CHKERRQ(ierr);
28526be0446SHong Zhang 
28626be0446SHong Zhang   /* create mpi matrix C by concatinating C_seq */
287*d6bb3c2dSHong Zhang   ierr = PetscObjectReference((PetscObject)mult->C_seq);CHKERRQ(ierr); /* prevent C_seq being destroyed by MatMerge() */
288*d6bb3c2dSHong Zhang   ierr = MatMerge(A->comm,mult->C_seq,MAT_INITIAL_MATRIX,C);CHKERRQ(ierr);
28926be0446SHong Zhang 
29026be0446SHong Zhang   /* attach the supporting struct to C for reuse of symbolic C */
29126be0446SHong Zhang   (*C)->spptr         = (void*)mult;
29226be0446SHong Zhang   (*C)->ops->destroy  = MatDestroy_MPIAIJ_MatMatMult;
29326be0446SHong Zhang 
29426be0446SHong Zhang   PetscFunctionReturn(0);
29526be0446SHong Zhang }
29626be0446SHong Zhang 
29726be0446SHong Zhang #undef __FUNCT__
29826be0446SHong Zhang #define __FUNCT__ "MatMatMultSymbolic_SeqAIJ_SeqAIJ"
29926be0446SHong Zhang int MatMatMultSymbolic_SeqAIJ_SeqAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
30058c24d83SHong Zhang {
30158c24d83SHong Zhang   int            ierr;
30258c24d83SHong Zhang   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
30358c24d83SHong Zhang   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c;
30458c24d83SHong Zhang   int            *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bjj;
3051c24bd37SHong Zhang   int            *ci,*cj,*lnk;
3065c66b693SKris Buschelman   int            am=A->M,bn=B->N,bm=B->M;
307c5db241fSHong Zhang   int            i,j,anzi,brow,bnzj,cnzi,nlnk,lnk_init=-1,nspacedouble=0;
30858c24d83SHong Zhang   MatScalar      *ca;
30958c24d83SHong Zhang 
31058c24d83SHong Zhang   PetscFunctionBegin;
3115c66b693SKris Buschelman   /* Start timers */
31258c24d83SHong Zhang   ierr = PetscLogEventBegin(logkey_matmatmult_symbolic,A,B,0,0);CHKERRQ(ierr);
31358c24d83SHong Zhang   /* Set up */
31458c24d83SHong Zhang   /* Allocate ci array, arrays for fill computation and */
31558c24d83SHong Zhang   /* free space for accumulating nonzero column info */
31658c24d83SHong Zhang   ierr = PetscMalloc(((am+1)+1)*sizeof(int),&ci);CHKERRQ(ierr);
31758c24d83SHong Zhang   ci[0] = 0;
31858c24d83SHong Zhang 
31958c24d83SHong Zhang   ierr = PetscMalloc((bn+1)*sizeof(int),&lnk);CHKERRQ(ierr);
320c5db241fSHong Zhang   LNKLISTINITIALIZE(lnk_init,bn,lnk);
32158c24d83SHong Zhang 
322c5db241fSHong Zhang   /* Initial FreeSpace size is fill*(nnz(A)+nnz(B)) */
323*d6bb3c2dSHong Zhang   ierr = GetMoreSpace((int)(fill*(ai[am]+bi[bm])),&free_space);CHKERRQ(ierr);
32458c24d83SHong Zhang   current_space = free_space;
32558c24d83SHong Zhang 
32658c24d83SHong Zhang   /* Determine symbolic info for each row of the product: */
32758c24d83SHong Zhang   for (i=0;i<am;i++) {
32858c24d83SHong Zhang     anzi = ai[i+1] - ai[i];
32958c24d83SHong Zhang     cnzi = 0;
33058c24d83SHong Zhang     lnk[bn] = bn;
3312d09714cSHong Zhang     j       = anzi;
3322d09714cSHong Zhang     aj      = a->j + ai[i];
3332d09714cSHong Zhang     while (j){/* assume cols are almost in increasing order, starting from its end saves computation */
3342d09714cSHong Zhang       j--;
3352d09714cSHong Zhang       brow = *(aj + j);
33658c24d83SHong Zhang       bnzj = bi[brow+1] - bi[brow];
33758c24d83SHong Zhang       bjj  = bj + bi[brow];
3381c239cc6SHong Zhang       /* add non-zero cols of B into the sorted linked list lnk */
339c5db241fSHong Zhang       LNKLISTADD(bnzj,bjj,bn,lnk_init,nlnk,lnk);
3401c239cc6SHong Zhang       cnzi += nlnk;
34158c24d83SHong Zhang     }
34258c24d83SHong Zhang 
34358c24d83SHong Zhang     /* If free space is not available, make more free space */
34458c24d83SHong Zhang     /* Double the amount of total space in the list */
34558c24d83SHong Zhang     if (current_space->local_remaining<cnzi) {
34626be0446SHong Zhang       /* printf("MatMatMultSymbolic_SeqAIJ_SeqAIJ()...%d -th row, double space ...\n",i);*/
34758c24d83SHong Zhang       ierr = GetMoreSpace(current_space->total_array_size,&current_space);CHKERRQ(ierr);
348c5db241fSHong Zhang       nspacedouble++;
34958c24d83SHong Zhang     }
35058c24d83SHong Zhang 
351c5db241fSHong Zhang     /* Copy data into free space, then initialize lnk */
352c5db241fSHong Zhang     LNKLISTCLEAR(bn,lnk_init,cnzi,lnk,current_space->array);
353c5db241fSHong Zhang     current_space->array += cnzi;
35458c24d83SHong Zhang 
35558c24d83SHong Zhang     current_space->local_used      += cnzi;
35658c24d83SHong Zhang     current_space->local_remaining -= cnzi;
35758c24d83SHong Zhang 
35858c24d83SHong Zhang     ci[i+1] = ci[i] + cnzi;
35958c24d83SHong Zhang   }
36058c24d83SHong Zhang 
36158c24d83SHong Zhang   /* Column indices are in the list of free space */
36258c24d83SHong Zhang   /* Allocate space for cj, initialize cj, and */
36358c24d83SHong Zhang   /* destroy list of free space and other temporary array(s) */
36458c24d83SHong Zhang   ierr = PetscMalloc((ci[am]+1)*sizeof(int),&cj);CHKERRQ(ierr);
36558c24d83SHong Zhang   ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr);
36658c24d83SHong Zhang   ierr = PetscFree(lnk);CHKERRQ(ierr);
36758c24d83SHong Zhang 
36858c24d83SHong Zhang   /* Allocate space for ca */
36958c24d83SHong Zhang   ierr = PetscMalloc((ci[am]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr);
37058c24d83SHong Zhang   ierr = PetscMemzero(ca,(ci[am]+1)*sizeof(MatScalar));CHKERRQ(ierr);
37158c24d83SHong Zhang 
37226be0446SHong Zhang   /* put together the new symbolic matrix */
37358c24d83SHong Zhang   ierr = MatCreateSeqAIJWithArrays(A->comm,am,bn,ci,cj,ca,C);CHKERRQ(ierr);
37458c24d83SHong Zhang 
37558c24d83SHong Zhang   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
37658c24d83SHong Zhang   /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */
37758c24d83SHong Zhang   c = (Mat_SeqAIJ *)((*C)->data);
37858c24d83SHong Zhang   c->freedata = PETSC_TRUE;
37958c24d83SHong Zhang   c->nonew    = 0;
38058c24d83SHong Zhang 
38158c24d83SHong Zhang   ierr = PetscLogEventEnd(logkey_matmatmult_symbolic,A,B,0,0);CHKERRQ(ierr);
382c5db241fSHong Zhang   PetscLogInfo((PetscObject)(*C),"Number of calls to GetMoreSpace(): %d\n",nspacedouble);
38358c24d83SHong Zhang   PetscFunctionReturn(0);
38458c24d83SHong Zhang }
385d50806bdSBarry Smith 
386c1f4806aSKris Buschelman #undef __FUNCT__
387c1f4806aSKris Buschelman #define __FUNCT__ "MatMatMultNumeric"
3885c66b693SKris Buschelman /*@
3895c66b693SKris Buschelman    MatMatMultNumeric - Performs the numeric matrix-matrix product.
3905c66b693SKris Buschelman    Call this routine after first calling MatMatMultSymbolic().
3915c66b693SKris Buschelman 
3925c66b693SKris Buschelman    Collective on Mat
3935c66b693SKris Buschelman 
3945c66b693SKris Buschelman    Input Parameters:
3955c66b693SKris Buschelman +  A - the left matrix
3965c66b693SKris Buschelman -  B - the right matrix
3975c66b693SKris Buschelman 
3985c66b693SKris Buschelman    Output Parameters:
3995c66b693SKris Buschelman .  C - the product matrix, whose ij structure was defined from MatMatMultSymbolic().
4005c66b693SKris Buschelman 
4015c66b693SKris Buschelman    Notes:
4025c66b693SKris Buschelman    C must have been created with MatMatMultSymbolic.
4035c66b693SKris Buschelman 
4045c66b693SKris Buschelman    This routine is currently only implemented for SeqAIJ type matrices.
4055c66b693SKris Buschelman 
4065c66b693SKris Buschelman    Level: intermediate
4075c66b693SKris Buschelman 
4085c66b693SKris Buschelman .seealso: MatMatMult(),MatMatMultSymbolic()
4095c66b693SKris Buschelman @*/
4105c66b693SKris Buschelman int MatMatMultNumeric(Mat A,Mat B,Mat C){
4115c66b693SKris Buschelman   /* Perhaps this "interface" routine should be moved into the interface directory.*/
4125c66b693SKris Buschelman   /* To facilitate implementations with varying types, QueryFunction is used.*/
4135c66b693SKris Buschelman   /* It is assumed that implementations will be composed as "MatMatMultNumeric_<type of A><type of B>". */
4145c66b693SKris Buschelman   int ierr;
4154d3841fdSKris Buschelman   char numfunct[80];
4165c66b693SKris Buschelman   int (*numeric)(Mat,Mat,Mat);
4175c66b693SKris Buschelman 
4185c66b693SKris Buschelman   PetscFunctionBegin;
4195c66b693SKris Buschelman 
4204482741eSBarry Smith   PetscValidHeaderSpecific(A,MAT_COOKIE,1);
421c9780b6fSBarry Smith   PetscValidType(A,1);
4225c66b693SKris Buschelman   MatPreallocated(A);
4235c66b693SKris Buschelman   if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
4245c66b693SKris Buschelman   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
4255c66b693SKris Buschelman 
4264482741eSBarry Smith   PetscValidHeaderSpecific(B,MAT_COOKIE,2);
427c9780b6fSBarry Smith   PetscValidType(B,2);
4285c66b693SKris Buschelman   MatPreallocated(B);
4295c66b693SKris Buschelman   if (!B->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
4305c66b693SKris Buschelman   if (B->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
4315c66b693SKris Buschelman 
4324482741eSBarry Smith   PetscValidHeaderSpecific(C,MAT_COOKIE,3);
433c9780b6fSBarry Smith   PetscValidType(C,3);
4345c66b693SKris Buschelman   MatPreallocated(C);
4355c66b693SKris Buschelman   if (!C->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
4365c66b693SKris Buschelman   if (C->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
4375c66b693SKris Buschelman 
4385c66b693SKris Buschelman   if (B->N!=C->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",B->N,C->N);
4395c66b693SKris Buschelman   if (B->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",B->M,A->N);
4405c66b693SKris Buschelman   if (A->M!=C->M) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",A->M,C->M);
4415c66b693SKris Buschelman 
4424d3841fdSKris Buschelman   /* Currently only _seqaijseqaij is implemented, so just query for it in A and B. */
4434d3841fdSKris Buschelman   /* When other implementations exist, attack the multiple dispatch problem. */
4444d3841fdSKris Buschelman   ierr = PetscStrcpy(numfunct,"MatMatMultNumeric_seqaijseqaij");CHKERRQ(ierr);
4454d3841fdSKris Buschelman   ierr = PetscObjectQueryFunction((PetscObject)A,numfunct,(PetscVoidFunction)&numeric);CHKERRQ(ierr);
4464d3841fdSKris Buschelman   if (!numeric) SETERRQ1(PETSC_ERR_SUP,"C=A*B not implemented for A of type %s",A->type_name);
4474d3841fdSKris Buschelman   ierr = PetscObjectQueryFunction((PetscObject)B,numfunct,(PetscVoidFunction)&numeric);CHKERRQ(ierr);
4484d3841fdSKris Buschelman   if (!numeric) SETERRQ1(PETSC_ERR_SUP,"C=A*B not implemented for B of type %s",B->type_name);
4494d3841fdSKris Buschelman 
4505c66b693SKris Buschelman   ierr = (*numeric)(A,B,C);CHKERRQ(ierr);
4515c66b693SKris Buschelman 
4525c66b693SKris Buschelman   PetscFunctionReturn(0);
4535c66b693SKris Buschelman }
4545c66b693SKris Buschelman 
455*d6bb3c2dSHong Zhang /* This routine is called ONLY in the case of reusing previously computed symbolic C */
456d50806bdSBarry Smith #undef __FUNCT__
45726be0446SHong Zhang #define __FUNCT__ "MatMatMultNumeric_MPIAIJ_MPIAIJ"
45826be0446SHong Zhang int MatMatMultNumeric_MPIAIJ_MPIAIJ(Mat A,Mat B,Mat C)
45926be0446SHong Zhang {
460*d6bb3c2dSHong Zhang   int               ierr;
461*d6bb3c2dSHong Zhang   Mat_MatMatMultMPI *mult=(Mat_MatMatMultMPI*)C->spptr;
462*d6bb3c2dSHong Zhang 
46326be0446SHong Zhang   PetscFunctionBegin;
464*d6bb3c2dSHong Zhang   ierr = MatGetSubMatrices(B,1,&mult->isrowb,&mult->iscolb,MAT_REUSE_MATRIX,&mult->bseq);CHKERRQ(ierr)
465*d6bb3c2dSHong Zhang   ierr = MatGetSubMatrices(A,1,&mult->isrowa,&mult->isrowb,MAT_REUSE_MATRIX,&mult->aseq);CHKERRQ(ierr);
466*d6bb3c2dSHong Zhang   ierr = MatMatMult_SeqAIJ_SeqAIJ(mult->aseq[0],mult->bseq[0],MAT_REUSE_MATRIX,0.0,&mult->C_seq);CHKERRQ(ierr);
467*d6bb3c2dSHong Zhang 
468*d6bb3c2dSHong Zhang   ierr = PetscObjectReference((PetscObject)mult->C_seq);CHKERRQ(ierr);
469*d6bb3c2dSHong Zhang   ierr = MatMerge(A->comm,mult->C_seq,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
470*d6bb3c2dSHong Zhang 
47126be0446SHong Zhang   PetscFunctionReturn(0);
47226be0446SHong Zhang }
47326be0446SHong Zhang 
47426be0446SHong Zhang #undef __FUNCT__
47526be0446SHong Zhang #define __FUNCT__ "MatMatMultNumeric_SeqAIJ_SeqAIJ"
47626be0446SHong Zhang int MatMatMultNumeric_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat C)
477d50806bdSBarry Smith {
47894e3eecaSKris Buschelman   int        ierr,flops=0;
479d50806bdSBarry Smith   Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
480d50806bdSBarry Smith   Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data;
481d50806bdSBarry Smith   Mat_SeqAIJ *c = (Mat_SeqAIJ *)C->data;
482d50806bdSBarry Smith   int        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bjj,*ci=c->i,*cj=c->j;
4835c66b693SKris Buschelman   int        am=A->M,cn=C->N;
48494e3eecaSKris Buschelman   int        i,j,k,anzi,bnzi,cnzi,brow;
485d50806bdSBarry Smith   MatScalar  *aa=a->a,*ba=b->a,*baj,*ca=c->a,*temp;
486d50806bdSBarry Smith 
487d50806bdSBarry Smith   PetscFunctionBegin;
488d50806bdSBarry Smith 
4895c66b693SKris Buschelman   /* Start timers */
490d50806bdSBarry Smith   ierr = PetscLogEventBegin(logkey_matmatmult_numeric,A,B,C,0);CHKERRQ(ierr);
49194e3eecaSKris Buschelman 
492d50806bdSBarry Smith   /* Allocate temp accumulation space to avoid searching for nonzero columns in C */
493d50806bdSBarry Smith   ierr = PetscMalloc((cn+1)*sizeof(MatScalar),&temp);CHKERRQ(ierr);
494d50806bdSBarry Smith   ierr = PetscMemzero(temp,cn*sizeof(MatScalar));CHKERRQ(ierr);
495d50806bdSBarry Smith   /* Traverse A row-wise. */
496d50806bdSBarry Smith   /* Build the ith row in C by summing over nonzero columns in A, */
497d50806bdSBarry Smith   /* the rows of B corresponding to nonzeros of A. */
498d50806bdSBarry Smith   for (i=0;i<am;i++) {
499d50806bdSBarry Smith     anzi = ai[i+1] - ai[i];
500d50806bdSBarry Smith     for (j=0;j<anzi;j++) {
501d50806bdSBarry Smith       brow = *aj++;
502d50806bdSBarry Smith       bnzi = bi[brow+1] - bi[brow];
503d50806bdSBarry Smith       bjj  = bj + bi[brow];
504d50806bdSBarry Smith       baj  = ba + bi[brow];
505d50806bdSBarry Smith       for (k=0;k<bnzi;k++) {
506d50806bdSBarry Smith         temp[bjj[k]] += (*aa)*baj[k];
507d50806bdSBarry Smith       }
508d50806bdSBarry Smith       flops += 2*bnzi;
509d50806bdSBarry Smith       aa++;
510d50806bdSBarry Smith     }
511d50806bdSBarry Smith     /* Store row back into C, and re-zero temp */
512d50806bdSBarry Smith     cnzi = ci[i+1] - ci[i];
513d50806bdSBarry Smith     for (j=0;j<cnzi;j++) {
514d50806bdSBarry Smith       ca[j] = temp[cj[j]];
515d50806bdSBarry Smith       temp[cj[j]] = 0.0;
516d50806bdSBarry Smith     }
517d50806bdSBarry Smith     ca += cnzi;
518d50806bdSBarry Smith     cj += cnzi;
519d50806bdSBarry Smith   }
520716bacf3SKris Buschelman   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
521716bacf3SKris Buschelman   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
522716bacf3SKris Buschelman 
523d50806bdSBarry Smith   /* Free temp */
524d50806bdSBarry Smith   ierr = PetscFree(temp);CHKERRQ(ierr);
525d50806bdSBarry Smith   ierr = PetscLogFlops(flops);CHKERRQ(ierr);
526d50806bdSBarry Smith   ierr = PetscLogEventEnd(logkey_matmatmult_numeric,A,B,C,0);CHKERRQ(ierr);
527d50806bdSBarry Smith   PetscFunctionReturn(0);
528d50806bdSBarry Smith }
529d50806bdSBarry Smith 
530d50806bdSBarry Smith #undef __FUNCT__
5315c66b693SKris Buschelman #define __FUNCT__ "RegisterMatMatMultRoutines_Private"
5325c66b693SKris Buschelman int RegisterMatMatMultRoutines_Private(Mat A) {
533d50806bdSBarry Smith   int ierr;
534d50806bdSBarry Smith 
535d50806bdSBarry Smith   PetscFunctionBegin;
5365c66b693SKris Buschelman   if (!logkey_matmatmult_symbolic) {
53726be0446SHong Zhang     ierr = PetscLogEventRegister(&logkey_matmatmult_symbolic,"MatMatMultSymbolic",MAT_COOKIE);CHKERRQ(ierr);
538d50806bdSBarry Smith   }
5395c66b693SKris Buschelman   if (!logkey_matmatmult_numeric) {
54026be0446SHong Zhang     ierr = PetscLogEventRegister(&logkey_matmatmult_numeric,"MatMatMultNumeric",MAT_COOKIE);CHKERRQ(ierr);
54194e3eecaSKris Buschelman   }
542*d6bb3c2dSHong Zhang 
54394e3eecaSKris Buschelman   PetscFunctionReturn(0);
54494e3eecaSKris Buschelman }
545