xref: /petsc/src/mat/impls/aij/seq/matmatmult.c (revision 1c239cc6be46bcf498b8411b7fbe1d350156fca8)
1d50806bdSBarry Smith /*
22c9ce0e5SKris Buschelman   Defines matrix-matrix product routines for pairs of SeqAIJ matrices
3d50806bdSBarry Smith           C = A * B
4d50806bdSBarry Smith */
5d50806bdSBarry Smith 
6c1f4806aSKris Buschelman #include "src/mat/impls/aij/seq/aij.h" /*I "petscmat.h" I*/
770f19b1fSKris Buschelman #include "src/mat/utils/freespace.h"
8d50806bdSBarry Smith 
9*1c239cc6SHong Zhang /*
10*1c239cc6SHong Zhang   Add a index set into a sorted linked list
11*1c239cc6SHong Zhang   input:
12*1c239cc6SHong Zhang     nidx      - number of input indices
13*1c239cc6SHong Zhang     indices   - interger array
14*1c239cc6SHong Zhang     idx_head  - the header of the list
15*1c239cc6SHong Zhang     idx_unset - the value indicating the entry in the list is not set yet
16*1c239cc6SHong Zhang     lnk       - linked list(an integer array) that is created
17*1c239cc6SHong Zhang   output:
18*1c239cc6SHong Zhang     nlnk      - number of newly added indices
19*1c239cc6SHong Zhang     lnk       - the sorted(increasing order) linked list containing new and non-redundate entries from indices
20*1c239cc6SHong Zhang  */
21*1c239cc6SHong Zhang #undef __FUNCT__
22*1c239cc6SHong Zhang #define __FUNCT__ "LnklistAdd"
23*1c239cc6SHong Zhang int LnklistAdd(int nidx,int *indices,int idx_head,int idx_unset,int *nlnk,int *lnk)
24*1c239cc6SHong Zhang {
25*1c239cc6SHong Zhang   int i,idx,lidx,entry,n;
26*1c239cc6SHong Zhang 
27*1c239cc6SHong Zhang   PetscFunctionBegin;
28*1c239cc6SHong Zhang   n    = 0;
29*1c239cc6SHong Zhang   lidx = idx_head;
30*1c239cc6SHong Zhang   i    = nidx;
31*1c239cc6SHong Zhang   while (i){
32*1c239cc6SHong Zhang     /* assume indices is almost in increasing order, starting from its end saves computation */
33*1c239cc6SHong Zhang     entry = indices[--i];
34*1c239cc6SHong Zhang     if (lnk[entry] == idx_unset) { /* new entry */
35*1c239cc6SHong Zhang       do {
36*1c239cc6SHong Zhang         idx = lidx;
37*1c239cc6SHong Zhang         lidx  = lnk[idx];
38*1c239cc6SHong Zhang       } while (entry > lidx);
39*1c239cc6SHong Zhang       lnk[idx] = entry;
40*1c239cc6SHong Zhang       lnk[entry] = lidx;
41*1c239cc6SHong Zhang       n++;
42*1c239cc6SHong Zhang     }
43*1c239cc6SHong Zhang   }
44*1c239cc6SHong Zhang   *nlnk = n;
45*1c239cc6SHong Zhang   PetscFunctionReturn(0);
46*1c239cc6SHong Zhang }
47*1c239cc6SHong Zhang 
48*1c239cc6SHong Zhang 
492216b3a4SKris Buschelman static int logkey_matmatmult          = 0;
502216b3a4SKris Buschelman static int logkey_matmatmult_symbolic = 0;
512216b3a4SKris Buschelman static int logkey_matmatmult_numeric  = 0;
522216b3a4SKris Buschelman 
53c1f4806aSKris Buschelman #undef __FUNCT__
54c1f4806aSKris Buschelman #define __FUNCT__ "MatMatMult"
555c66b693SKris Buschelman /*@
565c66b693SKris Buschelman    MatMatMult - Performs Matrix-Matrix Multiplication C=A*B.
5794e3eecaSKris Buschelman 
585c66b693SKris Buschelman    Collective on Mat
59d50806bdSBarry Smith 
605c66b693SKris Buschelman    Input Parameters:
615c66b693SKris Buschelman +  A - the left matrix
625c66b693SKris Buschelman -  B - the right matrix
635c66b693SKris Buschelman 
645c66b693SKris Buschelman    Output Parameters:
655c66b693SKris Buschelman .  C - the product matrix
665c66b693SKris Buschelman 
675c66b693SKris Buschelman    Notes:
685c66b693SKris Buschelman    C will be created and must be destroyed by the user with MatDestroy().
695c66b693SKris Buschelman 
704d3841fdSKris Buschelman    This routine is currently only implemented for pairs of SeqAIJ matrices and classes
714d3841fdSKris Buschelman    which inherit from SeqAIJ.  C will be of type MATSEQAIJ.
725c66b693SKris Buschelman 
735c66b693SKris Buschelman    Level: intermediate
745c66b693SKris Buschelman 
755c66b693SKris Buschelman .seealso: MatMatMultSymbolic(),MatMatMultNumeric()
765c66b693SKris Buschelman @*/
775c66b693SKris Buschelman int MatMatMult(Mat A,Mat B, Mat *C) {
785c66b693SKris Buschelman   /* Perhaps this "interface" routine should be moved into the interface directory.*/
795c66b693SKris Buschelman   /* To facilitate implementations with varying types, QueryFunction is used.*/
805c66b693SKris Buschelman   /* It is assumed that implementations will be composed as "MatMatMult_<type of A><type of B>". */
81d50806bdSBarry Smith   int  ierr;
825c66b693SKris Buschelman   char funct[80];
835c66b693SKris Buschelman   int  (*mult)(Mat,Mat,Mat*);
84d50806bdSBarry Smith 
85d50806bdSBarry Smith   PetscFunctionBegin;
864482741eSBarry Smith   PetscValidHeaderSpecific(A,MAT_COOKIE,1);
87c9780b6fSBarry Smith   PetscValidType(A,1);
885c66b693SKris Buschelman   MatPreallocated(A);
895c66b693SKris Buschelman   if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
905c66b693SKris Buschelman   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
91d50806bdSBarry Smith 
924482741eSBarry Smith   PetscValidHeaderSpecific(B,MAT_COOKIE,2);
93c9780b6fSBarry Smith   PetscValidType(B,2);
945c66b693SKris Buschelman   MatPreallocated(B);
955c66b693SKris Buschelman   if (!B->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
965c66b693SKris Buschelman   if (B->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
97d50806bdSBarry Smith 
984482741eSBarry Smith   PetscValidPointer(C,3);
994482741eSBarry Smith 
1005c66b693SKris Buschelman   if (B->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",B->M,A->N);
101d50806bdSBarry Smith 
1024d3841fdSKris Buschelman   /* Currently only _seqaijseqaij is implemented, so just query for it in A and B. */
1034d3841fdSKris Buschelman   /* When other implementations exist, attack the multiple dispatch problem. */
1044d3841fdSKris Buschelman   ierr = PetscStrcpy(funct,"MatMatMult_seqaijseqaij");CHKERRQ(ierr);
1054d3841fdSKris Buschelman   ierr = PetscObjectQueryFunction((PetscObject)B,funct,(PetscVoidFunction)&mult);CHKERRQ(ierr);
1064d3841fdSKris Buschelman   if (!mult) SETERRQ1(PETSC_ERR_SUP,"C=A*B not implemented for B of type %s",B->type_name);
1075c66b693SKris Buschelman   ierr = PetscObjectQueryFunction((PetscObject)A,funct,(PetscVoidFunction)&mult);CHKERRQ(ierr);
1084d3841fdSKris Buschelman   if (!mult) SETERRQ1(PETSC_ERR_SUP,"C=A*B not implemented for A of type %s",A->type_name);
1094d3841fdSKris Buschelman 
1105c66b693SKris Buschelman   ierr = (*mult)(A,B,C);CHKERRQ(ierr);
111d50806bdSBarry Smith   PetscFunctionReturn(0);
112d50806bdSBarry Smith }
1135c66b693SKris Buschelman 
1145c66b693SKris Buschelman #undef __FUNCT__
1155c66b693SKris Buschelman #define __FUNCT__ "MatMatMult_SeqAIJ_SeqAIJ"
1165c66b693SKris Buschelman int MatMatMult_SeqAIJ_SeqAIJ(Mat A,Mat B, Mat *C) {
1175c66b693SKris Buschelman   int ierr;
1184d3841fdSKris Buschelman   char symfunct[80],numfunct[80];
1195c66b693SKris Buschelman   int (*symbolic)(Mat,Mat,Mat*),(*numeric)(Mat,Mat,Mat);
1205c66b693SKris Buschelman 
1215c66b693SKris Buschelman   PetscFunctionBegin;
1224d3841fdSKris Buschelman 
1234d3841fdSKris Buschelman   /* Currently only _seqaijseqaij is implemented, so just query for it in A and B. */
1244d3841fdSKris Buschelman   /* When other implementations exist, attack the multiple dispatch problem. */
1254d3841fdSKris Buschelman   ierr = PetscStrcpy(symfunct,"MatMatMultSymbolic_seqaijseqaij");CHKERRQ(ierr);
1264d3841fdSKris Buschelman   ierr = PetscObjectQueryFunction((PetscObject)B,symfunct,(PetscVoidFunction)&symbolic);CHKERRQ(ierr);
1274d3841fdSKris Buschelman   if (!symbolic) SETERRQ1(PETSC_ERR_SUP,"C=A*B not implemented for B of type %s",B->type_name);
1285c66b693SKris Buschelman   ierr = PetscObjectQueryFunction((PetscObject)A,symfunct,(PetscVoidFunction)&symbolic);CHKERRQ(ierr);
1294d3841fdSKris Buschelman   if (!symbolic) SETERRQ1(PETSC_ERR_SUP,"C=A*B not implemented for A of type %s",A->type_name);
1304d3841fdSKris Buschelman 
1314d3841fdSKris Buschelman   ierr = PetscStrcpy(numfunct,"MatMatMultNumeric_seqaijseqaij");CHKERRQ(ierr);
1325c66b693SKris Buschelman   ierr = PetscObjectQueryFunction((PetscObject)A,numfunct,(PetscVoidFunction)&numeric);CHKERRQ(ierr);
1334d3841fdSKris Buschelman   if (!numeric) SETERRQ1(PETSC_ERR_SUP,"C=A*B not implemented for A of type %s",A->type_name);
1344d3841fdSKris Buschelman   ierr = PetscObjectQueryFunction((PetscObject)B,numfunct,(PetscVoidFunction)&numeric);CHKERRQ(ierr);
1354d3841fdSKris Buschelman   if (!numeric) SETERRQ1(PETSC_ERR_SUP,"C=A*B not implemented for B of type %s",B->type_name);
1364d3841fdSKris Buschelman 
1375c66b693SKris Buschelman   ierr = PetscLogEventBegin(logkey_matmatmult,A,B,0,0);CHKERRQ(ierr);
1385c66b693SKris Buschelman   ierr = (*symbolic)(A,B,C);CHKERRQ(ierr);
1395c66b693SKris Buschelman   ierr = (*numeric)(A,B,*C);CHKERRQ(ierr);
1405c66b693SKris Buschelman   ierr = PetscLogEventEnd(logkey_matmatmult,A,B,0,0);CHKERRQ(ierr);
1415c66b693SKris Buschelman   PetscFunctionReturn(0);
1425c66b693SKris Buschelman }
1435c66b693SKris Buschelman 
144c1f4806aSKris Buschelman #undef __FUNCT__
145c1f4806aSKris Buschelman #define __FUNCT__ "MatMatMultSymbolic"
1465c66b693SKris Buschelman /*@
1475c66b693SKris Buschelman    MatMatMultSymbolic - Performs construction, preallocation, and computes the ij structure
1485c66b693SKris Buschelman    of the matrix-matrix product C=A*B.  Call this routine before calling MatMatMultNumeric().
1495c66b693SKris Buschelman 
1505c66b693SKris Buschelman    Collective on Mat
1515c66b693SKris Buschelman 
1525c66b693SKris Buschelman    Input Parameters:
1535c66b693SKris Buschelman +  A - the left matrix
1545c66b693SKris Buschelman -  B - the right matrix
1555c66b693SKris Buschelman 
1565c66b693SKris Buschelman    Output Parameters:
1575c66b693SKris Buschelman .  C - the matrix containing the ij structure of product matrix
1585c66b693SKris Buschelman 
1595c66b693SKris Buschelman    Notes:
1604d3841fdSKris Buschelman    C will be created as a MATSEQAIJ matrix and must be destroyed by the user with MatDestroy().
1615c66b693SKris Buschelman 
1624d3841fdSKris Buschelman    This routine is currently only implemented for SeqAIJ matrices and classes which inherit from SeqAIJ.
1635c66b693SKris Buschelman 
1645c66b693SKris Buschelman    Level: intermediate
1655c66b693SKris Buschelman 
1665c66b693SKris Buschelman .seealso: MatMatMult(),MatMatMultNumeric()
1675c66b693SKris Buschelman @*/
1685c66b693SKris Buschelman int MatMatMultSymbolic(Mat A,Mat B,Mat *C) {
1695c66b693SKris Buschelman   /* Perhaps this "interface" routine should be moved into the interface directory.*/
1705c66b693SKris Buschelman   /* To facilitate implementations with varying types, QueryFunction is used.*/
1715c66b693SKris Buschelman   /* It is assumed that implementations will be composed as "MatMatMultSymbolic_<type of A><type of B>". */
1725c66b693SKris Buschelman   int  ierr;
1734d3841fdSKris Buschelman   char symfunct[80];
1745c66b693SKris Buschelman   int  (*symbolic)(Mat,Mat,Mat *);
1755c66b693SKris Buschelman 
1765c66b693SKris Buschelman   PetscFunctionBegin;
1774482741eSBarry Smith   PetscValidHeaderSpecific(A,MAT_COOKIE,1);
178c9780b6fSBarry Smith   PetscValidType(A,1);
1795c66b693SKris Buschelman   MatPreallocated(A);
1805c66b693SKris Buschelman   if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
1815c66b693SKris Buschelman   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
1825c66b693SKris Buschelman 
1834482741eSBarry Smith   PetscValidHeaderSpecific(B,MAT_COOKIE,2);
184c9780b6fSBarry Smith   PetscValidType(B,2);
1855c66b693SKris Buschelman   MatPreallocated(B);
1865c66b693SKris Buschelman   if (!B->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
1875c66b693SKris Buschelman   if (B->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
1884482741eSBarry Smith   PetscValidPointer(C,3);
1894482741eSBarry Smith 
1905c66b693SKris Buschelman 
1915c66b693SKris Buschelman   if (B->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",B->M,A->N);
1925c66b693SKris Buschelman 
1934d3841fdSKris Buschelman   /* Currently only _seqaijseqaij is implemented, so just query for it in A and B. */
1944d3841fdSKris Buschelman   /* When other implementations exist, attack the multiple dispatch problem. */
1954d3841fdSKris Buschelman   ierr = PetscStrcpy(symfunct,"MatMatMultSymbolic_seqaijseqaij");CHKERRQ(ierr);
1964d3841fdSKris Buschelman   ierr = PetscObjectQueryFunction((PetscObject)B,symfunct,(PetscVoidFunction)&symbolic);CHKERRQ(ierr);
1974d3841fdSKris Buschelman   if (!symbolic) SETERRQ1(PETSC_ERR_SUP,"C=A*B not implemented for B of type %s",B->type_name);
1984d3841fdSKris Buschelman   ierr = PetscObjectQueryFunction((PetscObject)A,symfunct,(PetscVoidFunction)&symbolic);CHKERRQ(ierr);
1994d3841fdSKris Buschelman   if (!symbolic) SETERRQ1(PETSC_ERR_SUP,"C=A*B not implemented for A of type %s",A->type_name);
2005c66b693SKris Buschelman   ierr = (*symbolic)(A,B,C);CHKERRQ(ierr);
2015c66b693SKris Buschelman 
2025c66b693SKris Buschelman   PetscFunctionReturn(0);
2035c66b693SKris Buschelman }
20458c24d83SHong Zhang 
20558c24d83SHong Zhang #undef __FUNCT__
20658c24d83SHong Zhang #define __FUNCT__ "MatMatMult_Symbolic_SeqAIJ_SeqAIJ"
20758c24d83SHong Zhang int MatMatMult_Symbolic_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat *C)
20858c24d83SHong Zhang {
20958c24d83SHong Zhang   int            ierr;
21058c24d83SHong Zhang   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
21158c24d83SHong Zhang   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c;
21258c24d83SHong Zhang   int            *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bjj;
213*1c239cc6SHong Zhang   int            *ci,*cj,*lnk,idx0,idx;
2145c66b693SKris Buschelman   int            am=A->M,bn=B->N,bm=B->M;
215*1c239cc6SHong Zhang   int            i,j,k,anzi,brow,bnzj,cnzi,nlnk;
21658c24d83SHong Zhang   MatScalar      *ca;
21758c24d83SHong Zhang 
21858c24d83SHong Zhang   PetscFunctionBegin;
2195c66b693SKris Buschelman   /* Start timers */
22058c24d83SHong Zhang   ierr = PetscLogEventBegin(logkey_matmatmult_symbolic,A,B,0,0);CHKERRQ(ierr);
22158c24d83SHong Zhang 
22258c24d83SHong Zhang   /* Set up */
22358c24d83SHong Zhang   /* Allocate ci array, arrays for fill computation and */
22458c24d83SHong Zhang   /* free space for accumulating nonzero column info */
22558c24d83SHong Zhang   ierr = PetscMalloc(((am+1)+1)*sizeof(int),&ci);CHKERRQ(ierr);
22658c24d83SHong Zhang   ci[0] = 0;
22758c24d83SHong Zhang 
22858c24d83SHong Zhang   ierr = PetscMalloc((bn+1)*sizeof(int),&lnk);CHKERRQ(ierr);
22958c24d83SHong Zhang   for (i=0; i<bn; i++) lnk[i] = -1;
23058c24d83SHong Zhang 
23158c24d83SHong Zhang   /* Initial FreeSpace size is nnz(B)=4*bi[bm] */
23258c24d83SHong Zhang   ierr = GetMoreSpace(4*bi[bm],&free_space);CHKERRQ(ierr);
23358c24d83SHong Zhang   current_space = free_space;
23458c24d83SHong Zhang 
23558c24d83SHong Zhang   /* Determine symbolic info for each row of the product: */
23658c24d83SHong Zhang   for (i=0;i<am;i++) {
23758c24d83SHong Zhang     anzi = ai[i+1] - ai[i];
23858c24d83SHong Zhang     cnzi = 0;
23958c24d83SHong Zhang     lnk[bn] = bn;
24058c24d83SHong Zhang     for (j=0;j<anzi;j++) {
24158c24d83SHong Zhang       brow = *aj++;
24258c24d83SHong Zhang       bnzj = bi[brow+1] - bi[brow];
24358c24d83SHong Zhang       bjj  = bj + bi[brow];
244*1c239cc6SHong Zhang       /* add non-zero cols of B into the sorted linked list lnk */
245*1c239cc6SHong Zhang       ierr = LnklistAdd(bnzj,bjj,bn,-1,&nlnk,lnk);CHKERRQ(ierr);
246*1c239cc6SHong Zhang       cnzi += nlnk;
24758c24d83SHong Zhang     }
24858c24d83SHong Zhang 
24958c24d83SHong Zhang     /* If free space is not available, make more free space */
25058c24d83SHong Zhang     /* Double the amount of total space in the list */
25158c24d83SHong Zhang     if (current_space->local_remaining<cnzi) {
25258c24d83SHong Zhang       printf("...%d -th row, double space ...\n",i);
25358c24d83SHong Zhang       ierr = GetMoreSpace(current_space->total_array_size,&current_space);CHKERRQ(ierr);
25458c24d83SHong Zhang     }
25558c24d83SHong Zhang 
25658c24d83SHong Zhang     /* Copy data into free space, and zero out denserow and lnk */
25758c24d83SHong Zhang     idx = bn;
25858c24d83SHong Zhang     for (j=0; j<cnzi; j++){
25958c24d83SHong Zhang       idx0 = idx;
26058c24d83SHong Zhang       idx  = lnk[idx0];
26158c24d83SHong Zhang       *current_space->array++ = idx;
26258c24d83SHong Zhang       lnk[idx0] = -1;
26358c24d83SHong Zhang     }
26458c24d83SHong Zhang     lnk[idx] = -1;
26558c24d83SHong Zhang 
26658c24d83SHong Zhang     current_space->local_used      += cnzi;
26758c24d83SHong Zhang     current_space->local_remaining -= cnzi;
26858c24d83SHong Zhang 
26958c24d83SHong Zhang     ci[i+1] = ci[i] + cnzi;
27058c24d83SHong Zhang   }
27158c24d83SHong Zhang 
27258c24d83SHong Zhang   /* Column indices are in the list of free space */
27358c24d83SHong Zhang   /* Allocate space for cj, initialize cj, and */
27458c24d83SHong Zhang   /* destroy list of free space and other temporary array(s) */
27558c24d83SHong Zhang   ierr = PetscMalloc((ci[am]+1)*sizeof(int),&cj);CHKERRQ(ierr);
27658c24d83SHong Zhang   ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr);
27758c24d83SHong Zhang   ierr = PetscFree(lnk);CHKERRQ(ierr);
27858c24d83SHong Zhang 
27958c24d83SHong Zhang   /* Allocate space for ca */
28058c24d83SHong Zhang   ierr = PetscMalloc((ci[am]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr);
28158c24d83SHong Zhang   ierr = PetscMemzero(ca,(ci[am]+1)*sizeof(MatScalar));CHKERRQ(ierr);
28258c24d83SHong Zhang 
28358c24d83SHong Zhang   /* put together the new matrix */
28458c24d83SHong Zhang   ierr = MatCreateSeqAIJWithArrays(A->comm,am,bn,ci,cj,ca,C);CHKERRQ(ierr);
28558c24d83SHong Zhang 
28658c24d83SHong Zhang   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
28758c24d83SHong Zhang   /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */
28858c24d83SHong Zhang   c = (Mat_SeqAIJ *)((*C)->data);
28958c24d83SHong Zhang   c->freedata = PETSC_TRUE;
29058c24d83SHong Zhang   c->nonew    = 0;
29158c24d83SHong Zhang 
29258c24d83SHong Zhang   ierr = PetscLogEventEnd(logkey_matmatmult_symbolic,A,B,0,0);CHKERRQ(ierr);
29358c24d83SHong Zhang   PetscFunctionReturn(0);
29458c24d83SHong Zhang }
295d50806bdSBarry Smith 
296c1f4806aSKris Buschelman #undef __FUNCT__
297c1f4806aSKris Buschelman #define __FUNCT__ "MatMatMultNumeric"
2985c66b693SKris Buschelman /*@
2995c66b693SKris Buschelman    MatMatMultNumeric - Performs the numeric matrix-matrix product.
3005c66b693SKris Buschelman    Call this routine after first calling MatMatMultSymbolic().
3015c66b693SKris Buschelman 
3025c66b693SKris Buschelman    Collective on Mat
3035c66b693SKris Buschelman 
3045c66b693SKris Buschelman    Input Parameters:
3055c66b693SKris Buschelman +  A - the left matrix
3065c66b693SKris Buschelman -  B - the right matrix
3075c66b693SKris Buschelman 
3085c66b693SKris Buschelman    Output Parameters:
3095c66b693SKris Buschelman .  C - the product matrix, whose ij structure was defined from MatMatMultSymbolic().
3105c66b693SKris Buschelman 
3115c66b693SKris Buschelman    Notes:
3125c66b693SKris Buschelman    C must have been created with MatMatMultSymbolic.
3135c66b693SKris Buschelman 
3145c66b693SKris Buschelman    This routine is currently only implemented for SeqAIJ type matrices.
3155c66b693SKris Buschelman 
3165c66b693SKris Buschelman    Level: intermediate
3175c66b693SKris Buschelman 
3185c66b693SKris Buschelman .seealso: MatMatMult(),MatMatMultSymbolic()
3195c66b693SKris Buschelman @*/
3205c66b693SKris Buschelman int MatMatMultNumeric(Mat A,Mat B,Mat C){
3215c66b693SKris Buschelman   /* Perhaps this "interface" routine should be moved into the interface directory.*/
3225c66b693SKris Buschelman   /* To facilitate implementations with varying types, QueryFunction is used.*/
3235c66b693SKris Buschelman   /* It is assumed that implementations will be composed as "MatMatMultNumeric_<type of A><type of B>". */
3245c66b693SKris Buschelman   int ierr;
3254d3841fdSKris Buschelman   char numfunct[80];
3265c66b693SKris Buschelman   int (*numeric)(Mat,Mat,Mat);
3275c66b693SKris Buschelman 
3285c66b693SKris Buschelman   PetscFunctionBegin;
3295c66b693SKris Buschelman 
3304482741eSBarry Smith   PetscValidHeaderSpecific(A,MAT_COOKIE,1);
331c9780b6fSBarry Smith   PetscValidType(A,1);
3325c66b693SKris Buschelman   MatPreallocated(A);
3335c66b693SKris Buschelman   if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
3345c66b693SKris Buschelman   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
3355c66b693SKris Buschelman 
3364482741eSBarry Smith   PetscValidHeaderSpecific(B,MAT_COOKIE,2);
337c9780b6fSBarry Smith   PetscValidType(B,2);
3385c66b693SKris Buschelman   MatPreallocated(B);
3395c66b693SKris Buschelman   if (!B->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
3405c66b693SKris Buschelman   if (B->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
3415c66b693SKris Buschelman 
3424482741eSBarry Smith   PetscValidHeaderSpecific(C,MAT_COOKIE,3);
343c9780b6fSBarry Smith   PetscValidType(C,3);
3445c66b693SKris Buschelman   MatPreallocated(C);
3455c66b693SKris Buschelman   if (!C->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
3465c66b693SKris Buschelman   if (C->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
3475c66b693SKris Buschelman 
3485c66b693SKris Buschelman   if (B->N!=C->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",B->N,C->N);
3495c66b693SKris Buschelman   if (B->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",B->M,A->N);
3505c66b693SKris Buschelman   if (A->M!=C->M) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",A->M,C->M);
3515c66b693SKris Buschelman 
3524d3841fdSKris Buschelman   /* Currently only _seqaijseqaij is implemented, so just query for it in A and B. */
3534d3841fdSKris Buschelman   /* When other implementations exist, attack the multiple dispatch problem. */
3544d3841fdSKris Buschelman   ierr = PetscStrcpy(numfunct,"MatMatMultNumeric_seqaijseqaij");CHKERRQ(ierr);
3554d3841fdSKris Buschelman   ierr = PetscObjectQueryFunction((PetscObject)A,numfunct,(PetscVoidFunction)&numeric);CHKERRQ(ierr);
3564d3841fdSKris Buschelman   if (!numeric) SETERRQ1(PETSC_ERR_SUP,"C=A*B not implemented for A of type %s",A->type_name);
3574d3841fdSKris Buschelman   ierr = PetscObjectQueryFunction((PetscObject)B,numfunct,(PetscVoidFunction)&numeric);CHKERRQ(ierr);
3584d3841fdSKris Buschelman   if (!numeric) SETERRQ1(PETSC_ERR_SUP,"C=A*B not implemented for B of type %s",B->type_name);
3594d3841fdSKris Buschelman 
3605c66b693SKris Buschelman   ierr = (*numeric)(A,B,C);CHKERRQ(ierr);
3615c66b693SKris Buschelman 
3625c66b693SKris Buschelman   PetscFunctionReturn(0);
3635c66b693SKris Buschelman }
3645c66b693SKris Buschelman 
365d50806bdSBarry Smith #undef __FUNCT__
36694e3eecaSKris Buschelman #define __FUNCT__ "MatMatMult_Numeric_SeqAIJ_SeqAIJ"
36794e3eecaSKris Buschelman int MatMatMult_Numeric_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat C)
368d50806bdSBarry Smith {
36994e3eecaSKris Buschelman   int        ierr,flops=0;
370d50806bdSBarry Smith   Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
371d50806bdSBarry Smith   Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data;
372d50806bdSBarry Smith   Mat_SeqAIJ *c = (Mat_SeqAIJ *)C->data;
373d50806bdSBarry Smith   int        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bjj,*ci=c->i,*cj=c->j;
3745c66b693SKris Buschelman   int        am=A->M,cn=C->N;
37594e3eecaSKris Buschelman   int        i,j,k,anzi,bnzi,cnzi,brow;
376d50806bdSBarry Smith   MatScalar  *aa=a->a,*ba=b->a,*baj,*ca=c->a,*temp;
377d50806bdSBarry Smith 
378d50806bdSBarry Smith   PetscFunctionBegin;
379d50806bdSBarry Smith 
3805c66b693SKris Buschelman   /* Start timers */
381d50806bdSBarry Smith   ierr = PetscLogEventBegin(logkey_matmatmult_numeric,A,B,C,0);CHKERRQ(ierr);
38294e3eecaSKris Buschelman 
383d50806bdSBarry Smith   /* Allocate temp accumulation space to avoid searching for nonzero columns in C */
384d50806bdSBarry Smith   ierr = PetscMalloc((cn+1)*sizeof(MatScalar),&temp);CHKERRQ(ierr);
385d50806bdSBarry Smith   ierr = PetscMemzero(temp,cn*sizeof(MatScalar));CHKERRQ(ierr);
386d50806bdSBarry Smith   /* Traverse A row-wise. */
387d50806bdSBarry Smith   /* Build the ith row in C by summing over nonzero columns in A, */
388d50806bdSBarry Smith   /* the rows of B corresponding to nonzeros of A. */
389d50806bdSBarry Smith   for (i=0;i<am;i++) {
390d50806bdSBarry Smith     anzi = ai[i+1] - ai[i];
391d50806bdSBarry Smith     for (j=0;j<anzi;j++) {
392d50806bdSBarry Smith       brow = *aj++;
393d50806bdSBarry Smith       bnzi = bi[brow+1] - bi[brow];
394d50806bdSBarry Smith       bjj  = bj + bi[brow];
395d50806bdSBarry Smith       baj  = ba + bi[brow];
396d50806bdSBarry Smith       for (k=0;k<bnzi;k++) {
397d50806bdSBarry Smith         temp[bjj[k]] += (*aa)*baj[k];
398d50806bdSBarry Smith       }
399d50806bdSBarry Smith       flops += 2*bnzi;
400d50806bdSBarry Smith       aa++;
401d50806bdSBarry Smith     }
402d50806bdSBarry Smith     /* Store row back into C, and re-zero temp */
403d50806bdSBarry Smith     cnzi = ci[i+1] - ci[i];
404d50806bdSBarry Smith     for (j=0;j<cnzi;j++) {
405d50806bdSBarry Smith       ca[j] = temp[cj[j]];
406d50806bdSBarry Smith       temp[cj[j]] = 0.0;
407d50806bdSBarry Smith     }
408d50806bdSBarry Smith     ca += cnzi;
409d50806bdSBarry Smith     cj += cnzi;
410d50806bdSBarry Smith   }
411716bacf3SKris Buschelman   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
412716bacf3SKris Buschelman   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
413716bacf3SKris Buschelman 
414d50806bdSBarry Smith   /* Free temp */
415d50806bdSBarry Smith   ierr = PetscFree(temp);CHKERRQ(ierr);
416d50806bdSBarry Smith   ierr = PetscLogFlops(flops);CHKERRQ(ierr);
417d50806bdSBarry Smith   ierr = PetscLogEventEnd(logkey_matmatmult_numeric,A,B,C,0);CHKERRQ(ierr);
418d50806bdSBarry Smith   PetscFunctionReturn(0);
419d50806bdSBarry Smith }
420d50806bdSBarry Smith 
421d50806bdSBarry Smith #undef __FUNCT__
4225c66b693SKris Buschelman #define __FUNCT__ "RegisterMatMatMultRoutines_Private"
4235c66b693SKris Buschelman int RegisterMatMatMultRoutines_Private(Mat A) {
424d50806bdSBarry Smith   int ierr;
425d50806bdSBarry Smith 
426d50806bdSBarry Smith   PetscFunctionBegin;
4272216b3a4SKris Buschelman   if (!logkey_matmatmult) {
4282216b3a4SKris Buschelman     ierr = PetscLogEventRegister(&logkey_matmatmult,"MatMatMult",MAT_COOKIE);CHKERRQ(ierr);
4292216b3a4SKris Buschelman   }
4305c66b693SKris Buschelman   ierr = PetscObjectComposeFunctionDynamic((PetscObject)A,"MatMatMult_seqaijseqaij",
4315c66b693SKris Buschelman                                            "MatMatMult_SeqAIJ_SeqAIJ",
4325c66b693SKris Buschelman                                            MatMatMult_SeqAIJ_SeqAIJ);CHKERRQ(ierr);
4335c66b693SKris Buschelman   if (!logkey_matmatmult_symbolic) {
4345c66b693SKris Buschelman     ierr = PetscLogEventRegister(&logkey_matmatmult_symbolic,"MatMatMult_Symbolic",MAT_COOKIE);CHKERRQ(ierr);
435d50806bdSBarry Smith   }
4365c66b693SKris Buschelman   ierr = PetscObjectComposeFunctionDynamic((PetscObject)A,"MatMatMultSymbolic_seqaijseqaij",
4375c66b693SKris Buschelman                                            "MatMatMult_Symbolic_SeqAIJ_SeqAIJ",
4385c66b693SKris Buschelman                                            MatMatMult_Symbolic_SeqAIJ_SeqAIJ);CHKERRQ(ierr);
4395c66b693SKris Buschelman   if (!logkey_matmatmult_numeric) {
4405c66b693SKris Buschelman     ierr = PetscLogEventRegister(&logkey_matmatmult_numeric,"MatMatMult_Numeric",MAT_COOKIE);CHKERRQ(ierr);
44194e3eecaSKris Buschelman   }
4425c66b693SKris Buschelman   ierr = PetscObjectComposeFunctionDynamic((PetscObject)A,"MatMatMultNumeric_seqaijseqaij",
4435c66b693SKris Buschelman                                            "MatMatMult_Numeric_SeqAIJ_SeqAIJ",
4445c66b693SKris Buschelman                                            MatMatMult_Numeric_SeqAIJ_SeqAIJ);CHKERRQ(ierr);
44594e3eecaSKris Buschelman   PetscFunctionReturn(0);
44694e3eecaSKris Buschelman }
447