xref: /petsc/src/mat/impls/aij/seq/matmatmult.c (revision 6284ec503f7d48a443257752306f1a524cdcb213)
1d50806bdSBarry Smith /*
22c9ce0e5SKris Buschelman   Defines matrix-matrix product routines for pairs of SeqAIJ matrices
3d50806bdSBarry Smith           C = A * B
4d50806bdSBarry Smith */
5d50806bdSBarry Smith 
6c1f4806aSKris Buschelman #include "src/mat/impls/aij/seq/aij.h" /*I "petscmat.h" I*/
770f19b1fSKris Buschelman #include "src/mat/utils/freespace.h"
8d50806bdSBarry Smith 
91c239cc6SHong Zhang /*
101c239cc6SHong Zhang   Add a index set into a sorted linked list
111c239cc6SHong Zhang   input:
121c239cc6SHong Zhang     nidx      - number of input indices
131c239cc6SHong Zhang     indices   - interger array
141c239cc6SHong Zhang     idx_head  - the header of the list
151c239cc6SHong Zhang     idx_unset - the value indicating the entry in the list is not set yet
161c239cc6SHong Zhang     lnk       - linked list(an integer array) that is created
171c239cc6SHong Zhang   output:
181c239cc6SHong Zhang     nlnk      - number of newly added indices
191c239cc6SHong Zhang     lnk       - the sorted(increasing order) linked list containing new and non-redundate entries from indices
201c239cc6SHong Zhang  */
211c239cc6SHong Zhang #undef __FUNCT__
221c239cc6SHong Zhang #define __FUNCT__ "LnklistAdd"
231c239cc6SHong Zhang int LnklistAdd(int nidx,int *indices,int idx_head,int idx_unset,int *nlnk,int *lnk)
241c239cc6SHong Zhang {
251c239cc6SHong Zhang   int i,idx,lidx,entry,n;
261c239cc6SHong Zhang 
271c239cc6SHong Zhang   PetscFunctionBegin;
281c239cc6SHong Zhang   n    = 0;
291c239cc6SHong Zhang   lidx = idx_head;
301c239cc6SHong Zhang   i    = nidx;
311c239cc6SHong Zhang   while (i){
321c239cc6SHong Zhang     /* assume indices is almost in increasing order, starting from its end saves computation */
331c239cc6SHong Zhang     entry = indices[--i];
341c239cc6SHong Zhang     if (lnk[entry] == idx_unset) { /* new entry */
351c239cc6SHong Zhang       do {
361c239cc6SHong Zhang         idx = lidx;
371c239cc6SHong Zhang         lidx  = lnk[idx];
381c239cc6SHong Zhang       } while (entry > lidx);
391c239cc6SHong Zhang       lnk[idx] = entry;
401c239cc6SHong Zhang       lnk[entry] = lidx;
411c239cc6SHong Zhang       n++;
421c239cc6SHong Zhang     }
431c239cc6SHong Zhang   }
441c239cc6SHong Zhang   *nlnk = n;
451c239cc6SHong Zhang   PetscFunctionReturn(0);
461c239cc6SHong Zhang }
471c239cc6SHong Zhang 
481c239cc6SHong Zhang 
492216b3a4SKris Buschelman static int logkey_matmatmult          = 0;
502216b3a4SKris Buschelman static int logkey_matmatmult_symbolic = 0;
512216b3a4SKris Buschelman static int logkey_matmatmult_numeric  = 0;
522216b3a4SKris Buschelman 
53c1f4806aSKris Buschelman #undef __FUNCT__
54c1f4806aSKris Buschelman #define __FUNCT__ "MatMatMult"
555c66b693SKris Buschelman /*@
565c66b693SKris Buschelman    MatMatMult - Performs Matrix-Matrix Multiplication C=A*B.
5794e3eecaSKris Buschelman 
585c66b693SKris Buschelman    Collective on Mat
59d50806bdSBarry Smith 
605c66b693SKris Buschelman    Input Parameters:
615c66b693SKris Buschelman +  A - the left matrix
625c66b693SKris Buschelman -  B - the right matrix
635c66b693SKris Buschelman 
645c66b693SKris Buschelman    Output Parameters:
655c66b693SKris Buschelman .  C - the product matrix
665c66b693SKris Buschelman 
675c66b693SKris Buschelman    Notes:
685c66b693SKris Buschelman    C will be created and must be destroyed by the user with MatDestroy().
695c66b693SKris Buschelman 
704d3841fdSKris Buschelman    This routine is currently only implemented for pairs of SeqAIJ matrices and classes
714d3841fdSKris Buschelman    which inherit from SeqAIJ.  C will be of type MATSEQAIJ.
725c66b693SKris Buschelman 
735c66b693SKris Buschelman    Level: intermediate
745c66b693SKris Buschelman 
755c66b693SKris Buschelman .seealso: MatMatMultSymbolic(),MatMatMultNumeric()
765c66b693SKris Buschelman @*/
775c66b693SKris Buschelman int MatMatMult(Mat A,Mat B, Mat *C) {
785c66b693SKris Buschelman   /* Perhaps this "interface" routine should be moved into the interface directory.*/
795c66b693SKris Buschelman   /* To facilitate implementations with varying types, QueryFunction is used.*/
805c66b693SKris Buschelman   /* It is assumed that implementations will be composed as "MatMatMult_<type of A><type of B>". */
81d50806bdSBarry Smith   int  ierr;
82*6284ec50SHong Zhang #ifdef OLD
835c66b693SKris Buschelman   char funct[80];
845c66b693SKris Buschelman   int  (*mult)(Mat,Mat,Mat*);
85*6284ec50SHong Zhang #endif
86d50806bdSBarry Smith   PetscFunctionBegin;
874482741eSBarry Smith   PetscValidHeaderSpecific(A,MAT_COOKIE,1);
88c9780b6fSBarry Smith   PetscValidType(A,1);
895c66b693SKris Buschelman   MatPreallocated(A);
905c66b693SKris Buschelman   if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
915c66b693SKris Buschelman   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
92d50806bdSBarry Smith 
934482741eSBarry Smith   PetscValidHeaderSpecific(B,MAT_COOKIE,2);
94c9780b6fSBarry Smith   PetscValidType(B,2);
955c66b693SKris Buschelman   MatPreallocated(B);
965c66b693SKris Buschelman   if (!B->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
975c66b693SKris Buschelman   if (B->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
98d50806bdSBarry Smith 
994482741eSBarry Smith   PetscValidPointer(C,3);
1004482741eSBarry Smith 
1015c66b693SKris Buschelman   if (B->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",B->M,A->N);
102d50806bdSBarry Smith 
103*6284ec50SHong Zhang   ierr = PetscLogEventBegin(MAT_MatMult,A,B,0,0);CHKERRQ(ierr);
104*6284ec50SHong Zhang   ierr = (*A->ops->matmult)(A,B,C);CHKERRQ(ierr);
105*6284ec50SHong Zhang   ierr = PetscLogEventEnd(MAT_MatMult,A,B,0,0);CHKERRQ(ierr);
1064d3841fdSKris Buschelman 
107*6284ec50SHong Zhang   PetscFunctionReturn(0);
108*6284ec50SHong Zhang }
109*6284ec50SHong Zhang 
110*6284ec50SHong Zhang #undef __FUNCT__
111*6284ec50SHong Zhang #define __FUNCT__ "MatMatMult_MPIAIJ_MPIAIJ"
112*6284ec50SHong Zhang int MatMatMult_MPIAIJ_MPIAIJ(Mat A,Mat B, Mat *C) {
113*6284ec50SHong Zhang   int ierr;
114*6284ec50SHong Zhang 
115*6284ec50SHong Zhang   PetscFunctionBegin;
116*6284ec50SHong Zhang   SETERRQ(PETSC_ERR_SUP,"Not written yet");
117*6284ec50SHong Zhang 
118d50806bdSBarry Smith   PetscFunctionReturn(0);
119d50806bdSBarry Smith }
1205c66b693SKris Buschelman 
1215c66b693SKris Buschelman #undef __FUNCT__
1225c66b693SKris Buschelman #define __FUNCT__ "MatMatMult_SeqAIJ_SeqAIJ"
1235c66b693SKris Buschelman int MatMatMult_SeqAIJ_SeqAIJ(Mat A,Mat B, Mat *C) {
1245c66b693SKris Buschelman   int ierr;
1254d3841fdSKris Buschelman   char symfunct[80],numfunct[80];
1265c66b693SKris Buschelman   int (*symbolic)(Mat,Mat,Mat*),(*numeric)(Mat,Mat,Mat);
1275c66b693SKris Buschelman 
1285c66b693SKris Buschelman   PetscFunctionBegin;
1294d3841fdSKris Buschelman   /* Currently only _seqaijseqaij is implemented, so just query for it in A and B. */
1304d3841fdSKris Buschelman   /* When other implementations exist, attack the multiple dispatch problem. */
1314d3841fdSKris Buschelman   ierr = PetscStrcpy(symfunct,"MatMatMultSymbolic_seqaijseqaij");CHKERRQ(ierr);
1324d3841fdSKris Buschelman   ierr = PetscObjectQueryFunction((PetscObject)B,symfunct,(PetscVoidFunction)&symbolic);CHKERRQ(ierr);
1334d3841fdSKris Buschelman   if (!symbolic) SETERRQ1(PETSC_ERR_SUP,"C=A*B not implemented for B of type %s",B->type_name);
1345c66b693SKris Buschelman   ierr = PetscObjectQueryFunction((PetscObject)A,symfunct,(PetscVoidFunction)&symbolic);CHKERRQ(ierr);
1354d3841fdSKris Buschelman   if (!symbolic) SETERRQ1(PETSC_ERR_SUP,"C=A*B not implemented for A of type %s",A->type_name);
1364d3841fdSKris Buschelman 
1374d3841fdSKris Buschelman   ierr = PetscStrcpy(numfunct,"MatMatMultNumeric_seqaijseqaij");CHKERRQ(ierr);
1385c66b693SKris Buschelman   ierr = PetscObjectQueryFunction((PetscObject)A,numfunct,(PetscVoidFunction)&numeric);CHKERRQ(ierr);
1394d3841fdSKris Buschelman   if (!numeric) SETERRQ1(PETSC_ERR_SUP,"C=A*B not implemented for A of type %s",A->type_name);
1404d3841fdSKris Buschelman   ierr = PetscObjectQueryFunction((PetscObject)B,numfunct,(PetscVoidFunction)&numeric);CHKERRQ(ierr);
1414d3841fdSKris Buschelman   if (!numeric) SETERRQ1(PETSC_ERR_SUP,"C=A*B not implemented for B of type %s",B->type_name);
1424d3841fdSKris Buschelman 
1435c66b693SKris Buschelman   ierr = PetscLogEventBegin(logkey_matmatmult,A,B,0,0);CHKERRQ(ierr);
1445c66b693SKris Buschelman   ierr = (*symbolic)(A,B,C);CHKERRQ(ierr);
1455c66b693SKris Buschelman   ierr = (*numeric)(A,B,*C);CHKERRQ(ierr);
1465c66b693SKris Buschelman   ierr = PetscLogEventEnd(logkey_matmatmult,A,B,0,0);CHKERRQ(ierr);
1475c66b693SKris Buschelman   PetscFunctionReturn(0);
1485c66b693SKris Buschelman }
1495c66b693SKris Buschelman 
150c1f4806aSKris Buschelman #undef __FUNCT__
151c1f4806aSKris Buschelman #define __FUNCT__ "MatMatMultSymbolic"
1525c66b693SKris Buschelman /*@
1535c66b693SKris Buschelman    MatMatMultSymbolic - Performs construction, preallocation, and computes the ij structure
1545c66b693SKris Buschelman    of the matrix-matrix product C=A*B.  Call this routine before calling MatMatMultNumeric().
1555c66b693SKris Buschelman 
1565c66b693SKris Buschelman    Collective on Mat
1575c66b693SKris Buschelman 
1585c66b693SKris Buschelman    Input Parameters:
1595c66b693SKris Buschelman +  A - the left matrix
1605c66b693SKris Buschelman -  B - the right matrix
1615c66b693SKris Buschelman 
1625c66b693SKris Buschelman    Output Parameters:
1635c66b693SKris Buschelman .  C - the matrix containing the ij structure of product matrix
1645c66b693SKris Buschelman 
1655c66b693SKris Buschelman    Notes:
1664d3841fdSKris Buschelman    C will be created as a MATSEQAIJ matrix and must be destroyed by the user with MatDestroy().
1675c66b693SKris Buschelman 
1684d3841fdSKris Buschelman    This routine is currently only implemented for SeqAIJ matrices and classes which inherit from SeqAIJ.
1695c66b693SKris Buschelman 
1705c66b693SKris Buschelman    Level: intermediate
1715c66b693SKris Buschelman 
1725c66b693SKris Buschelman .seealso: MatMatMult(),MatMatMultNumeric()
1735c66b693SKris Buschelman @*/
1745c66b693SKris Buschelman int MatMatMultSymbolic(Mat A,Mat B,Mat *C) {
1755c66b693SKris Buschelman   /* Perhaps this "interface" routine should be moved into the interface directory.*/
1765c66b693SKris Buschelman   /* To facilitate implementations with varying types, QueryFunction is used.*/
1775c66b693SKris Buschelman   /* It is assumed that implementations will be composed as "MatMatMultSymbolic_<type of A><type of B>". */
1785c66b693SKris Buschelman   int  ierr;
1794d3841fdSKris Buschelman   char symfunct[80];
1805c66b693SKris Buschelman   int  (*symbolic)(Mat,Mat,Mat *);
1815c66b693SKris Buschelman 
1825c66b693SKris Buschelman   PetscFunctionBegin;
1834482741eSBarry Smith   PetscValidHeaderSpecific(A,MAT_COOKIE,1);
184c9780b6fSBarry Smith   PetscValidType(A,1);
1855c66b693SKris Buschelman   MatPreallocated(A);
1865c66b693SKris Buschelman   if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
1875c66b693SKris Buschelman   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
1885c66b693SKris Buschelman 
1894482741eSBarry Smith   PetscValidHeaderSpecific(B,MAT_COOKIE,2);
190c9780b6fSBarry Smith   PetscValidType(B,2);
1915c66b693SKris Buschelman   MatPreallocated(B);
1925c66b693SKris Buschelman   if (!B->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
1935c66b693SKris Buschelman   if (B->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
1944482741eSBarry Smith   PetscValidPointer(C,3);
1954482741eSBarry Smith 
1965c66b693SKris Buschelman 
1975c66b693SKris Buschelman   if (B->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",B->M,A->N);
1985c66b693SKris Buschelman 
1994d3841fdSKris Buschelman   /* Currently only _seqaijseqaij is implemented, so just query for it in A and B. */
2004d3841fdSKris Buschelman   /* When other implementations exist, attack the multiple dispatch problem. */
2014d3841fdSKris Buschelman   ierr = PetscStrcpy(symfunct,"MatMatMultSymbolic_seqaijseqaij");CHKERRQ(ierr);
2024d3841fdSKris Buschelman   ierr = PetscObjectQueryFunction((PetscObject)B,symfunct,(PetscVoidFunction)&symbolic);CHKERRQ(ierr);
2034d3841fdSKris Buschelman   if (!symbolic) SETERRQ1(PETSC_ERR_SUP,"C=A*B not implemented for B of type %s",B->type_name);
2044d3841fdSKris Buschelman   ierr = PetscObjectQueryFunction((PetscObject)A,symfunct,(PetscVoidFunction)&symbolic);CHKERRQ(ierr);
2054d3841fdSKris Buschelman   if (!symbolic) SETERRQ1(PETSC_ERR_SUP,"C=A*B not implemented for A of type %s",A->type_name);
2065c66b693SKris Buschelman   ierr = (*symbolic)(A,B,C);CHKERRQ(ierr);
2075c66b693SKris Buschelman 
2085c66b693SKris Buschelman   PetscFunctionReturn(0);
2095c66b693SKris Buschelman }
21058c24d83SHong Zhang 
21158c24d83SHong Zhang #undef __FUNCT__
21258c24d83SHong Zhang #define __FUNCT__ "MatMatMult_Symbolic_SeqAIJ_SeqAIJ"
21358c24d83SHong Zhang int MatMatMult_Symbolic_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat *C)
21458c24d83SHong Zhang {
21558c24d83SHong Zhang   int            ierr;
21658c24d83SHong Zhang   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
21758c24d83SHong Zhang   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c;
21858c24d83SHong Zhang   int            *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bjj;
2191c239cc6SHong Zhang   int            *ci,*cj,*lnk,idx0,idx;
2205c66b693SKris Buschelman   int            am=A->M,bn=B->N,bm=B->M;
221bf812060SBarry Smith   int            i,j,anzi,brow,bnzj,cnzi,nlnk;
22258c24d83SHong Zhang   MatScalar      *ca;
22358c24d83SHong Zhang 
22458c24d83SHong Zhang   PetscFunctionBegin;
2255c66b693SKris Buschelman   /* Start timers */
22658c24d83SHong Zhang   ierr = PetscLogEventBegin(logkey_matmatmult_symbolic,A,B,0,0);CHKERRQ(ierr);
22758c24d83SHong Zhang   /* Set up */
22858c24d83SHong Zhang   /* Allocate ci array, arrays for fill computation and */
22958c24d83SHong Zhang   /* free space for accumulating nonzero column info */
23058c24d83SHong Zhang   ierr = PetscMalloc(((am+1)+1)*sizeof(int),&ci);CHKERRQ(ierr);
23158c24d83SHong Zhang   ci[0] = 0;
23258c24d83SHong Zhang 
23358c24d83SHong Zhang   ierr = PetscMalloc((bn+1)*sizeof(int),&lnk);CHKERRQ(ierr);
23458c24d83SHong Zhang   for (i=0; i<bn; i++) lnk[i] = -1;
23558c24d83SHong Zhang 
23658c24d83SHong Zhang   /* Initial FreeSpace size is nnz(B)=4*bi[bm] */
23758c24d83SHong Zhang   ierr = GetMoreSpace(4*bi[bm],&free_space);CHKERRQ(ierr);
23858c24d83SHong Zhang   current_space = free_space;
23958c24d83SHong Zhang 
24058c24d83SHong Zhang   /* Determine symbolic info for each row of the product: */
24158c24d83SHong Zhang   for (i=0;i<am;i++) {
24258c24d83SHong Zhang     anzi = ai[i+1] - ai[i];
24358c24d83SHong Zhang     cnzi = 0;
24458c24d83SHong Zhang     lnk[bn] = bn;
24558c24d83SHong Zhang     for (j=0;j<anzi;j++) {
24658c24d83SHong Zhang       brow = *aj++;
24758c24d83SHong Zhang       bnzj = bi[brow+1] - bi[brow];
24858c24d83SHong Zhang       bjj  = bj + bi[brow];
2491c239cc6SHong Zhang       /* add non-zero cols of B into the sorted linked list lnk */
2501c239cc6SHong Zhang       ierr = LnklistAdd(bnzj,bjj,bn,-1,&nlnk,lnk);CHKERRQ(ierr);
2511c239cc6SHong Zhang       cnzi += nlnk;
25258c24d83SHong Zhang     }
25358c24d83SHong Zhang 
25458c24d83SHong Zhang     /* If free space is not available, make more free space */
25558c24d83SHong Zhang     /* Double the amount of total space in the list */
25658c24d83SHong Zhang     if (current_space->local_remaining<cnzi) {
25758c24d83SHong Zhang       printf("...%d -th row, double space ...\n",i);
25858c24d83SHong Zhang       ierr = GetMoreSpace(current_space->total_array_size,&current_space);CHKERRQ(ierr);
25958c24d83SHong Zhang     }
26058c24d83SHong Zhang 
26158c24d83SHong Zhang     /* Copy data into free space, and zero out denserow and lnk */
26258c24d83SHong Zhang     idx = bn;
26358c24d83SHong Zhang     for (j=0; j<cnzi; j++){
26458c24d83SHong Zhang       idx0 = idx;
26558c24d83SHong Zhang       idx  = lnk[idx0];
26658c24d83SHong Zhang       *current_space->array++ = idx;
26758c24d83SHong Zhang       lnk[idx0] = -1;
26858c24d83SHong Zhang     }
26958c24d83SHong Zhang     lnk[idx] = -1;
27058c24d83SHong Zhang 
27158c24d83SHong Zhang     current_space->local_used      += cnzi;
27258c24d83SHong Zhang     current_space->local_remaining -= cnzi;
27358c24d83SHong Zhang 
27458c24d83SHong Zhang     ci[i+1] = ci[i] + cnzi;
27558c24d83SHong Zhang   }
27658c24d83SHong Zhang 
27758c24d83SHong Zhang   /* Column indices are in the list of free space */
27858c24d83SHong Zhang   /* Allocate space for cj, initialize cj, and */
27958c24d83SHong Zhang   /* destroy list of free space and other temporary array(s) */
28058c24d83SHong Zhang   ierr = PetscMalloc((ci[am]+1)*sizeof(int),&cj);CHKERRQ(ierr);
28158c24d83SHong Zhang   ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr);
28258c24d83SHong Zhang   ierr = PetscFree(lnk);CHKERRQ(ierr);
28358c24d83SHong Zhang 
28458c24d83SHong Zhang   /* Allocate space for ca */
28558c24d83SHong Zhang   ierr = PetscMalloc((ci[am]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr);
28658c24d83SHong Zhang   ierr = PetscMemzero(ca,(ci[am]+1)*sizeof(MatScalar));CHKERRQ(ierr);
28758c24d83SHong Zhang 
28858c24d83SHong Zhang   /* put together the new matrix */
28958c24d83SHong Zhang   ierr = MatCreateSeqAIJWithArrays(A->comm,am,bn,ci,cj,ca,C);CHKERRQ(ierr);
29058c24d83SHong Zhang 
29158c24d83SHong Zhang   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
29258c24d83SHong Zhang   /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */
29358c24d83SHong Zhang   c = (Mat_SeqAIJ *)((*C)->data);
29458c24d83SHong Zhang   c->freedata = PETSC_TRUE;
29558c24d83SHong Zhang   c->nonew    = 0;
29658c24d83SHong Zhang 
29758c24d83SHong Zhang   ierr = PetscLogEventEnd(logkey_matmatmult_symbolic,A,B,0,0);CHKERRQ(ierr);
29858c24d83SHong Zhang   PetscFunctionReturn(0);
29958c24d83SHong Zhang }
300d50806bdSBarry Smith 
301c1f4806aSKris Buschelman #undef __FUNCT__
302c1f4806aSKris Buschelman #define __FUNCT__ "MatMatMultNumeric"
3035c66b693SKris Buschelman /*@
3045c66b693SKris Buschelman    MatMatMultNumeric - Performs the numeric matrix-matrix product.
3055c66b693SKris Buschelman    Call this routine after first calling MatMatMultSymbolic().
3065c66b693SKris Buschelman 
3075c66b693SKris Buschelman    Collective on Mat
3085c66b693SKris Buschelman 
3095c66b693SKris Buschelman    Input Parameters:
3105c66b693SKris Buschelman +  A - the left matrix
3115c66b693SKris Buschelman -  B - the right matrix
3125c66b693SKris Buschelman 
3135c66b693SKris Buschelman    Output Parameters:
3145c66b693SKris Buschelman .  C - the product matrix, whose ij structure was defined from MatMatMultSymbolic().
3155c66b693SKris Buschelman 
3165c66b693SKris Buschelman    Notes:
3175c66b693SKris Buschelman    C must have been created with MatMatMultSymbolic.
3185c66b693SKris Buschelman 
3195c66b693SKris Buschelman    This routine is currently only implemented for SeqAIJ type matrices.
3205c66b693SKris Buschelman 
3215c66b693SKris Buschelman    Level: intermediate
3225c66b693SKris Buschelman 
3235c66b693SKris Buschelman .seealso: MatMatMult(),MatMatMultSymbolic()
3245c66b693SKris Buschelman @*/
3255c66b693SKris Buschelman int MatMatMultNumeric(Mat A,Mat B,Mat C){
3265c66b693SKris Buschelman   /* Perhaps this "interface" routine should be moved into the interface directory.*/
3275c66b693SKris Buschelman   /* To facilitate implementations with varying types, QueryFunction is used.*/
3285c66b693SKris Buschelman   /* It is assumed that implementations will be composed as "MatMatMultNumeric_<type of A><type of B>". */
3295c66b693SKris Buschelman   int ierr;
3304d3841fdSKris Buschelman   char numfunct[80];
3315c66b693SKris Buschelman   int (*numeric)(Mat,Mat,Mat);
3325c66b693SKris Buschelman 
3335c66b693SKris Buschelman   PetscFunctionBegin;
3345c66b693SKris Buschelman 
3354482741eSBarry Smith   PetscValidHeaderSpecific(A,MAT_COOKIE,1);
336c9780b6fSBarry Smith   PetscValidType(A,1);
3375c66b693SKris Buschelman   MatPreallocated(A);
3385c66b693SKris Buschelman   if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
3395c66b693SKris Buschelman   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
3405c66b693SKris Buschelman 
3414482741eSBarry Smith   PetscValidHeaderSpecific(B,MAT_COOKIE,2);
342c9780b6fSBarry Smith   PetscValidType(B,2);
3435c66b693SKris Buschelman   MatPreallocated(B);
3445c66b693SKris Buschelman   if (!B->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
3455c66b693SKris Buschelman   if (B->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
3465c66b693SKris Buschelman 
3474482741eSBarry Smith   PetscValidHeaderSpecific(C,MAT_COOKIE,3);
348c9780b6fSBarry Smith   PetscValidType(C,3);
3495c66b693SKris Buschelman   MatPreallocated(C);
3505c66b693SKris Buschelman   if (!C->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
3515c66b693SKris Buschelman   if (C->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
3525c66b693SKris Buschelman 
3535c66b693SKris Buschelman   if (B->N!=C->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",B->N,C->N);
3545c66b693SKris Buschelman   if (B->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",B->M,A->N);
3555c66b693SKris Buschelman   if (A->M!=C->M) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",A->M,C->M);
3565c66b693SKris Buschelman 
3574d3841fdSKris Buschelman   /* Currently only _seqaijseqaij is implemented, so just query for it in A and B. */
3584d3841fdSKris Buschelman   /* When other implementations exist, attack the multiple dispatch problem. */
3594d3841fdSKris Buschelman   ierr = PetscStrcpy(numfunct,"MatMatMultNumeric_seqaijseqaij");CHKERRQ(ierr);
3604d3841fdSKris Buschelman   ierr = PetscObjectQueryFunction((PetscObject)A,numfunct,(PetscVoidFunction)&numeric);CHKERRQ(ierr);
3614d3841fdSKris Buschelman   if (!numeric) SETERRQ1(PETSC_ERR_SUP,"C=A*B not implemented for A of type %s",A->type_name);
3624d3841fdSKris Buschelman   ierr = PetscObjectQueryFunction((PetscObject)B,numfunct,(PetscVoidFunction)&numeric);CHKERRQ(ierr);
3634d3841fdSKris Buschelman   if (!numeric) SETERRQ1(PETSC_ERR_SUP,"C=A*B not implemented for B of type %s",B->type_name);
3644d3841fdSKris Buschelman 
3655c66b693SKris Buschelman   ierr = (*numeric)(A,B,C);CHKERRQ(ierr);
3665c66b693SKris Buschelman 
3675c66b693SKris Buschelman   PetscFunctionReturn(0);
3685c66b693SKris Buschelman }
3695c66b693SKris Buschelman 
370d50806bdSBarry Smith #undef __FUNCT__
37194e3eecaSKris Buschelman #define __FUNCT__ "MatMatMult_Numeric_SeqAIJ_SeqAIJ"
37294e3eecaSKris Buschelman int MatMatMult_Numeric_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat C)
373d50806bdSBarry Smith {
37494e3eecaSKris Buschelman   int        ierr,flops=0;
375d50806bdSBarry Smith   Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
376d50806bdSBarry Smith   Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data;
377d50806bdSBarry Smith   Mat_SeqAIJ *c = (Mat_SeqAIJ *)C->data;
378d50806bdSBarry Smith   int        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bjj,*ci=c->i,*cj=c->j;
3795c66b693SKris Buschelman   int        am=A->M,cn=C->N;
38094e3eecaSKris Buschelman   int        i,j,k,anzi,bnzi,cnzi,brow;
381d50806bdSBarry Smith   MatScalar  *aa=a->a,*ba=b->a,*baj,*ca=c->a,*temp;
382d50806bdSBarry Smith 
383d50806bdSBarry Smith   PetscFunctionBegin;
384d50806bdSBarry Smith 
3855c66b693SKris Buschelman   /* Start timers */
386d50806bdSBarry Smith   ierr = PetscLogEventBegin(logkey_matmatmult_numeric,A,B,C,0);CHKERRQ(ierr);
38794e3eecaSKris Buschelman 
388d50806bdSBarry Smith   /* Allocate temp accumulation space to avoid searching for nonzero columns in C */
389d50806bdSBarry Smith   ierr = PetscMalloc((cn+1)*sizeof(MatScalar),&temp);CHKERRQ(ierr);
390d50806bdSBarry Smith   ierr = PetscMemzero(temp,cn*sizeof(MatScalar));CHKERRQ(ierr);
391d50806bdSBarry Smith   /* Traverse A row-wise. */
392d50806bdSBarry Smith   /* Build the ith row in C by summing over nonzero columns in A, */
393d50806bdSBarry Smith   /* the rows of B corresponding to nonzeros of A. */
394d50806bdSBarry Smith   for (i=0;i<am;i++) {
395d50806bdSBarry Smith     anzi = ai[i+1] - ai[i];
396d50806bdSBarry Smith     for (j=0;j<anzi;j++) {
397d50806bdSBarry Smith       brow = *aj++;
398d50806bdSBarry Smith       bnzi = bi[brow+1] - bi[brow];
399d50806bdSBarry Smith       bjj  = bj + bi[brow];
400d50806bdSBarry Smith       baj  = ba + bi[brow];
401d50806bdSBarry Smith       for (k=0;k<bnzi;k++) {
402d50806bdSBarry Smith         temp[bjj[k]] += (*aa)*baj[k];
403d50806bdSBarry Smith       }
404d50806bdSBarry Smith       flops += 2*bnzi;
405d50806bdSBarry Smith       aa++;
406d50806bdSBarry Smith     }
407d50806bdSBarry Smith     /* Store row back into C, and re-zero temp */
408d50806bdSBarry Smith     cnzi = ci[i+1] - ci[i];
409d50806bdSBarry Smith     for (j=0;j<cnzi;j++) {
410d50806bdSBarry Smith       ca[j] = temp[cj[j]];
411d50806bdSBarry Smith       temp[cj[j]] = 0.0;
412d50806bdSBarry Smith     }
413d50806bdSBarry Smith     ca += cnzi;
414d50806bdSBarry Smith     cj += cnzi;
415d50806bdSBarry Smith   }
416716bacf3SKris Buschelman   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
417716bacf3SKris Buschelman   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
418716bacf3SKris Buschelman 
419d50806bdSBarry Smith   /* Free temp */
420d50806bdSBarry Smith   ierr = PetscFree(temp);CHKERRQ(ierr);
421d50806bdSBarry Smith   ierr = PetscLogFlops(flops);CHKERRQ(ierr);
422d50806bdSBarry Smith   ierr = PetscLogEventEnd(logkey_matmatmult_numeric,A,B,C,0);CHKERRQ(ierr);
423d50806bdSBarry Smith   PetscFunctionReturn(0);
424d50806bdSBarry Smith }
425d50806bdSBarry Smith 
426d50806bdSBarry Smith #undef __FUNCT__
4275c66b693SKris Buschelman #define __FUNCT__ "RegisterMatMatMultRoutines_Private"
4285c66b693SKris Buschelman int RegisterMatMatMultRoutines_Private(Mat A) {
429d50806bdSBarry Smith   int ierr;
430d50806bdSBarry Smith 
431d50806bdSBarry Smith   PetscFunctionBegin;
432*6284ec50SHong Zhang #ifdef OLD
4332216b3a4SKris Buschelman   if (!logkey_matmatmult) {
4342216b3a4SKris Buschelman     ierr = PetscLogEventRegister(&logkey_matmatmult,"MatMatMult",MAT_COOKIE);CHKERRQ(ierr);
4352216b3a4SKris Buschelman   }
4365c66b693SKris Buschelman   ierr = PetscObjectComposeFunctionDynamic((PetscObject)A,"MatMatMult_seqaijseqaij",
4375c66b693SKris Buschelman                                            "MatMatMult_SeqAIJ_SeqAIJ",
4385c66b693SKris Buschelman                                            MatMatMult_SeqAIJ_SeqAIJ);CHKERRQ(ierr);
439*6284ec50SHong Zhang #endif
4405c66b693SKris Buschelman   if (!logkey_matmatmult_symbolic) {
4415c66b693SKris Buschelman     ierr = PetscLogEventRegister(&logkey_matmatmult_symbolic,"MatMatMult_Symbolic",MAT_COOKIE);CHKERRQ(ierr);
442d50806bdSBarry Smith   }
4435c66b693SKris Buschelman   ierr = PetscObjectComposeFunctionDynamic((PetscObject)A,"MatMatMultSymbolic_seqaijseqaij",
4445c66b693SKris Buschelman                                            "MatMatMult_Symbolic_SeqAIJ_SeqAIJ",
4455c66b693SKris Buschelman                                            MatMatMult_Symbolic_SeqAIJ_SeqAIJ);CHKERRQ(ierr);
4465c66b693SKris Buschelman   if (!logkey_matmatmult_numeric) {
4475c66b693SKris Buschelman     ierr = PetscLogEventRegister(&logkey_matmatmult_numeric,"MatMatMult_Numeric",MAT_COOKIE);CHKERRQ(ierr);
44894e3eecaSKris Buschelman   }
4495c66b693SKris Buschelman   ierr = PetscObjectComposeFunctionDynamic((PetscObject)A,"MatMatMultNumeric_seqaijseqaij",
4505c66b693SKris Buschelman                                            "MatMatMult_Numeric_SeqAIJ_SeqAIJ",
4515c66b693SKris Buschelman                                            MatMatMult_Numeric_SeqAIJ_SeqAIJ);CHKERRQ(ierr);
45294e3eecaSKris Buschelman   PetscFunctionReturn(0);
45394e3eecaSKris Buschelman }
454