xref: /petsc/src/mat/impls/aij/seq/matmatmult.c (revision c5db241ff33b0aa6c523a2fe9ff5ac1a82e78f20)
1d50806bdSBarry Smith /*
2a50f8bf6SHong Zhang   Defines matrix-matrix product routines for pairs of AIJ matrices
3d50806bdSBarry Smith           C = A * B
4d50806bdSBarry Smith */
5d50806bdSBarry Smith 
6c1f4806aSKris Buschelman #include "src/mat/impls/aij/seq/aij.h" /*I "petscmat.h" I*/
770f19b1fSKris Buschelman #include "src/mat/utils/freespace.h"
82d09714cSHong Zhang #include "src/mat/impls/aij/mpi/mpiaij.h"
9d50806bdSBarry Smith 
101c239cc6SHong Zhang /*
11*c5db241fSHong Zhang   Initialize a linked list
12*c5db241fSHong Zhang   Input Parameters:
13*c5db241fSHong Zhang     lnk_init  - the initial index value indicating the entry in the list is not set yet
14*c5db241fSHong Zhang     nlnk      - max length of the list
15*c5db241fSHong Zhang     lnk       - linked list(an integer array) that is allocated
16*c5db241fSHong Zhang   output Parameters:
17*c5db241fSHong Zhang     lnk       - the linked list with all values set as lnk_int
18*c5db241fSHong Zhang */
19*c5db241fSHong Zhang #define LNKLISTINITIALIZE(lnk_init,nlnk,lnk){\
20*c5db241fSHong Zhang   int _i;\
21*c5db241fSHong Zhang   for (_i=0; _i<nlnk; _i++) lnk[_i] = lnk_init;\
22*c5db241fSHong Zhang }
23*c5db241fSHong Zhang 
24*c5db241fSHong Zhang /*
251c239cc6SHong Zhang   Add a index set into a sorted linked list
26a50f8bf6SHong Zhang   Input Parameters:
271c239cc6SHong Zhang     nidx      - number of input indices
281c239cc6SHong Zhang     indices   - interger array
29*c5db241fSHong Zhang     lnk_head  - the header of the list
30*c5db241fSHong Zhang     lnk_init  - the initial index value indicating the entry in the list is not set yet
311c239cc6SHong Zhang     lnk       - linked list(an integer array) that is created
32a50f8bf6SHong Zhang   output Parameters:
331c239cc6SHong Zhang     nlnk      - number of newly added indices
341c239cc6SHong Zhang     lnk       - the sorted(increasing order) linked list containing new and non-redundate entries from indices
351c239cc6SHong Zhang */
36*c5db241fSHong Zhang #define LNKLISTADD(nidx,indices,lnk_head,lnk_init,nlnk,lnk){\
37*c5db241fSHong Zhang   int _k,_entry,_lidx=lnk_head,_idx;\
38a50f8bf6SHong Zhang   nlnk = 0;\
39a50f8bf6SHong Zhang   _k=nidx;\
40a50f8bf6SHong Zhang   while (_k){/* assume indices are almost in increasing order, starting from its end saves computation */\
41a50f8bf6SHong Zhang     _entry = indices[--_k];\
42*c5db241fSHong Zhang     if (lnk[_entry] == lnk_init) { /* new col */\
43a50f8bf6SHong Zhang       do {\
44a50f8bf6SHong Zhang         _idx = _lidx;\
45a50f8bf6SHong Zhang         _lidx  = lnk[_idx];\
46a50f8bf6SHong Zhang       } while (_entry > _lidx);\
47a50f8bf6SHong Zhang       lnk[_idx] = _entry;\
48a50f8bf6SHong Zhang       lnk[_entry] = _lidx;\
49a50f8bf6SHong Zhang       nlnk++;\
50a50f8bf6SHong Zhang     }\
51a50f8bf6SHong Zhang   }\
521c239cc6SHong Zhang }
53*c5db241fSHong Zhang /*
54*c5db241fSHong Zhang   Copy data on the list into an array, then initialize the list
55*c5db241fSHong Zhang   Input Parameters:
56*c5db241fSHong Zhang     lnk_head  - the header of the list
57*c5db241fSHong Zhang     lnk_init  - the initial index value indicating the entry in the list is not set yet
58*c5db241fSHong Zhang     nlnk      - number of data on the list to be copied
59*c5db241fSHong Zhang     lnk       - linked list
60*c5db241fSHong Zhang   output Parameters:
61*c5db241fSHong Zhang     indices   - array that contains the copied data
62*c5db241fSHong Zhang */
63*c5db241fSHong Zhang #define LNKLISTCLEAR(lnk_head,lnk_init,nlnk,lnk,indices){\
64*c5db241fSHong Zhang   int _j,_idx=lnk_head,_idx0;\
65*c5db241fSHong Zhang   for (_j=0; _j<nlnk; _j++){\
66*c5db241fSHong Zhang     _idx0 = _idx; _idx = lnk[_idx0];\
67*c5db241fSHong Zhang     *(indices+_j) = _idx;\
68*c5db241fSHong Zhang     lnk[_idx0] = lnk_init;\
69*c5db241fSHong Zhang   }\
70*c5db241fSHong Zhang   lnk[_idx] = lnk_init;\
71*c5db241fSHong Zhang }
721c239cc6SHong Zhang 
732216b3a4SKris Buschelman static int logkey_matmatmult_symbolic = 0;
742216b3a4SKris Buschelman static int logkey_matmatmult_numeric  = 0;
752216b3a4SKris Buschelman 
76c1f4806aSKris Buschelman #undef __FUNCT__
77c1f4806aSKris Buschelman #define __FUNCT__ "MatMatMult"
785c66b693SKris Buschelman /*@
795c66b693SKris Buschelman    MatMatMult - Performs Matrix-Matrix Multiplication C=A*B.
8094e3eecaSKris Buschelman 
815c66b693SKris Buschelman    Collective on Mat
82d50806bdSBarry Smith 
835c66b693SKris Buschelman    Input Parameters:
845c66b693SKris Buschelman +  A - the left matrix
851c741599SHong Zhang .  B - the right matrix
861c741599SHong Zhang .  scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
87*c5db241fSHong Zhang -  fill - expected fill as ratio of nnz(C)/(nnz(A) + nnz(B))
885c66b693SKris Buschelman 
895c66b693SKris Buschelman    Output Parameters:
905c66b693SKris Buschelman .  C - the product matrix
915c66b693SKris Buschelman 
925c66b693SKris Buschelman    Notes:
935c66b693SKris Buschelman    C will be created and must be destroyed by the user with MatDestroy().
945c66b693SKris Buschelman 
954d3841fdSKris Buschelman    This routine is currently only implemented for pairs of SeqAIJ matrices and classes
964d3841fdSKris Buschelman    which inherit from SeqAIJ.  C will be of type MATSEQAIJ.
975c66b693SKris Buschelman 
985c66b693SKris Buschelman    Level: intermediate
995c66b693SKris Buschelman 
1005c66b693SKris Buschelman .seealso: MatMatMultSymbolic(),MatMatMultNumeric()
1015c66b693SKris Buschelman @*/
1021c741599SHong Zhang int MatMatMult(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
1032d09714cSHong Zhang {
104d50806bdSBarry Smith   int  ierr;
1052d09714cSHong Zhang 
106d50806bdSBarry Smith   PetscFunctionBegin;
1074482741eSBarry Smith   PetscValidHeaderSpecific(A,MAT_COOKIE,1);
108c9780b6fSBarry Smith   PetscValidType(A,1);
1095c66b693SKris Buschelman   MatPreallocated(A);
1105c66b693SKris Buschelman   if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
1115c66b693SKris Buschelman   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
1124482741eSBarry Smith   PetscValidHeaderSpecific(B,MAT_COOKIE,2);
113c9780b6fSBarry Smith   PetscValidType(B,2);
1145c66b693SKris Buschelman   MatPreallocated(B);
1155c66b693SKris Buschelman   if (!B->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
1165c66b693SKris Buschelman   if (B->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
1174482741eSBarry Smith   PetscValidPointer(C,3);
1185c66b693SKris Buschelman   if (B->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",B->M,A->N);
119d50806bdSBarry Smith 
120*c5db241fSHong Zhang   if (fill <=0) SETERRQ1(PETSC_ERR_ARG_SIZ,"fill=%d must be > 0",fill);
121*c5db241fSHong Zhang 
1226284ec50SHong Zhang   ierr = PetscLogEventBegin(MAT_MatMult,A,B,0,0);CHKERRQ(ierr);
1231c741599SHong Zhang   ierr = (*A->ops->matmult)(A,B,scall,fill,C);CHKERRQ(ierr);
1246284ec50SHong Zhang   ierr = PetscLogEventEnd(MAT_MatMult,A,B,0,0);CHKERRQ(ierr);
1254d3841fdSKris Buschelman 
1266284ec50SHong Zhang   PetscFunctionReturn(0);
1276284ec50SHong Zhang }
1286284ec50SHong Zhang 
1296284ec50SHong Zhang #undef __FUNCT__
1306284ec50SHong Zhang #define __FUNCT__ "MatMatMult_MPIAIJ_MPIAIJ"
1311c741599SHong Zhang int MatMatMult_MPIAIJ_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill, Mat *C)
1322d09714cSHong Zhang {
133a50f8bf6SHong Zhang   Mat           *aseq,*bseq,A_seq=PETSC_NULL,B_seq=PETSC_NULL,*cseq,C_seq;
1342d09714cSHong Zhang   Mat_MPIAIJ    *a = (Mat_MPIAIJ*)A->data;
135a50f8bf6SHong Zhang   int           ierr,*idx,i,start,end,ncols,imark,nzA,nzB,*cmap;
1362d09714cSHong Zhang   IS            isrow,iscol;
1376284ec50SHong Zhang 
1386284ec50SHong Zhang   PetscFunctionBegin;
1392d09714cSHong Zhang   /* create a seq matrix B_seq = submatrix of B by taking rows of B that equal to nonzero col of A */
1402d09714cSHong Zhang   start = a->cstart;
1412d09714cSHong Zhang   cmap  = a->garray;
1422d09714cSHong Zhang   nzA   = a->A->n;
1432d09714cSHong Zhang   nzB   = a->B->n;
1442d09714cSHong Zhang   ierr  = PetscMalloc((nzA+nzB)*sizeof(int), &idx);CHKERRQ(ierr);
1452d09714cSHong Zhang   ncols = 0;
1462d09714cSHong Zhang   for (i=0; i<nzB; i++) {
1472d09714cSHong Zhang     if (cmap[i] < start) idx[ncols++] = cmap[i];
1482d09714cSHong Zhang     else break;
1492d09714cSHong Zhang   }
1502d09714cSHong Zhang   imark = i;
1512d09714cSHong Zhang   for (i=0; i<nzA; i++) idx[ncols++] = start + i;
1522d09714cSHong Zhang   for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
1532d09714cSHong Zhang   ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrow);CHKERRQ(ierr); /* isrow of B = iscol of A! */
1542d09714cSHong Zhang   ierr = PetscFree(idx);CHKERRQ(ierr);
1552d09714cSHong Zhang   ierr = ISCreateStride(PETSC_COMM_SELF,B->N,0,1,&iscol);CHKERRQ(ierr);
156a50f8bf6SHong Zhang   ierr = MatGetSubMatrices(B,1,&isrow,&iscol,scall,&bseq);CHKERRQ(ierr);
1572d09714cSHong Zhang   ierr = ISDestroy(iscol);CHKERRQ(ierr);
1582d09714cSHong Zhang   B_seq = bseq[0];
1596284ec50SHong Zhang 
1602d09714cSHong Zhang   /*  create a seq matrix A_seq = submatrix of A by taking all local rows of A */
1612d09714cSHong Zhang   start = a->rstart; end = a->rend;
1622d09714cSHong Zhang   ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&iscol);CHKERRQ(ierr); /* isrow of A = iscol */
163a50f8bf6SHong Zhang   ierr = MatGetSubMatrices(A,1,&iscol,&isrow,scall,&aseq);CHKERRQ(ierr);
1642d09714cSHong Zhang   ierr = ISDestroy(isrow);CHKERRQ(ierr);
1652d09714cSHong Zhang   ierr = ISDestroy(iscol);CHKERRQ(ierr);
1662d09714cSHong Zhang   A_seq = aseq[0];
1672d09714cSHong Zhang 
1682d09714cSHong Zhang   /* compute C_seq = A_seq * B_seq */
1691c741599SHong Zhang   ierr = MatMatMult_SeqAIJ_SeqAIJ(A_seq,B_seq,scall,fill,&C_seq);CHKERRQ(ierr);
1702d09714cSHong Zhang   /*
1712d09714cSHong Zhang   int rank;
1722d09714cSHong Zhang   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
1732d09714cSHong Zhang   ierr = PetscPrintf(PETSC_COMM_SELF," [%d] A_seq: %d, %d; B_seq: %d, %d; C_seq: %d, %d;\n",rank,A_seq->m,A_seq->n,B_seq->m,B_seq->n,C_seq->m,C_seq->n);
1742d09714cSHong Zhang   */
1752d09714cSHong Zhang   ierr = MatDestroyMatrices(1,&aseq);CHKERRQ(ierr);
1762d09714cSHong Zhang   ierr = MatDestroyMatrices(1,&bseq);CHKERRQ(ierr);
1772d09714cSHong Zhang 
178a50f8bf6SHong Zhang   /* create mpi matrix C by concatinating C_seq */
179a50f8bf6SHong Zhang   ierr = MatMerge(A->comm,C_seq,C);CHKERRQ(ierr);
1802d09714cSHong Zhang 
181d50806bdSBarry Smith   PetscFunctionReturn(0);
182d50806bdSBarry Smith }
1835c66b693SKris Buschelman 
1845c66b693SKris Buschelman #undef __FUNCT__
1855c66b693SKris Buschelman #define __FUNCT__ "MatMatMult_SeqAIJ_SeqAIJ"
1861c741599SHong Zhang int MatMatMult_SeqAIJ_SeqAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) {
1875c66b693SKris Buschelman   int ierr;
1884d3841fdSKris Buschelman   char symfunct[80],numfunct[80];
189*c5db241fSHong Zhang   int (*symbolic)(Mat,Mat,PetscReal,Mat*),(*numeric)(Mat,Mat,Mat);
1905c66b693SKris Buschelman 
1915c66b693SKris Buschelman   PetscFunctionBegin;
1924d3841fdSKris Buschelman   /* Currently only _seqaijseqaij is implemented, so just query for it in A and B. */
1934d3841fdSKris Buschelman   /* When other implementations exist, attack the multiple dispatch problem. */
1944d3841fdSKris Buschelman   ierr = PetscStrcpy(symfunct,"MatMatMultSymbolic_seqaijseqaij");CHKERRQ(ierr);
1954d3841fdSKris Buschelman   ierr = PetscObjectQueryFunction((PetscObject)B,symfunct,(PetscVoidFunction)&symbolic);CHKERRQ(ierr);
1964d3841fdSKris Buschelman   if (!symbolic) SETERRQ1(PETSC_ERR_SUP,"C=A*B not implemented for B of type %s",B->type_name);
1975c66b693SKris Buschelman   ierr = PetscObjectQueryFunction((PetscObject)A,symfunct,(PetscVoidFunction)&symbolic);CHKERRQ(ierr);
1984d3841fdSKris Buschelman   if (!symbolic) SETERRQ1(PETSC_ERR_SUP,"C=A*B not implemented for A of type %s",A->type_name);
1994d3841fdSKris Buschelman 
2004d3841fdSKris Buschelman   ierr = PetscStrcpy(numfunct,"MatMatMultNumeric_seqaijseqaij");CHKERRQ(ierr);
2015c66b693SKris Buschelman   ierr = PetscObjectQueryFunction((PetscObject)A,numfunct,(PetscVoidFunction)&numeric);CHKERRQ(ierr);
2024d3841fdSKris Buschelman   if (!numeric) SETERRQ1(PETSC_ERR_SUP,"C=A*B not implemented for A of type %s",A->type_name);
2034d3841fdSKris Buschelman   ierr = PetscObjectQueryFunction((PetscObject)B,numfunct,(PetscVoidFunction)&numeric);CHKERRQ(ierr);
2044d3841fdSKris Buschelman   if (!numeric) SETERRQ1(PETSC_ERR_SUP,"C=A*B not implemented for B of type %s",B->type_name);
2054d3841fdSKris Buschelman 
206*c5db241fSHong Zhang   ierr = (*symbolic)(A,B,fill,C);CHKERRQ(ierr);
2075c66b693SKris Buschelman   ierr = (*numeric)(A,B,*C);CHKERRQ(ierr);
2085c66b693SKris Buschelman   PetscFunctionReturn(0);
2095c66b693SKris Buschelman }
2105c66b693SKris Buschelman 
211c1f4806aSKris Buschelman #undef __FUNCT__
212c1f4806aSKris Buschelman #define __FUNCT__ "MatMatMultSymbolic"
2135c66b693SKris Buschelman /*@
2145c66b693SKris Buschelman    MatMatMultSymbolic - Performs construction, preallocation, and computes the ij structure
2155c66b693SKris Buschelman    of the matrix-matrix product C=A*B.  Call this routine before calling MatMatMultNumeric().
2165c66b693SKris Buschelman 
2175c66b693SKris Buschelman    Collective on Mat
2185c66b693SKris Buschelman 
2195c66b693SKris Buschelman    Input Parameters:
2205c66b693SKris Buschelman +  A - the left matrix
221*c5db241fSHong Zhang .  B - the right matrix
222*c5db241fSHong Zhang -  fill - expected fill as ratio of nnz(C)/(nnz(A) + nnz(B))
2235c66b693SKris Buschelman 
2245c66b693SKris Buschelman    Output Parameters:
2255c66b693SKris Buschelman .  C - the matrix containing the ij structure of product matrix
2265c66b693SKris Buschelman 
2275c66b693SKris Buschelman    Notes:
2284d3841fdSKris Buschelman    C will be created as a MATSEQAIJ matrix and must be destroyed by the user with MatDestroy().
2295c66b693SKris Buschelman 
2304d3841fdSKris Buschelman    This routine is currently only implemented for SeqAIJ matrices and classes which inherit from SeqAIJ.
2315c66b693SKris Buschelman 
2325c66b693SKris Buschelman    Level: intermediate
2335c66b693SKris Buschelman 
2345c66b693SKris Buschelman .seealso: MatMatMult(),MatMatMultNumeric()
2355c66b693SKris Buschelman @*/
236*c5db241fSHong Zhang int MatMatMultSymbolic(Mat A,Mat B,PetscReal fill,Mat *C) {
2375c66b693SKris Buschelman   /* Perhaps this "interface" routine should be moved into the interface directory.*/
2385c66b693SKris Buschelman   /* To facilitate implementations with varying types, QueryFunction is used.*/
2395c66b693SKris Buschelman   /* It is assumed that implementations will be composed as "MatMatMultSymbolic_<type of A><type of B>". */
2405c66b693SKris Buschelman   int  ierr;
2414d3841fdSKris Buschelman   char symfunct[80];
242*c5db241fSHong Zhang   int  (*symbolic)(Mat,Mat,PetscReal,Mat *);
2435c66b693SKris Buschelman 
2445c66b693SKris Buschelman   PetscFunctionBegin;
2454482741eSBarry Smith   PetscValidHeaderSpecific(A,MAT_COOKIE,1);
246c9780b6fSBarry Smith   PetscValidType(A,1);
2475c66b693SKris Buschelman   MatPreallocated(A);
2485c66b693SKris Buschelman   if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
2495c66b693SKris Buschelman   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
2505c66b693SKris Buschelman 
2514482741eSBarry Smith   PetscValidHeaderSpecific(B,MAT_COOKIE,2);
252c9780b6fSBarry Smith   PetscValidType(B,2);
2535c66b693SKris Buschelman   MatPreallocated(B);
2545c66b693SKris Buschelman   if (!B->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
2555c66b693SKris Buschelman   if (B->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
2564482741eSBarry Smith   PetscValidPointer(C,3);
2574482741eSBarry Smith 
2585c66b693SKris Buschelman   if (B->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",B->M,A->N);
259*c5db241fSHong Zhang   if (fill <=0) SETERRQ1(PETSC_ERR_ARG_SIZ,"fill=%d must be > 0",fill);
2605c66b693SKris Buschelman 
2614d3841fdSKris Buschelman   /* Currently only _seqaijseqaij is implemented, so just query for it in A and B. */
2624d3841fdSKris Buschelman   /* When other implementations exist, attack the multiple dispatch problem. */
2634d3841fdSKris Buschelman   ierr = PetscStrcpy(symfunct,"MatMatMultSymbolic_seqaijseqaij");CHKERRQ(ierr);
2644d3841fdSKris Buschelman   ierr = PetscObjectQueryFunction((PetscObject)B,symfunct,(PetscVoidFunction)&symbolic);CHKERRQ(ierr);
2654d3841fdSKris Buschelman   if (!symbolic) SETERRQ1(PETSC_ERR_SUP,"C=A*B not implemented for B of type %s",B->type_name);
2664d3841fdSKris Buschelman   ierr = PetscObjectQueryFunction((PetscObject)A,symfunct,(PetscVoidFunction)&symbolic);CHKERRQ(ierr);
2674d3841fdSKris Buschelman   if (!symbolic) SETERRQ1(PETSC_ERR_SUP,"C=A*B not implemented for A of type %s",A->type_name);
268*c5db241fSHong Zhang   ierr = (*symbolic)(A,B,fill,C);CHKERRQ(ierr);
2695c66b693SKris Buschelman 
2705c66b693SKris Buschelman   PetscFunctionReturn(0);
2715c66b693SKris Buschelman }
27258c24d83SHong Zhang 
27358c24d83SHong Zhang #undef __FUNCT__
27458c24d83SHong Zhang #define __FUNCT__ "MatMatMult_Symbolic_SeqAIJ_SeqAIJ"
275*c5db241fSHong Zhang int MatMatMult_Symbolic_SeqAIJ_SeqAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
27658c24d83SHong Zhang {
27758c24d83SHong Zhang   int            ierr;
27858c24d83SHong Zhang   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
27958c24d83SHong Zhang   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c;
28058c24d83SHong Zhang   int            *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bjj;
2811c239cc6SHong Zhang   int            *ci,*cj,*lnk,idx0,idx;
2825c66b693SKris Buschelman   int            am=A->M,bn=B->N,bm=B->M;
283*c5db241fSHong Zhang   int            i,j,anzi,brow,bnzj,cnzi,nlnk,lnk_init=-1,nspacedouble=0;
28458c24d83SHong Zhang   MatScalar      *ca;
28558c24d83SHong Zhang 
28658c24d83SHong Zhang   PetscFunctionBegin;
2875c66b693SKris Buschelman   /* Start timers */
28858c24d83SHong Zhang   ierr = PetscLogEventBegin(logkey_matmatmult_symbolic,A,B,0,0);CHKERRQ(ierr);
28958c24d83SHong Zhang   /* Set up */
29058c24d83SHong Zhang   /* Allocate ci array, arrays for fill computation and */
29158c24d83SHong Zhang   /* free space for accumulating nonzero column info */
29258c24d83SHong Zhang   ierr = PetscMalloc(((am+1)+1)*sizeof(int),&ci);CHKERRQ(ierr);
29358c24d83SHong Zhang   ci[0] = 0;
29458c24d83SHong Zhang 
29558c24d83SHong Zhang   ierr = PetscMalloc((bn+1)*sizeof(int),&lnk);CHKERRQ(ierr);
296*c5db241fSHong Zhang   LNKLISTINITIALIZE(lnk_init,bn,lnk);
29758c24d83SHong Zhang 
298*c5db241fSHong Zhang   /* Initial FreeSpace size is fill*(nnz(A)+nnz(B)) */
299*c5db241fSHong Zhang   ierr = GetMoreSpace(fill*(ai[am]+bi[bm]),&free_space);CHKERRQ(ierr);
30058c24d83SHong Zhang   current_space = free_space;
30158c24d83SHong Zhang 
30258c24d83SHong Zhang   /* Determine symbolic info for each row of the product: */
30358c24d83SHong Zhang   for (i=0;i<am;i++) {
30458c24d83SHong Zhang     anzi = ai[i+1] - ai[i];
30558c24d83SHong Zhang     cnzi = 0;
30658c24d83SHong Zhang     lnk[bn] = bn;
3072d09714cSHong Zhang     j       = anzi;
3082d09714cSHong Zhang     aj      = a->j + ai[i];
3092d09714cSHong Zhang     while (j){/* assume cols are almost in increasing order, starting from its end saves computation */
3102d09714cSHong Zhang       j--;
3112d09714cSHong Zhang       brow = *(aj + j);
31258c24d83SHong Zhang       bnzj = bi[brow+1] - bi[brow];
31358c24d83SHong Zhang       bjj  = bj + bi[brow];
3141c239cc6SHong Zhang       /* add non-zero cols of B into the sorted linked list lnk */
315*c5db241fSHong Zhang       LNKLISTADD(bnzj,bjj,bn,lnk_init,nlnk,lnk);
3161c239cc6SHong Zhang       cnzi += nlnk;
31758c24d83SHong Zhang     }
31858c24d83SHong Zhang 
31958c24d83SHong Zhang     /* If free space is not available, make more free space */
32058c24d83SHong Zhang     /* Double the amount of total space in the list */
32158c24d83SHong Zhang     if (current_space->local_remaining<cnzi) {
322*c5db241fSHong Zhang       /* printf("MatMatMult_Symbolic_SeqAIJ_SeqAIJ()...%d -th row, double space ...\n",i);*/
32358c24d83SHong Zhang       ierr = GetMoreSpace(current_space->total_array_size,&current_space);CHKERRQ(ierr);
324*c5db241fSHong Zhang       nspacedouble++;
32558c24d83SHong Zhang     }
32658c24d83SHong Zhang 
327*c5db241fSHong Zhang     /* Copy data into free space, then initialize lnk */
328*c5db241fSHong Zhang     LNKLISTCLEAR(bn,lnk_init,cnzi,lnk,current_space->array);
329*c5db241fSHong Zhang     current_space->array += cnzi;
33058c24d83SHong Zhang 
33158c24d83SHong Zhang     current_space->local_used      += cnzi;
33258c24d83SHong Zhang     current_space->local_remaining -= cnzi;
33358c24d83SHong Zhang 
33458c24d83SHong Zhang     ci[i+1] = ci[i] + cnzi;
33558c24d83SHong Zhang   }
33658c24d83SHong Zhang 
33758c24d83SHong Zhang   /* Column indices are in the list of free space */
33858c24d83SHong Zhang   /* Allocate space for cj, initialize cj, and */
33958c24d83SHong Zhang   /* destroy list of free space and other temporary array(s) */
34058c24d83SHong Zhang   ierr = PetscMalloc((ci[am]+1)*sizeof(int),&cj);CHKERRQ(ierr);
34158c24d83SHong Zhang   ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr);
34258c24d83SHong Zhang   ierr = PetscFree(lnk);CHKERRQ(ierr);
34358c24d83SHong Zhang 
34458c24d83SHong Zhang   /* Allocate space for ca */
34558c24d83SHong Zhang   ierr = PetscMalloc((ci[am]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr);
34658c24d83SHong Zhang   ierr = PetscMemzero(ca,(ci[am]+1)*sizeof(MatScalar));CHKERRQ(ierr);
34758c24d83SHong Zhang 
34858c24d83SHong Zhang   /* put together the new matrix */
34958c24d83SHong Zhang   ierr = MatCreateSeqAIJWithArrays(A->comm,am,bn,ci,cj,ca,C);CHKERRQ(ierr);
35058c24d83SHong Zhang 
35158c24d83SHong Zhang   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
35258c24d83SHong Zhang   /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */
35358c24d83SHong Zhang   c = (Mat_SeqAIJ *)((*C)->data);
35458c24d83SHong Zhang   c->freedata = PETSC_TRUE;
35558c24d83SHong Zhang   c->nonew    = 0;
35658c24d83SHong Zhang 
35758c24d83SHong Zhang   ierr = PetscLogEventEnd(logkey_matmatmult_symbolic,A,B,0,0);CHKERRQ(ierr);
358*c5db241fSHong Zhang   PetscLogInfo((PetscObject)(*C),"Number of calls to GetMoreSpace(): %d\n",nspacedouble);
35958c24d83SHong Zhang   PetscFunctionReturn(0);
36058c24d83SHong Zhang }
361d50806bdSBarry Smith 
362c1f4806aSKris Buschelman #undef __FUNCT__
363c1f4806aSKris Buschelman #define __FUNCT__ "MatMatMultNumeric"
3645c66b693SKris Buschelman /*@
3655c66b693SKris Buschelman    MatMatMultNumeric - Performs the numeric matrix-matrix product.
3665c66b693SKris Buschelman    Call this routine after first calling MatMatMultSymbolic().
3675c66b693SKris Buschelman 
3685c66b693SKris Buschelman    Collective on Mat
3695c66b693SKris Buschelman 
3705c66b693SKris Buschelman    Input Parameters:
3715c66b693SKris Buschelman +  A - the left matrix
3725c66b693SKris Buschelman -  B - the right matrix
3735c66b693SKris Buschelman 
3745c66b693SKris Buschelman    Output Parameters:
3755c66b693SKris Buschelman .  C - the product matrix, whose ij structure was defined from MatMatMultSymbolic().
3765c66b693SKris Buschelman 
3775c66b693SKris Buschelman    Notes:
3785c66b693SKris Buschelman    C must have been created with MatMatMultSymbolic.
3795c66b693SKris Buschelman 
3805c66b693SKris Buschelman    This routine is currently only implemented for SeqAIJ type matrices.
3815c66b693SKris Buschelman 
3825c66b693SKris Buschelman    Level: intermediate
3835c66b693SKris Buschelman 
3845c66b693SKris Buschelman .seealso: MatMatMult(),MatMatMultSymbolic()
3855c66b693SKris Buschelman @*/
3865c66b693SKris Buschelman int MatMatMultNumeric(Mat A,Mat B,Mat C){
3875c66b693SKris Buschelman   /* Perhaps this "interface" routine should be moved into the interface directory.*/
3885c66b693SKris Buschelman   /* To facilitate implementations with varying types, QueryFunction is used.*/
3895c66b693SKris Buschelman   /* It is assumed that implementations will be composed as "MatMatMultNumeric_<type of A><type of B>". */
3905c66b693SKris Buschelman   int ierr;
3914d3841fdSKris Buschelman   char numfunct[80];
3925c66b693SKris Buschelman   int (*numeric)(Mat,Mat,Mat);
3935c66b693SKris Buschelman 
3945c66b693SKris Buschelman   PetscFunctionBegin;
3955c66b693SKris Buschelman 
3964482741eSBarry Smith   PetscValidHeaderSpecific(A,MAT_COOKIE,1);
397c9780b6fSBarry Smith   PetscValidType(A,1);
3985c66b693SKris Buschelman   MatPreallocated(A);
3995c66b693SKris Buschelman   if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
4005c66b693SKris Buschelman   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
4015c66b693SKris Buschelman 
4024482741eSBarry Smith   PetscValidHeaderSpecific(B,MAT_COOKIE,2);
403c9780b6fSBarry Smith   PetscValidType(B,2);
4045c66b693SKris Buschelman   MatPreallocated(B);
4055c66b693SKris Buschelman   if (!B->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
4065c66b693SKris Buschelman   if (B->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
4075c66b693SKris Buschelman 
4084482741eSBarry Smith   PetscValidHeaderSpecific(C,MAT_COOKIE,3);
409c9780b6fSBarry Smith   PetscValidType(C,3);
4105c66b693SKris Buschelman   MatPreallocated(C);
4115c66b693SKris Buschelman   if (!C->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
4125c66b693SKris Buschelman   if (C->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
4135c66b693SKris Buschelman 
4145c66b693SKris Buschelman   if (B->N!=C->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",B->N,C->N);
4155c66b693SKris Buschelman   if (B->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",B->M,A->N);
4165c66b693SKris Buschelman   if (A->M!=C->M) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",A->M,C->M);
4175c66b693SKris Buschelman 
4184d3841fdSKris Buschelman   /* Currently only _seqaijseqaij is implemented, so just query for it in A and B. */
4194d3841fdSKris Buschelman   /* When other implementations exist, attack the multiple dispatch problem. */
4204d3841fdSKris Buschelman   ierr = PetscStrcpy(numfunct,"MatMatMultNumeric_seqaijseqaij");CHKERRQ(ierr);
4214d3841fdSKris Buschelman   ierr = PetscObjectQueryFunction((PetscObject)A,numfunct,(PetscVoidFunction)&numeric);CHKERRQ(ierr);
4224d3841fdSKris Buschelman   if (!numeric) SETERRQ1(PETSC_ERR_SUP,"C=A*B not implemented for A of type %s",A->type_name);
4234d3841fdSKris Buschelman   ierr = PetscObjectQueryFunction((PetscObject)B,numfunct,(PetscVoidFunction)&numeric);CHKERRQ(ierr);
4244d3841fdSKris Buschelman   if (!numeric) SETERRQ1(PETSC_ERR_SUP,"C=A*B not implemented for B of type %s",B->type_name);
4254d3841fdSKris Buschelman 
4265c66b693SKris Buschelman   ierr = (*numeric)(A,B,C);CHKERRQ(ierr);
4275c66b693SKris Buschelman 
4285c66b693SKris Buschelman   PetscFunctionReturn(0);
4295c66b693SKris Buschelman }
4305c66b693SKris Buschelman 
431d50806bdSBarry Smith #undef __FUNCT__
43294e3eecaSKris Buschelman #define __FUNCT__ "MatMatMult_Numeric_SeqAIJ_SeqAIJ"
43394e3eecaSKris Buschelman int MatMatMult_Numeric_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat C)
434d50806bdSBarry Smith {
43594e3eecaSKris Buschelman   int        ierr,flops=0;
436d50806bdSBarry Smith   Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
437d50806bdSBarry Smith   Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data;
438d50806bdSBarry Smith   Mat_SeqAIJ *c = (Mat_SeqAIJ *)C->data;
439d50806bdSBarry Smith   int        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bjj,*ci=c->i,*cj=c->j;
4405c66b693SKris Buschelman   int        am=A->M,cn=C->N;
44194e3eecaSKris Buschelman   int        i,j,k,anzi,bnzi,cnzi,brow;
442d50806bdSBarry Smith   MatScalar  *aa=a->a,*ba=b->a,*baj,*ca=c->a,*temp;
443d50806bdSBarry Smith 
444d50806bdSBarry Smith   PetscFunctionBegin;
445d50806bdSBarry Smith 
4465c66b693SKris Buschelman   /* Start timers */
447d50806bdSBarry Smith   ierr = PetscLogEventBegin(logkey_matmatmult_numeric,A,B,C,0);CHKERRQ(ierr);
44894e3eecaSKris Buschelman 
449d50806bdSBarry Smith   /* Allocate temp accumulation space to avoid searching for nonzero columns in C */
450d50806bdSBarry Smith   ierr = PetscMalloc((cn+1)*sizeof(MatScalar),&temp);CHKERRQ(ierr);
451d50806bdSBarry Smith   ierr = PetscMemzero(temp,cn*sizeof(MatScalar));CHKERRQ(ierr);
452d50806bdSBarry Smith   /* Traverse A row-wise. */
453d50806bdSBarry Smith   /* Build the ith row in C by summing over nonzero columns in A, */
454d50806bdSBarry Smith   /* the rows of B corresponding to nonzeros of A. */
455d50806bdSBarry Smith   for (i=0;i<am;i++) {
456d50806bdSBarry Smith     anzi = ai[i+1] - ai[i];
457d50806bdSBarry Smith     for (j=0;j<anzi;j++) {
458d50806bdSBarry Smith       brow = *aj++;
459d50806bdSBarry Smith       bnzi = bi[brow+1] - bi[brow];
460d50806bdSBarry Smith       bjj  = bj + bi[brow];
461d50806bdSBarry Smith       baj  = ba + bi[brow];
462d50806bdSBarry Smith       for (k=0;k<bnzi;k++) {
463d50806bdSBarry Smith         temp[bjj[k]] += (*aa)*baj[k];
464d50806bdSBarry Smith       }
465d50806bdSBarry Smith       flops += 2*bnzi;
466d50806bdSBarry Smith       aa++;
467d50806bdSBarry Smith     }
468d50806bdSBarry Smith     /* Store row back into C, and re-zero temp */
469d50806bdSBarry Smith     cnzi = ci[i+1] - ci[i];
470d50806bdSBarry Smith     for (j=0;j<cnzi;j++) {
471d50806bdSBarry Smith       ca[j] = temp[cj[j]];
472d50806bdSBarry Smith       temp[cj[j]] = 0.0;
473d50806bdSBarry Smith     }
474d50806bdSBarry Smith     ca += cnzi;
475d50806bdSBarry Smith     cj += cnzi;
476d50806bdSBarry Smith   }
477716bacf3SKris Buschelman   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
478716bacf3SKris Buschelman   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
479716bacf3SKris Buschelman 
480d50806bdSBarry Smith   /* Free temp */
481d50806bdSBarry Smith   ierr = PetscFree(temp);CHKERRQ(ierr);
482d50806bdSBarry Smith   ierr = PetscLogFlops(flops);CHKERRQ(ierr);
483d50806bdSBarry Smith   ierr = PetscLogEventEnd(logkey_matmatmult_numeric,A,B,C,0);CHKERRQ(ierr);
484d50806bdSBarry Smith   PetscFunctionReturn(0);
485d50806bdSBarry Smith }
486d50806bdSBarry Smith 
487d50806bdSBarry Smith #undef __FUNCT__
4885c66b693SKris Buschelman #define __FUNCT__ "RegisterMatMatMultRoutines_Private"
4895c66b693SKris Buschelman int RegisterMatMatMultRoutines_Private(Mat A) {
490d50806bdSBarry Smith   int ierr;
491d50806bdSBarry Smith 
492d50806bdSBarry Smith   PetscFunctionBegin;
4935c66b693SKris Buschelman   if (!logkey_matmatmult_symbolic) {
4945c66b693SKris Buschelman     ierr = PetscLogEventRegister(&logkey_matmatmult_symbolic,"MatMatMult_Symbolic",MAT_COOKIE);CHKERRQ(ierr);
495d50806bdSBarry Smith   }
4965c66b693SKris Buschelman   ierr = PetscObjectComposeFunctionDynamic((PetscObject)A,"MatMatMultSymbolic_seqaijseqaij",
4975c66b693SKris Buschelman                                            "MatMatMult_Symbolic_SeqAIJ_SeqAIJ",
4985c66b693SKris Buschelman                                            MatMatMult_Symbolic_SeqAIJ_SeqAIJ);CHKERRQ(ierr);
4995c66b693SKris Buschelman   if (!logkey_matmatmult_numeric) {
5005c66b693SKris Buschelman     ierr = PetscLogEventRegister(&logkey_matmatmult_numeric,"MatMatMult_Numeric",MAT_COOKIE);CHKERRQ(ierr);
50194e3eecaSKris Buschelman   }
5025c66b693SKris Buschelman   ierr = PetscObjectComposeFunctionDynamic((PetscObject)A,"MatMatMultNumeric_seqaijseqaij",
5035c66b693SKris Buschelman                                            "MatMatMult_Numeric_SeqAIJ_SeqAIJ",
5045c66b693SKris Buschelman                                            MatMatMult_Numeric_SeqAIJ_SeqAIJ);CHKERRQ(ierr);
50594e3eecaSKris Buschelman   PetscFunctionReturn(0);
50694e3eecaSKris Buschelman }
507