xref: /petsc/src/mat/impls/aij/seq/matmatmult.c (revision 1c741599245a8e1308c682ee11969ffb033c0bea)
1d50806bdSBarry Smith /*
22c9ce0e5SKris Buschelman   Defines matrix-matrix product routines for pairs of SeqAIJ matrices
3d50806bdSBarry Smith           C = A * B
4d50806bdSBarry Smith */
5d50806bdSBarry Smith 
6c1f4806aSKris Buschelman #include "src/mat/impls/aij/seq/aij.h" /*I "petscmat.h" I*/
770f19b1fSKris Buschelman #include "src/mat/utils/freespace.h"
82d09714cSHong Zhang #include "src/mat/impls/aij/mpi/mpiaij.h"
9d50806bdSBarry Smith 
101c239cc6SHong Zhang /*
111c239cc6SHong Zhang   Add a index set into a sorted linked list
121c239cc6SHong Zhang   input:
131c239cc6SHong Zhang     nidx      - number of input indices
141c239cc6SHong Zhang     indices   - interger array
151c239cc6SHong Zhang     idx_head  - the header of the list
161c239cc6SHong Zhang     idx_unset - the value indicating the entry in the list is not set yet
171c239cc6SHong Zhang     lnk       - linked list(an integer array) that is created
181c239cc6SHong Zhang   output:
191c239cc6SHong Zhang     nlnk      - number of newly added indices
201c239cc6SHong Zhang     lnk       - the sorted(increasing order) linked list containing new and non-redundate entries from indices
211c239cc6SHong Zhang  */
221c239cc6SHong Zhang #undef __FUNCT__
231c239cc6SHong Zhang #define __FUNCT__ "LnklistAdd"
241c239cc6SHong Zhang int LnklistAdd(int nidx,int *indices,int idx_head,int idx_unset,int *nlnk,int *lnk)
251c239cc6SHong Zhang {
261c239cc6SHong Zhang   int i,idx,lidx,entry,n;
271c239cc6SHong Zhang 
281c239cc6SHong Zhang   PetscFunctionBegin;
291c239cc6SHong Zhang   n    = 0;
301c239cc6SHong Zhang   lidx = idx_head;
311c239cc6SHong Zhang   i    = nidx;
321c239cc6SHong Zhang   while (i){
332d09714cSHong Zhang     /* assume indices are almost in increasing order, starting from its end saves computation */
341c239cc6SHong Zhang     entry = indices[--i];
351c239cc6SHong Zhang     if (lnk[entry] == idx_unset) { /* new entry */
361c239cc6SHong Zhang       do {
371c239cc6SHong Zhang         idx = lidx;
381c239cc6SHong Zhang         lidx  = lnk[idx];
391c239cc6SHong Zhang       } while (entry > lidx);
401c239cc6SHong Zhang       lnk[idx] = entry;
411c239cc6SHong Zhang       lnk[entry] = lidx;
421c239cc6SHong Zhang       n++;
431c239cc6SHong Zhang     }
441c239cc6SHong Zhang   }
451c239cc6SHong Zhang   *nlnk = n;
461c239cc6SHong Zhang   PetscFunctionReturn(0);
471c239cc6SHong Zhang }
481c239cc6SHong Zhang 
491c239cc6SHong Zhang 
502216b3a4SKris Buschelman static int logkey_matmatmult          = 0;
512216b3a4SKris Buschelman static int logkey_matmatmult_symbolic = 0;
522216b3a4SKris Buschelman static int logkey_matmatmult_numeric  = 0;
532216b3a4SKris Buschelman 
54c1f4806aSKris Buschelman #undef __FUNCT__
55c1f4806aSKris Buschelman #define __FUNCT__ "MatMatMult"
565c66b693SKris Buschelman /*@
575c66b693SKris Buschelman    MatMatMult - Performs Matrix-Matrix Multiplication C=A*B.
5894e3eecaSKris Buschelman 
595c66b693SKris Buschelman    Collective on Mat
60d50806bdSBarry Smith 
615c66b693SKris Buschelman    Input Parameters:
625c66b693SKris Buschelman +  A - the left matrix
63*1c741599SHong Zhang .  B - the right matrix
64*1c741599SHong Zhang .  scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
65*1c741599SHong Zhang -  fill - expected fill as ratio of nonzeros in product matrix/nonzeros in original matrix
665c66b693SKris Buschelman 
675c66b693SKris Buschelman    Output Parameters:
685c66b693SKris Buschelman .  C - the product matrix
695c66b693SKris Buschelman 
705c66b693SKris Buschelman    Notes:
715c66b693SKris Buschelman    C will be created and must be destroyed by the user with MatDestroy().
725c66b693SKris Buschelman 
734d3841fdSKris Buschelman    This routine is currently only implemented for pairs of SeqAIJ matrices and classes
744d3841fdSKris Buschelman    which inherit from SeqAIJ.  C will be of type MATSEQAIJ.
755c66b693SKris Buschelman 
765c66b693SKris Buschelman    Level: intermediate
775c66b693SKris Buschelman 
785c66b693SKris Buschelman .seealso: MatMatMultSymbolic(),MatMatMultNumeric()
795c66b693SKris Buschelman @*/
80*1c741599SHong Zhang int MatMatMult(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
812d09714cSHong Zhang {
82d50806bdSBarry Smith   int  ierr;
832d09714cSHong Zhang 
84d50806bdSBarry Smith   PetscFunctionBegin;
854482741eSBarry Smith   PetscValidHeaderSpecific(A,MAT_COOKIE,1);
86c9780b6fSBarry Smith   PetscValidType(A,1);
875c66b693SKris Buschelman   MatPreallocated(A);
885c66b693SKris Buschelman   if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
895c66b693SKris Buschelman   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
90d50806bdSBarry Smith 
914482741eSBarry Smith   PetscValidHeaderSpecific(B,MAT_COOKIE,2);
92c9780b6fSBarry Smith   PetscValidType(B,2);
935c66b693SKris Buschelman   MatPreallocated(B);
945c66b693SKris Buschelman   if (!B->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
955c66b693SKris Buschelman   if (B->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
96d50806bdSBarry Smith 
974482741eSBarry Smith   PetscValidPointer(C,3);
984482741eSBarry Smith 
995c66b693SKris Buschelman   if (B->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",B->M,A->N);
100d50806bdSBarry Smith 
1016284ec50SHong Zhang   ierr = PetscLogEventBegin(MAT_MatMult,A,B,0,0);CHKERRQ(ierr);
102*1c741599SHong Zhang   ierr = (*A->ops->matmult)(A,B,scall,fill,C);CHKERRQ(ierr);
1036284ec50SHong Zhang   ierr = PetscLogEventEnd(MAT_MatMult,A,B,0,0);CHKERRQ(ierr);
1044d3841fdSKris Buschelman 
1056284ec50SHong Zhang   PetscFunctionReturn(0);
1066284ec50SHong Zhang }
1076284ec50SHong Zhang 
1086284ec50SHong Zhang #undef __FUNCT__
1096284ec50SHong Zhang #define __FUNCT__ "MatMatMult_MPIAIJ_MPIAIJ"
110*1c741599SHong Zhang int MatMatMult_MPIAIJ_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill, Mat *C)
1112d09714cSHong Zhang {
1122d09714cSHong Zhang   Mat           *aseq,*bseq,A_seq=PETSC_NULL,B_seq=PETSC_NULL,*cseq,C_seq,C_mpi;
1132d09714cSHong Zhang   Mat_MPIAIJ    *a = (Mat_MPIAIJ*)A->data;
1142d09714cSHong Zhang   Mat_SeqAIJ    *c;
1152d09714cSHong Zhang   MatScalar     *ca;
1162d09714cSHong Zhang   int           ierr,*idx,i,start,end,ncols,imark,nzA,nzB,*cmap,*ci,*cj,grow,*d_nnz,*o_nnz;
1172d09714cSHong Zhang   IS            isrow,iscol;
1186284ec50SHong Zhang 
1196284ec50SHong Zhang   PetscFunctionBegin;
1202d09714cSHong Zhang   /* create a seq matrix B_seq = submatrix of B by taking rows of B that equal to nonzero col of A */
1212d09714cSHong Zhang   start = a->cstart;
1222d09714cSHong Zhang   cmap  = a->garray;
1232d09714cSHong Zhang   nzA   = a->A->n;
1242d09714cSHong Zhang   nzB   = a->B->n;
1252d09714cSHong Zhang   ierr  = PetscMalloc((nzA+nzB)*sizeof(int), &idx);CHKERRQ(ierr);
1262d09714cSHong Zhang   ncols = 0;
1272d09714cSHong Zhang   for (i=0; i<nzB; i++) {
1282d09714cSHong Zhang     if (cmap[i] < start) idx[ncols++] = cmap[i];
1292d09714cSHong Zhang     else break;
1302d09714cSHong Zhang   }
1312d09714cSHong Zhang   imark = i;
1322d09714cSHong Zhang   for (i=0; i<nzA; i++) idx[ncols++] = start + i;
1332d09714cSHong Zhang   for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
1342d09714cSHong Zhang   ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrow);CHKERRQ(ierr); /* isrow of B = iscol of A! */
1352d09714cSHong Zhang   ierr = PetscFree(idx);CHKERRQ(ierr);
1362d09714cSHong Zhang   ierr = ISCreateStride(PETSC_COMM_SELF,B->N,0,1,&iscol);CHKERRQ(ierr);
137*1c741599SHong Zhang   ierr = MatGetSubMatrices(B,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&bseq);CHKERRQ(ierr);/* reuse */
1382d09714cSHong Zhang   ierr = ISDestroy(iscol);CHKERRQ(ierr);
1392d09714cSHong Zhang   B_seq = bseq[0];
1406284ec50SHong Zhang 
1412d09714cSHong Zhang   /*  create a seq matrix A_seq = submatrix of A by taking all local rows of A */
1422d09714cSHong Zhang   start = a->rstart; end = a->rend;
1432d09714cSHong Zhang   ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&iscol);CHKERRQ(ierr); /* isrow of A = iscol */
144*1c741599SHong Zhang   ierr = MatGetSubMatrices(A,1,&iscol,&isrow,MAT_INITIAL_MATRIX,&aseq);CHKERRQ(ierr); /* reuse */
1452d09714cSHong Zhang   ierr = ISDestroy(isrow);CHKERRQ(ierr);
1462d09714cSHong Zhang   ierr = ISDestroy(iscol);CHKERRQ(ierr);
1472d09714cSHong Zhang   A_seq = aseq[0];
1482d09714cSHong Zhang 
1492d09714cSHong Zhang   /* compute C_seq = A_seq * B_seq */
150*1c741599SHong Zhang   ierr = MatMatMult_SeqAIJ_SeqAIJ(A_seq, B_seq, scall, fill,&C_seq);CHKERRQ(ierr);
1512d09714cSHong Zhang   /*
1522d09714cSHong Zhang   int rank;
1532d09714cSHong Zhang   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
1542d09714cSHong Zhang   ierr = PetscPrintf(PETSC_COMM_SELF," [%d] A_seq: %d, %d; B_seq: %d, %d; C_seq: %d, %d;\n",rank,A_seq->m,A_seq->n,B_seq->m,B_seq->n,C_seq->m,C_seq->n);
1552d09714cSHong Zhang   */
1562d09714cSHong Zhang   ierr = MatDestroyMatrices(1,&aseq);CHKERRQ(ierr);
1572d09714cSHong Zhang   ierr = MatDestroyMatrices(1,&bseq);CHKERRQ(ierr);
1582d09714cSHong Zhang 
1592d09714cSHong Zhang   /* create a mpi matrix C_mpi that has C_seq as its local entries */
1602d09714cSHong Zhang   ierr = MatCreate(A->comm,C_seq->m,PETSC_DECIDE,PETSC_DECIDE,C_seq->N,&C_mpi);CHKERRQ(ierr);
1612d09714cSHong Zhang   ierr = MatSetType(C_mpi,MATMPIAIJ);CHKERRQ(ierr);
1622d09714cSHong Zhang 
1632d09714cSHong Zhang   c  = (Mat_SeqAIJ*)C_seq->data;
1642d09714cSHong Zhang   ci = c->i; cj = c->j; ca = c->a;
1652d09714cSHong Zhang   ierr = PetscMalloc((2*C_seq->m+1)*sizeof(int),&d_nnz);CHKERRQ(ierr);
1662d09714cSHong Zhang   o_nnz = d_nnz + C_seq->m;
1672d09714cSHong Zhang   nzA   = end-start;    /* max nonezero cols of the local diagonal part of C_mpi */
1682d09714cSHong Zhang   nzB   = C_seq->n-nzA; /* max nonezero cols of the local off-diagonal part of C_mpi */
1692d09714cSHong Zhang   for (i=0; i< C_seq->m; i++){
1702d09714cSHong Zhang     ncols = ci[i+1] - ci[i];
1712d09714cSHong Zhang     d_nnz[i] = PetscMin(ncols,nzA);
1722d09714cSHong Zhang     o_nnz[i] = PetscMin(ncols,nzB);
1732d09714cSHong Zhang   }
1742d09714cSHong Zhang   ierr = MatMPIAIJSetPreallocation(C_mpi,PETSC_DECIDE,d_nnz,PETSC_DECIDE,o_nnz);CHKERRQ(ierr);
1752d09714cSHong Zhang   ierr = PetscFree(d_nnz);CHKERRQ(ierr);
1762d09714cSHong Zhang 
1772d09714cSHong Zhang   /* set row values of C_mpi */
1782d09714cSHong Zhang   for (i=0; i< C_seq->m; i++){
1792d09714cSHong Zhang     grow  = start + i;
1802d09714cSHong Zhang     ncols = ci[i+1] - ci[i];
181*1c741599SHong Zhang     ierr = MatSetValues_MPIAIJ(C_mpi,1,&grow,ncols,cj+ci[i],ca+ci[i],INSERT_VALUES);CHKERRQ(ierr);
1822d09714cSHong Zhang   }
1832d09714cSHong Zhang   ierr = MatAssemblyBegin(C_mpi,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1842d09714cSHong Zhang   ierr = MatAssemblyEnd(C_mpi,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1852d09714cSHong Zhang   ierr = MatDestroy(C_seq);CHKERRQ(ierr);
1862d09714cSHong Zhang 
1872d09714cSHong Zhang   *C = C_mpi;
188d50806bdSBarry Smith   PetscFunctionReturn(0);
189d50806bdSBarry Smith }
1905c66b693SKris Buschelman 
1915c66b693SKris Buschelman #undef __FUNCT__
1925c66b693SKris Buschelman #define __FUNCT__ "MatMatMult_SeqAIJ_SeqAIJ"
193*1c741599SHong Zhang int MatMatMult_SeqAIJ_SeqAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) {
1945c66b693SKris Buschelman   int ierr;
1954d3841fdSKris Buschelman   char symfunct[80],numfunct[80];
1965c66b693SKris Buschelman   int (*symbolic)(Mat,Mat,Mat*),(*numeric)(Mat,Mat,Mat);
1975c66b693SKris Buschelman 
1985c66b693SKris Buschelman   PetscFunctionBegin;
1994d3841fdSKris Buschelman   /* Currently only _seqaijseqaij is implemented, so just query for it in A and B. */
2004d3841fdSKris Buschelman   /* When other implementations exist, attack the multiple dispatch problem. */
2014d3841fdSKris Buschelman   ierr = PetscStrcpy(symfunct,"MatMatMultSymbolic_seqaijseqaij");CHKERRQ(ierr);
2024d3841fdSKris Buschelman   ierr = PetscObjectQueryFunction((PetscObject)B,symfunct,(PetscVoidFunction)&symbolic);CHKERRQ(ierr);
2034d3841fdSKris Buschelman   if (!symbolic) SETERRQ1(PETSC_ERR_SUP,"C=A*B not implemented for B of type %s",B->type_name);
2045c66b693SKris Buschelman   ierr = PetscObjectQueryFunction((PetscObject)A,symfunct,(PetscVoidFunction)&symbolic);CHKERRQ(ierr);
2054d3841fdSKris Buschelman   if (!symbolic) SETERRQ1(PETSC_ERR_SUP,"C=A*B not implemented for A of type %s",A->type_name);
2064d3841fdSKris Buschelman 
2074d3841fdSKris Buschelman   ierr = PetscStrcpy(numfunct,"MatMatMultNumeric_seqaijseqaij");CHKERRQ(ierr);
2085c66b693SKris Buschelman   ierr = PetscObjectQueryFunction((PetscObject)A,numfunct,(PetscVoidFunction)&numeric);CHKERRQ(ierr);
2094d3841fdSKris Buschelman   if (!numeric) SETERRQ1(PETSC_ERR_SUP,"C=A*B not implemented for A of type %s",A->type_name);
2104d3841fdSKris Buschelman   ierr = PetscObjectQueryFunction((PetscObject)B,numfunct,(PetscVoidFunction)&numeric);CHKERRQ(ierr);
2114d3841fdSKris Buschelman   if (!numeric) SETERRQ1(PETSC_ERR_SUP,"C=A*B not implemented for B of type %s",B->type_name);
2124d3841fdSKris Buschelman 
2135c66b693SKris Buschelman   ierr = PetscLogEventBegin(logkey_matmatmult,A,B,0,0);CHKERRQ(ierr);
2145c66b693SKris Buschelman   ierr = (*symbolic)(A,B,C);CHKERRQ(ierr);
2155c66b693SKris Buschelman   ierr = (*numeric)(A,B,*C);CHKERRQ(ierr);
2165c66b693SKris Buschelman   ierr = PetscLogEventEnd(logkey_matmatmult,A,B,0,0);CHKERRQ(ierr);
2175c66b693SKris Buschelman   PetscFunctionReturn(0);
2185c66b693SKris Buschelman }
2195c66b693SKris Buschelman 
220c1f4806aSKris Buschelman #undef __FUNCT__
221c1f4806aSKris Buschelman #define __FUNCT__ "MatMatMultSymbolic"
2225c66b693SKris Buschelman /*@
2235c66b693SKris Buschelman    MatMatMultSymbolic - Performs construction, preallocation, and computes the ij structure
2245c66b693SKris Buschelman    of the matrix-matrix product C=A*B.  Call this routine before calling MatMatMultNumeric().
2255c66b693SKris Buschelman 
2265c66b693SKris Buschelman    Collective on Mat
2275c66b693SKris Buschelman 
2285c66b693SKris Buschelman    Input Parameters:
2295c66b693SKris Buschelman +  A - the left matrix
2305c66b693SKris Buschelman -  B - the right matrix
2315c66b693SKris Buschelman 
2325c66b693SKris Buschelman    Output Parameters:
2335c66b693SKris Buschelman .  C - the matrix containing the ij structure of product matrix
2345c66b693SKris Buschelman 
2355c66b693SKris Buschelman    Notes:
2364d3841fdSKris Buschelman    C will be created as a MATSEQAIJ matrix and must be destroyed by the user with MatDestroy().
2375c66b693SKris Buschelman 
2384d3841fdSKris Buschelman    This routine is currently only implemented for SeqAIJ matrices and classes which inherit from SeqAIJ.
2395c66b693SKris Buschelman 
2405c66b693SKris Buschelman    Level: intermediate
2415c66b693SKris Buschelman 
2425c66b693SKris Buschelman .seealso: MatMatMult(),MatMatMultNumeric()
2435c66b693SKris Buschelman @*/
2445c66b693SKris Buschelman int MatMatMultSymbolic(Mat A,Mat B,Mat *C) {
2455c66b693SKris Buschelman   /* Perhaps this "interface" routine should be moved into the interface directory.*/
2465c66b693SKris Buschelman   /* To facilitate implementations with varying types, QueryFunction is used.*/
2475c66b693SKris Buschelman   /* It is assumed that implementations will be composed as "MatMatMultSymbolic_<type of A><type of B>". */
2485c66b693SKris Buschelman   int  ierr;
2494d3841fdSKris Buschelman   char symfunct[80];
2505c66b693SKris Buschelman   int  (*symbolic)(Mat,Mat,Mat *);
2515c66b693SKris Buschelman 
2525c66b693SKris Buschelman   PetscFunctionBegin;
2534482741eSBarry Smith   PetscValidHeaderSpecific(A,MAT_COOKIE,1);
254c9780b6fSBarry Smith   PetscValidType(A,1);
2555c66b693SKris Buschelman   MatPreallocated(A);
2565c66b693SKris Buschelman   if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
2575c66b693SKris Buschelman   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
2585c66b693SKris Buschelman 
2594482741eSBarry Smith   PetscValidHeaderSpecific(B,MAT_COOKIE,2);
260c9780b6fSBarry Smith   PetscValidType(B,2);
2615c66b693SKris Buschelman   MatPreallocated(B);
2625c66b693SKris Buschelman   if (!B->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
2635c66b693SKris Buschelman   if (B->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
2644482741eSBarry Smith   PetscValidPointer(C,3);
2654482741eSBarry Smith 
2665c66b693SKris Buschelman 
2675c66b693SKris Buschelman   if (B->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",B->M,A->N);
2685c66b693SKris Buschelman 
2694d3841fdSKris Buschelman   /* Currently only _seqaijseqaij is implemented, so just query for it in A and B. */
2704d3841fdSKris Buschelman   /* When other implementations exist, attack the multiple dispatch problem. */
2714d3841fdSKris Buschelman   ierr = PetscStrcpy(symfunct,"MatMatMultSymbolic_seqaijseqaij");CHKERRQ(ierr);
2724d3841fdSKris Buschelman   ierr = PetscObjectQueryFunction((PetscObject)B,symfunct,(PetscVoidFunction)&symbolic);CHKERRQ(ierr);
2734d3841fdSKris Buschelman   if (!symbolic) SETERRQ1(PETSC_ERR_SUP,"C=A*B not implemented for B of type %s",B->type_name);
2744d3841fdSKris Buschelman   ierr = PetscObjectQueryFunction((PetscObject)A,symfunct,(PetscVoidFunction)&symbolic);CHKERRQ(ierr);
2754d3841fdSKris Buschelman   if (!symbolic) SETERRQ1(PETSC_ERR_SUP,"C=A*B not implemented for A of type %s",A->type_name);
2765c66b693SKris Buschelman   ierr = (*symbolic)(A,B,C);CHKERRQ(ierr);
2775c66b693SKris Buschelman 
2785c66b693SKris Buschelman   PetscFunctionReturn(0);
2795c66b693SKris Buschelman }
28058c24d83SHong Zhang 
28158c24d83SHong Zhang #undef __FUNCT__
28258c24d83SHong Zhang #define __FUNCT__ "MatMatMult_Symbolic_SeqAIJ_SeqAIJ"
28358c24d83SHong Zhang int MatMatMult_Symbolic_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat *C)
28458c24d83SHong Zhang {
28558c24d83SHong Zhang   int            ierr;
28658c24d83SHong Zhang   FreeSpaceList  free_space=PETSC_NULL,current_space=PETSC_NULL;
28758c24d83SHong Zhang   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c;
28858c24d83SHong Zhang   int            *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bjj;
2891c239cc6SHong Zhang   int            *ci,*cj,*lnk,idx0,idx;
2905c66b693SKris Buschelman   int            am=A->M,bn=B->N,bm=B->M;
291bf812060SBarry Smith   int            i,j,anzi,brow,bnzj,cnzi,nlnk;
29258c24d83SHong Zhang   MatScalar      *ca;
29358c24d83SHong Zhang 
29458c24d83SHong Zhang   PetscFunctionBegin;
2955c66b693SKris Buschelman   /* Start timers */
29658c24d83SHong Zhang   ierr = PetscLogEventBegin(logkey_matmatmult_symbolic,A,B,0,0);CHKERRQ(ierr);
29758c24d83SHong Zhang   /* Set up */
29858c24d83SHong Zhang   /* Allocate ci array, arrays for fill computation and */
29958c24d83SHong Zhang   /* free space for accumulating nonzero column info */
30058c24d83SHong Zhang   ierr = PetscMalloc(((am+1)+1)*sizeof(int),&ci);CHKERRQ(ierr);
30158c24d83SHong Zhang   ci[0] = 0;
30258c24d83SHong Zhang 
30358c24d83SHong Zhang   ierr = PetscMalloc((bn+1)*sizeof(int),&lnk);CHKERRQ(ierr);
30458c24d83SHong Zhang   for (i=0; i<bn; i++) lnk[i] = -1;
30558c24d83SHong Zhang 
30658c24d83SHong Zhang   /* Initial FreeSpace size is nnz(B)=4*bi[bm] */
30758c24d83SHong Zhang   ierr = GetMoreSpace(4*bi[bm],&free_space);CHKERRQ(ierr);
30858c24d83SHong Zhang   current_space = free_space;
30958c24d83SHong Zhang 
31058c24d83SHong Zhang   /* Determine symbolic info for each row of the product: */
31158c24d83SHong Zhang   for (i=0;i<am;i++) {
31258c24d83SHong Zhang     anzi = ai[i+1] - ai[i];
31358c24d83SHong Zhang     cnzi = 0;
31458c24d83SHong Zhang     lnk[bn] = bn;
3152d09714cSHong Zhang     j       = anzi;
3162d09714cSHong Zhang     aj      = a->j + ai[i];
3172d09714cSHong Zhang     while (j){/* assume cols are almost in increasing order, starting from its end saves computation */
3182d09714cSHong Zhang       j--;
3192d09714cSHong Zhang       brow = *(aj + j);
32058c24d83SHong Zhang       bnzj = bi[brow+1] - bi[brow];
32158c24d83SHong Zhang       bjj  = bj + bi[brow];
3221c239cc6SHong Zhang       /* add non-zero cols of B into the sorted linked list lnk */
3231c239cc6SHong Zhang       ierr = LnklistAdd(bnzj,bjj,bn,-1,&nlnk,lnk);CHKERRQ(ierr);
3241c239cc6SHong Zhang       cnzi += nlnk;
32558c24d83SHong Zhang     }
32658c24d83SHong Zhang 
32758c24d83SHong Zhang     /* If free space is not available, make more free space */
32858c24d83SHong Zhang     /* Double the amount of total space in the list */
32958c24d83SHong Zhang     if (current_space->local_remaining<cnzi) {
330*1c741599SHong Zhang       printf("MatMatMult_Symbolic_SeqAIJ_SeqAIJ()...%d -th row, double space ...\n",i);
33158c24d83SHong Zhang       ierr = GetMoreSpace(current_space->total_array_size,&current_space);CHKERRQ(ierr);
33258c24d83SHong Zhang     }
33358c24d83SHong Zhang 
33458c24d83SHong Zhang     /* Copy data into free space, and zero out denserow and lnk */
33558c24d83SHong Zhang     idx = bn;
33658c24d83SHong Zhang     for (j=0; j<cnzi; j++){
33758c24d83SHong Zhang       idx0 = idx;
33858c24d83SHong Zhang       idx  = lnk[idx0];
33958c24d83SHong Zhang       *current_space->array++ = idx;
34058c24d83SHong Zhang       lnk[idx0] = -1;
34158c24d83SHong Zhang     }
34258c24d83SHong Zhang     lnk[idx] = -1;
34358c24d83SHong Zhang 
34458c24d83SHong Zhang     current_space->local_used      += cnzi;
34558c24d83SHong Zhang     current_space->local_remaining -= cnzi;
34658c24d83SHong Zhang 
34758c24d83SHong Zhang     ci[i+1] = ci[i] + cnzi;
34858c24d83SHong Zhang   }
34958c24d83SHong Zhang 
35058c24d83SHong Zhang   /* Column indices are in the list of free space */
35158c24d83SHong Zhang   /* Allocate space for cj, initialize cj, and */
35258c24d83SHong Zhang   /* destroy list of free space and other temporary array(s) */
35358c24d83SHong Zhang   ierr = PetscMalloc((ci[am]+1)*sizeof(int),&cj);CHKERRQ(ierr);
35458c24d83SHong Zhang   ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr);
35558c24d83SHong Zhang   ierr = PetscFree(lnk);CHKERRQ(ierr);
35658c24d83SHong Zhang 
35758c24d83SHong Zhang   /* Allocate space for ca */
35858c24d83SHong Zhang   ierr = PetscMalloc((ci[am]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr);
35958c24d83SHong Zhang   ierr = PetscMemzero(ca,(ci[am]+1)*sizeof(MatScalar));CHKERRQ(ierr);
36058c24d83SHong Zhang 
36158c24d83SHong Zhang   /* put together the new matrix */
36258c24d83SHong Zhang   ierr = MatCreateSeqAIJWithArrays(A->comm,am,bn,ci,cj,ca,C);CHKERRQ(ierr);
36358c24d83SHong Zhang 
36458c24d83SHong Zhang   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
36558c24d83SHong Zhang   /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */
36658c24d83SHong Zhang   c = (Mat_SeqAIJ *)((*C)->data);
36758c24d83SHong Zhang   c->freedata = PETSC_TRUE;
36858c24d83SHong Zhang   c->nonew    = 0;
36958c24d83SHong Zhang 
37058c24d83SHong Zhang   ierr = PetscLogEventEnd(logkey_matmatmult_symbolic,A,B,0,0);CHKERRQ(ierr);
37158c24d83SHong Zhang   PetscFunctionReturn(0);
37258c24d83SHong Zhang }
373d50806bdSBarry Smith 
374c1f4806aSKris Buschelman #undef __FUNCT__
375c1f4806aSKris Buschelman #define __FUNCT__ "MatMatMultNumeric"
3765c66b693SKris Buschelman /*@
3775c66b693SKris Buschelman    MatMatMultNumeric - Performs the numeric matrix-matrix product.
3785c66b693SKris Buschelman    Call this routine after first calling MatMatMultSymbolic().
3795c66b693SKris Buschelman 
3805c66b693SKris Buschelman    Collective on Mat
3815c66b693SKris Buschelman 
3825c66b693SKris Buschelman    Input Parameters:
3835c66b693SKris Buschelman +  A - the left matrix
3845c66b693SKris Buschelman -  B - the right matrix
3855c66b693SKris Buschelman 
3865c66b693SKris Buschelman    Output Parameters:
3875c66b693SKris Buschelman .  C - the product matrix, whose ij structure was defined from MatMatMultSymbolic().
3885c66b693SKris Buschelman 
3895c66b693SKris Buschelman    Notes:
3905c66b693SKris Buschelman    C must have been created with MatMatMultSymbolic.
3915c66b693SKris Buschelman 
3925c66b693SKris Buschelman    This routine is currently only implemented for SeqAIJ type matrices.
3935c66b693SKris Buschelman 
3945c66b693SKris Buschelman    Level: intermediate
3955c66b693SKris Buschelman 
3965c66b693SKris Buschelman .seealso: MatMatMult(),MatMatMultSymbolic()
3975c66b693SKris Buschelman @*/
3985c66b693SKris Buschelman int MatMatMultNumeric(Mat A,Mat B,Mat C){
3995c66b693SKris Buschelman   /* Perhaps this "interface" routine should be moved into the interface directory.*/
4005c66b693SKris Buschelman   /* To facilitate implementations with varying types, QueryFunction is used.*/
4015c66b693SKris Buschelman   /* It is assumed that implementations will be composed as "MatMatMultNumeric_<type of A><type of B>". */
4025c66b693SKris Buschelman   int ierr;
4034d3841fdSKris Buschelman   char numfunct[80];
4045c66b693SKris Buschelman   int (*numeric)(Mat,Mat,Mat);
4055c66b693SKris Buschelman 
4065c66b693SKris Buschelman   PetscFunctionBegin;
4075c66b693SKris Buschelman 
4084482741eSBarry Smith   PetscValidHeaderSpecific(A,MAT_COOKIE,1);
409c9780b6fSBarry Smith   PetscValidType(A,1);
4105c66b693SKris Buschelman   MatPreallocated(A);
4115c66b693SKris Buschelman   if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
4125c66b693SKris Buschelman   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
4135c66b693SKris Buschelman 
4144482741eSBarry Smith   PetscValidHeaderSpecific(B,MAT_COOKIE,2);
415c9780b6fSBarry Smith   PetscValidType(B,2);
4165c66b693SKris Buschelman   MatPreallocated(B);
4175c66b693SKris Buschelman   if (!B->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
4185c66b693SKris Buschelman   if (B->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
4195c66b693SKris Buschelman 
4204482741eSBarry Smith   PetscValidHeaderSpecific(C,MAT_COOKIE,3);
421c9780b6fSBarry Smith   PetscValidType(C,3);
4225c66b693SKris Buschelman   MatPreallocated(C);
4235c66b693SKris Buschelman   if (!C->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
4245c66b693SKris Buschelman   if (C->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
4255c66b693SKris Buschelman 
4265c66b693SKris Buschelman   if (B->N!=C->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",B->N,C->N);
4275c66b693SKris Buschelman   if (B->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",B->M,A->N);
4285c66b693SKris Buschelman   if (A->M!=C->M) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",A->M,C->M);
4295c66b693SKris Buschelman 
4304d3841fdSKris Buschelman   /* Currently only _seqaijseqaij is implemented, so just query for it in A and B. */
4314d3841fdSKris Buschelman   /* When other implementations exist, attack the multiple dispatch problem. */
4324d3841fdSKris Buschelman   ierr = PetscStrcpy(numfunct,"MatMatMultNumeric_seqaijseqaij");CHKERRQ(ierr);
4334d3841fdSKris Buschelman   ierr = PetscObjectQueryFunction((PetscObject)A,numfunct,(PetscVoidFunction)&numeric);CHKERRQ(ierr);
4344d3841fdSKris Buschelman   if (!numeric) SETERRQ1(PETSC_ERR_SUP,"C=A*B not implemented for A of type %s",A->type_name);
4354d3841fdSKris Buschelman   ierr = PetscObjectQueryFunction((PetscObject)B,numfunct,(PetscVoidFunction)&numeric);CHKERRQ(ierr);
4364d3841fdSKris Buschelman   if (!numeric) SETERRQ1(PETSC_ERR_SUP,"C=A*B not implemented for B of type %s",B->type_name);
4374d3841fdSKris Buschelman 
4385c66b693SKris Buschelman   ierr = (*numeric)(A,B,C);CHKERRQ(ierr);
4395c66b693SKris Buschelman 
4405c66b693SKris Buschelman   PetscFunctionReturn(0);
4415c66b693SKris Buschelman }
4425c66b693SKris Buschelman 
443d50806bdSBarry Smith #undef __FUNCT__
44494e3eecaSKris Buschelman #define __FUNCT__ "MatMatMult_Numeric_SeqAIJ_SeqAIJ"
44594e3eecaSKris Buschelman int MatMatMult_Numeric_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat C)
446d50806bdSBarry Smith {
44794e3eecaSKris Buschelman   int        ierr,flops=0;
448d50806bdSBarry Smith   Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
449d50806bdSBarry Smith   Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data;
450d50806bdSBarry Smith   Mat_SeqAIJ *c = (Mat_SeqAIJ *)C->data;
451d50806bdSBarry Smith   int        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bjj,*ci=c->i,*cj=c->j;
4525c66b693SKris Buschelman   int        am=A->M,cn=C->N;
45394e3eecaSKris Buschelman   int        i,j,k,anzi,bnzi,cnzi,brow;
454d50806bdSBarry Smith   MatScalar  *aa=a->a,*ba=b->a,*baj,*ca=c->a,*temp;
455d50806bdSBarry Smith 
456d50806bdSBarry Smith   PetscFunctionBegin;
457d50806bdSBarry Smith 
4585c66b693SKris Buschelman   /* Start timers */
459d50806bdSBarry Smith   ierr = PetscLogEventBegin(logkey_matmatmult_numeric,A,B,C,0);CHKERRQ(ierr);
46094e3eecaSKris Buschelman 
461d50806bdSBarry Smith   /* Allocate temp accumulation space to avoid searching for nonzero columns in C */
462d50806bdSBarry Smith   ierr = PetscMalloc((cn+1)*sizeof(MatScalar),&temp);CHKERRQ(ierr);
463d50806bdSBarry Smith   ierr = PetscMemzero(temp,cn*sizeof(MatScalar));CHKERRQ(ierr);
464d50806bdSBarry Smith   /* Traverse A row-wise. */
465d50806bdSBarry Smith   /* Build the ith row in C by summing over nonzero columns in A, */
466d50806bdSBarry Smith   /* the rows of B corresponding to nonzeros of A. */
467d50806bdSBarry Smith   for (i=0;i<am;i++) {
468d50806bdSBarry Smith     anzi = ai[i+1] - ai[i];
469d50806bdSBarry Smith     for (j=0;j<anzi;j++) {
470d50806bdSBarry Smith       brow = *aj++;
471d50806bdSBarry Smith       bnzi = bi[brow+1] - bi[brow];
472d50806bdSBarry Smith       bjj  = bj + bi[brow];
473d50806bdSBarry Smith       baj  = ba + bi[brow];
474d50806bdSBarry Smith       for (k=0;k<bnzi;k++) {
475d50806bdSBarry Smith         temp[bjj[k]] += (*aa)*baj[k];
476d50806bdSBarry Smith       }
477d50806bdSBarry Smith       flops += 2*bnzi;
478d50806bdSBarry Smith       aa++;
479d50806bdSBarry Smith     }
480d50806bdSBarry Smith     /* Store row back into C, and re-zero temp */
481d50806bdSBarry Smith     cnzi = ci[i+1] - ci[i];
482d50806bdSBarry Smith     for (j=0;j<cnzi;j++) {
483d50806bdSBarry Smith       ca[j] = temp[cj[j]];
484d50806bdSBarry Smith       temp[cj[j]] = 0.0;
485d50806bdSBarry Smith     }
486d50806bdSBarry Smith     ca += cnzi;
487d50806bdSBarry Smith     cj += cnzi;
488d50806bdSBarry Smith   }
489716bacf3SKris Buschelman   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
490716bacf3SKris Buschelman   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
491716bacf3SKris Buschelman 
492d50806bdSBarry Smith   /* Free temp */
493d50806bdSBarry Smith   ierr = PetscFree(temp);CHKERRQ(ierr);
494d50806bdSBarry Smith   ierr = PetscLogFlops(flops);CHKERRQ(ierr);
495d50806bdSBarry Smith   ierr = PetscLogEventEnd(logkey_matmatmult_numeric,A,B,C,0);CHKERRQ(ierr);
496d50806bdSBarry Smith   PetscFunctionReturn(0);
497d50806bdSBarry Smith }
498d50806bdSBarry Smith 
499d50806bdSBarry Smith #undef __FUNCT__
5005c66b693SKris Buschelman #define __FUNCT__ "RegisterMatMatMultRoutines_Private"
5015c66b693SKris Buschelman int RegisterMatMatMultRoutines_Private(Mat A) {
502d50806bdSBarry Smith   int ierr;
503d50806bdSBarry Smith 
504d50806bdSBarry Smith   PetscFunctionBegin;
505*1c741599SHong Zhang #ifndef OLD
5062216b3a4SKris Buschelman   if (!logkey_matmatmult) {
5072216b3a4SKris Buschelman     ierr = PetscLogEventRegister(&logkey_matmatmult,"MatMatMult",MAT_COOKIE);CHKERRQ(ierr);
5082216b3a4SKris Buschelman   }
5096284ec50SHong Zhang #endif
5105c66b693SKris Buschelman   if (!logkey_matmatmult_symbolic) {
5115c66b693SKris Buschelman     ierr = PetscLogEventRegister(&logkey_matmatmult_symbolic,"MatMatMult_Symbolic",MAT_COOKIE);CHKERRQ(ierr);
512d50806bdSBarry Smith   }
5135c66b693SKris Buschelman   ierr = PetscObjectComposeFunctionDynamic((PetscObject)A,"MatMatMultSymbolic_seqaijseqaij",
5145c66b693SKris Buschelman                                            "MatMatMult_Symbolic_SeqAIJ_SeqAIJ",
5155c66b693SKris Buschelman                                            MatMatMult_Symbolic_SeqAIJ_SeqAIJ);CHKERRQ(ierr);
5165c66b693SKris Buschelman   if (!logkey_matmatmult_numeric) {
5175c66b693SKris Buschelman     ierr = PetscLogEventRegister(&logkey_matmatmult_numeric,"MatMatMult_Numeric",MAT_COOKIE);CHKERRQ(ierr);
51894e3eecaSKris Buschelman   }
5195c66b693SKris Buschelman   ierr = PetscObjectComposeFunctionDynamic((PetscObject)A,"MatMatMultNumeric_seqaijseqaij",
5205c66b693SKris Buschelman                                            "MatMatMult_Numeric_SeqAIJ_SeqAIJ",
5215c66b693SKris Buschelman                                            MatMatMult_Numeric_SeqAIJ_SeqAIJ);CHKERRQ(ierr);
52294e3eecaSKris Buschelman   PetscFunctionReturn(0);
52394e3eecaSKris Buschelman }
524