1d50806bdSBarry Smith /* 22c9ce0e5SKris Buschelman Defines matrix-matrix product routines for pairs of SeqAIJ matrices 3d50806bdSBarry Smith C = A * B 4d50806bdSBarry Smith */ 5d50806bdSBarry Smith 6c1f4806aSKris Buschelman #include "src/mat/impls/aij/seq/aij.h" /*I "petscmat.h" I*/ 770f19b1fSKris Buschelman #include "src/mat/utils/freespace.h" 8d50806bdSBarry Smith 9*1c239cc6SHong Zhang /* 10*1c239cc6SHong Zhang Add a index set into a sorted linked list 11*1c239cc6SHong Zhang input: 12*1c239cc6SHong Zhang nidx - number of input indices 13*1c239cc6SHong Zhang indices - interger array 14*1c239cc6SHong Zhang idx_head - the header of the list 15*1c239cc6SHong Zhang idx_unset - the value indicating the entry in the list is not set yet 16*1c239cc6SHong Zhang lnk - linked list(an integer array) that is created 17*1c239cc6SHong Zhang output: 18*1c239cc6SHong Zhang nlnk - number of newly added indices 19*1c239cc6SHong Zhang lnk - the sorted(increasing order) linked list containing new and non-redundate entries from indices 20*1c239cc6SHong Zhang */ 21*1c239cc6SHong Zhang #undef __FUNCT__ 22*1c239cc6SHong Zhang #define __FUNCT__ "LnklistAdd" 23*1c239cc6SHong Zhang int LnklistAdd(int nidx,int *indices,int idx_head,int idx_unset,int *nlnk,int *lnk) 24*1c239cc6SHong Zhang { 25*1c239cc6SHong Zhang int i,idx,lidx,entry,n; 26*1c239cc6SHong Zhang 27*1c239cc6SHong Zhang PetscFunctionBegin; 28*1c239cc6SHong Zhang n = 0; 29*1c239cc6SHong Zhang lidx = idx_head; 30*1c239cc6SHong Zhang i = nidx; 31*1c239cc6SHong Zhang while (i){ 32*1c239cc6SHong Zhang /* assume indices is almost in increasing order, starting from its end saves computation */ 33*1c239cc6SHong Zhang entry = indices[--i]; 34*1c239cc6SHong Zhang if (lnk[entry] == idx_unset) { /* new entry */ 35*1c239cc6SHong Zhang do { 36*1c239cc6SHong Zhang idx = lidx; 37*1c239cc6SHong Zhang lidx = lnk[idx]; 38*1c239cc6SHong Zhang } while (entry > lidx); 39*1c239cc6SHong Zhang lnk[idx] = entry; 40*1c239cc6SHong Zhang lnk[entry] = lidx; 41*1c239cc6SHong Zhang n++; 42*1c239cc6SHong Zhang } 43*1c239cc6SHong Zhang } 44*1c239cc6SHong Zhang *nlnk = n; 45*1c239cc6SHong Zhang PetscFunctionReturn(0); 46*1c239cc6SHong Zhang } 47*1c239cc6SHong Zhang 48*1c239cc6SHong Zhang 492216b3a4SKris Buschelman static int logkey_matmatmult = 0; 502216b3a4SKris Buschelman static int logkey_matmatmult_symbolic = 0; 512216b3a4SKris Buschelman static int logkey_matmatmult_numeric = 0; 522216b3a4SKris Buschelman 53c1f4806aSKris Buschelman #undef __FUNCT__ 54c1f4806aSKris Buschelman #define __FUNCT__ "MatMatMult" 555c66b693SKris Buschelman /*@ 565c66b693SKris Buschelman MatMatMult - Performs Matrix-Matrix Multiplication C=A*B. 5794e3eecaSKris Buschelman 585c66b693SKris Buschelman Collective on Mat 59d50806bdSBarry Smith 605c66b693SKris Buschelman Input Parameters: 615c66b693SKris Buschelman + A - the left matrix 625c66b693SKris Buschelman - B - the right matrix 635c66b693SKris Buschelman 645c66b693SKris Buschelman Output Parameters: 655c66b693SKris Buschelman . C - the product matrix 665c66b693SKris Buschelman 675c66b693SKris Buschelman Notes: 685c66b693SKris Buschelman C will be created and must be destroyed by the user with MatDestroy(). 695c66b693SKris Buschelman 704d3841fdSKris Buschelman This routine is currently only implemented for pairs of SeqAIJ matrices and classes 714d3841fdSKris Buschelman which inherit from SeqAIJ. C will be of type MATSEQAIJ. 725c66b693SKris Buschelman 735c66b693SKris Buschelman Level: intermediate 745c66b693SKris Buschelman 755c66b693SKris Buschelman .seealso: MatMatMultSymbolic(),MatMatMultNumeric() 765c66b693SKris Buschelman @*/ 775c66b693SKris Buschelman int MatMatMult(Mat A,Mat B, Mat *C) { 785c66b693SKris Buschelman /* Perhaps this "interface" routine should be moved into the interface directory.*/ 795c66b693SKris Buschelman /* To facilitate implementations with varying types, QueryFunction is used.*/ 805c66b693SKris Buschelman /* It is assumed that implementations will be composed as "MatMatMult_<type of A><type of B>". */ 81d50806bdSBarry Smith int ierr; 825c66b693SKris Buschelman char funct[80]; 835c66b693SKris Buschelman int (*mult)(Mat,Mat,Mat*); 84d50806bdSBarry Smith 85d50806bdSBarry Smith PetscFunctionBegin; 864482741eSBarry Smith PetscValidHeaderSpecific(A,MAT_COOKIE,1); 87c9780b6fSBarry Smith PetscValidType(A,1); 885c66b693SKris Buschelman MatPreallocated(A); 895c66b693SKris Buschelman if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 905c66b693SKris Buschelman if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 91d50806bdSBarry Smith 924482741eSBarry Smith PetscValidHeaderSpecific(B,MAT_COOKIE,2); 93c9780b6fSBarry Smith PetscValidType(B,2); 945c66b693SKris Buschelman MatPreallocated(B); 955c66b693SKris Buschelman if (!B->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 965c66b693SKris Buschelman if (B->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 97d50806bdSBarry Smith 984482741eSBarry Smith PetscValidPointer(C,3); 994482741eSBarry Smith 1005c66b693SKris Buschelman if (B->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",B->M,A->N); 101d50806bdSBarry Smith 1024d3841fdSKris Buschelman /* Currently only _seqaijseqaij is implemented, so just query for it in A and B. */ 1034d3841fdSKris Buschelman /* When other implementations exist, attack the multiple dispatch problem. */ 1044d3841fdSKris Buschelman ierr = PetscStrcpy(funct,"MatMatMult_seqaijseqaij");CHKERRQ(ierr); 1054d3841fdSKris Buschelman ierr = PetscObjectQueryFunction((PetscObject)B,funct,(PetscVoidFunction)&mult);CHKERRQ(ierr); 1064d3841fdSKris Buschelman if (!mult) SETERRQ1(PETSC_ERR_SUP,"C=A*B not implemented for B of type %s",B->type_name); 1075c66b693SKris Buschelman ierr = PetscObjectQueryFunction((PetscObject)A,funct,(PetscVoidFunction)&mult);CHKERRQ(ierr); 1084d3841fdSKris Buschelman if (!mult) SETERRQ1(PETSC_ERR_SUP,"C=A*B not implemented for A of type %s",A->type_name); 1094d3841fdSKris Buschelman 1105c66b693SKris Buschelman ierr = (*mult)(A,B,C);CHKERRQ(ierr); 111d50806bdSBarry Smith PetscFunctionReturn(0); 112d50806bdSBarry Smith } 1135c66b693SKris Buschelman 1145c66b693SKris Buschelman #undef __FUNCT__ 1155c66b693SKris Buschelman #define __FUNCT__ "MatMatMult_SeqAIJ_SeqAIJ" 1165c66b693SKris Buschelman int MatMatMult_SeqAIJ_SeqAIJ(Mat A,Mat B, Mat *C) { 1175c66b693SKris Buschelman int ierr; 1184d3841fdSKris Buschelman char symfunct[80],numfunct[80]; 1195c66b693SKris Buschelman int (*symbolic)(Mat,Mat,Mat*),(*numeric)(Mat,Mat,Mat); 1205c66b693SKris Buschelman 1215c66b693SKris Buschelman PetscFunctionBegin; 1224d3841fdSKris Buschelman 1234d3841fdSKris Buschelman /* Currently only _seqaijseqaij is implemented, so just query for it in A and B. */ 1244d3841fdSKris Buschelman /* When other implementations exist, attack the multiple dispatch problem. */ 1254d3841fdSKris Buschelman ierr = PetscStrcpy(symfunct,"MatMatMultSymbolic_seqaijseqaij");CHKERRQ(ierr); 1264d3841fdSKris Buschelman ierr = PetscObjectQueryFunction((PetscObject)B,symfunct,(PetscVoidFunction)&symbolic);CHKERRQ(ierr); 1274d3841fdSKris Buschelman if (!symbolic) SETERRQ1(PETSC_ERR_SUP,"C=A*B not implemented for B of type %s",B->type_name); 1285c66b693SKris Buschelman ierr = PetscObjectQueryFunction((PetscObject)A,symfunct,(PetscVoidFunction)&symbolic);CHKERRQ(ierr); 1294d3841fdSKris Buschelman if (!symbolic) SETERRQ1(PETSC_ERR_SUP,"C=A*B not implemented for A of type %s",A->type_name); 1304d3841fdSKris Buschelman 1314d3841fdSKris Buschelman ierr = PetscStrcpy(numfunct,"MatMatMultNumeric_seqaijseqaij");CHKERRQ(ierr); 1325c66b693SKris Buschelman ierr = PetscObjectQueryFunction((PetscObject)A,numfunct,(PetscVoidFunction)&numeric);CHKERRQ(ierr); 1334d3841fdSKris Buschelman if (!numeric) SETERRQ1(PETSC_ERR_SUP,"C=A*B not implemented for A of type %s",A->type_name); 1344d3841fdSKris Buschelman ierr = PetscObjectQueryFunction((PetscObject)B,numfunct,(PetscVoidFunction)&numeric);CHKERRQ(ierr); 1354d3841fdSKris Buschelman if (!numeric) SETERRQ1(PETSC_ERR_SUP,"C=A*B not implemented for B of type %s",B->type_name); 1364d3841fdSKris Buschelman 1375c66b693SKris Buschelman ierr = PetscLogEventBegin(logkey_matmatmult,A,B,0,0);CHKERRQ(ierr); 1385c66b693SKris Buschelman ierr = (*symbolic)(A,B,C);CHKERRQ(ierr); 1395c66b693SKris Buschelman ierr = (*numeric)(A,B,*C);CHKERRQ(ierr); 1405c66b693SKris Buschelman ierr = PetscLogEventEnd(logkey_matmatmult,A,B,0,0);CHKERRQ(ierr); 1415c66b693SKris Buschelman PetscFunctionReturn(0); 1425c66b693SKris Buschelman } 1435c66b693SKris Buschelman 144c1f4806aSKris Buschelman #undef __FUNCT__ 145c1f4806aSKris Buschelman #define __FUNCT__ "MatMatMultSymbolic" 1465c66b693SKris Buschelman /*@ 1475c66b693SKris Buschelman MatMatMultSymbolic - Performs construction, preallocation, and computes the ij structure 1485c66b693SKris Buschelman of the matrix-matrix product C=A*B. Call this routine before calling MatMatMultNumeric(). 1495c66b693SKris Buschelman 1505c66b693SKris Buschelman Collective on Mat 1515c66b693SKris Buschelman 1525c66b693SKris Buschelman Input Parameters: 1535c66b693SKris Buschelman + A - the left matrix 1545c66b693SKris Buschelman - B - the right matrix 1555c66b693SKris Buschelman 1565c66b693SKris Buschelman Output Parameters: 1575c66b693SKris Buschelman . C - the matrix containing the ij structure of product matrix 1585c66b693SKris Buschelman 1595c66b693SKris Buschelman Notes: 1604d3841fdSKris Buschelman C will be created as a MATSEQAIJ matrix and must be destroyed by the user with MatDestroy(). 1615c66b693SKris Buschelman 1624d3841fdSKris Buschelman This routine is currently only implemented for SeqAIJ matrices and classes which inherit from SeqAIJ. 1635c66b693SKris Buschelman 1645c66b693SKris Buschelman Level: intermediate 1655c66b693SKris Buschelman 1665c66b693SKris Buschelman .seealso: MatMatMult(),MatMatMultNumeric() 1675c66b693SKris Buschelman @*/ 1685c66b693SKris Buschelman int MatMatMultSymbolic(Mat A,Mat B,Mat *C) { 1695c66b693SKris Buschelman /* Perhaps this "interface" routine should be moved into the interface directory.*/ 1705c66b693SKris Buschelman /* To facilitate implementations with varying types, QueryFunction is used.*/ 1715c66b693SKris Buschelman /* It is assumed that implementations will be composed as "MatMatMultSymbolic_<type of A><type of B>". */ 1725c66b693SKris Buschelman int ierr; 1734d3841fdSKris Buschelman char symfunct[80]; 1745c66b693SKris Buschelman int (*symbolic)(Mat,Mat,Mat *); 1755c66b693SKris Buschelman 1765c66b693SKris Buschelman PetscFunctionBegin; 1774482741eSBarry Smith PetscValidHeaderSpecific(A,MAT_COOKIE,1); 178c9780b6fSBarry Smith PetscValidType(A,1); 1795c66b693SKris Buschelman MatPreallocated(A); 1805c66b693SKris Buschelman if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 1815c66b693SKris Buschelman if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 1825c66b693SKris Buschelman 1834482741eSBarry Smith PetscValidHeaderSpecific(B,MAT_COOKIE,2); 184c9780b6fSBarry Smith PetscValidType(B,2); 1855c66b693SKris Buschelman MatPreallocated(B); 1865c66b693SKris Buschelman if (!B->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 1875c66b693SKris Buschelman if (B->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 1884482741eSBarry Smith PetscValidPointer(C,3); 1894482741eSBarry Smith 1905c66b693SKris Buschelman 1915c66b693SKris Buschelman if (B->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",B->M,A->N); 1925c66b693SKris Buschelman 1934d3841fdSKris Buschelman /* Currently only _seqaijseqaij is implemented, so just query for it in A and B. */ 1944d3841fdSKris Buschelman /* When other implementations exist, attack the multiple dispatch problem. */ 1954d3841fdSKris Buschelman ierr = PetscStrcpy(symfunct,"MatMatMultSymbolic_seqaijseqaij");CHKERRQ(ierr); 1964d3841fdSKris Buschelman ierr = PetscObjectQueryFunction((PetscObject)B,symfunct,(PetscVoidFunction)&symbolic);CHKERRQ(ierr); 1974d3841fdSKris Buschelman if (!symbolic) SETERRQ1(PETSC_ERR_SUP,"C=A*B not implemented for B of type %s",B->type_name); 1984d3841fdSKris Buschelman ierr = PetscObjectQueryFunction((PetscObject)A,symfunct,(PetscVoidFunction)&symbolic);CHKERRQ(ierr); 1994d3841fdSKris Buschelman if (!symbolic) SETERRQ1(PETSC_ERR_SUP,"C=A*B not implemented for A of type %s",A->type_name); 2005c66b693SKris Buschelman ierr = (*symbolic)(A,B,C);CHKERRQ(ierr); 2015c66b693SKris Buschelman 2025c66b693SKris Buschelman PetscFunctionReturn(0); 2035c66b693SKris Buschelman } 20458c24d83SHong Zhang 20558c24d83SHong Zhang #undef __FUNCT__ 20658c24d83SHong Zhang #define __FUNCT__ "MatMatMult_Symbolic_SeqAIJ_SeqAIJ" 20758c24d83SHong Zhang int MatMatMult_Symbolic_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat *C) 20858c24d83SHong Zhang { 20958c24d83SHong Zhang int ierr; 21058c24d83SHong Zhang FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 21158c24d83SHong Zhang Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data,*c; 21258c24d83SHong Zhang int *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bjj; 213*1c239cc6SHong Zhang int *ci,*cj,*lnk,idx0,idx; 2145c66b693SKris Buschelman int am=A->M,bn=B->N,bm=B->M; 215*1c239cc6SHong Zhang int i,j,k,anzi,brow,bnzj,cnzi,nlnk; 21658c24d83SHong Zhang MatScalar *ca; 21758c24d83SHong Zhang 21858c24d83SHong Zhang PetscFunctionBegin; 2195c66b693SKris Buschelman /* Start timers */ 22058c24d83SHong Zhang ierr = PetscLogEventBegin(logkey_matmatmult_symbolic,A,B,0,0);CHKERRQ(ierr); 22158c24d83SHong Zhang 22258c24d83SHong Zhang /* Set up */ 22358c24d83SHong Zhang /* Allocate ci array, arrays for fill computation and */ 22458c24d83SHong Zhang /* free space for accumulating nonzero column info */ 22558c24d83SHong Zhang ierr = PetscMalloc(((am+1)+1)*sizeof(int),&ci);CHKERRQ(ierr); 22658c24d83SHong Zhang ci[0] = 0; 22758c24d83SHong Zhang 22858c24d83SHong Zhang ierr = PetscMalloc((bn+1)*sizeof(int),&lnk);CHKERRQ(ierr); 22958c24d83SHong Zhang for (i=0; i<bn; i++) lnk[i] = -1; 23058c24d83SHong Zhang 23158c24d83SHong Zhang /* Initial FreeSpace size is nnz(B)=4*bi[bm] */ 23258c24d83SHong Zhang ierr = GetMoreSpace(4*bi[bm],&free_space);CHKERRQ(ierr); 23358c24d83SHong Zhang current_space = free_space; 23458c24d83SHong Zhang 23558c24d83SHong Zhang /* Determine symbolic info for each row of the product: */ 23658c24d83SHong Zhang for (i=0;i<am;i++) { 23758c24d83SHong Zhang anzi = ai[i+1] - ai[i]; 23858c24d83SHong Zhang cnzi = 0; 23958c24d83SHong Zhang lnk[bn] = bn; 24058c24d83SHong Zhang for (j=0;j<anzi;j++) { 24158c24d83SHong Zhang brow = *aj++; 24258c24d83SHong Zhang bnzj = bi[brow+1] - bi[brow]; 24358c24d83SHong Zhang bjj = bj + bi[brow]; 244*1c239cc6SHong Zhang /* add non-zero cols of B into the sorted linked list lnk */ 245*1c239cc6SHong Zhang ierr = LnklistAdd(bnzj,bjj,bn,-1,&nlnk,lnk);CHKERRQ(ierr); 246*1c239cc6SHong Zhang cnzi += nlnk; 24758c24d83SHong Zhang } 24858c24d83SHong Zhang 24958c24d83SHong Zhang /* If free space is not available, make more free space */ 25058c24d83SHong Zhang /* Double the amount of total space in the list */ 25158c24d83SHong Zhang if (current_space->local_remaining<cnzi) { 25258c24d83SHong Zhang printf("...%d -th row, double space ...\n",i); 25358c24d83SHong Zhang ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 25458c24d83SHong Zhang } 25558c24d83SHong Zhang 25658c24d83SHong Zhang /* Copy data into free space, and zero out denserow and lnk */ 25758c24d83SHong Zhang idx = bn; 25858c24d83SHong Zhang for (j=0; j<cnzi; j++){ 25958c24d83SHong Zhang idx0 = idx; 26058c24d83SHong Zhang idx = lnk[idx0]; 26158c24d83SHong Zhang *current_space->array++ = idx; 26258c24d83SHong Zhang lnk[idx0] = -1; 26358c24d83SHong Zhang } 26458c24d83SHong Zhang lnk[idx] = -1; 26558c24d83SHong Zhang 26658c24d83SHong Zhang current_space->local_used += cnzi; 26758c24d83SHong Zhang current_space->local_remaining -= cnzi; 26858c24d83SHong Zhang 26958c24d83SHong Zhang ci[i+1] = ci[i] + cnzi; 27058c24d83SHong Zhang } 27158c24d83SHong Zhang 27258c24d83SHong Zhang /* Column indices are in the list of free space */ 27358c24d83SHong Zhang /* Allocate space for cj, initialize cj, and */ 27458c24d83SHong Zhang /* destroy list of free space and other temporary array(s) */ 27558c24d83SHong Zhang ierr = PetscMalloc((ci[am]+1)*sizeof(int),&cj);CHKERRQ(ierr); 27658c24d83SHong Zhang ierr = MakeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 27758c24d83SHong Zhang ierr = PetscFree(lnk);CHKERRQ(ierr); 27858c24d83SHong Zhang 27958c24d83SHong Zhang /* Allocate space for ca */ 28058c24d83SHong Zhang ierr = PetscMalloc((ci[am]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 28158c24d83SHong Zhang ierr = PetscMemzero(ca,(ci[am]+1)*sizeof(MatScalar));CHKERRQ(ierr); 28258c24d83SHong Zhang 28358c24d83SHong Zhang /* put together the new matrix */ 28458c24d83SHong Zhang ierr = MatCreateSeqAIJWithArrays(A->comm,am,bn,ci,cj,ca,C);CHKERRQ(ierr); 28558c24d83SHong Zhang 28658c24d83SHong Zhang /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 28758c24d83SHong Zhang /* These are PETSc arrays, so change flags so arrays can be deleted by PETSc */ 28858c24d83SHong Zhang c = (Mat_SeqAIJ *)((*C)->data); 28958c24d83SHong Zhang c->freedata = PETSC_TRUE; 29058c24d83SHong Zhang c->nonew = 0; 29158c24d83SHong Zhang 29258c24d83SHong Zhang ierr = PetscLogEventEnd(logkey_matmatmult_symbolic,A,B,0,0);CHKERRQ(ierr); 29358c24d83SHong Zhang PetscFunctionReturn(0); 29458c24d83SHong Zhang } 295d50806bdSBarry Smith 296c1f4806aSKris Buschelman #undef __FUNCT__ 297c1f4806aSKris Buschelman #define __FUNCT__ "MatMatMultNumeric" 2985c66b693SKris Buschelman /*@ 2995c66b693SKris Buschelman MatMatMultNumeric - Performs the numeric matrix-matrix product. 3005c66b693SKris Buschelman Call this routine after first calling MatMatMultSymbolic(). 3015c66b693SKris Buschelman 3025c66b693SKris Buschelman Collective on Mat 3035c66b693SKris Buschelman 3045c66b693SKris Buschelman Input Parameters: 3055c66b693SKris Buschelman + A - the left matrix 3065c66b693SKris Buschelman - B - the right matrix 3075c66b693SKris Buschelman 3085c66b693SKris Buschelman Output Parameters: 3095c66b693SKris Buschelman . C - the product matrix, whose ij structure was defined from MatMatMultSymbolic(). 3105c66b693SKris Buschelman 3115c66b693SKris Buschelman Notes: 3125c66b693SKris Buschelman C must have been created with MatMatMultSymbolic. 3135c66b693SKris Buschelman 3145c66b693SKris Buschelman This routine is currently only implemented for SeqAIJ type matrices. 3155c66b693SKris Buschelman 3165c66b693SKris Buschelman Level: intermediate 3175c66b693SKris Buschelman 3185c66b693SKris Buschelman .seealso: MatMatMult(),MatMatMultSymbolic() 3195c66b693SKris Buschelman @*/ 3205c66b693SKris Buschelman int MatMatMultNumeric(Mat A,Mat B,Mat C){ 3215c66b693SKris Buschelman /* Perhaps this "interface" routine should be moved into the interface directory.*/ 3225c66b693SKris Buschelman /* To facilitate implementations with varying types, QueryFunction is used.*/ 3235c66b693SKris Buschelman /* It is assumed that implementations will be composed as "MatMatMultNumeric_<type of A><type of B>". */ 3245c66b693SKris Buschelman int ierr; 3254d3841fdSKris Buschelman char numfunct[80]; 3265c66b693SKris Buschelman int (*numeric)(Mat,Mat,Mat); 3275c66b693SKris Buschelman 3285c66b693SKris Buschelman PetscFunctionBegin; 3295c66b693SKris Buschelman 3304482741eSBarry Smith PetscValidHeaderSpecific(A,MAT_COOKIE,1); 331c9780b6fSBarry Smith PetscValidType(A,1); 3325c66b693SKris Buschelman MatPreallocated(A); 3335c66b693SKris Buschelman if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 3345c66b693SKris Buschelman if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 3355c66b693SKris Buschelman 3364482741eSBarry Smith PetscValidHeaderSpecific(B,MAT_COOKIE,2); 337c9780b6fSBarry Smith PetscValidType(B,2); 3385c66b693SKris Buschelman MatPreallocated(B); 3395c66b693SKris Buschelman if (!B->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 3405c66b693SKris Buschelman if (B->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 3415c66b693SKris Buschelman 3424482741eSBarry Smith PetscValidHeaderSpecific(C,MAT_COOKIE,3); 343c9780b6fSBarry Smith PetscValidType(C,3); 3445c66b693SKris Buschelman MatPreallocated(C); 3455c66b693SKris Buschelman if (!C->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 3465c66b693SKris Buschelman if (C->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 3475c66b693SKris Buschelman 3485c66b693SKris Buschelman if (B->N!=C->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",B->N,C->N); 3495c66b693SKris Buschelman if (B->M!=A->N) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",B->M,A->N); 3505c66b693SKris Buschelman if (A->M!=C->M) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %d != %d",A->M,C->M); 3515c66b693SKris Buschelman 3524d3841fdSKris Buschelman /* Currently only _seqaijseqaij is implemented, so just query for it in A and B. */ 3534d3841fdSKris Buschelman /* When other implementations exist, attack the multiple dispatch problem. */ 3544d3841fdSKris Buschelman ierr = PetscStrcpy(numfunct,"MatMatMultNumeric_seqaijseqaij");CHKERRQ(ierr); 3554d3841fdSKris Buschelman ierr = PetscObjectQueryFunction((PetscObject)A,numfunct,(PetscVoidFunction)&numeric);CHKERRQ(ierr); 3564d3841fdSKris Buschelman if (!numeric) SETERRQ1(PETSC_ERR_SUP,"C=A*B not implemented for A of type %s",A->type_name); 3574d3841fdSKris Buschelman ierr = PetscObjectQueryFunction((PetscObject)B,numfunct,(PetscVoidFunction)&numeric);CHKERRQ(ierr); 3584d3841fdSKris Buschelman if (!numeric) SETERRQ1(PETSC_ERR_SUP,"C=A*B not implemented for B of type %s",B->type_name); 3594d3841fdSKris Buschelman 3605c66b693SKris Buschelman ierr = (*numeric)(A,B,C);CHKERRQ(ierr); 3615c66b693SKris Buschelman 3625c66b693SKris Buschelman PetscFunctionReturn(0); 3635c66b693SKris Buschelman } 3645c66b693SKris Buschelman 365d50806bdSBarry Smith #undef __FUNCT__ 36694e3eecaSKris Buschelman #define __FUNCT__ "MatMatMult_Numeric_SeqAIJ_SeqAIJ" 36794e3eecaSKris Buschelman int MatMatMult_Numeric_SeqAIJ_SeqAIJ(Mat A,Mat B,Mat C) 368d50806bdSBarry Smith { 36994e3eecaSKris Buschelman int ierr,flops=0; 370d50806bdSBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 371d50806bdSBarry Smith Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 372d50806bdSBarry Smith Mat_SeqAIJ *c = (Mat_SeqAIJ *)C->data; 373d50806bdSBarry Smith int *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bjj,*ci=c->i,*cj=c->j; 3745c66b693SKris Buschelman int am=A->M,cn=C->N; 37594e3eecaSKris Buschelman int i,j,k,anzi,bnzi,cnzi,brow; 376d50806bdSBarry Smith MatScalar *aa=a->a,*ba=b->a,*baj,*ca=c->a,*temp; 377d50806bdSBarry Smith 378d50806bdSBarry Smith PetscFunctionBegin; 379d50806bdSBarry Smith 3805c66b693SKris Buschelman /* Start timers */ 381d50806bdSBarry Smith ierr = PetscLogEventBegin(logkey_matmatmult_numeric,A,B,C,0);CHKERRQ(ierr); 38294e3eecaSKris Buschelman 383d50806bdSBarry Smith /* Allocate temp accumulation space to avoid searching for nonzero columns in C */ 384d50806bdSBarry Smith ierr = PetscMalloc((cn+1)*sizeof(MatScalar),&temp);CHKERRQ(ierr); 385d50806bdSBarry Smith ierr = PetscMemzero(temp,cn*sizeof(MatScalar));CHKERRQ(ierr); 386d50806bdSBarry Smith /* Traverse A row-wise. */ 387d50806bdSBarry Smith /* Build the ith row in C by summing over nonzero columns in A, */ 388d50806bdSBarry Smith /* the rows of B corresponding to nonzeros of A. */ 389d50806bdSBarry Smith for (i=0;i<am;i++) { 390d50806bdSBarry Smith anzi = ai[i+1] - ai[i]; 391d50806bdSBarry Smith for (j=0;j<anzi;j++) { 392d50806bdSBarry Smith brow = *aj++; 393d50806bdSBarry Smith bnzi = bi[brow+1] - bi[brow]; 394d50806bdSBarry Smith bjj = bj + bi[brow]; 395d50806bdSBarry Smith baj = ba + bi[brow]; 396d50806bdSBarry Smith for (k=0;k<bnzi;k++) { 397d50806bdSBarry Smith temp[bjj[k]] += (*aa)*baj[k]; 398d50806bdSBarry Smith } 399d50806bdSBarry Smith flops += 2*bnzi; 400d50806bdSBarry Smith aa++; 401d50806bdSBarry Smith } 402d50806bdSBarry Smith /* Store row back into C, and re-zero temp */ 403d50806bdSBarry Smith cnzi = ci[i+1] - ci[i]; 404d50806bdSBarry Smith for (j=0;j<cnzi;j++) { 405d50806bdSBarry Smith ca[j] = temp[cj[j]]; 406d50806bdSBarry Smith temp[cj[j]] = 0.0; 407d50806bdSBarry Smith } 408d50806bdSBarry Smith ca += cnzi; 409d50806bdSBarry Smith cj += cnzi; 410d50806bdSBarry Smith } 411716bacf3SKris Buschelman ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 412716bacf3SKris Buschelman ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 413716bacf3SKris Buschelman 414d50806bdSBarry Smith /* Free temp */ 415d50806bdSBarry Smith ierr = PetscFree(temp);CHKERRQ(ierr); 416d50806bdSBarry Smith ierr = PetscLogFlops(flops);CHKERRQ(ierr); 417d50806bdSBarry Smith ierr = PetscLogEventEnd(logkey_matmatmult_numeric,A,B,C,0);CHKERRQ(ierr); 418d50806bdSBarry Smith PetscFunctionReturn(0); 419d50806bdSBarry Smith } 420d50806bdSBarry Smith 421d50806bdSBarry Smith #undef __FUNCT__ 4225c66b693SKris Buschelman #define __FUNCT__ "RegisterMatMatMultRoutines_Private" 4235c66b693SKris Buschelman int RegisterMatMatMultRoutines_Private(Mat A) { 424d50806bdSBarry Smith int ierr; 425d50806bdSBarry Smith 426d50806bdSBarry Smith PetscFunctionBegin; 4272216b3a4SKris Buschelman if (!logkey_matmatmult) { 4282216b3a4SKris Buschelman ierr = PetscLogEventRegister(&logkey_matmatmult,"MatMatMult",MAT_COOKIE);CHKERRQ(ierr); 4292216b3a4SKris Buschelman } 4305c66b693SKris Buschelman ierr = PetscObjectComposeFunctionDynamic((PetscObject)A,"MatMatMult_seqaijseqaij", 4315c66b693SKris Buschelman "MatMatMult_SeqAIJ_SeqAIJ", 4325c66b693SKris Buschelman MatMatMult_SeqAIJ_SeqAIJ);CHKERRQ(ierr); 4335c66b693SKris Buschelman if (!logkey_matmatmult_symbolic) { 4345c66b693SKris Buschelman ierr = PetscLogEventRegister(&logkey_matmatmult_symbolic,"MatMatMult_Symbolic",MAT_COOKIE);CHKERRQ(ierr); 435d50806bdSBarry Smith } 4365c66b693SKris Buschelman ierr = PetscObjectComposeFunctionDynamic((PetscObject)A,"MatMatMultSymbolic_seqaijseqaij", 4375c66b693SKris Buschelman "MatMatMult_Symbolic_SeqAIJ_SeqAIJ", 4385c66b693SKris Buschelman MatMatMult_Symbolic_SeqAIJ_SeqAIJ);CHKERRQ(ierr); 4395c66b693SKris Buschelman if (!logkey_matmatmult_numeric) { 4405c66b693SKris Buschelman ierr = PetscLogEventRegister(&logkey_matmatmult_numeric,"MatMatMult_Numeric",MAT_COOKIE);CHKERRQ(ierr); 44194e3eecaSKris Buschelman } 4425c66b693SKris Buschelman ierr = PetscObjectComposeFunctionDynamic((PetscObject)A,"MatMatMultNumeric_seqaijseqaij", 4435c66b693SKris Buschelman "MatMatMult_Numeric_SeqAIJ_SeqAIJ", 4445c66b693SKris Buschelman MatMatMult_Numeric_SeqAIJ_SeqAIJ);CHKERRQ(ierr); 44594e3eecaSKris Buschelman PetscFunctionReturn(0); 44694e3eecaSKris Buschelman } 447