1be1d678aSKris Buschelman 2ed3cc1f0SBarry Smith /* 3ed3cc1f0SBarry Smith Basic functions for basic parallel dense matrices. 4ed3cc1f0SBarry Smith */ 5ed3cc1f0SBarry Smith 6c6db04a5SJed Brown #include <../src/mat/impls/dense/mpi/mpidense.h> /*I "petscmat.h" I*/ 78949adfdSHong Zhang #include <../src/mat/impls/aij/mpi/mpiaij.h> 8baa3c1c6SHong Zhang #include <petscblaslapack.h> 98965ea79SLois Curfman McInnes 10ab92ecdeSBarry Smith /*@ 11ab92ecdeSBarry Smith 12ab92ecdeSBarry Smith MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential 13ab92ecdeSBarry Smith matrix that represents the operator. For sequential matrices it returns itself. 14ab92ecdeSBarry Smith 15ab92ecdeSBarry Smith Input Parameter: 16ab92ecdeSBarry Smith . A - the Seq or MPI dense matrix 17ab92ecdeSBarry Smith 18ab92ecdeSBarry Smith Output Parameter: 19ab92ecdeSBarry Smith . B - the inner matrix 20ab92ecdeSBarry Smith 218e6c10adSSatish Balay Level: intermediate 228e6c10adSSatish Balay 23ab92ecdeSBarry Smith @*/ 24ab92ecdeSBarry Smith PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B) 25ab92ecdeSBarry Smith { 26ab92ecdeSBarry Smith Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 27ab92ecdeSBarry Smith PetscErrorCode ierr; 28ace3abfcSBarry Smith PetscBool flg; 29ab92ecdeSBarry Smith 30ab92ecdeSBarry Smith PetscFunctionBegin; 31d5ea218eSStefano Zampini PetscValidHeaderSpecific(A,MAT_CLASSID,1); 32d5ea218eSStefano Zampini PetscValidPointer(B,2); 33251f4c67SDmitry Karpeev ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr); 342205254eSKarl Rupp if (flg) *B = mat->A; 352205254eSKarl Rupp else *B = A; 36ab92ecdeSBarry Smith PetscFunctionReturn(0); 37ab92ecdeSBarry Smith } 38ab92ecdeSBarry Smith 39ba8c8a56SBarry Smith PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 40ba8c8a56SBarry Smith { 41ba8c8a56SBarry Smith Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 42ba8c8a56SBarry Smith PetscErrorCode ierr; 43d0f46423SBarry Smith PetscInt lrow,rstart = A->rmap->rstart,rend = A->rmap->rend; 44ba8c8a56SBarry Smith 45ba8c8a56SBarry Smith PetscFunctionBegin; 46e7e72b3dSBarry Smith if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows"); 47ba8c8a56SBarry Smith lrow = row - rstart; 48ba8c8a56SBarry Smith ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v);CHKERRQ(ierr); 49ba8c8a56SBarry Smith PetscFunctionReturn(0); 50ba8c8a56SBarry Smith } 51ba8c8a56SBarry Smith 52637a0070SStefano Zampini PetscErrorCode MatRestoreRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 53ba8c8a56SBarry Smith { 54637a0070SStefano Zampini Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 55ba8c8a56SBarry Smith PetscErrorCode ierr; 56637a0070SStefano Zampini PetscInt lrow,rstart = A->rmap->rstart,rend = A->rmap->rend; 57ba8c8a56SBarry Smith 58ba8c8a56SBarry Smith PetscFunctionBegin; 59637a0070SStefano Zampini if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows"); 60637a0070SStefano Zampini lrow = row - rstart; 61637a0070SStefano Zampini ierr = MatRestoreRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v);CHKERRQ(ierr); 62ba8c8a56SBarry Smith PetscFunctionReturn(0); 63ba8c8a56SBarry Smith } 64ba8c8a56SBarry Smith 6511bd1e4dSLisandro Dalcin PetscErrorCode MatGetDiagonalBlock_MPIDense(Mat A,Mat *a) 660de54da6SSatish Balay { 670de54da6SSatish Balay Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 686849ba73SBarry Smith PetscErrorCode ierr; 69d0f46423SBarry Smith PetscInt m = A->rmap->n,rstart = A->rmap->rstart; 7087828ca2SBarry Smith PetscScalar *array; 710de54da6SSatish Balay MPI_Comm comm; 724b6ae80fSStefano Zampini PetscBool flg; 7311bd1e4dSLisandro Dalcin Mat B; 740de54da6SSatish Balay 750de54da6SSatish Balay PetscFunctionBegin; 764b6ae80fSStefano Zampini ierr = MatHasCongruentLayouts(A,&flg);CHKERRQ(ierr); 774b6ae80fSStefano Zampini if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only square matrices supported."); 7811bd1e4dSLisandro Dalcin ierr = PetscObjectQuery((PetscObject)A,"DiagonalBlock",(PetscObject*)&B);CHKERRQ(ierr); 79*616b8fbbSStefano Zampini if (!B) { /* This should use MatDenseGetSubMatrix (not create), but we would need a call like MatRestoreDiagonalBlock */ 804b6ae80fSStefano Zampini 814b6ae80fSStefano Zampini ierr = PetscObjectTypeCompare((PetscObject)mdn->A,MATSEQDENSECUDA,&flg);CHKERRQ(ierr); 824b6ae80fSStefano Zampini if (flg) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature",MATSEQDENSECUDA); 830de54da6SSatish Balay ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr); 8411bd1e4dSLisandro Dalcin ierr = MatCreate(comm,&B);CHKERRQ(ierr); 8511bd1e4dSLisandro Dalcin ierr = MatSetSizes(B,m,m,m,m);CHKERRQ(ierr); 8611bd1e4dSLisandro Dalcin ierr = MatSetType(B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr); 874b6ae80fSStefano Zampini ierr = MatDenseGetArrayRead(mdn->A,(const PetscScalar**)&array);CHKERRQ(ierr); 8811bd1e4dSLisandro Dalcin ierr = MatSeqDenseSetPreallocation(B,array+m*rstart);CHKERRQ(ierr); 894b6ae80fSStefano Zampini ierr = MatDenseRestoreArrayRead(mdn->A,(const PetscScalar**)&array);CHKERRQ(ierr); 9011bd1e4dSLisandro Dalcin ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 9111bd1e4dSLisandro Dalcin ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 9211bd1e4dSLisandro Dalcin ierr = PetscObjectCompose((PetscObject)A,"DiagonalBlock",(PetscObject)B);CHKERRQ(ierr); 9311bd1e4dSLisandro Dalcin *a = B; 9411bd1e4dSLisandro Dalcin ierr = MatDestroy(&B);CHKERRQ(ierr); 952205254eSKarl Rupp } else *a = B; 960de54da6SSatish Balay PetscFunctionReturn(0); 970de54da6SSatish Balay } 980de54da6SSatish Balay 9913f74950SBarry Smith PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv) 1008965ea79SLois Curfman McInnes { 10139b7565bSBarry Smith Mat_MPIDense *A = (Mat_MPIDense*)mat->data; 102dfbe8321SBarry Smith PetscErrorCode ierr; 103d0f46423SBarry Smith PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row; 104ace3abfcSBarry Smith PetscBool roworiented = A->roworiented; 1058965ea79SLois Curfman McInnes 1063a40ed3dSBarry Smith PetscFunctionBegin; 1078965ea79SLois Curfman McInnes for (i=0; i<m; i++) { 1085ef9f2a5SBarry Smith if (idxm[i] < 0) continue; 109e32f2f54SBarry Smith if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 1108965ea79SLois Curfman McInnes if (idxm[i] >= rstart && idxm[i] < rend) { 1118965ea79SLois Curfman McInnes row = idxm[i] - rstart; 11239b7565bSBarry Smith if (roworiented) { 11339b7565bSBarry Smith ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr); 1143a40ed3dSBarry Smith } else { 1158965ea79SLois Curfman McInnes for (j=0; j<n; j++) { 1165ef9f2a5SBarry Smith if (idxn[j] < 0) continue; 117e32f2f54SBarry Smith if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 11839b7565bSBarry Smith ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr); 11939b7565bSBarry Smith } 1208965ea79SLois Curfman McInnes } 1212205254eSKarl Rupp } else if (!A->donotstash) { 1225080c13bSMatthew G Knepley mat->assembled = PETSC_FALSE; 12339b7565bSBarry Smith if (roworiented) { 124b400d20cSBarry Smith ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 125d36fbae8SSatish Balay } else { 126b400d20cSBarry Smith ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 12739b7565bSBarry Smith } 128b49de8d1SLois Curfman McInnes } 129b49de8d1SLois Curfman McInnes } 1303a40ed3dSBarry Smith PetscFunctionReturn(0); 131b49de8d1SLois Curfman McInnes } 132b49de8d1SLois Curfman McInnes 13313f74950SBarry Smith PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 134b49de8d1SLois Curfman McInnes { 135b49de8d1SLois Curfman McInnes Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 136dfbe8321SBarry Smith PetscErrorCode ierr; 137d0f46423SBarry Smith PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row; 138b49de8d1SLois Curfman McInnes 1393a40ed3dSBarry Smith PetscFunctionBegin; 140b49de8d1SLois Curfman McInnes for (i=0; i<m; i++) { 141e32f2f54SBarry Smith if (idxm[i] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 142e32f2f54SBarry Smith if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 143b49de8d1SLois Curfman McInnes if (idxm[i] >= rstart && idxm[i] < rend) { 144b49de8d1SLois Curfman McInnes row = idxm[i] - rstart; 145b49de8d1SLois Curfman McInnes for (j=0; j<n; j++) { 146e32f2f54SBarry Smith if (idxn[j] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 147e7e72b3dSBarry Smith if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 148b49de8d1SLois Curfman McInnes ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr); 149b49de8d1SLois Curfman McInnes } 150e7e72b3dSBarry Smith } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 1518965ea79SLois Curfman McInnes } 1523a40ed3dSBarry Smith PetscFunctionReturn(0); 1538965ea79SLois Curfman McInnes } 1548965ea79SLois Curfman McInnes 15549a6ff4bSBarry Smith static PetscErrorCode MatDenseGetLDA_MPIDense(Mat A,PetscInt *lda) 15649a6ff4bSBarry Smith { 15749a6ff4bSBarry Smith Mat_MPIDense *a = (Mat_MPIDense*)A->data; 15849a6ff4bSBarry Smith PetscErrorCode ierr; 15949a6ff4bSBarry Smith 16049a6ff4bSBarry Smith PetscFunctionBegin; 16149a6ff4bSBarry Smith ierr = MatDenseGetLDA(a->A,lda);CHKERRQ(ierr); 16249a6ff4bSBarry Smith PetscFunctionReturn(0); 16349a6ff4bSBarry Smith } 16449a6ff4bSBarry Smith 165ad16ce7aSStefano Zampini static PetscErrorCode MatDenseSetLDA_MPIDense(Mat A,PetscInt lda) 166ad16ce7aSStefano Zampini { 167ad16ce7aSStefano Zampini Mat_MPIDense *a = (Mat_MPIDense*)A->data; 168ad16ce7aSStefano Zampini PetscErrorCode ierr; 169ad16ce7aSStefano Zampini 170ad16ce7aSStefano Zampini PetscFunctionBegin; 171ad16ce7aSStefano Zampini ierr = MatDenseSetLDA(a->A,lda);CHKERRQ(ierr); 172ad16ce7aSStefano Zampini PetscFunctionReturn(0); 173ad16ce7aSStefano Zampini } 174ad16ce7aSStefano Zampini 175637a0070SStefano Zampini static PetscErrorCode MatDenseGetArray_MPIDense(Mat A,PetscScalar **array) 176ff14e315SSatish Balay { 177ff14e315SSatish Balay Mat_MPIDense *a = (Mat_MPIDense*)A->data; 178dfbe8321SBarry Smith PetscErrorCode ierr; 179ff14e315SSatish Balay 1803a40ed3dSBarry Smith PetscFunctionBegin; 181*616b8fbbSStefano Zampini if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1828c778c55SBarry Smith ierr = MatDenseGetArray(a->A,array);CHKERRQ(ierr); 1833a40ed3dSBarry Smith PetscFunctionReturn(0); 184ff14e315SSatish Balay } 185ff14e315SSatish Balay 186637a0070SStefano Zampini static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A,const PetscScalar **array) 1878572280aSBarry Smith { 1888572280aSBarry Smith Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1898572280aSBarry Smith PetscErrorCode ierr; 1908572280aSBarry Smith 1918572280aSBarry Smith PetscFunctionBegin; 192*616b8fbbSStefano Zampini if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1938572280aSBarry Smith ierr = MatDenseGetArrayRead(a->A,array);CHKERRQ(ierr); 1948572280aSBarry Smith PetscFunctionReturn(0); 1958572280aSBarry Smith } 1968572280aSBarry Smith 1976947451fSStefano Zampini static PetscErrorCode MatDenseGetArrayWrite_MPIDense(Mat A,PetscScalar **array) 1986947451fSStefano Zampini { 1996947451fSStefano Zampini Mat_MPIDense *a = (Mat_MPIDense*)A->data; 2006947451fSStefano Zampini PetscErrorCode ierr; 2016947451fSStefano Zampini 2026947451fSStefano Zampini PetscFunctionBegin; 203*616b8fbbSStefano Zampini if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 2046947451fSStefano Zampini ierr = MatDenseGetArrayWrite(a->A,array);CHKERRQ(ierr); 2056947451fSStefano Zampini PetscFunctionReturn(0); 2066947451fSStefano Zampini } 2076947451fSStefano Zampini 208637a0070SStefano Zampini static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A,const PetscScalar *array) 209d3042a70SBarry Smith { 210d3042a70SBarry Smith Mat_MPIDense *a = (Mat_MPIDense*)A->data; 211d3042a70SBarry Smith PetscErrorCode ierr; 212d3042a70SBarry Smith 213d3042a70SBarry Smith PetscFunctionBegin; 214*616b8fbbSStefano Zampini if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 215*616b8fbbSStefano Zampini if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 216d3042a70SBarry Smith ierr = MatDensePlaceArray(a->A,array);CHKERRQ(ierr); 217d3042a70SBarry Smith PetscFunctionReturn(0); 218d3042a70SBarry Smith } 219d3042a70SBarry Smith 220d3042a70SBarry Smith static PetscErrorCode MatDenseResetArray_MPIDense(Mat A) 221d3042a70SBarry Smith { 222d3042a70SBarry Smith Mat_MPIDense *a = (Mat_MPIDense*)A->data; 223d3042a70SBarry Smith PetscErrorCode ierr; 224d3042a70SBarry Smith 225d3042a70SBarry Smith PetscFunctionBegin; 226*616b8fbbSStefano Zampini if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 227*616b8fbbSStefano Zampini if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 228d3042a70SBarry Smith ierr = MatDenseResetArray(a->A);CHKERRQ(ierr); 229d3042a70SBarry Smith PetscFunctionReturn(0); 230d3042a70SBarry Smith } 231d3042a70SBarry Smith 232d5ea218eSStefano Zampini static PetscErrorCode MatDenseReplaceArray_MPIDense(Mat A,const PetscScalar *array) 233d5ea218eSStefano Zampini { 234d5ea218eSStefano Zampini Mat_MPIDense *a = (Mat_MPIDense*)A->data; 235d5ea218eSStefano Zampini PetscErrorCode ierr; 236d5ea218eSStefano Zampini 237d5ea218eSStefano Zampini PetscFunctionBegin; 238*616b8fbbSStefano Zampini if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 239*616b8fbbSStefano Zampini if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 240d5ea218eSStefano Zampini ierr = MatDenseReplaceArray(a->A,array);CHKERRQ(ierr); 241d5ea218eSStefano Zampini PetscFunctionReturn(0); 242d5ea218eSStefano Zampini } 243d5ea218eSStefano Zampini 2447dae84e0SHong Zhang static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B) 245ca3fa75bSLois Curfman McInnes { 246ca3fa75bSLois Curfman McInnes Mat_MPIDense *mat = (Mat_MPIDense*)A->data,*newmatd; 2476849ba73SBarry Smith PetscErrorCode ierr; 248637a0070SStefano Zampini PetscInt lda,i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols; 2495d0c19d7SBarry Smith const PetscInt *irow,*icol; 250637a0070SStefano Zampini const PetscScalar *v; 251637a0070SStefano Zampini PetscScalar *bv; 252ca3fa75bSLois Curfman McInnes Mat newmat; 2534aa3045dSJed Brown IS iscol_local; 25442a884f0SBarry Smith MPI_Comm comm_is,comm_mat; 255ca3fa75bSLois Curfman McInnes 256ca3fa75bSLois Curfman McInnes PetscFunctionBegin; 25742a884f0SBarry Smith ierr = PetscObjectGetComm((PetscObject)A,&comm_mat);CHKERRQ(ierr); 25842a884f0SBarry Smith ierr = PetscObjectGetComm((PetscObject)iscol,&comm_is);CHKERRQ(ierr); 25942a884f0SBarry Smith if (comm_mat != comm_is) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NOTSAMECOMM,"IS communicator must match matrix communicator"); 26042a884f0SBarry Smith 2614aa3045dSJed Brown ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 262ca3fa75bSLois Curfman McInnes ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr); 2634aa3045dSJed Brown ierr = ISGetIndices(iscol_local,&icol);CHKERRQ(ierr); 264b9b97703SBarry Smith ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr); 265b9b97703SBarry Smith ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 2664aa3045dSJed Brown ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); /* global number of columns, size of iscol_local */ 267ca3fa75bSLois Curfman McInnes 268ca3fa75bSLois Curfman McInnes /* No parallel redistribution currently supported! Should really check each index set 2697eba5e9cSLois Curfman McInnes to comfirm that it is OK. ... Currently supports only submatrix same partitioning as 2707eba5e9cSLois Curfman McInnes original matrix! */ 271ca3fa75bSLois Curfman McInnes 272ca3fa75bSLois Curfman McInnes ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr); 2737eba5e9cSLois Curfman McInnes ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 274ca3fa75bSLois Curfman McInnes 275ca3fa75bSLois Curfman McInnes /* Check submatrix call */ 276ca3fa75bSLois Curfman McInnes if (scall == MAT_REUSE_MATRIX) { 277e32f2f54SBarry Smith /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */ 2787eba5e9cSLois Curfman McInnes /* Really need to test rows and column sizes! */ 279ca3fa75bSLois Curfman McInnes newmat = *B; 280ca3fa75bSLois Curfman McInnes } else { 281ca3fa75bSLois Curfman McInnes /* Create and fill new matrix */ 282ce94432eSBarry Smith ierr = MatCreate(PetscObjectComm((PetscObject)A),&newmat);CHKERRQ(ierr); 2834aa3045dSJed Brown ierr = MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols);CHKERRQ(ierr); 2847adad957SLisandro Dalcin ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr); 2850298fd71SBarry Smith ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr); 286ca3fa75bSLois Curfman McInnes } 287ca3fa75bSLois Curfman McInnes 288ca3fa75bSLois Curfman McInnes /* Now extract the data pointers and do the copy, column at a time */ 289ca3fa75bSLois Curfman McInnes newmatd = (Mat_MPIDense*)newmat->data; 290637a0070SStefano Zampini ierr = MatDenseGetArray(newmatd->A,&bv);CHKERRQ(ierr); 291637a0070SStefano Zampini ierr = MatDenseGetArrayRead(mat->A,&v);CHKERRQ(ierr); 292637a0070SStefano Zampini ierr = MatDenseGetLDA(mat->A,&lda);CHKERRQ(ierr); 2934aa3045dSJed Brown for (i=0; i<Ncols; i++) { 294637a0070SStefano Zampini const PetscScalar *av = v + lda*icol[i]; 295ca3fa75bSLois Curfman McInnes for (j=0; j<nrows; j++) { 2967eba5e9cSLois Curfman McInnes *bv++ = av[irow[j] - rstart]; 297ca3fa75bSLois Curfman McInnes } 298ca3fa75bSLois Curfman McInnes } 299637a0070SStefano Zampini ierr = MatDenseRestoreArrayRead(mat->A,&v);CHKERRQ(ierr); 300637a0070SStefano Zampini ierr = MatDenseRestoreArray(newmatd->A,&bv);CHKERRQ(ierr); 301ca3fa75bSLois Curfman McInnes 302ca3fa75bSLois Curfman McInnes /* Assemble the matrices so that the correct flags are set */ 303ca3fa75bSLois Curfman McInnes ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 304ca3fa75bSLois Curfman McInnes ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 305ca3fa75bSLois Curfman McInnes 306ca3fa75bSLois Curfman McInnes /* Free work space */ 307ca3fa75bSLois Curfman McInnes ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr); 3085bdf786aSShri Abhyankar ierr = ISRestoreIndices(iscol_local,&icol);CHKERRQ(ierr); 30932bb1f2dSLisandro Dalcin ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 310ca3fa75bSLois Curfman McInnes *B = newmat; 311ca3fa75bSLois Curfman McInnes PetscFunctionReturn(0); 312ca3fa75bSLois Curfman McInnes } 313ca3fa75bSLois Curfman McInnes 314637a0070SStefano Zampini PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A,PetscScalar **array) 315ff14e315SSatish Balay { 31673a71a0fSBarry Smith Mat_MPIDense *a = (Mat_MPIDense*)A->data; 31773a71a0fSBarry Smith PetscErrorCode ierr; 31873a71a0fSBarry Smith 3193a40ed3dSBarry Smith PetscFunctionBegin; 3208c778c55SBarry Smith ierr = MatDenseRestoreArray(a->A,array);CHKERRQ(ierr); 3213a40ed3dSBarry Smith PetscFunctionReturn(0); 322ff14e315SSatish Balay } 323ff14e315SSatish Balay 324637a0070SStefano Zampini PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A,const PetscScalar **array) 3258572280aSBarry Smith { 3268572280aSBarry Smith Mat_MPIDense *a = (Mat_MPIDense*)A->data; 3278572280aSBarry Smith PetscErrorCode ierr; 3288572280aSBarry Smith 3298572280aSBarry Smith PetscFunctionBegin; 3308572280aSBarry Smith ierr = MatDenseRestoreArrayRead(a->A,array);CHKERRQ(ierr); 3318572280aSBarry Smith PetscFunctionReturn(0); 3328572280aSBarry Smith } 3338572280aSBarry Smith 3346947451fSStefano Zampini PetscErrorCode MatDenseRestoreArrayWrite_MPIDense(Mat A,PetscScalar **array) 3356947451fSStefano Zampini { 3366947451fSStefano Zampini Mat_MPIDense *a = (Mat_MPIDense*)A->data; 3376947451fSStefano Zampini PetscErrorCode ierr; 3386947451fSStefano Zampini 3396947451fSStefano Zampini PetscFunctionBegin; 3406947451fSStefano Zampini ierr = MatDenseRestoreArrayWrite(a->A,array);CHKERRQ(ierr); 3416947451fSStefano Zampini PetscFunctionReturn(0); 3426947451fSStefano Zampini } 3436947451fSStefano Zampini 344dfbe8321SBarry Smith PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode) 3458965ea79SLois Curfman McInnes { 34639ddd567SLois Curfman McInnes Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 347dfbe8321SBarry Smith PetscErrorCode ierr; 34813f74950SBarry Smith PetscInt nstash,reallocs; 3498965ea79SLois Curfman McInnes 3503a40ed3dSBarry Smith PetscFunctionBegin; 351910cf402Sprj- if (mdn->donotstash || mat->nooffprocentries) return(0); 3528965ea79SLois Curfman McInnes 353d0f46423SBarry Smith ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 3548798bf22SSatish Balay ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 355ae15b995SBarry Smith ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 3563a40ed3dSBarry Smith PetscFunctionReturn(0); 3578965ea79SLois Curfman McInnes } 3588965ea79SLois Curfman McInnes 359dfbe8321SBarry Smith PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode) 3608965ea79SLois Curfman McInnes { 36139ddd567SLois Curfman McInnes Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data; 3626849ba73SBarry Smith PetscErrorCode ierr; 36313f74950SBarry Smith PetscInt i,*row,*col,flg,j,rstart,ncols; 36413f74950SBarry Smith PetscMPIInt n; 36587828ca2SBarry Smith PetscScalar *val; 3668965ea79SLois Curfman McInnes 3673a40ed3dSBarry Smith PetscFunctionBegin; 368910cf402Sprj- if (!mdn->donotstash && !mat->nooffprocentries) { 3698965ea79SLois Curfman McInnes /* wait on receives */ 3707ef1d9bdSSatish Balay while (1) { 3718798bf22SSatish Balay ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 3727ef1d9bdSSatish Balay if (!flg) break; 3738965ea79SLois Curfman McInnes 3747ef1d9bdSSatish Balay for (i=0; i<n;) { 3757ef1d9bdSSatish Balay /* Now identify the consecutive vals belonging to the same row */ 3762205254eSKarl Rupp for (j=i,rstart=row[j]; j<n; j++) { 3772205254eSKarl Rupp if (row[j] != rstart) break; 3782205254eSKarl Rupp } 3797ef1d9bdSSatish Balay if (j < n) ncols = j-i; 3807ef1d9bdSSatish Balay else ncols = n-i; 3817ef1d9bdSSatish Balay /* Now assemble all these values with a single function call */ 3824b4eb8d3SJed Brown ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 3837ef1d9bdSSatish Balay i = j; 3848965ea79SLois Curfman McInnes } 3857ef1d9bdSSatish Balay } 3868798bf22SSatish Balay ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 387910cf402Sprj- } 3888965ea79SLois Curfman McInnes 38939ddd567SLois Curfman McInnes ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr); 39039ddd567SLois Curfman McInnes ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr); 3918965ea79SLois Curfman McInnes 3926d4a8577SBarry Smith if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 39339ddd567SLois Curfman McInnes ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 3948965ea79SLois Curfman McInnes } 3953a40ed3dSBarry Smith PetscFunctionReturn(0); 3968965ea79SLois Curfman McInnes } 3978965ea79SLois Curfman McInnes 398dfbe8321SBarry Smith PetscErrorCode MatZeroEntries_MPIDense(Mat A) 3998965ea79SLois Curfman McInnes { 400dfbe8321SBarry Smith PetscErrorCode ierr; 40139ddd567SLois Curfman McInnes Mat_MPIDense *l = (Mat_MPIDense*)A->data; 4023a40ed3dSBarry Smith 4033a40ed3dSBarry Smith PetscFunctionBegin; 4043a40ed3dSBarry Smith ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 4053a40ed3dSBarry Smith PetscFunctionReturn(0); 4068965ea79SLois Curfman McInnes } 4078965ea79SLois Curfman McInnes 408637a0070SStefano Zampini PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt n,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 4098965ea79SLois Curfman McInnes { 41039ddd567SLois Curfman McInnes Mat_MPIDense *l = (Mat_MPIDense*)A->data; 4116849ba73SBarry Smith PetscErrorCode ierr; 412637a0070SStefano Zampini PetscInt i,len,*lrows; 413637a0070SStefano Zampini 414637a0070SStefano Zampini PetscFunctionBegin; 415637a0070SStefano Zampini /* get locally owned rows */ 416637a0070SStefano Zampini ierr = PetscLayoutMapLocal(A->rmap,n,rows,&len,&lrows,NULL);CHKERRQ(ierr); 417637a0070SStefano Zampini /* fix right hand side if needed */ 418637a0070SStefano Zampini if (x && b) { 41997b48c8fSBarry Smith const PetscScalar *xx; 42097b48c8fSBarry Smith PetscScalar *bb; 4218965ea79SLois Curfman McInnes 42297b48c8fSBarry Smith ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr); 423637a0070SStefano Zampini ierr = VecGetArrayWrite(b, &bb);CHKERRQ(ierr); 424637a0070SStefano Zampini for (i=0;i<len;++i) bb[lrows[i]] = diag*xx[lrows[i]]; 42597b48c8fSBarry Smith ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr); 426637a0070SStefano Zampini ierr = VecRestoreArrayWrite(b, &bb);CHKERRQ(ierr); 42797b48c8fSBarry Smith } 428637a0070SStefano Zampini ierr = MatZeroRows(l->A,len,lrows,0.0,NULL,NULL);CHKERRQ(ierr); 429e2eb51b1SBarry Smith if (diag != 0.0) { 430637a0070SStefano Zampini Vec d; 431b9679d65SBarry Smith 432637a0070SStefano Zampini ierr = MatCreateVecs(A,NULL,&d);CHKERRQ(ierr); 433637a0070SStefano Zampini ierr = VecSet(d,diag);CHKERRQ(ierr); 434637a0070SStefano Zampini ierr = MatDiagonalSet(A,d,INSERT_VALUES);CHKERRQ(ierr); 435637a0070SStefano Zampini ierr = VecDestroy(&d);CHKERRQ(ierr); 436b9679d65SBarry Smith } 437606d414cSSatish Balay ierr = PetscFree(lrows);CHKERRQ(ierr); 4383a40ed3dSBarry Smith PetscFunctionReturn(0); 4398965ea79SLois Curfman McInnes } 4408965ea79SLois Curfman McInnes 441cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat,Vec,Vec); 442cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat,Vec,Vec,Vec); 443cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat,Vec,Vec); 444cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat,Vec,Vec,Vec); 445cc2e6a90SBarry Smith 446dfbe8321SBarry Smith PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy) 4478965ea79SLois Curfman McInnes { 44839ddd567SLois Curfman McInnes Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 449dfbe8321SBarry Smith PetscErrorCode ierr; 450637a0070SStefano Zampini const PetscScalar *ax; 451637a0070SStefano Zampini PetscScalar *ay; 452c456f294SBarry Smith 4533a40ed3dSBarry Smith PetscFunctionBegin; 454637a0070SStefano Zampini ierr = VecGetArrayReadInPlace(xx,&ax);CHKERRQ(ierr); 455637a0070SStefano Zampini ierr = VecGetArrayInPlace(mdn->lvec,&ay);CHKERRQ(ierr); 456637a0070SStefano Zampini ierr = PetscSFBcastBegin(mdn->Mvctx,MPIU_SCALAR,ax,ay);CHKERRQ(ierr); 457637a0070SStefano Zampini ierr = PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ax,ay);CHKERRQ(ierr); 458637a0070SStefano Zampini ierr = VecRestoreArrayInPlace(mdn->lvec,&ay);CHKERRQ(ierr); 459637a0070SStefano Zampini ierr = VecRestoreArrayReadInPlace(xx,&ax);CHKERRQ(ierr); 460637a0070SStefano Zampini ierr = (*mdn->A->ops->mult)(mdn->A,mdn->lvec,yy);CHKERRQ(ierr); 4613a40ed3dSBarry Smith PetscFunctionReturn(0); 4628965ea79SLois Curfman McInnes } 4638965ea79SLois Curfman McInnes 464dfbe8321SBarry Smith PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz) 4658965ea79SLois Curfman McInnes { 46639ddd567SLois Curfman McInnes Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 467dfbe8321SBarry Smith PetscErrorCode ierr; 468637a0070SStefano Zampini const PetscScalar *ax; 469637a0070SStefano Zampini PetscScalar *ay; 470c456f294SBarry Smith 4713a40ed3dSBarry Smith PetscFunctionBegin; 472637a0070SStefano Zampini ierr = VecGetArrayReadInPlace(xx,&ax);CHKERRQ(ierr); 473637a0070SStefano Zampini ierr = VecGetArrayInPlace(mdn->lvec,&ay);CHKERRQ(ierr); 474637a0070SStefano Zampini ierr = PetscSFBcastBegin(mdn->Mvctx,MPIU_SCALAR,ax,ay);CHKERRQ(ierr); 475637a0070SStefano Zampini ierr = PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ax,ay);CHKERRQ(ierr); 476637a0070SStefano Zampini ierr = VecRestoreArrayInPlace(mdn->lvec,&ay);CHKERRQ(ierr); 477637a0070SStefano Zampini ierr = VecRestoreArrayReadInPlace(xx,&ax);CHKERRQ(ierr); 478637a0070SStefano Zampini ierr = (*mdn->A->ops->multadd)(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr); 4793a40ed3dSBarry Smith PetscFunctionReturn(0); 4808965ea79SLois Curfman McInnes } 4818965ea79SLois Curfman McInnes 482dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy) 483096963f5SLois Curfman McInnes { 484096963f5SLois Curfman McInnes Mat_MPIDense *a = (Mat_MPIDense*)A->data; 485dfbe8321SBarry Smith PetscErrorCode ierr; 486637a0070SStefano Zampini const PetscScalar *ax; 487637a0070SStefano Zampini PetscScalar *ay; 488096963f5SLois Curfman McInnes 4893a40ed3dSBarry Smith PetscFunctionBegin; 490637a0070SStefano Zampini ierr = VecSet(yy,0.0);CHKERRQ(ierr); 491637a0070SStefano Zampini ierr = (*a->A->ops->multtranspose)(a->A,xx,a->lvec);CHKERRQ(ierr); 492637a0070SStefano Zampini ierr = VecGetArrayReadInPlace(a->lvec,&ax);CHKERRQ(ierr); 493637a0070SStefano Zampini ierr = VecGetArrayInPlace(yy,&ay);CHKERRQ(ierr); 494637a0070SStefano Zampini ierr = PetscSFReduceBegin(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM);CHKERRQ(ierr); 495637a0070SStefano Zampini ierr = PetscSFReduceEnd(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM);CHKERRQ(ierr); 496637a0070SStefano Zampini ierr = VecRestoreArrayReadInPlace(a->lvec,&ax);CHKERRQ(ierr); 497637a0070SStefano Zampini ierr = VecRestoreArrayInPlace(yy,&ay);CHKERRQ(ierr); 4983a40ed3dSBarry Smith PetscFunctionReturn(0); 499096963f5SLois Curfman McInnes } 500096963f5SLois Curfman McInnes 501dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz) 502096963f5SLois Curfman McInnes { 503096963f5SLois Curfman McInnes Mat_MPIDense *a = (Mat_MPIDense*)A->data; 504dfbe8321SBarry Smith PetscErrorCode ierr; 505637a0070SStefano Zampini const PetscScalar *ax; 506637a0070SStefano Zampini PetscScalar *ay; 507096963f5SLois Curfman McInnes 5083a40ed3dSBarry Smith PetscFunctionBegin; 5093501a2bdSLois Curfman McInnes ierr = VecCopy(yy,zz);CHKERRQ(ierr); 510637a0070SStefano Zampini ierr = (*a->A->ops->multtranspose)(a->A,xx,a->lvec);CHKERRQ(ierr); 511637a0070SStefano Zampini ierr = VecGetArrayReadInPlace(a->lvec,&ax);CHKERRQ(ierr); 512637a0070SStefano Zampini ierr = VecGetArrayInPlace(zz,&ay);CHKERRQ(ierr); 513637a0070SStefano Zampini ierr = PetscSFReduceBegin(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM);CHKERRQ(ierr); 514637a0070SStefano Zampini ierr = PetscSFReduceEnd(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM);CHKERRQ(ierr); 515637a0070SStefano Zampini ierr = VecRestoreArrayReadInPlace(a->lvec,&ax);CHKERRQ(ierr); 516637a0070SStefano Zampini ierr = VecRestoreArrayInPlace(zz,&ay);CHKERRQ(ierr); 5173a40ed3dSBarry Smith PetscFunctionReturn(0); 518096963f5SLois Curfman McInnes } 519096963f5SLois Curfman McInnes 520dfbe8321SBarry Smith PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v) 5218965ea79SLois Curfman McInnes { 52239ddd567SLois Curfman McInnes Mat_MPIDense *a = (Mat_MPIDense*)A->data; 523dfbe8321SBarry Smith PetscErrorCode ierr; 524637a0070SStefano Zampini PetscInt lda,len,i,n,m = A->rmap->n,radd; 52587828ca2SBarry Smith PetscScalar *x,zero = 0.0; 526637a0070SStefano Zampini const PetscScalar *av; 527ed3cc1f0SBarry Smith 5283a40ed3dSBarry Smith PetscFunctionBegin; 5292dcb1b2aSMatthew Knepley ierr = VecSet(v,zero);CHKERRQ(ierr); 5301ebc52fbSHong Zhang ierr = VecGetArray(v,&x);CHKERRQ(ierr); 531096963f5SLois Curfman McInnes ierr = VecGetSize(v,&n);CHKERRQ(ierr); 532e32f2f54SBarry Smith if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec"); 533d0f46423SBarry Smith len = PetscMin(a->A->rmap->n,a->A->cmap->n); 534d0f46423SBarry Smith radd = A->rmap->rstart*m; 535637a0070SStefano Zampini ierr = MatDenseGetArrayRead(a->A,&av);CHKERRQ(ierr); 536637a0070SStefano Zampini ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr); 53744cd7ae7SLois Curfman McInnes for (i=0; i<len; i++) { 538637a0070SStefano Zampini x[i] = av[radd + i*lda + i]; 539096963f5SLois Curfman McInnes } 540637a0070SStefano Zampini ierr = MatDenseRestoreArrayRead(a->A,&av);CHKERRQ(ierr); 5411ebc52fbSHong Zhang ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 5423a40ed3dSBarry Smith PetscFunctionReturn(0); 5438965ea79SLois Curfman McInnes } 5448965ea79SLois Curfman McInnes 545dfbe8321SBarry Smith PetscErrorCode MatDestroy_MPIDense(Mat mat) 5468965ea79SLois Curfman McInnes { 5473501a2bdSLois Curfman McInnes Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 548dfbe8321SBarry Smith PetscErrorCode ierr; 549ed3cc1f0SBarry Smith 5503a40ed3dSBarry Smith PetscFunctionBegin; 551aa482453SBarry Smith #if defined(PETSC_USE_LOG) 552d0f46423SBarry Smith PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 5538965ea79SLois Curfman McInnes #endif 5548798bf22SSatish Balay ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 555*616b8fbbSStefano Zampini if (mdn->vecinuse) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 556*616b8fbbSStefano Zampini if (mdn->matinuse) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 5576bf464f9SBarry Smith ierr = MatDestroy(&mdn->A);CHKERRQ(ierr); 5586bf464f9SBarry Smith ierr = VecDestroy(&mdn->lvec);CHKERRQ(ierr); 559637a0070SStefano Zampini ierr = PetscSFDestroy(&mdn->Mvctx);CHKERRQ(ierr); 5606947451fSStefano Zampini ierr = VecDestroy(&mdn->cvec);CHKERRQ(ierr); 5615ea7661aSPierre Jolivet ierr = MatDestroy(&mdn->cmat);CHKERRQ(ierr); 56201b82886SBarry Smith 563bf0cc555SLisandro Dalcin ierr = PetscFree(mat->data);CHKERRQ(ierr); 564dbd8c25aSHong Zhang ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 5658baccfbdSHong Zhang 56649a6ff4bSBarry Smith ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetLDA_C",NULL);CHKERRQ(ierr); 567ad16ce7aSStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseSetLDA_C",NULL);CHKERRQ(ierr); 5688baccfbdSHong Zhang ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",NULL);CHKERRQ(ierr); 5698572280aSBarry Smith ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",NULL);CHKERRQ(ierr); 5708572280aSBarry Smith ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",NULL);CHKERRQ(ierr); 5718572280aSBarry Smith ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",NULL);CHKERRQ(ierr); 5726947451fSStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayWrite_C",NULL);CHKERRQ(ierr); 5736947451fSStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayWrite_C",NULL);CHKERRQ(ierr); 574d3042a70SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",NULL);CHKERRQ(ierr); 575d3042a70SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",NULL);CHKERRQ(ierr); 576d5ea218eSStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseReplaceArray_C",NULL);CHKERRQ(ierr); 5778baccfbdSHong Zhang #if defined(PETSC_HAVE_ELEMENTAL) 5788baccfbdSHong Zhang ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",NULL);CHKERRQ(ierr); 5798baccfbdSHong Zhang #endif 580bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",NULL);CHKERRQ(ierr); 5814222ddf1SHong Zhang ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 5824222ddf1SHong Zhang ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaij_C",NULL);CHKERRQ(ierr); 5836718818eSStefano Zampini #if defined (PETSC_HAVE_CUDA) 5846718818eSStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaijcusparse_mpidense_C",NULL);CHKERRQ(ierr); 5856718818eSStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaijcusparse_C",NULL);CHKERRQ(ierr); 5866718818eSStefano Zampini #endif 58786aefd0dSHong Zhang ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",NULL);CHKERRQ(ierr); 58886aefd0dSHong Zhang ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",NULL);CHKERRQ(ierr); 5896947451fSStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",NULL);CHKERRQ(ierr); 5906947451fSStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",NULL);CHKERRQ(ierr); 5916947451fSStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",NULL);CHKERRQ(ierr); 5926947451fSStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",NULL);CHKERRQ(ierr); 5936947451fSStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",NULL);CHKERRQ(ierr); 5946947451fSStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",NULL);CHKERRQ(ierr); 5955ea7661aSPierre Jolivet ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetSubMatrix_C",NULL);CHKERRQ(ierr); 5965ea7661aSPierre Jolivet ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreSubMatrix_C",NULL);CHKERRQ(ierr); 5973a40ed3dSBarry Smith PetscFunctionReturn(0); 5988965ea79SLois Curfman McInnes } 59939ddd567SLois Curfman McInnes 60052c5f739Sprj- PETSC_INTERN PetscErrorCode MatView_SeqDense(Mat,PetscViewer); 60152c5f739Sprj- 6029804daf3SBarry Smith #include <petscdraw.h> 6036849ba73SBarry Smith static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 6048965ea79SLois Curfman McInnes { 60539ddd567SLois Curfman McInnes Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 606dfbe8321SBarry Smith PetscErrorCode ierr; 607*616b8fbbSStefano Zampini PetscMPIInt rank; 60819fd82e9SBarry Smith PetscViewerType vtype; 609ace3abfcSBarry Smith PetscBool iascii,isdraw; 610b0a32e0cSBarry Smith PetscViewer sviewer; 611f3ef73ceSBarry Smith PetscViewerFormat format; 6128965ea79SLois Curfman McInnes 6133a40ed3dSBarry Smith PetscFunctionBegin; 614*616b8fbbSStefano Zampini ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 615251f4c67SDmitry Karpeev ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 616251f4c67SDmitry Karpeev ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 61732077d6dSBarry Smith if (iascii) { 618b0a32e0cSBarry Smith ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr); 619b0a32e0cSBarry Smith ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 620456192e2SBarry Smith if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 6214e220ebcSLois Curfman McInnes MatInfo info; 622888f2ed8SSatish Balay ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 6231575c14dSBarry Smith ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 6247b23a99aSBarry Smith ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 625b0a32e0cSBarry Smith ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 6261575c14dSBarry Smith ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 627637a0070SStefano Zampini ierr = PetscSFView(mdn->Mvctx,viewer);CHKERRQ(ierr); 6283a40ed3dSBarry Smith PetscFunctionReturn(0); 629fb9695e5SSatish Balay } else if (format == PETSC_VIEWER_ASCII_INFO) { 6303a40ed3dSBarry Smith PetscFunctionReturn(0); 6318965ea79SLois Curfman McInnes } 632f1af5d2fSBarry Smith } else if (isdraw) { 633b0a32e0cSBarry Smith PetscDraw draw; 634ace3abfcSBarry Smith PetscBool isnull; 635f1af5d2fSBarry Smith 636b0a32e0cSBarry Smith ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 637b0a32e0cSBarry Smith ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 638f1af5d2fSBarry Smith if (isnull) PetscFunctionReturn(0); 639f1af5d2fSBarry Smith } 64077ed5343SBarry Smith 6417da1fb6eSBarry Smith { 6428965ea79SLois Curfman McInnes /* assemble the entire matrix onto first processor. */ 6438965ea79SLois Curfman McInnes Mat A; 644d0f46423SBarry Smith PetscInt M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz; 645ba8c8a56SBarry Smith PetscInt *cols; 646ba8c8a56SBarry Smith PetscScalar *vals; 6478965ea79SLois Curfman McInnes 648ce94432eSBarry Smith ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 6498965ea79SLois Curfman McInnes if (!rank) { 650f69a0ea3SMatthew Knepley ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 6513a40ed3dSBarry Smith } else { 652f69a0ea3SMatthew Knepley ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 6538965ea79SLois Curfman McInnes } 6547adad957SLisandro Dalcin /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */ 655878740d9SKris Buschelman ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr); 6560298fd71SBarry Smith ierr = MatMPIDenseSetPreallocation(A,NULL);CHKERRQ(ierr); 6573bb1ff40SBarry Smith ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 6588965ea79SLois Curfman McInnes 65939ddd567SLois Curfman McInnes /* Copy the matrix ... This isn't the most efficient means, 66039ddd567SLois Curfman McInnes but it's quick for now */ 66151022da4SBarry Smith A->insertmode = INSERT_VALUES; 6622205254eSKarl Rupp 6632205254eSKarl Rupp row = mat->rmap->rstart; 6642205254eSKarl Rupp m = mdn->A->rmap->n; 6658965ea79SLois Curfman McInnes for (i=0; i<m; i++) { 666ba8c8a56SBarry Smith ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 667ba8c8a56SBarry Smith ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 668ba8c8a56SBarry Smith ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 66939ddd567SLois Curfman McInnes row++; 6708965ea79SLois Curfman McInnes } 6718965ea79SLois Curfman McInnes 6726d4a8577SBarry Smith ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6736d4a8577SBarry Smith ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6743f08860eSBarry Smith ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 675b9b97703SBarry Smith if (!rank) { 6761a9d3c3cSBarry Smith ierr = PetscObjectSetName((PetscObject)((Mat_MPIDense*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 6777da1fb6eSBarry Smith ierr = MatView_SeqDense(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr); 6788965ea79SLois Curfman McInnes } 6793f08860eSBarry Smith ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 680b0a32e0cSBarry Smith ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 6816bf464f9SBarry Smith ierr = MatDestroy(&A);CHKERRQ(ierr); 6828965ea79SLois Curfman McInnes } 6833a40ed3dSBarry Smith PetscFunctionReturn(0); 6848965ea79SLois Curfman McInnes } 6858965ea79SLois Curfman McInnes 686dfbe8321SBarry Smith PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer) 6878965ea79SLois Curfman McInnes { 688dfbe8321SBarry Smith PetscErrorCode ierr; 689ace3abfcSBarry Smith PetscBool iascii,isbinary,isdraw,issocket; 6908965ea79SLois Curfman McInnes 691433994e6SBarry Smith PetscFunctionBegin; 692251f4c67SDmitry Karpeev ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 693251f4c67SDmitry Karpeev ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 694251f4c67SDmitry Karpeev ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 695251f4c67SDmitry Karpeev ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 6960f5bd95cSBarry Smith 69732077d6dSBarry Smith if (iascii || issocket || isdraw) { 698f1af5d2fSBarry Smith ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 6990f5bd95cSBarry Smith } else if (isbinary) { 7008491ab44SLisandro Dalcin ierr = MatView_Dense_Binary(mat,viewer);CHKERRQ(ierr); 70111aeaf0aSBarry Smith } 7023a40ed3dSBarry Smith PetscFunctionReturn(0); 7038965ea79SLois Curfman McInnes } 7048965ea79SLois Curfman McInnes 705dfbe8321SBarry Smith PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info) 7068965ea79SLois Curfman McInnes { 7073501a2bdSLois Curfman McInnes Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 7083501a2bdSLois Curfman McInnes Mat mdn = mat->A; 709dfbe8321SBarry Smith PetscErrorCode ierr; 7103966268fSBarry Smith PetscLogDouble isend[5],irecv[5]; 7118965ea79SLois Curfman McInnes 7123a40ed3dSBarry Smith PetscFunctionBegin; 7134e220ebcSLois Curfman McInnes info->block_size = 1.0; 7142205254eSKarl Rupp 7154e220ebcSLois Curfman McInnes ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr); 7162205254eSKarl Rupp 7174e220ebcSLois Curfman McInnes isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 7184e220ebcSLois Curfman McInnes isend[3] = info->memory; isend[4] = info->mallocs; 7198965ea79SLois Curfman McInnes if (flag == MAT_LOCAL) { 7204e220ebcSLois Curfman McInnes info->nz_used = isend[0]; 7214e220ebcSLois Curfman McInnes info->nz_allocated = isend[1]; 7224e220ebcSLois Curfman McInnes info->nz_unneeded = isend[2]; 7234e220ebcSLois Curfman McInnes info->memory = isend[3]; 7244e220ebcSLois Curfman McInnes info->mallocs = isend[4]; 7258965ea79SLois Curfman McInnes } else if (flag == MAT_GLOBAL_MAX) { 7263966268fSBarry Smith ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 7272205254eSKarl Rupp 7284e220ebcSLois Curfman McInnes info->nz_used = irecv[0]; 7294e220ebcSLois Curfman McInnes info->nz_allocated = irecv[1]; 7304e220ebcSLois Curfman McInnes info->nz_unneeded = irecv[2]; 7314e220ebcSLois Curfman McInnes info->memory = irecv[3]; 7324e220ebcSLois Curfman McInnes info->mallocs = irecv[4]; 7338965ea79SLois Curfman McInnes } else if (flag == MAT_GLOBAL_SUM) { 7343966268fSBarry Smith ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 7352205254eSKarl Rupp 7364e220ebcSLois Curfman McInnes info->nz_used = irecv[0]; 7374e220ebcSLois Curfman McInnes info->nz_allocated = irecv[1]; 7384e220ebcSLois Curfman McInnes info->nz_unneeded = irecv[2]; 7394e220ebcSLois Curfman McInnes info->memory = irecv[3]; 7404e220ebcSLois Curfman McInnes info->mallocs = irecv[4]; 7418965ea79SLois Curfman McInnes } 7424e220ebcSLois Curfman McInnes info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 7434e220ebcSLois Curfman McInnes info->fill_ratio_needed = 0; 7444e220ebcSLois Curfman McInnes info->factor_mallocs = 0; 7453a40ed3dSBarry Smith PetscFunctionReturn(0); 7468965ea79SLois Curfman McInnes } 7478965ea79SLois Curfman McInnes 748ace3abfcSBarry Smith PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscBool flg) 7498965ea79SLois Curfman McInnes { 75039ddd567SLois Curfman McInnes Mat_MPIDense *a = (Mat_MPIDense*)A->data; 751dfbe8321SBarry Smith PetscErrorCode ierr; 7528965ea79SLois Curfman McInnes 7533a40ed3dSBarry Smith PetscFunctionBegin; 75412c028f9SKris Buschelman switch (op) { 755512a5fc5SBarry Smith case MAT_NEW_NONZERO_LOCATIONS: 75612c028f9SKris Buschelman case MAT_NEW_NONZERO_LOCATION_ERR: 75712c028f9SKris Buschelman case MAT_NEW_NONZERO_ALLOCATION_ERR: 75843674050SBarry Smith MatCheckPreallocated(A,1); 7594e0d8c25SBarry Smith ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 76012c028f9SKris Buschelman break; 76112c028f9SKris Buschelman case MAT_ROW_ORIENTED: 76243674050SBarry Smith MatCheckPreallocated(A,1); 7634e0d8c25SBarry Smith a->roworiented = flg; 7644e0d8c25SBarry Smith ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 76512c028f9SKris Buschelman break; 7664e0d8c25SBarry Smith case MAT_NEW_DIAGONALS: 76713fa8e87SLisandro Dalcin case MAT_KEEP_NONZERO_PATTERN: 76812c028f9SKris Buschelman case MAT_USE_HASH_TABLE: 769071fcb05SBarry Smith case MAT_SORTED_FULL: 770290bbb0aSBarry Smith ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 77112c028f9SKris Buschelman break; 77212c028f9SKris Buschelman case MAT_IGNORE_OFF_PROC_ENTRIES: 7734e0d8c25SBarry Smith a->donotstash = flg; 77412c028f9SKris Buschelman break; 77577e54ba9SKris Buschelman case MAT_SYMMETRIC: 77677e54ba9SKris Buschelman case MAT_STRUCTURALLY_SYMMETRIC: 7779a4540c5SBarry Smith case MAT_HERMITIAN: 7789a4540c5SBarry Smith case MAT_SYMMETRY_ETERNAL: 779600fe468SBarry Smith case MAT_IGNORE_LOWER_TRIANGULAR: 7805d7aebe8SStefano Zampini case MAT_IGNORE_ZERO_ENTRIES: 781290bbb0aSBarry Smith ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 78277e54ba9SKris Buschelman break; 78312c028f9SKris Buschelman default: 784e32f2f54SBarry Smith SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]); 7853a40ed3dSBarry Smith } 7863a40ed3dSBarry Smith PetscFunctionReturn(0); 7878965ea79SLois Curfman McInnes } 7888965ea79SLois Curfman McInnes 789dfbe8321SBarry Smith PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr) 7905b2fa520SLois Curfman McInnes { 7915b2fa520SLois Curfman McInnes Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 792637a0070SStefano Zampini const PetscScalar *l; 793637a0070SStefano Zampini PetscScalar x,*v,*vv,*r; 794dfbe8321SBarry Smith PetscErrorCode ierr; 795637a0070SStefano Zampini PetscInt i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n,lda; 7965b2fa520SLois Curfman McInnes 7975b2fa520SLois Curfman McInnes PetscFunctionBegin; 798637a0070SStefano Zampini ierr = MatDenseGetArray(mdn->A,&vv);CHKERRQ(ierr); 799637a0070SStefano Zampini ierr = MatDenseGetLDA(mdn->A,&lda);CHKERRQ(ierr); 80072d926a5SLois Curfman McInnes ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr); 8015b2fa520SLois Curfman McInnes if (ll) { 80272d926a5SLois Curfman McInnes ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr); 803637a0070SStefano Zampini if (s2a != s2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %D != %D", s2a, s2); 804bca11509SBarry Smith ierr = VecGetArrayRead(ll,&l);CHKERRQ(ierr); 8055b2fa520SLois Curfman McInnes for (i=0; i<m; i++) { 8065b2fa520SLois Curfman McInnes x = l[i]; 807637a0070SStefano Zampini v = vv + i; 808637a0070SStefano Zampini for (j=0; j<n; j++) { (*v) *= x; v+= lda;} 8095b2fa520SLois Curfman McInnes } 810bca11509SBarry Smith ierr = VecRestoreArrayRead(ll,&l);CHKERRQ(ierr); 811637a0070SStefano Zampini ierr = PetscLogFlops(1.0*n*m);CHKERRQ(ierr); 8125b2fa520SLois Curfman McInnes } 8135b2fa520SLois Curfman McInnes if (rr) { 814637a0070SStefano Zampini const PetscScalar *ar; 815637a0070SStefano Zampini 816175be7b4SMatthew Knepley ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr); 817e32f2f54SBarry Smith if (s3a != s3) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3); 818637a0070SStefano Zampini ierr = VecGetArrayRead(rr,&ar);CHKERRQ(ierr); 819637a0070SStefano Zampini ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr); 820637a0070SStefano Zampini ierr = PetscSFBcastBegin(mdn->Mvctx,MPIU_SCALAR,ar,r);CHKERRQ(ierr); 821637a0070SStefano Zampini ierr = PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ar,r);CHKERRQ(ierr); 822637a0070SStefano Zampini ierr = VecRestoreArrayRead(rr,&ar);CHKERRQ(ierr); 8235b2fa520SLois Curfman McInnes for (i=0; i<n; i++) { 8245b2fa520SLois Curfman McInnes x = r[i]; 825637a0070SStefano Zampini v = vv + i*lda; 8262205254eSKarl Rupp for (j=0; j<m; j++) (*v++) *= x; 8275b2fa520SLois Curfman McInnes } 828637a0070SStefano Zampini ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr); 829637a0070SStefano Zampini ierr = PetscLogFlops(1.0*n*m);CHKERRQ(ierr); 8305b2fa520SLois Curfman McInnes } 831637a0070SStefano Zampini ierr = MatDenseRestoreArray(mdn->A,&vv);CHKERRQ(ierr); 8325b2fa520SLois Curfman McInnes PetscFunctionReturn(0); 8335b2fa520SLois Curfman McInnes } 8345b2fa520SLois Curfman McInnes 835dfbe8321SBarry Smith PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm) 836096963f5SLois Curfman McInnes { 8373501a2bdSLois Curfman McInnes Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 838dfbe8321SBarry Smith PetscErrorCode ierr; 83913f74950SBarry Smith PetscInt i,j; 840*616b8fbbSStefano Zampini PetscMPIInt size; 841329f5518SBarry Smith PetscReal sum = 0.0; 842637a0070SStefano Zampini const PetscScalar *av,*v; 8433501a2bdSLois Curfman McInnes 8443a40ed3dSBarry Smith PetscFunctionBegin; 845637a0070SStefano Zampini ierr = MatDenseGetArrayRead(mdn->A,&av);CHKERRQ(ierr); 846637a0070SStefano Zampini v = av; 847*616b8fbbSStefano Zampini ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 848*616b8fbbSStefano Zampini if (size == 1) { 849064f8208SBarry Smith ierr = MatNorm(mdn->A,type,nrm);CHKERRQ(ierr); 8503501a2bdSLois Curfman McInnes } else { 8513501a2bdSLois Curfman McInnes if (type == NORM_FROBENIUS) { 852d0f46423SBarry Smith for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) { 853329f5518SBarry Smith sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 8543501a2bdSLois Curfman McInnes } 855b2566f29SBarry Smith ierr = MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 8568f1a2a5eSBarry Smith *nrm = PetscSqrtReal(*nrm); 857dc0b31edSSatish Balay ierr = PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n);CHKERRQ(ierr); 8583a40ed3dSBarry Smith } else if (type == NORM_1) { 859329f5518SBarry Smith PetscReal *tmp,*tmp2; 860580bdb30SBarry Smith ierr = PetscCalloc2(A->cmap->N,&tmp,A->cmap->N,&tmp2);CHKERRQ(ierr); 861064f8208SBarry Smith *nrm = 0.0; 862637a0070SStefano Zampini v = av; 863d0f46423SBarry Smith for (j=0; j<mdn->A->cmap->n; j++) { 864d0f46423SBarry Smith for (i=0; i<mdn->A->rmap->n; i++) { 86567e560aaSBarry Smith tmp[j] += PetscAbsScalar(*v); v++; 8663501a2bdSLois Curfman McInnes } 8673501a2bdSLois Curfman McInnes } 868b2566f29SBarry Smith ierr = MPIU_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 869d0f46423SBarry Smith for (j=0; j<A->cmap->N; j++) { 870064f8208SBarry Smith if (tmp2[j] > *nrm) *nrm = tmp2[j]; 8713501a2bdSLois Curfman McInnes } 8728627564fSBarry Smith ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr); 873d0f46423SBarry Smith ierr = PetscLogFlops(A->cmap->n*A->rmap->n);CHKERRQ(ierr); 8743a40ed3dSBarry Smith } else if (type == NORM_INFINITY) { /* max row norm */ 875329f5518SBarry Smith PetscReal ntemp; 8763501a2bdSLois Curfman McInnes ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr); 877b2566f29SBarry Smith ierr = MPIU_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 878ce94432eSBarry Smith } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for two norm"); 8793501a2bdSLois Curfman McInnes } 880637a0070SStefano Zampini ierr = MatDenseRestoreArrayRead(mdn->A,&av);CHKERRQ(ierr); 8813a40ed3dSBarry Smith PetscFunctionReturn(0); 8823501a2bdSLois Curfman McInnes } 8833501a2bdSLois Curfman McInnes 884fc4dec0aSBarry Smith PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout) 8853501a2bdSLois Curfman McInnes { 8863501a2bdSLois Curfman McInnes Mat_MPIDense *a = (Mat_MPIDense*)A->data; 8873501a2bdSLois Curfman McInnes Mat B; 888d0f46423SBarry Smith PetscInt M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart; 8896849ba73SBarry Smith PetscErrorCode ierr; 890637a0070SStefano Zampini PetscInt j,i,lda; 89187828ca2SBarry Smith PetscScalar *v; 8923501a2bdSLois Curfman McInnes 8933a40ed3dSBarry Smith PetscFunctionBegin; 894cf37664fSBarry Smith if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) { 895ce94432eSBarry Smith ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 896d0f46423SBarry Smith ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 8977adad957SLisandro Dalcin ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 8980298fd71SBarry Smith ierr = MatMPIDenseSetPreallocation(B,NULL);CHKERRQ(ierr); 899637a0070SStefano Zampini } else B = *matout; 9003501a2bdSLois Curfman McInnes 901637a0070SStefano Zampini m = a->A->rmap->n; n = a->A->cmap->n; 902637a0070SStefano Zampini ierr = MatDenseGetArrayRead(a->A,(const PetscScalar**)&v);CHKERRQ(ierr); 903637a0070SStefano Zampini ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr); 904785e854fSJed Brown ierr = PetscMalloc1(m,&rwork);CHKERRQ(ierr); 9053501a2bdSLois Curfman McInnes for (i=0; i<m; i++) rwork[i] = rstart + i; 9061acff37aSSatish Balay for (j=0; j<n; j++) { 9073501a2bdSLois Curfman McInnes ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr); 908637a0070SStefano Zampini v += lda; 9093501a2bdSLois Curfman McInnes } 910637a0070SStefano Zampini ierr = MatDenseRestoreArrayRead(a->A,(const PetscScalar**)&v);CHKERRQ(ierr); 911606d414cSSatish Balay ierr = PetscFree(rwork);CHKERRQ(ierr); 9126d4a8577SBarry Smith ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 9136d4a8577SBarry Smith ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 914cf37664fSBarry Smith if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 9153501a2bdSLois Curfman McInnes *matout = B; 9163501a2bdSLois Curfman McInnes } else { 91728be2f97SBarry Smith ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr); 9183501a2bdSLois Curfman McInnes } 9193a40ed3dSBarry Smith PetscFunctionReturn(0); 920096963f5SLois Curfman McInnes } 921096963f5SLois Curfman McInnes 9226849ba73SBarry Smith static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat*); 92352c5f739Sprj- PETSC_INTERN PetscErrorCode MatScale_MPIDense(Mat,PetscScalar); 9248965ea79SLois Curfman McInnes 9254994cf47SJed Brown PetscErrorCode MatSetUp_MPIDense(Mat A) 926273d9f13SBarry Smith { 927dfbe8321SBarry Smith PetscErrorCode ierr; 928273d9f13SBarry Smith 929273d9f13SBarry Smith PetscFunctionBegin; 93018992e5dSStefano Zampini ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr); 93118992e5dSStefano Zampini ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr); 93218992e5dSStefano Zampini if (!A->preallocated) { 933273d9f13SBarry Smith ierr = MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr); 93418992e5dSStefano Zampini } 935273d9f13SBarry Smith PetscFunctionReturn(0); 936273d9f13SBarry Smith } 937273d9f13SBarry Smith 938488007eeSBarry Smith PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str) 939488007eeSBarry Smith { 940488007eeSBarry Smith PetscErrorCode ierr; 941488007eeSBarry Smith Mat_MPIDense *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data; 942488007eeSBarry Smith 943488007eeSBarry Smith PetscFunctionBegin; 944488007eeSBarry Smith ierr = MatAXPY(A->A,alpha,B->A,str);CHKERRQ(ierr); 945488007eeSBarry Smith PetscFunctionReturn(0); 946488007eeSBarry Smith } 947488007eeSBarry Smith 9487087cfbeSBarry Smith PetscErrorCode MatConjugate_MPIDense(Mat mat) 949ba337c44SJed Brown { 950ba337c44SJed Brown Mat_MPIDense *a = (Mat_MPIDense*)mat->data; 951ba337c44SJed Brown PetscErrorCode ierr; 952ba337c44SJed Brown 953ba337c44SJed Brown PetscFunctionBegin; 954ba337c44SJed Brown ierr = MatConjugate(a->A);CHKERRQ(ierr); 955ba337c44SJed Brown PetscFunctionReturn(0); 956ba337c44SJed Brown } 957ba337c44SJed Brown 958ba337c44SJed Brown PetscErrorCode MatRealPart_MPIDense(Mat A) 959ba337c44SJed Brown { 960ba337c44SJed Brown Mat_MPIDense *a = (Mat_MPIDense*)A->data; 961ba337c44SJed Brown PetscErrorCode ierr; 962ba337c44SJed Brown 963ba337c44SJed Brown PetscFunctionBegin; 964ba337c44SJed Brown ierr = MatRealPart(a->A);CHKERRQ(ierr); 965ba337c44SJed Brown PetscFunctionReturn(0); 966ba337c44SJed Brown } 967ba337c44SJed Brown 968ba337c44SJed Brown PetscErrorCode MatImaginaryPart_MPIDense(Mat A) 969ba337c44SJed Brown { 970ba337c44SJed Brown Mat_MPIDense *a = (Mat_MPIDense*)A->data; 971ba337c44SJed Brown PetscErrorCode ierr; 972ba337c44SJed Brown 973ba337c44SJed Brown PetscFunctionBegin; 974ba337c44SJed Brown ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 975ba337c44SJed Brown PetscFunctionReturn(0); 976ba337c44SJed Brown } 977ba337c44SJed Brown 97849a6ff4bSBarry Smith static PetscErrorCode MatGetColumnVector_MPIDense(Mat A,Vec v,PetscInt col) 97949a6ff4bSBarry Smith { 98049a6ff4bSBarry Smith PetscErrorCode ierr; 981637a0070SStefano Zampini Mat_MPIDense *a = (Mat_MPIDense*) A->data; 98249a6ff4bSBarry Smith 98349a6ff4bSBarry Smith PetscFunctionBegin; 984637a0070SStefano Zampini if (!a->A) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Missing local matrix"); 985637a0070SStefano Zampini if (!a->A->ops->getcolumnvector) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Missing get column operation"); 986637a0070SStefano Zampini ierr = (*a->A->ops->getcolumnvector)(a->A,v,col);CHKERRQ(ierr); 98749a6ff4bSBarry Smith PetscFunctionReturn(0); 98849a6ff4bSBarry Smith } 98949a6ff4bSBarry Smith 99052c5f739Sprj- PETSC_INTERN PetscErrorCode MatGetColumnNorms_SeqDense(Mat,NormType,PetscReal*); 99152c5f739Sprj- 9920716a85fSBarry Smith PetscErrorCode MatGetColumnNorms_MPIDense(Mat A,NormType type,PetscReal *norms) 9930716a85fSBarry Smith { 9940716a85fSBarry Smith PetscErrorCode ierr; 9950716a85fSBarry Smith PetscInt i,n; 9960716a85fSBarry Smith Mat_MPIDense *a = (Mat_MPIDense*) A->data; 9970716a85fSBarry Smith PetscReal *work; 9980716a85fSBarry Smith 9990716a85fSBarry Smith PetscFunctionBegin; 10000298fd71SBarry Smith ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 1001785e854fSJed Brown ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 10020716a85fSBarry Smith ierr = MatGetColumnNorms_SeqDense(a->A,type,work);CHKERRQ(ierr); 10030716a85fSBarry Smith if (type == NORM_2) { 10040716a85fSBarry Smith for (i=0; i<n; i++) work[i] *= work[i]; 10050716a85fSBarry Smith } 10060716a85fSBarry Smith if (type == NORM_INFINITY) { 1007b2566f29SBarry Smith ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr); 10080716a85fSBarry Smith } else { 1009b2566f29SBarry Smith ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr); 10100716a85fSBarry Smith } 10110716a85fSBarry Smith ierr = PetscFree(work);CHKERRQ(ierr); 10120716a85fSBarry Smith if (type == NORM_2) { 10138f1a2a5eSBarry Smith for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 10140716a85fSBarry Smith } 10150716a85fSBarry Smith PetscFunctionReturn(0); 10160716a85fSBarry Smith } 10170716a85fSBarry Smith 1018637a0070SStefano Zampini #if defined(PETSC_HAVE_CUDA) 10196947451fSStefano Zampini static PetscErrorCode MatDenseGetColumnVec_MPIDenseCUDA(Mat A,PetscInt col,Vec *v) 10206947451fSStefano Zampini { 10216947451fSStefano Zampini Mat_MPIDense *a = (Mat_MPIDense*)A->data; 10226947451fSStefano Zampini PetscErrorCode ierr; 10236947451fSStefano Zampini PetscInt lda; 10246947451fSStefano Zampini 10256947451fSStefano Zampini PetscFunctionBegin; 1026*616b8fbbSStefano Zampini if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1027*616b8fbbSStefano Zampini if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 10286947451fSStefano Zampini if (!a->cvec) { 10296947451fSStefano Zampini ierr = VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr); 1030*616b8fbbSStefano Zampini ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)a->cvec);CHKERRQ(ierr); 10316947451fSStefano Zampini } 10326947451fSStefano Zampini a->vecinuse = col + 1; 10336947451fSStefano Zampini ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr); 10346947451fSStefano Zampini ierr = MatDenseCUDAGetArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr); 10356947451fSStefano Zampini ierr = VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr); 10366947451fSStefano Zampini *v = a->cvec; 10376947451fSStefano Zampini PetscFunctionReturn(0); 10386947451fSStefano Zampini } 10396947451fSStefano Zampini 10406947451fSStefano Zampini static PetscErrorCode MatDenseRestoreColumnVec_MPIDenseCUDA(Mat A,PetscInt col,Vec *v) 10416947451fSStefano Zampini { 10426947451fSStefano Zampini Mat_MPIDense *a = (Mat_MPIDense*)A->data; 10436947451fSStefano Zampini PetscErrorCode ierr; 10446947451fSStefano Zampini 10456947451fSStefano Zampini PetscFunctionBegin; 10465ea7661aSPierre Jolivet if (!a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first"); 10476947451fSStefano Zampini if (!a->cvec) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector"); 10486947451fSStefano Zampini a->vecinuse = 0; 10496947451fSStefano Zampini ierr = MatDenseCUDARestoreArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr); 10506947451fSStefano Zampini ierr = VecCUDAResetArray(a->cvec);CHKERRQ(ierr); 10516947451fSStefano Zampini *v = NULL; 10526947451fSStefano Zampini PetscFunctionReturn(0); 10536947451fSStefano Zampini } 10546947451fSStefano Zampini 10556947451fSStefano Zampini static PetscErrorCode MatDenseGetColumnVecRead_MPIDenseCUDA(Mat A,PetscInt col,Vec *v) 10566947451fSStefano Zampini { 10576947451fSStefano Zampini Mat_MPIDense *a = (Mat_MPIDense*)A->data; 10586947451fSStefano Zampini PetscInt lda; 10596947451fSStefano Zampini PetscErrorCode ierr; 10606947451fSStefano Zampini 10616947451fSStefano Zampini PetscFunctionBegin; 1062*616b8fbbSStefano Zampini if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1063*616b8fbbSStefano Zampini if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 10646947451fSStefano Zampini if (!a->cvec) { 10656947451fSStefano Zampini ierr = VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr); 1066*616b8fbbSStefano Zampini ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)a->cvec);CHKERRQ(ierr); 10676947451fSStefano Zampini } 10686947451fSStefano Zampini a->vecinuse = col + 1; 10696947451fSStefano Zampini ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr); 10706947451fSStefano Zampini ierr = MatDenseCUDAGetArrayRead(a->A,&a->ptrinuse);CHKERRQ(ierr); 10716947451fSStefano Zampini ierr = VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr); 10726947451fSStefano Zampini ierr = VecLockReadPush(a->cvec);CHKERRQ(ierr); 10736947451fSStefano Zampini *v = a->cvec; 10746947451fSStefano Zampini PetscFunctionReturn(0); 10756947451fSStefano Zampini } 10766947451fSStefano Zampini 10776947451fSStefano Zampini static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDenseCUDA(Mat A,PetscInt col,Vec *v) 10786947451fSStefano Zampini { 10796947451fSStefano Zampini Mat_MPIDense *a = (Mat_MPIDense*)A->data; 10806947451fSStefano Zampini PetscErrorCode ierr; 10816947451fSStefano Zampini 10826947451fSStefano Zampini PetscFunctionBegin; 1083*616b8fbbSStefano Zampini if (!a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first"); 1084*616b8fbbSStefano Zampini if (!a->cvec) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector"); 10856947451fSStefano Zampini a->vecinuse = 0; 10866947451fSStefano Zampini ierr = MatDenseCUDARestoreArrayRead(a->A,&a->ptrinuse);CHKERRQ(ierr); 10876947451fSStefano Zampini ierr = VecLockReadPop(a->cvec);CHKERRQ(ierr); 10886947451fSStefano Zampini ierr = VecCUDAResetArray(a->cvec);CHKERRQ(ierr); 10896947451fSStefano Zampini *v = NULL; 10906947451fSStefano Zampini PetscFunctionReturn(0); 10916947451fSStefano Zampini } 10926947451fSStefano Zampini 10936947451fSStefano Zampini static PetscErrorCode MatDenseGetColumnVecWrite_MPIDenseCUDA(Mat A,PetscInt col,Vec *v) 10946947451fSStefano Zampini { 10956947451fSStefano Zampini Mat_MPIDense *a = (Mat_MPIDense*)A->data; 10966947451fSStefano Zampini PetscErrorCode ierr; 10976947451fSStefano Zampini PetscInt lda; 10986947451fSStefano Zampini 10996947451fSStefano Zampini PetscFunctionBegin; 1100*616b8fbbSStefano Zampini if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1101*616b8fbbSStefano Zampini if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 11026947451fSStefano Zampini if (!a->cvec) { 11036947451fSStefano Zampini ierr = VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr); 1104*616b8fbbSStefano Zampini ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)a->cvec);CHKERRQ(ierr); 11056947451fSStefano Zampini } 11066947451fSStefano Zampini a->vecinuse = col + 1; 11076947451fSStefano Zampini ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr); 11086947451fSStefano Zampini ierr = MatDenseCUDAGetArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr); 11096947451fSStefano Zampini ierr = VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr); 11106947451fSStefano Zampini *v = a->cvec; 11116947451fSStefano Zampini PetscFunctionReturn(0); 11126947451fSStefano Zampini } 11136947451fSStefano Zampini 11146947451fSStefano Zampini static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDenseCUDA(Mat A,PetscInt col,Vec *v) 11156947451fSStefano Zampini { 11166947451fSStefano Zampini Mat_MPIDense *a = (Mat_MPIDense*)A->data; 11176947451fSStefano Zampini PetscErrorCode ierr; 11186947451fSStefano Zampini 11196947451fSStefano Zampini PetscFunctionBegin; 1120*616b8fbbSStefano Zampini if (!a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first"); 1121*616b8fbbSStefano Zampini if (!a->cvec) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector"); 11226947451fSStefano Zampini a->vecinuse = 0; 11236947451fSStefano Zampini ierr = MatDenseCUDARestoreArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr); 11246947451fSStefano Zampini ierr = VecCUDAResetArray(a->cvec);CHKERRQ(ierr); 11256947451fSStefano Zampini *v = NULL; 11266947451fSStefano Zampini PetscFunctionReturn(0); 11276947451fSStefano Zampini } 11286947451fSStefano Zampini 1129637a0070SStefano Zampini static PetscErrorCode MatDenseCUDAPlaceArray_MPIDenseCUDA(Mat A, const PetscScalar *a) 1130637a0070SStefano Zampini { 1131637a0070SStefano Zampini Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1132637a0070SStefano Zampini PetscErrorCode ierr; 1133637a0070SStefano Zampini 1134637a0070SStefano Zampini PetscFunctionBegin; 1135*616b8fbbSStefano Zampini if (l->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1136*616b8fbbSStefano Zampini if (l->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1137637a0070SStefano Zampini ierr = MatDenseCUDAPlaceArray(l->A,a);CHKERRQ(ierr); 1138637a0070SStefano Zampini PetscFunctionReturn(0); 1139637a0070SStefano Zampini } 1140637a0070SStefano Zampini 1141637a0070SStefano Zampini static PetscErrorCode MatDenseCUDAResetArray_MPIDenseCUDA(Mat A) 1142637a0070SStefano Zampini { 1143637a0070SStefano Zampini Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1144637a0070SStefano Zampini PetscErrorCode ierr; 1145637a0070SStefano Zampini 1146637a0070SStefano Zampini PetscFunctionBegin; 1147*616b8fbbSStefano Zampini if (l->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1148*616b8fbbSStefano Zampini if (l->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1149637a0070SStefano Zampini ierr = MatDenseCUDAResetArray(l->A);CHKERRQ(ierr); 1150637a0070SStefano Zampini PetscFunctionReturn(0); 1151637a0070SStefano Zampini } 1152637a0070SStefano Zampini 1153d5ea218eSStefano Zampini static PetscErrorCode MatDenseCUDAReplaceArray_MPIDenseCUDA(Mat A, const PetscScalar *a) 1154d5ea218eSStefano Zampini { 1155d5ea218eSStefano Zampini Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1156d5ea218eSStefano Zampini PetscErrorCode ierr; 1157d5ea218eSStefano Zampini 1158d5ea218eSStefano Zampini PetscFunctionBegin; 1159*616b8fbbSStefano Zampini if (l->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1160*616b8fbbSStefano Zampini if (l->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1161d5ea218eSStefano Zampini ierr = MatDenseCUDAReplaceArray(l->A,a);CHKERRQ(ierr); 1162d5ea218eSStefano Zampini PetscFunctionReturn(0); 1163d5ea218eSStefano Zampini } 1164d5ea218eSStefano Zampini 1165637a0070SStefano Zampini static PetscErrorCode MatDenseCUDAGetArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a) 1166637a0070SStefano Zampini { 1167637a0070SStefano Zampini Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1168637a0070SStefano Zampini PetscErrorCode ierr; 1169637a0070SStefano Zampini 1170637a0070SStefano Zampini PetscFunctionBegin; 1171637a0070SStefano Zampini ierr = MatDenseCUDAGetArrayWrite(l->A,a);CHKERRQ(ierr); 1172637a0070SStefano Zampini PetscFunctionReturn(0); 1173637a0070SStefano Zampini } 1174637a0070SStefano Zampini 1175637a0070SStefano Zampini static PetscErrorCode MatDenseCUDARestoreArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a) 1176637a0070SStefano Zampini { 1177637a0070SStefano Zampini Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1178637a0070SStefano Zampini PetscErrorCode ierr; 1179637a0070SStefano Zampini 1180637a0070SStefano Zampini PetscFunctionBegin; 1181637a0070SStefano Zampini ierr = MatDenseCUDARestoreArrayWrite(l->A,a);CHKERRQ(ierr); 1182637a0070SStefano Zampini PetscFunctionReturn(0); 1183637a0070SStefano Zampini } 1184637a0070SStefano Zampini 1185637a0070SStefano Zampini static PetscErrorCode MatDenseCUDAGetArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a) 1186637a0070SStefano Zampini { 1187637a0070SStefano Zampini Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1188637a0070SStefano Zampini PetscErrorCode ierr; 1189637a0070SStefano Zampini 1190637a0070SStefano Zampini PetscFunctionBegin; 1191637a0070SStefano Zampini ierr = MatDenseCUDAGetArrayRead(l->A,a);CHKERRQ(ierr); 1192637a0070SStefano Zampini PetscFunctionReturn(0); 1193637a0070SStefano Zampini } 1194637a0070SStefano Zampini 1195637a0070SStefano Zampini static PetscErrorCode MatDenseCUDARestoreArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a) 1196637a0070SStefano Zampini { 1197637a0070SStefano Zampini Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1198637a0070SStefano Zampini PetscErrorCode ierr; 1199637a0070SStefano Zampini 1200637a0070SStefano Zampini PetscFunctionBegin; 1201637a0070SStefano Zampini ierr = MatDenseCUDARestoreArrayRead(l->A,a);CHKERRQ(ierr); 1202637a0070SStefano Zampini PetscFunctionReturn(0); 1203637a0070SStefano Zampini } 1204637a0070SStefano Zampini 1205637a0070SStefano Zampini static PetscErrorCode MatDenseCUDAGetArray_MPIDenseCUDA(Mat A, PetscScalar **a) 1206637a0070SStefano Zampini { 1207637a0070SStefano Zampini Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1208637a0070SStefano Zampini PetscErrorCode ierr; 1209637a0070SStefano Zampini 1210637a0070SStefano Zampini PetscFunctionBegin; 1211637a0070SStefano Zampini ierr = MatDenseCUDAGetArray(l->A,a);CHKERRQ(ierr); 1212637a0070SStefano Zampini PetscFunctionReturn(0); 1213637a0070SStefano Zampini } 1214637a0070SStefano Zampini 1215637a0070SStefano Zampini static PetscErrorCode MatDenseCUDARestoreArray_MPIDenseCUDA(Mat A, PetscScalar **a) 1216637a0070SStefano Zampini { 1217637a0070SStefano Zampini Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1218637a0070SStefano Zampini PetscErrorCode ierr; 1219637a0070SStefano Zampini 1220637a0070SStefano Zampini PetscFunctionBegin; 1221637a0070SStefano Zampini ierr = MatDenseCUDARestoreArray(l->A,a);CHKERRQ(ierr); 1222637a0070SStefano Zampini PetscFunctionReturn(0); 1223637a0070SStefano Zampini } 1224637a0070SStefano Zampini 12256947451fSStefano Zampini static PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat,PetscInt,Vec*); 12266947451fSStefano Zampini static PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat,PetscInt,Vec*); 12276947451fSStefano Zampini static PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat,PetscInt,Vec*); 12286947451fSStefano Zampini static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat,PetscInt,Vec*); 12296947451fSStefano Zampini static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat,PetscInt,Vec*); 12306947451fSStefano Zampini static PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat,PetscInt,Vec*); 12315ea7661aSPierre Jolivet static PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat,Mat*); 12326947451fSStefano Zampini 1233637a0070SStefano Zampini static PetscErrorCode MatBindToCPU_MPIDenseCUDA(Mat mat,PetscBool bind) 1234637a0070SStefano Zampini { 1235637a0070SStefano Zampini Mat_MPIDense *d = (Mat_MPIDense*)mat->data; 1236637a0070SStefano Zampini PetscErrorCode ierr; 1237637a0070SStefano Zampini 1238637a0070SStefano Zampini PetscFunctionBegin; 1239*616b8fbbSStefano Zampini if (d->vecinuse) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1240*616b8fbbSStefano Zampini if (d->matinuse) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1241637a0070SStefano Zampini if (d->A) { 1242637a0070SStefano Zampini ierr = MatBindToCPU(d->A,bind);CHKERRQ(ierr); 1243637a0070SStefano Zampini } 1244637a0070SStefano Zampini mat->boundtocpu = bind; 12456947451fSStefano Zampini if (!bind) { 12466947451fSStefano Zampini PetscBool iscuda; 12476947451fSStefano Zampini 12486947451fSStefano Zampini ierr = PetscObjectTypeCompare((PetscObject)d->cvec,VECMPICUDA,&iscuda);CHKERRQ(ierr); 12496947451fSStefano Zampini if (!iscuda) { 12506947451fSStefano Zampini ierr = VecDestroy(&d->cvec);CHKERRQ(ierr); 1251*616b8fbbSStefano Zampini } 1252*616b8fbbSStefano Zampini ierr = PetscObjectTypeCompare((PetscObject)d->cmat,MATMPIDENSECUDA,&iscuda);CHKERRQ(ierr); 1253*616b8fbbSStefano Zampini if (!iscuda) { 12545ea7661aSPierre Jolivet ierr = MatDestroy(&d->cmat);CHKERRQ(ierr); 12556947451fSStefano Zampini } 12566947451fSStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDenseCUDA);CHKERRQ(ierr); 12576947451fSStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDenseCUDA);CHKERRQ(ierr); 12586947451fSStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDenseCUDA);CHKERRQ(ierr); 12596947451fSStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDenseCUDA);CHKERRQ(ierr); 12606947451fSStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDenseCUDA);CHKERRQ(ierr); 12616947451fSStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDenseCUDA);CHKERRQ(ierr); 12626947451fSStefano Zampini } else { 12636947451fSStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDense);CHKERRQ(ierr); 12646947451fSStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDense);CHKERRQ(ierr); 12656947451fSStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDense);CHKERRQ(ierr); 12666947451fSStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDense);CHKERRQ(ierr); 12676947451fSStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDense);CHKERRQ(ierr); 12686947451fSStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDense);CHKERRQ(ierr); 1269*616b8fbbSStefano Zampini } 1270*616b8fbbSStefano Zampini if (d->cmat) { 1271*616b8fbbSStefano Zampini ierr = MatBindToCPU(d->cmat,bind);CHKERRQ(ierr); 12726947451fSStefano Zampini } 1273637a0070SStefano Zampini PetscFunctionReturn(0); 1274637a0070SStefano Zampini } 1275637a0070SStefano Zampini 1276637a0070SStefano Zampini PetscErrorCode MatMPIDenseCUDASetPreallocation(Mat A, PetscScalar *d_data) 1277637a0070SStefano Zampini { 1278637a0070SStefano Zampini Mat_MPIDense *d = (Mat_MPIDense*)A->data; 1279637a0070SStefano Zampini PetscErrorCode ierr; 1280637a0070SStefano Zampini PetscBool iscuda; 1281637a0070SStefano Zampini 1282637a0070SStefano Zampini PetscFunctionBegin; 1283d5ea218eSStefano Zampini PetscValidHeaderSpecific(A,MAT_CLASSID,1); 1284637a0070SStefano Zampini ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIDENSECUDA,&iscuda);CHKERRQ(ierr); 1285637a0070SStefano Zampini if (!iscuda) PetscFunctionReturn(0); 1286637a0070SStefano Zampini ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr); 1287637a0070SStefano Zampini ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr); 1288637a0070SStefano Zampini if (!d->A) { 1289637a0070SStefano Zampini ierr = MatCreate(PETSC_COMM_SELF,&d->A);CHKERRQ(ierr); 1290637a0070SStefano Zampini ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)d->A);CHKERRQ(ierr); 1291637a0070SStefano Zampini ierr = MatSetSizes(d->A,A->rmap->n,A->cmap->N,A->rmap->n,A->cmap->N);CHKERRQ(ierr); 1292637a0070SStefano Zampini } 1293637a0070SStefano Zampini ierr = MatSetType(d->A,MATSEQDENSECUDA);CHKERRQ(ierr); 1294637a0070SStefano Zampini ierr = MatSeqDenseCUDASetPreallocation(d->A,d_data);CHKERRQ(ierr); 1295637a0070SStefano Zampini A->preallocated = PETSC_TRUE; 1296637a0070SStefano Zampini PetscFunctionReturn(0); 1297637a0070SStefano Zampini } 1298637a0070SStefano Zampini #endif 1299637a0070SStefano Zampini 130073a71a0fSBarry Smith static PetscErrorCode MatSetRandom_MPIDense(Mat x,PetscRandom rctx) 130173a71a0fSBarry Smith { 130273a71a0fSBarry Smith Mat_MPIDense *d = (Mat_MPIDense*)x->data; 130373a71a0fSBarry Smith PetscErrorCode ierr; 130473a71a0fSBarry Smith 130573a71a0fSBarry Smith PetscFunctionBegin; 1306637a0070SStefano Zampini ierr = MatSetRandom(d->A,rctx);CHKERRQ(ierr); 130773a71a0fSBarry Smith PetscFunctionReturn(0); 130873a71a0fSBarry Smith } 130973a71a0fSBarry Smith 13103b49f96aSBarry Smith static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A,PetscBool *missing,PetscInt *d) 13113b49f96aSBarry Smith { 13123b49f96aSBarry Smith PetscFunctionBegin; 13133b49f96aSBarry Smith *missing = PETSC_FALSE; 13143b49f96aSBarry Smith PetscFunctionReturn(0); 13153b49f96aSBarry Smith } 13163b49f96aSBarry Smith 13174222ddf1SHong Zhang static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat,Mat,PetscReal,Mat); 1318cc48ffa7SToby Isaac static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat,Mat,Mat); 13196718818eSStefano Zampini static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat,Mat,Mat); 13206718818eSStefano Zampini static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat,Mat,PetscReal,Mat); 13216718818eSStefano Zampini static PetscErrorCode MatEqual_MPIDense(Mat,Mat,PetscBool*); 13226718818eSStefano Zampini static PetscErrorCode MatLoad_MPIDense(Mat,PetscViewer); 1323cc48ffa7SToby Isaac 13248965ea79SLois Curfman McInnes /* -------------------------------------------------------------------*/ 132509dc0095SBarry Smith static struct _MatOps MatOps_Values = { MatSetValues_MPIDense, 132609dc0095SBarry Smith MatGetRow_MPIDense, 132709dc0095SBarry Smith MatRestoreRow_MPIDense, 132809dc0095SBarry Smith MatMult_MPIDense, 132997304618SKris Buschelman /* 4*/ MatMultAdd_MPIDense, 13307c922b88SBarry Smith MatMultTranspose_MPIDense, 13317c922b88SBarry Smith MatMultTransposeAdd_MPIDense, 13328965ea79SLois Curfman McInnes 0, 133309dc0095SBarry Smith 0, 133409dc0095SBarry Smith 0, 133597304618SKris Buschelman /* 10*/ 0, 133609dc0095SBarry Smith 0, 133709dc0095SBarry Smith 0, 133809dc0095SBarry Smith 0, 133909dc0095SBarry Smith MatTranspose_MPIDense, 134097304618SKris Buschelman /* 15*/ MatGetInfo_MPIDense, 13416e4ee0c6SHong Zhang MatEqual_MPIDense, 134209dc0095SBarry Smith MatGetDiagonal_MPIDense, 13435b2fa520SLois Curfman McInnes MatDiagonalScale_MPIDense, 134409dc0095SBarry Smith MatNorm_MPIDense, 134597304618SKris Buschelman /* 20*/ MatAssemblyBegin_MPIDense, 134609dc0095SBarry Smith MatAssemblyEnd_MPIDense, 134709dc0095SBarry Smith MatSetOption_MPIDense, 134809dc0095SBarry Smith MatZeroEntries_MPIDense, 1349d519adbfSMatthew Knepley /* 24*/ MatZeroRows_MPIDense, 1350919b68f7SBarry Smith 0, 135101b82886SBarry Smith 0, 135201b82886SBarry Smith 0, 135301b82886SBarry Smith 0, 13544994cf47SJed Brown /* 29*/ MatSetUp_MPIDense, 1355273d9f13SBarry Smith 0, 135609dc0095SBarry Smith 0, 1357c56a70eeSBarry Smith MatGetDiagonalBlock_MPIDense, 13588c778c55SBarry Smith 0, 1359d519adbfSMatthew Knepley /* 34*/ MatDuplicate_MPIDense, 136009dc0095SBarry Smith 0, 136109dc0095SBarry Smith 0, 136209dc0095SBarry Smith 0, 136309dc0095SBarry Smith 0, 1364d519adbfSMatthew Knepley /* 39*/ MatAXPY_MPIDense, 13657dae84e0SHong Zhang MatCreateSubMatrices_MPIDense, 136609dc0095SBarry Smith 0, 136709dc0095SBarry Smith MatGetValues_MPIDense, 136809dc0095SBarry Smith 0, 1369d519adbfSMatthew Knepley /* 44*/ 0, 137009dc0095SBarry Smith MatScale_MPIDense, 13717d68702bSBarry Smith MatShift_Basic, 137209dc0095SBarry Smith 0, 137309dc0095SBarry Smith 0, 137473a71a0fSBarry Smith /* 49*/ MatSetRandom_MPIDense, 137509dc0095SBarry Smith 0, 137609dc0095SBarry Smith 0, 137709dc0095SBarry Smith 0, 137809dc0095SBarry Smith 0, 1379d519adbfSMatthew Knepley /* 54*/ 0, 138009dc0095SBarry Smith 0, 138109dc0095SBarry Smith 0, 138209dc0095SBarry Smith 0, 138309dc0095SBarry Smith 0, 13847dae84e0SHong Zhang /* 59*/ MatCreateSubMatrix_MPIDense, 1385b9b97703SBarry Smith MatDestroy_MPIDense, 1386b9b97703SBarry Smith MatView_MPIDense, 1387357abbc8SBarry Smith 0, 138897304618SKris Buschelman 0, 1389d519adbfSMatthew Knepley /* 64*/ 0, 139097304618SKris Buschelman 0, 139197304618SKris Buschelman 0, 139297304618SKris Buschelman 0, 139397304618SKris Buschelman 0, 1394d519adbfSMatthew Knepley /* 69*/ 0, 139597304618SKris Buschelman 0, 139697304618SKris Buschelman 0, 139797304618SKris Buschelman 0, 139897304618SKris Buschelman 0, 1399d519adbfSMatthew Knepley /* 74*/ 0, 140097304618SKris Buschelman 0, 140197304618SKris Buschelman 0, 140297304618SKris Buschelman 0, 140397304618SKris Buschelman 0, 1404d519adbfSMatthew Knepley /* 79*/ 0, 140597304618SKris Buschelman 0, 140697304618SKris Buschelman 0, 140797304618SKris Buschelman 0, 14085bba2384SShri Abhyankar /* 83*/ MatLoad_MPIDense, 1409865e5f61SKris Buschelman 0, 1410865e5f61SKris Buschelman 0, 1411865e5f61SKris Buschelman 0, 1412865e5f61SKris Buschelman 0, 1413865e5f61SKris Buschelman 0, 14144222ddf1SHong Zhang /* 89*/ 0, 14154222ddf1SHong Zhang 0, 14166718818eSStefano Zampini 0, 14172fbe02b9SBarry Smith 0, 1418ba337c44SJed Brown 0, 1419d519adbfSMatthew Knepley /* 94*/ 0, 14204222ddf1SHong Zhang 0, 14216718818eSStefano Zampini MatMatTransposeMultSymbolic_MPIDense_MPIDense, 1422cc48ffa7SToby Isaac MatMatTransposeMultNumeric_MPIDense_MPIDense, 1423ba337c44SJed Brown 0, 14244222ddf1SHong Zhang /* 99*/ MatProductSetFromOptions_MPIDense, 1425ba337c44SJed Brown 0, 1426ba337c44SJed Brown 0, 1427ba337c44SJed Brown MatConjugate_MPIDense, 1428ba337c44SJed Brown 0, 1429ba337c44SJed Brown /*104*/ 0, 1430ba337c44SJed Brown MatRealPart_MPIDense, 1431ba337c44SJed Brown MatImaginaryPart_MPIDense, 143286d161a7SShri Abhyankar 0, 143386d161a7SShri Abhyankar 0, 143486d161a7SShri Abhyankar /*109*/ 0, 143586d161a7SShri Abhyankar 0, 143686d161a7SShri Abhyankar 0, 143749a6ff4bSBarry Smith MatGetColumnVector_MPIDense, 14383b49f96aSBarry Smith MatMissingDiagonal_MPIDense, 143986d161a7SShri Abhyankar /*114*/ 0, 144086d161a7SShri Abhyankar 0, 144186d161a7SShri Abhyankar 0, 144286d161a7SShri Abhyankar 0, 144386d161a7SShri Abhyankar 0, 144486d161a7SShri Abhyankar /*119*/ 0, 144586d161a7SShri Abhyankar 0, 144686d161a7SShri Abhyankar 0, 14470716a85fSBarry Smith 0, 14480716a85fSBarry Smith 0, 14490716a85fSBarry Smith /*124*/ 0, 14503964eb88SJed Brown MatGetColumnNorms_MPIDense, 14513964eb88SJed Brown 0, 14523964eb88SJed Brown 0, 14533964eb88SJed Brown 0, 14543964eb88SJed Brown /*129*/ 0, 14554222ddf1SHong Zhang 0, 14566718818eSStefano Zampini MatTransposeMatMultSymbolic_MPIDense_MPIDense, 1457cb20be35SHong Zhang MatTransposeMatMultNumeric_MPIDense_MPIDense, 14583964eb88SJed Brown 0, 14593964eb88SJed Brown /*134*/ 0, 14603964eb88SJed Brown 0, 14613964eb88SJed Brown 0, 14623964eb88SJed Brown 0, 14633964eb88SJed Brown 0, 14643964eb88SJed Brown /*139*/ 0, 1465f9426fe0SMark Adams 0, 146694e2cb23SJakub Kruzik 0, 146794e2cb23SJakub Kruzik 0, 146894e2cb23SJakub Kruzik 0, 14694222ddf1SHong Zhang MatCreateMPIMatConcatenateSeqMat_MPIDense, 14704222ddf1SHong Zhang /*145*/ 0, 14714222ddf1SHong Zhang 0, 14724222ddf1SHong Zhang 0 1473ba337c44SJed Brown }; 14748965ea79SLois Curfman McInnes 14757087cfbeSBarry Smith PetscErrorCode MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data) 1476a23d5eceSKris Buschelman { 1477637a0070SStefano Zampini Mat_MPIDense *a = (Mat_MPIDense*)mat->data; 1478637a0070SStefano Zampini PetscBool iscuda; 1479dfbe8321SBarry Smith PetscErrorCode ierr; 1480a23d5eceSKris Buschelman 1481a23d5eceSKris Buschelman PetscFunctionBegin; 1482*616b8fbbSStefano Zampini if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 148334ef9618SShri Abhyankar ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr); 148434ef9618SShri Abhyankar ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr); 1485637a0070SStefano Zampini if (!a->A) { 1486f69a0ea3SMatthew Knepley ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr); 14873bb1ff40SBarry Smith ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 1488637a0070SStefano Zampini ierr = MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);CHKERRQ(ierr); 1489637a0070SStefano Zampini } 1490637a0070SStefano Zampini ierr = PetscObjectTypeCompare((PetscObject)mat,MATMPIDENSECUDA,&iscuda);CHKERRQ(ierr); 1491637a0070SStefano Zampini ierr = MatSetType(a->A,iscuda ? MATSEQDENSECUDA : MATSEQDENSE);CHKERRQ(ierr); 1492637a0070SStefano Zampini ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr); 1493637a0070SStefano Zampini mat->preallocated = PETSC_TRUE; 1494a23d5eceSKris Buschelman PetscFunctionReturn(0); 1495a23d5eceSKris Buschelman } 1496a23d5eceSKris Buschelman 149765b80a83SHong Zhang #if defined(PETSC_HAVE_ELEMENTAL) 1498cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype,MatReuse reuse,Mat *newmat) 14998baccfbdSHong Zhang { 15008ea901baSHong Zhang Mat mat_elemental; 15018ea901baSHong Zhang PetscErrorCode ierr; 150232d7a744SHong Zhang PetscScalar *v; 150332d7a744SHong Zhang PetscInt m=A->rmap->n,N=A->cmap->N,rstart=A->rmap->rstart,i,*rows,*cols; 15048ea901baSHong Zhang 15058baccfbdSHong Zhang PetscFunctionBegin; 1506378336b6SHong Zhang if (reuse == MAT_REUSE_MATRIX) { 1507378336b6SHong Zhang mat_elemental = *newmat; 1508378336b6SHong Zhang ierr = MatZeroEntries(*newmat);CHKERRQ(ierr); 1509378336b6SHong Zhang } else { 1510378336b6SHong Zhang ierr = MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental);CHKERRQ(ierr); 1511378336b6SHong Zhang ierr = MatSetSizes(mat_elemental,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 1512378336b6SHong Zhang ierr = MatSetType(mat_elemental,MATELEMENTAL);CHKERRQ(ierr); 1513378336b6SHong Zhang ierr = MatSetUp(mat_elemental);CHKERRQ(ierr); 151432d7a744SHong Zhang ierr = MatSetOption(mat_elemental,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 1515378336b6SHong Zhang } 1516378336b6SHong Zhang 151732d7a744SHong Zhang ierr = PetscMalloc2(m,&rows,N,&cols);CHKERRQ(ierr); 151832d7a744SHong Zhang for (i=0; i<N; i++) cols[i] = i; 151932d7a744SHong Zhang for (i=0; i<m; i++) rows[i] = rstart + i; 15208ea901baSHong Zhang 1521637a0070SStefano Zampini /* PETSc-Elemental interface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */ 152232d7a744SHong Zhang ierr = MatDenseGetArray(A,&v);CHKERRQ(ierr); 152332d7a744SHong Zhang ierr = MatSetValues(mat_elemental,m,rows,N,cols,v,ADD_VALUES);CHKERRQ(ierr); 15248ea901baSHong Zhang ierr = MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 15258ea901baSHong Zhang ierr = MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 152632d7a744SHong Zhang ierr = MatDenseRestoreArray(A,&v);CHKERRQ(ierr); 152732d7a744SHong Zhang ierr = PetscFree2(rows,cols);CHKERRQ(ierr); 15288ea901baSHong Zhang 1529511c6705SHong Zhang if (reuse == MAT_INPLACE_MATRIX) { 153028be2f97SBarry Smith ierr = MatHeaderReplace(A,&mat_elemental);CHKERRQ(ierr); 15318ea901baSHong Zhang } else { 15328ea901baSHong Zhang *newmat = mat_elemental; 15338ea901baSHong Zhang } 15348baccfbdSHong Zhang PetscFunctionReturn(0); 15358baccfbdSHong Zhang } 153665b80a83SHong Zhang #endif 15378baccfbdSHong Zhang 1538af53bab2SHong Zhang static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A,PetscInt col,PetscScalar **vals) 153986aefd0dSHong Zhang { 154086aefd0dSHong Zhang Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 154186aefd0dSHong Zhang PetscErrorCode ierr; 154286aefd0dSHong Zhang 154386aefd0dSHong Zhang PetscFunctionBegin; 154486aefd0dSHong Zhang ierr = MatDenseGetColumn(mat->A,col,vals);CHKERRQ(ierr); 154586aefd0dSHong Zhang PetscFunctionReturn(0); 154686aefd0dSHong Zhang } 154786aefd0dSHong Zhang 1548af53bab2SHong Zhang static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A,PetscScalar **vals) 154986aefd0dSHong Zhang { 155086aefd0dSHong Zhang Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 155186aefd0dSHong Zhang PetscErrorCode ierr; 155286aefd0dSHong Zhang 155386aefd0dSHong Zhang PetscFunctionBegin; 155486aefd0dSHong Zhang ierr = MatDenseRestoreColumn(mat->A,vals);CHKERRQ(ierr); 155586aefd0dSHong Zhang PetscFunctionReturn(0); 155686aefd0dSHong Zhang } 155786aefd0dSHong Zhang 155894e2cb23SJakub Kruzik PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 155994e2cb23SJakub Kruzik { 156094e2cb23SJakub Kruzik PetscErrorCode ierr; 156194e2cb23SJakub Kruzik Mat_MPIDense *mat; 156294e2cb23SJakub Kruzik PetscInt m,nloc,N; 156394e2cb23SJakub Kruzik 156494e2cb23SJakub Kruzik PetscFunctionBegin; 156594e2cb23SJakub Kruzik ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 156694e2cb23SJakub Kruzik ierr = MatGetLocalSize(inmat,NULL,&nloc);CHKERRQ(ierr); 156794e2cb23SJakub Kruzik if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 156894e2cb23SJakub Kruzik PetscInt sum; 156994e2cb23SJakub Kruzik 157094e2cb23SJakub Kruzik if (n == PETSC_DECIDE) { 157194e2cb23SJakub Kruzik ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 157294e2cb23SJakub Kruzik } 157394e2cb23SJakub Kruzik /* Check sum(n) = N */ 157494e2cb23SJakub Kruzik ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 157594e2cb23SJakub Kruzik if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N); 157694e2cb23SJakub Kruzik 157794e2cb23SJakub Kruzik ierr = MatCreateDense(comm,m,n,PETSC_DETERMINE,N,NULL,outmat);CHKERRQ(ierr); 157894e2cb23SJakub Kruzik } 157994e2cb23SJakub Kruzik 158094e2cb23SJakub Kruzik /* numeric phase */ 158194e2cb23SJakub Kruzik mat = (Mat_MPIDense*)(*outmat)->data; 158294e2cb23SJakub Kruzik ierr = MatCopy(inmat,mat->A,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 158394e2cb23SJakub Kruzik ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 158494e2cb23SJakub Kruzik ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 158594e2cb23SJakub Kruzik PetscFunctionReturn(0); 158694e2cb23SJakub Kruzik } 158794e2cb23SJakub Kruzik 1588637a0070SStefano Zampini #if defined(PETSC_HAVE_CUDA) 1589637a0070SStefano Zampini PetscErrorCode MatConvert_MPIDenseCUDA_MPIDense(Mat M,MatType type,MatReuse reuse,Mat *newmat) 1590637a0070SStefano Zampini { 1591637a0070SStefano Zampini Mat B; 1592637a0070SStefano Zampini Mat_MPIDense *m; 1593637a0070SStefano Zampini PetscErrorCode ierr; 1594637a0070SStefano Zampini 1595637a0070SStefano Zampini PetscFunctionBegin; 1596637a0070SStefano Zampini if (reuse == MAT_INITIAL_MATRIX) { 1597637a0070SStefano Zampini ierr = MatDuplicate(M,MAT_COPY_VALUES,newmat);CHKERRQ(ierr); 1598637a0070SStefano Zampini } else if (reuse == MAT_REUSE_MATRIX) { 1599637a0070SStefano Zampini ierr = MatCopy(M,*newmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1600637a0070SStefano Zampini } 1601637a0070SStefano Zampini 1602637a0070SStefano Zampini B = *newmat; 1603637a0070SStefano Zampini ierr = MatBindToCPU_MPIDenseCUDA(B,PETSC_TRUE);CHKERRQ(ierr); 1604637a0070SStefano Zampini ierr = PetscFree(B->defaultvectype);CHKERRQ(ierr); 1605637a0070SStefano Zampini ierr = PetscStrallocpy(VECSTANDARD,&B->defaultvectype);CHKERRQ(ierr); 1606637a0070SStefano Zampini ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIDENSE);CHKERRQ(ierr); 1607637a0070SStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpidensecuda_mpidense_C",NULL);CHKERRQ(ierr); 1608637a0070SStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpidensecuda_C",NULL);CHKERRQ(ierr); 16096718818eSStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C",NULL);CHKERRQ(ierr); 1610637a0070SStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaij_C",NULL);CHKERRQ(ierr); 16116718818eSStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C",NULL);CHKERRQ(ierr); 1612637a0070SStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArray_C",NULL);CHKERRQ(ierr); 1613637a0070SStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayRead_C",NULL);CHKERRQ(ierr); 1614637a0070SStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayWrite_C",NULL);CHKERRQ(ierr); 1615637a0070SStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArray_C",NULL);CHKERRQ(ierr); 1616637a0070SStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayRead_C",NULL);CHKERRQ(ierr); 1617637a0070SStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayWrite_C",NULL);CHKERRQ(ierr); 1618637a0070SStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAPlaceArray_C",NULL);CHKERRQ(ierr); 1619637a0070SStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAResetArray_C",NULL);CHKERRQ(ierr); 1620d5ea218eSStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAReplaceArray_C",NULL);CHKERRQ(ierr); 1621637a0070SStefano Zampini m = (Mat_MPIDense*)(B)->data; 1622637a0070SStefano Zampini if (m->A) { 1623637a0070SStefano Zampini ierr = MatConvert(m->A,MATSEQDENSE,MAT_INPLACE_MATRIX,&m->A);CHKERRQ(ierr); 1624637a0070SStefano Zampini ierr = MatSetUpMultiply_MPIDense(B);CHKERRQ(ierr); 1625637a0070SStefano Zampini } 1626637a0070SStefano Zampini B->ops->bindtocpu = NULL; 1627637a0070SStefano Zampini B->offloadmask = PETSC_OFFLOAD_CPU; 1628637a0070SStefano Zampini PetscFunctionReturn(0); 1629637a0070SStefano Zampini } 1630637a0070SStefano Zampini 1631637a0070SStefano Zampini PetscErrorCode MatConvert_MPIDense_MPIDenseCUDA(Mat M,MatType type,MatReuse reuse,Mat *newmat) 1632637a0070SStefano Zampini { 1633637a0070SStefano Zampini Mat B; 1634637a0070SStefano Zampini Mat_MPIDense *m; 1635637a0070SStefano Zampini PetscErrorCode ierr; 1636637a0070SStefano Zampini 1637637a0070SStefano Zampini PetscFunctionBegin; 1638637a0070SStefano Zampini if (reuse == MAT_INITIAL_MATRIX) { 1639637a0070SStefano Zampini ierr = MatDuplicate(M,MAT_COPY_VALUES,newmat);CHKERRQ(ierr); 1640637a0070SStefano Zampini } else if (reuse == MAT_REUSE_MATRIX) { 1641637a0070SStefano Zampini ierr = MatCopy(M,*newmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1642637a0070SStefano Zampini } 1643637a0070SStefano Zampini 1644637a0070SStefano Zampini B = *newmat; 1645637a0070SStefano Zampini ierr = PetscFree(B->defaultvectype);CHKERRQ(ierr); 1646637a0070SStefano Zampini ierr = PetscStrallocpy(VECCUDA,&B->defaultvectype);CHKERRQ(ierr); 1647637a0070SStefano Zampini ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIDENSECUDA);CHKERRQ(ierr); 1648637a0070SStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpidensecuda_mpidense_C", MatConvert_MPIDenseCUDA_MPIDense);CHKERRQ(ierr); 1649637a0070SStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpidensecuda_C", MatProductSetFromOptions_MPIAIJ_MPIDense);CHKERRQ(ierr); 16506718818eSStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C",MatProductSetFromOptions_MPIAIJ_MPIDense);CHKERRQ(ierr); 1651637a0070SStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaij_C", MatProductSetFromOptions_MPIDense_MPIAIJ);CHKERRQ(ierr); 16526718818eSStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C",MatProductSetFromOptions_MPIDense_MPIAIJ);CHKERRQ(ierr); 1653637a0070SStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArray_C", MatDenseCUDAGetArray_MPIDenseCUDA);CHKERRQ(ierr); 1654637a0070SStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayRead_C", MatDenseCUDAGetArrayRead_MPIDenseCUDA);CHKERRQ(ierr); 1655637a0070SStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayWrite_C", MatDenseCUDAGetArrayWrite_MPIDenseCUDA);CHKERRQ(ierr); 1656637a0070SStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArray_C", MatDenseCUDARestoreArray_MPIDenseCUDA);CHKERRQ(ierr); 1657637a0070SStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayRead_C", MatDenseCUDARestoreArrayRead_MPIDenseCUDA);CHKERRQ(ierr); 1658637a0070SStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayWrite_C", MatDenseCUDARestoreArrayWrite_MPIDenseCUDA);CHKERRQ(ierr); 1659637a0070SStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAPlaceArray_C", MatDenseCUDAPlaceArray_MPIDenseCUDA);CHKERRQ(ierr); 1660637a0070SStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAResetArray_C", MatDenseCUDAResetArray_MPIDenseCUDA);CHKERRQ(ierr); 1661d5ea218eSStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAReplaceArray_C", MatDenseCUDAReplaceArray_MPIDenseCUDA);CHKERRQ(ierr); 1662637a0070SStefano Zampini m = (Mat_MPIDense*)(B)->data; 1663637a0070SStefano Zampini if (m->A) { 1664637a0070SStefano Zampini ierr = MatConvert(m->A,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&m->A);CHKERRQ(ierr); 1665637a0070SStefano Zampini ierr = MatSetUpMultiply_MPIDense(B);CHKERRQ(ierr); 1666637a0070SStefano Zampini B->offloadmask = PETSC_OFFLOAD_BOTH; 1667637a0070SStefano Zampini } else { 1668637a0070SStefano Zampini B->offloadmask = PETSC_OFFLOAD_UNALLOCATED; 1669637a0070SStefano Zampini } 1670637a0070SStefano Zampini ierr = MatBindToCPU_MPIDenseCUDA(B,PETSC_FALSE);CHKERRQ(ierr); 1671637a0070SStefano Zampini 1672637a0070SStefano Zampini B->ops->bindtocpu = MatBindToCPU_MPIDenseCUDA; 1673637a0070SStefano Zampini PetscFunctionReturn(0); 1674637a0070SStefano Zampini } 1675637a0070SStefano Zampini #endif 1676637a0070SStefano Zampini 16776947451fSStefano Zampini PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat A,PetscInt col,Vec *v) 16786947451fSStefano Zampini { 16796947451fSStefano Zampini Mat_MPIDense *a = (Mat_MPIDense*)A->data; 16806947451fSStefano Zampini PetscErrorCode ierr; 16816947451fSStefano Zampini PetscInt lda; 16826947451fSStefano Zampini 16836947451fSStefano Zampini PetscFunctionBegin; 1684*616b8fbbSStefano Zampini if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1685*616b8fbbSStefano Zampini if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 16866947451fSStefano Zampini if (!a->cvec) { 16876947451fSStefano Zampini ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr); 16886947451fSStefano Zampini } 16896947451fSStefano Zampini a->vecinuse = col + 1; 16906947451fSStefano Zampini ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr); 16916947451fSStefano Zampini ierr = MatDenseGetArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr); 16926947451fSStefano Zampini ierr = VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr); 16936947451fSStefano Zampini *v = a->cvec; 16946947451fSStefano Zampini PetscFunctionReturn(0); 16956947451fSStefano Zampini } 16966947451fSStefano Zampini 16976947451fSStefano Zampini PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat A,PetscInt col,Vec *v) 16986947451fSStefano Zampini { 16996947451fSStefano Zampini Mat_MPIDense *a = (Mat_MPIDense*)A->data; 17006947451fSStefano Zampini PetscErrorCode ierr; 17016947451fSStefano Zampini 17026947451fSStefano Zampini PetscFunctionBegin; 17035ea7661aSPierre Jolivet if (!a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first"); 17046947451fSStefano Zampini if (!a->cvec) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector"); 17056947451fSStefano Zampini a->vecinuse = 0; 17066947451fSStefano Zampini ierr = MatDenseRestoreArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr); 17076947451fSStefano Zampini ierr = VecResetArray(a->cvec);CHKERRQ(ierr); 17086947451fSStefano Zampini *v = NULL; 17096947451fSStefano Zampini PetscFunctionReturn(0); 17106947451fSStefano Zampini } 17116947451fSStefano Zampini 17126947451fSStefano Zampini PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat A,PetscInt col,Vec *v) 17136947451fSStefano Zampini { 17146947451fSStefano Zampini Mat_MPIDense *a = (Mat_MPIDense*)A->data; 17156947451fSStefano Zampini PetscErrorCode ierr; 17166947451fSStefano Zampini PetscInt lda; 17176947451fSStefano Zampini 17186947451fSStefano Zampini PetscFunctionBegin; 1719*616b8fbbSStefano Zampini if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1720*616b8fbbSStefano Zampini if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 17216947451fSStefano Zampini if (!a->cvec) { 17226947451fSStefano Zampini ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr); 17236947451fSStefano Zampini } 17246947451fSStefano Zampini a->vecinuse = col + 1; 17256947451fSStefano Zampini ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr); 17266947451fSStefano Zampini ierr = MatDenseGetArrayRead(a->A,&a->ptrinuse);CHKERRQ(ierr); 17276947451fSStefano Zampini ierr = VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr); 17286947451fSStefano Zampini ierr = VecLockReadPush(a->cvec);CHKERRQ(ierr); 17296947451fSStefano Zampini *v = a->cvec; 17306947451fSStefano Zampini PetscFunctionReturn(0); 17316947451fSStefano Zampini } 17326947451fSStefano Zampini 17336947451fSStefano Zampini PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat A,PetscInt col,Vec *v) 17346947451fSStefano Zampini { 17356947451fSStefano Zampini Mat_MPIDense *a = (Mat_MPIDense*)A->data; 17366947451fSStefano Zampini PetscErrorCode ierr; 17376947451fSStefano Zampini 17386947451fSStefano Zampini PetscFunctionBegin; 1739*616b8fbbSStefano Zampini if (!a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first"); 1740*616b8fbbSStefano Zampini if (!a->cvec) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector"); 17416947451fSStefano Zampini a->vecinuse = 0; 17426947451fSStefano Zampini ierr = MatDenseRestoreArrayRead(a->A,&a->ptrinuse);CHKERRQ(ierr); 17436947451fSStefano Zampini ierr = VecLockReadPop(a->cvec);CHKERRQ(ierr); 17446947451fSStefano Zampini ierr = VecResetArray(a->cvec);CHKERRQ(ierr); 17456947451fSStefano Zampini *v = NULL; 17466947451fSStefano Zampini PetscFunctionReturn(0); 17476947451fSStefano Zampini } 17486947451fSStefano Zampini 17496947451fSStefano Zampini PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat A,PetscInt col,Vec *v) 17506947451fSStefano Zampini { 17516947451fSStefano Zampini Mat_MPIDense *a = (Mat_MPIDense*)A->data; 17526947451fSStefano Zampini PetscErrorCode ierr; 17536947451fSStefano Zampini PetscInt lda; 17546947451fSStefano Zampini 17556947451fSStefano Zampini PetscFunctionBegin; 1756*616b8fbbSStefano Zampini if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1757*616b8fbbSStefano Zampini if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 17586947451fSStefano Zampini if (!a->cvec) { 17596947451fSStefano Zampini ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr); 17606947451fSStefano Zampini } 17616947451fSStefano Zampini a->vecinuse = col + 1; 17626947451fSStefano Zampini ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr); 17636947451fSStefano Zampini ierr = MatDenseGetArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr); 17646947451fSStefano Zampini ierr = VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr); 17656947451fSStefano Zampini *v = a->cvec; 17666947451fSStefano Zampini PetscFunctionReturn(0); 17676947451fSStefano Zampini } 17686947451fSStefano Zampini 17696947451fSStefano Zampini PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat A,PetscInt col,Vec *v) 17706947451fSStefano Zampini { 17716947451fSStefano Zampini Mat_MPIDense *a = (Mat_MPIDense*)A->data; 17726947451fSStefano Zampini PetscErrorCode ierr; 17736947451fSStefano Zampini 17746947451fSStefano Zampini PetscFunctionBegin; 1775*616b8fbbSStefano Zampini if (!a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first"); 1776*616b8fbbSStefano Zampini if (!a->cvec) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector"); 17776947451fSStefano Zampini a->vecinuse = 0; 17786947451fSStefano Zampini ierr = MatDenseRestoreArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr); 17796947451fSStefano Zampini ierr = VecResetArray(a->cvec);CHKERRQ(ierr); 17806947451fSStefano Zampini *v = NULL; 17816947451fSStefano Zampini PetscFunctionReturn(0); 17826947451fSStefano Zampini } 17836947451fSStefano Zampini 17845ea7661aSPierre Jolivet PetscErrorCode MatDenseGetSubMatrix_MPIDense(Mat A,PetscInt cbegin,PetscInt cend,Mat *v) 17855ea7661aSPierre Jolivet { 17865ea7661aSPierre Jolivet Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1787*616b8fbbSStefano Zampini Mat_MPIDense *c; 17885ea7661aSPierre Jolivet PetscErrorCode ierr; 1789*616b8fbbSStefano Zampini MPI_Comm comm; 1790*616b8fbbSStefano Zampini PetscBool setup = PETSC_FALSE; 17915ea7661aSPierre Jolivet 17925ea7661aSPierre Jolivet PetscFunctionBegin; 1793*616b8fbbSStefano Zampini ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1794*616b8fbbSStefano Zampini if (a->vecinuse) SETERRQ(comm,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1795*616b8fbbSStefano Zampini if (a->matinuse) SETERRQ(comm,PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 17965ea7661aSPierre Jolivet if (!a->cmat) { 1797*616b8fbbSStefano Zampini setup = PETSC_TRUE; 1798*616b8fbbSStefano Zampini ierr = MatCreate(comm,&a->cmat);CHKERRQ(ierr); 1799*616b8fbbSStefano Zampini ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)a->cmat);CHKERRQ(ierr); 1800*616b8fbbSStefano Zampini ierr = MatSetType(a->cmat,((PetscObject)A)->type_name);CHKERRQ(ierr); 1801*616b8fbbSStefano Zampini ierr = PetscLayoutReference(A->rmap,&a->cmat->rmap);CHKERRQ(ierr); 1802*616b8fbbSStefano Zampini ierr = PetscLayoutSetSize(a->cmat->cmap,cend-cbegin);CHKERRQ(ierr); 1803*616b8fbbSStefano Zampini ierr = PetscLayoutSetUp(a->cmat->cmap);CHKERRQ(ierr); 1804*616b8fbbSStefano Zampini } else if (cend-cbegin != a->cmat->cmap->N) { 1805*616b8fbbSStefano Zampini setup = PETSC_TRUE; 1806*616b8fbbSStefano Zampini ierr = PetscLayoutDestroy(&a->cmat->cmap);CHKERRQ(ierr); 1807*616b8fbbSStefano Zampini ierr = PetscLayoutCreate(comm,&a->cmat->cmap);CHKERRQ(ierr); 1808*616b8fbbSStefano Zampini ierr = PetscLayoutSetSize(a->cmat->cmap,cend-cbegin);CHKERRQ(ierr); 1809*616b8fbbSStefano Zampini ierr = PetscLayoutSetUp(a->cmat->cmap);CHKERRQ(ierr); 18105ea7661aSPierre Jolivet } 1811*616b8fbbSStefano Zampini c = (Mat_MPIDense*)a->cmat->data; 1812*616b8fbbSStefano Zampini if (c->A) SETERRQ(comm,PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1813*616b8fbbSStefano Zampini ierr = MatDenseGetSubMatrix(a->A,cbegin,cend,&c->A);CHKERRQ(ierr); 1814*616b8fbbSStefano Zampini if (setup) { /* do we really need this? */ 1815*616b8fbbSStefano Zampini ierr = MatSetUpMultiply_MPIDense(a->cmat);CHKERRQ(ierr); 1816*616b8fbbSStefano Zampini } 1817*616b8fbbSStefano Zampini a->cmat->preallocated = PETSC_TRUE; 1818*616b8fbbSStefano Zampini a->cmat->assembled = PETSC_TRUE; 18195ea7661aSPierre Jolivet a->matinuse = cbegin + 1; 18205ea7661aSPierre Jolivet *v = a->cmat; 18215ea7661aSPierre Jolivet PetscFunctionReturn(0); 18225ea7661aSPierre Jolivet } 18235ea7661aSPierre Jolivet 18245ea7661aSPierre Jolivet PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat A,Mat *v) 18255ea7661aSPierre Jolivet { 18265ea7661aSPierre Jolivet Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1827*616b8fbbSStefano Zampini Mat_MPIDense *c; 18285ea7661aSPierre Jolivet PetscErrorCode ierr; 18295ea7661aSPierre Jolivet 18305ea7661aSPierre Jolivet PetscFunctionBegin; 1831*616b8fbbSStefano Zampini if (!a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetSubMatrix() first"); 1832*616b8fbbSStefano Zampini if (!a->cmat) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal matrix"); 1833*616b8fbbSStefano Zampini if (*v != a->cmat) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Not the matrix obtained from MatDenseGetSubMatrix()"); 18345ea7661aSPierre Jolivet a->matinuse = 0; 1835*616b8fbbSStefano Zampini c = (Mat_MPIDense*)a->cmat->data; 1836*616b8fbbSStefano Zampini ierr = MatDenseRestoreSubMatrix(a->A,&c->A);CHKERRQ(ierr); 18375ea7661aSPierre Jolivet *v = NULL; 18385ea7661aSPierre Jolivet PetscFunctionReturn(0); 18395ea7661aSPierre Jolivet } 18405ea7661aSPierre Jolivet 18418cc058d9SJed Brown PETSC_EXTERN PetscErrorCode MatCreate_MPIDense(Mat mat) 1842273d9f13SBarry Smith { 1843273d9f13SBarry Smith Mat_MPIDense *a; 1844dfbe8321SBarry Smith PetscErrorCode ierr; 1845273d9f13SBarry Smith 1846273d9f13SBarry Smith PetscFunctionBegin; 1847b00a9115SJed Brown ierr = PetscNewLog(mat,&a);CHKERRQ(ierr); 1848b0a32e0cSBarry Smith mat->data = (void*)a; 1849273d9f13SBarry Smith ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1850273d9f13SBarry Smith 1851273d9f13SBarry Smith mat->insertmode = NOT_SET_VALUES; 1852273d9f13SBarry Smith 1853273d9f13SBarry Smith /* build cache for off array entries formed */ 1854273d9f13SBarry Smith a->donotstash = PETSC_FALSE; 18552205254eSKarl Rupp 1856ce94432eSBarry Smith ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)mat),1,&mat->stash);CHKERRQ(ierr); 1857273d9f13SBarry Smith 1858273d9f13SBarry Smith /* stuff used for matrix vector multiply */ 1859273d9f13SBarry Smith a->lvec = 0; 1860273d9f13SBarry Smith a->Mvctx = 0; 1861273d9f13SBarry Smith a->roworiented = PETSC_TRUE; 1862273d9f13SBarry Smith 186349a6ff4bSBarry Smith ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetLDA_C",MatDenseGetLDA_MPIDense);CHKERRQ(ierr); 1864ad16ce7aSStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseSetLDA_C",MatDenseSetLDA_MPIDense);CHKERRQ(ierr); 1865bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",MatDenseGetArray_MPIDense);CHKERRQ(ierr); 18668572280aSBarry Smith ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",MatDenseRestoreArray_MPIDense);CHKERRQ(ierr); 18678572280aSBarry Smith ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",MatDenseGetArrayRead_MPIDense);CHKERRQ(ierr); 18688572280aSBarry Smith ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",MatDenseRestoreArrayRead_MPIDense);CHKERRQ(ierr); 18696947451fSStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayWrite_C",MatDenseGetArrayWrite_MPIDense);CHKERRQ(ierr); 18706947451fSStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayWrite_C",MatDenseRestoreArrayWrite_MPIDense);CHKERRQ(ierr); 1871d3042a70SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",MatDensePlaceArray_MPIDense);CHKERRQ(ierr); 1872d3042a70SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",MatDenseResetArray_MPIDense);CHKERRQ(ierr); 1873d5ea218eSStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseReplaceArray_C",MatDenseReplaceArray_MPIDense);CHKERRQ(ierr); 18746947451fSStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDense);CHKERRQ(ierr); 18756947451fSStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDense);CHKERRQ(ierr); 18766947451fSStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDense);CHKERRQ(ierr); 18776947451fSStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDense);CHKERRQ(ierr); 18786947451fSStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDense);CHKERRQ(ierr); 18796947451fSStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDense);CHKERRQ(ierr); 18805ea7661aSPierre Jolivet ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetSubMatrix_C",MatDenseGetSubMatrix_MPIDense);CHKERRQ(ierr); 18815ea7661aSPierre Jolivet ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreSubMatrix_C",MatDenseRestoreSubMatrix_MPIDense);CHKERRQ(ierr); 18828baccfbdSHong Zhang #if defined(PETSC_HAVE_ELEMENTAL) 18838baccfbdSHong Zhang ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",MatConvert_MPIDense_Elemental);CHKERRQ(ierr); 18848baccfbdSHong Zhang #endif 1885637a0070SStefano Zampini #if defined(PETSC_HAVE_CUDA) 1886637a0070SStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_mpidensecuda_C",MatConvert_MPIDense_MPIDenseCUDA);CHKERRQ(ierr); 1887637a0070SStefano Zampini #endif 1888bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr); 18894222ddf1SHong Zhang ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpidense_C",MatProductSetFromOptions_MPIAIJ_MPIDense);CHKERRQ(ierr); 18904222ddf1SHong Zhang ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaij_C",MatProductSetFromOptions_MPIDense_MPIAIJ);CHKERRQ(ierr); 18916718818eSStefano Zampini #if defined(PETSC_HAVE_CUDA) 18926718818eSStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaijcusparse_mpidense_C",MatProductSetFromOptions_MPIAIJ_MPIDense);CHKERRQ(ierr); 18936718818eSStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaijcusparse_C",MatProductSetFromOptions_MPIDense_MPIAIJ);CHKERRQ(ierr); 18946718818eSStefano Zampini #endif 18958949adfdSHong Zhang 1896af53bab2SHong Zhang ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",MatDenseGetColumn_MPIDense);CHKERRQ(ierr); 1897af53bab2SHong Zhang ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",MatDenseRestoreColumn_MPIDense);CHKERRQ(ierr); 189838aed534SBarry Smith ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr); 1899273d9f13SBarry Smith PetscFunctionReturn(0); 1900273d9f13SBarry Smith } 1901273d9f13SBarry Smith 1902209238afSKris Buschelman /*MC 1903637a0070SStefano Zampini MATMPIDENSECUDA - MATMPIDENSECUDA = "mpidensecuda" - A matrix type to be used for distributed dense matrices on GPUs. 1904637a0070SStefano Zampini 1905637a0070SStefano Zampini Options Database Keys: 1906637a0070SStefano Zampini . -mat_type mpidensecuda - sets the matrix type to "mpidensecuda" during a call to MatSetFromOptions() 1907637a0070SStefano Zampini 1908637a0070SStefano Zampini Level: beginner 1909637a0070SStefano Zampini 1910637a0070SStefano Zampini .seealso: 1911637a0070SStefano Zampini 1912637a0070SStefano Zampini M*/ 1913637a0070SStefano Zampini #if defined(PETSC_HAVE_CUDA) 1914637a0070SStefano Zampini PETSC_EXTERN PetscErrorCode MatCreate_MPIDenseCUDA(Mat B) 1915637a0070SStefano Zampini { 1916637a0070SStefano Zampini PetscErrorCode ierr; 1917637a0070SStefano Zampini 1918637a0070SStefano Zampini PetscFunctionBegin; 1919637a0070SStefano Zampini ierr = MatCreate_MPIDense(B);CHKERRQ(ierr); 1920637a0070SStefano Zampini ierr = MatConvert_MPIDense_MPIDenseCUDA(B,MATMPIDENSECUDA,MAT_INPLACE_MATRIX,&B);CHKERRQ(ierr); 1921637a0070SStefano Zampini PetscFunctionReturn(0); 1922637a0070SStefano Zampini } 1923637a0070SStefano Zampini #endif 1924637a0070SStefano Zampini 1925637a0070SStefano Zampini /*MC 1926002d173eSKris Buschelman MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices. 1927209238afSKris Buschelman 1928209238afSKris Buschelman This matrix type is identical to MATSEQDENSE when constructed with a single process communicator, 1929209238afSKris Buschelman and MATMPIDENSE otherwise. 1930209238afSKris Buschelman 1931209238afSKris Buschelman Options Database Keys: 1932209238afSKris Buschelman . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions() 1933209238afSKris Buschelman 1934209238afSKris Buschelman Level: beginner 1935209238afSKris Buschelman 193601b82886SBarry Smith 19376947451fSStefano Zampini .seealso: MATSEQDENSE,MATMPIDENSE,MATDENSECUDA 19386947451fSStefano Zampini M*/ 19396947451fSStefano Zampini 19406947451fSStefano Zampini /*MC 19416947451fSStefano Zampini MATDENSECUDA - MATDENSECUDA = "densecuda" - A matrix type to be used for dense matrices on GPUs. 19426947451fSStefano Zampini 19436947451fSStefano Zampini This matrix type is identical to MATSEQDENSECUDA when constructed with a single process communicator, 19446947451fSStefano Zampini and MATMPIDENSECUDA otherwise. 19456947451fSStefano Zampini 19466947451fSStefano Zampini Options Database Keys: 19476947451fSStefano Zampini . -mat_type densecuda - sets the matrix type to "densecuda" during a call to MatSetFromOptions() 19486947451fSStefano Zampini 19496947451fSStefano Zampini Level: beginner 19506947451fSStefano Zampini 19516947451fSStefano Zampini .seealso: MATSEQDENSECUDA,MATMPIDENSECUDA,MATDENSE 1952209238afSKris Buschelman M*/ 1953209238afSKris Buschelman 1954273d9f13SBarry Smith /*@C 1955273d9f13SBarry Smith MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries 1956273d9f13SBarry Smith 1957273d9f13SBarry Smith Not collective 1958273d9f13SBarry Smith 1959273d9f13SBarry Smith Input Parameters: 19601c4f3114SJed Brown . B - the matrix 19610298fd71SBarry Smith - data - optional location of matrix data. Set data=NULL for PETSc 1962273d9f13SBarry Smith to control all matrix memory allocation. 1963273d9f13SBarry Smith 1964273d9f13SBarry Smith Notes: 1965273d9f13SBarry Smith The dense format is fully compatible with standard Fortran 77 1966273d9f13SBarry Smith storage by columns. 1967273d9f13SBarry Smith 1968273d9f13SBarry Smith The data input variable is intended primarily for Fortran programmers 1969273d9f13SBarry Smith who wish to allocate their own matrix memory space. Most users should 19700298fd71SBarry Smith set data=NULL. 1971273d9f13SBarry Smith 1972273d9f13SBarry Smith Level: intermediate 1973273d9f13SBarry Smith 1974273d9f13SBarry Smith .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1975273d9f13SBarry Smith @*/ 19761c4f3114SJed Brown PetscErrorCode MatMPIDenseSetPreallocation(Mat B,PetscScalar *data) 1977273d9f13SBarry Smith { 19784ac538c5SBarry Smith PetscErrorCode ierr; 1979273d9f13SBarry Smith 1980273d9f13SBarry Smith PetscFunctionBegin; 1981d5ea218eSStefano Zampini PetscValidHeaderSpecific(B,MAT_CLASSID,1); 19821c4f3114SJed Brown ierr = PetscTryMethod(B,"MatMPIDenseSetPreallocation_C",(Mat,PetscScalar*),(B,data));CHKERRQ(ierr); 1983273d9f13SBarry Smith PetscFunctionReturn(0); 1984273d9f13SBarry Smith } 1985273d9f13SBarry Smith 1986d3042a70SBarry Smith /*@ 1987637a0070SStefano Zampini MatDensePlaceArray - Allows one to replace the array in a dense matrix with an 1988d3042a70SBarry Smith array provided by the user. This is useful to avoid copying an array 1989d3042a70SBarry Smith into a matrix 1990d3042a70SBarry Smith 1991d3042a70SBarry Smith Not Collective 1992d3042a70SBarry Smith 1993d3042a70SBarry Smith Input Parameters: 1994d3042a70SBarry Smith + mat - the matrix 1995d3042a70SBarry Smith - array - the array in column major order 1996d3042a70SBarry Smith 1997d3042a70SBarry Smith Notes: 1998d3042a70SBarry Smith You can return to the original array with a call to MatDenseResetArray(). The user is responsible for freeing this array; it will not be 1999d3042a70SBarry Smith freed when the matrix is destroyed. 2000d3042a70SBarry Smith 2001d3042a70SBarry Smith Level: developer 2002d3042a70SBarry Smith 2003d3042a70SBarry Smith .seealso: MatDenseGetArray(), MatDenseResetArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray() 2004d3042a70SBarry Smith 2005d3042a70SBarry Smith @*/ 2006637a0070SStefano Zampini PetscErrorCode MatDensePlaceArray(Mat mat,const PetscScalar *array) 2007d3042a70SBarry Smith { 2008d3042a70SBarry Smith PetscErrorCode ierr; 2009637a0070SStefano Zampini 2010d3042a70SBarry Smith PetscFunctionBegin; 2011d5ea218eSStefano Zampini PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2012d3042a70SBarry Smith ierr = PetscUseMethod(mat,"MatDensePlaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr); 2013d3042a70SBarry Smith ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 2014637a0070SStefano Zampini #if defined(PETSC_HAVE_CUDA) 2015637a0070SStefano Zampini mat->offloadmask = PETSC_OFFLOAD_CPU; 2016637a0070SStefano Zampini #endif 2017d3042a70SBarry Smith PetscFunctionReturn(0); 2018d3042a70SBarry Smith } 2019d3042a70SBarry Smith 2020d3042a70SBarry Smith /*@ 2021d3042a70SBarry Smith MatDenseResetArray - Resets the matrix array to that it previously had before the call to MatDensePlaceArray() 2022d3042a70SBarry Smith 2023d3042a70SBarry Smith Not Collective 2024d3042a70SBarry Smith 2025d3042a70SBarry Smith Input Parameters: 2026d3042a70SBarry Smith . mat - the matrix 2027d3042a70SBarry Smith 2028d3042a70SBarry Smith Notes: 2029d3042a70SBarry Smith You can only call this after a call to MatDensePlaceArray() 2030d3042a70SBarry Smith 2031d3042a70SBarry Smith Level: developer 2032d3042a70SBarry Smith 2033d3042a70SBarry Smith .seealso: MatDenseGetArray(), MatDensePlaceArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray() 2034d3042a70SBarry Smith 2035d3042a70SBarry Smith @*/ 2036d3042a70SBarry Smith PetscErrorCode MatDenseResetArray(Mat mat) 2037d3042a70SBarry Smith { 2038d3042a70SBarry Smith PetscErrorCode ierr; 2039637a0070SStefano Zampini 2040d3042a70SBarry Smith PetscFunctionBegin; 2041d5ea218eSStefano Zampini PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2042d3042a70SBarry Smith ierr = PetscUseMethod(mat,"MatDenseResetArray_C",(Mat),(mat));CHKERRQ(ierr); 2043d3042a70SBarry Smith ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 2044d3042a70SBarry Smith PetscFunctionReturn(0); 2045d3042a70SBarry Smith } 2046d3042a70SBarry Smith 2047d5ea218eSStefano Zampini /*@ 2048d5ea218eSStefano Zampini MatDenseReplaceArray - Allows one to replace the array in a dense matrix with an 2049d5ea218eSStefano Zampini array provided by the user. This is useful to avoid copying an array 2050d5ea218eSStefano Zampini into a matrix 2051d5ea218eSStefano Zampini 2052d5ea218eSStefano Zampini Not Collective 2053d5ea218eSStefano Zampini 2054d5ea218eSStefano Zampini Input Parameters: 2055d5ea218eSStefano Zampini + mat - the matrix 2056d5ea218eSStefano Zampini - array - the array in column major order 2057d5ea218eSStefano Zampini 2058d5ea218eSStefano Zampini Notes: 2059d5ea218eSStefano Zampini The memory passed in MUST be obtained with PetscMalloc() and CANNOT be 2060d5ea218eSStefano Zampini freed by the user. It will be freed when the matrix is destroyed. 2061d5ea218eSStefano Zampini 2062d5ea218eSStefano Zampini Level: developer 2063d5ea218eSStefano Zampini 2064d5ea218eSStefano Zampini .seealso: MatDenseGetArray(), VecReplaceArray() 2065d5ea218eSStefano Zampini @*/ 2066d5ea218eSStefano Zampini PetscErrorCode MatDenseReplaceArray(Mat mat,const PetscScalar *array) 2067d5ea218eSStefano Zampini { 2068d5ea218eSStefano Zampini PetscErrorCode ierr; 2069d5ea218eSStefano Zampini 2070d5ea218eSStefano Zampini PetscFunctionBegin; 2071d5ea218eSStefano Zampini PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2072d5ea218eSStefano Zampini ierr = PetscUseMethod(mat,"MatDenseReplaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr); 2073d5ea218eSStefano Zampini ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 2074d5ea218eSStefano Zampini #if defined(PETSC_HAVE_CUDA) 2075d5ea218eSStefano Zampini mat->offloadmask = PETSC_OFFLOAD_CPU; 2076d5ea218eSStefano Zampini #endif 2077d5ea218eSStefano Zampini PetscFunctionReturn(0); 2078d5ea218eSStefano Zampini } 2079d5ea218eSStefano Zampini 2080637a0070SStefano Zampini #if defined(PETSC_HAVE_CUDA) 20818965ea79SLois Curfman McInnes /*@C 2082637a0070SStefano Zampini MatDenseCUDAPlaceArray - Allows one to replace the GPU array in a dense matrix with an 2083637a0070SStefano Zampini array provided by the user. This is useful to avoid copying an array 2084637a0070SStefano Zampini into a matrix 2085637a0070SStefano Zampini 2086637a0070SStefano Zampini Not Collective 2087637a0070SStefano Zampini 2088637a0070SStefano Zampini Input Parameters: 2089637a0070SStefano Zampini + mat - the matrix 2090637a0070SStefano Zampini - array - the array in column major order 2091637a0070SStefano Zampini 2092637a0070SStefano Zampini Notes: 2093637a0070SStefano Zampini You can return to the original array with a call to MatDenseCUDAResetArray(). The user is responsible for freeing this array; it will not be 2094637a0070SStefano Zampini freed when the matrix is destroyed. The array must have been allocated with cudaMalloc(). 2095637a0070SStefano Zampini 2096637a0070SStefano Zampini Level: developer 2097637a0070SStefano Zampini 2098637a0070SStefano Zampini .seealso: MatDenseCUDAGetArray(), MatDenseCUDAResetArray() 2099637a0070SStefano Zampini @*/ 2100637a0070SStefano Zampini PetscErrorCode MatDenseCUDAPlaceArray(Mat mat,const PetscScalar *array) 2101637a0070SStefano Zampini { 2102637a0070SStefano Zampini PetscErrorCode ierr; 2103637a0070SStefano Zampini 2104637a0070SStefano Zampini PetscFunctionBegin; 2105d5ea218eSStefano Zampini PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2106637a0070SStefano Zampini ierr = PetscUseMethod(mat,"MatDenseCUDAPlaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr); 2107637a0070SStefano Zampini ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 2108637a0070SStefano Zampini mat->offloadmask = PETSC_OFFLOAD_GPU; 2109637a0070SStefano Zampini PetscFunctionReturn(0); 2110637a0070SStefano Zampini } 2111637a0070SStefano Zampini 2112637a0070SStefano Zampini /*@C 2113637a0070SStefano Zampini MatDenseCUDAResetArray - Resets the matrix array to that it previously had before the call to MatDenseCUDAPlaceArray() 2114637a0070SStefano Zampini 2115637a0070SStefano Zampini Not Collective 2116637a0070SStefano Zampini 2117637a0070SStefano Zampini Input Parameters: 2118637a0070SStefano Zampini . mat - the matrix 2119637a0070SStefano Zampini 2120637a0070SStefano Zampini Notes: 2121637a0070SStefano Zampini You can only call this after a call to MatDenseCUDAPlaceArray() 2122637a0070SStefano Zampini 2123637a0070SStefano Zampini Level: developer 2124637a0070SStefano Zampini 2125637a0070SStefano Zampini .seealso: MatDenseCUDAGetArray(), MatDenseCUDAPlaceArray() 2126637a0070SStefano Zampini 2127637a0070SStefano Zampini @*/ 2128637a0070SStefano Zampini PetscErrorCode MatDenseCUDAResetArray(Mat mat) 2129637a0070SStefano Zampini { 2130637a0070SStefano Zampini PetscErrorCode ierr; 2131637a0070SStefano Zampini 2132637a0070SStefano Zampini PetscFunctionBegin; 2133d5ea218eSStefano Zampini PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2134637a0070SStefano Zampini ierr = PetscUseMethod(mat,"MatDenseCUDAResetArray_C",(Mat),(mat));CHKERRQ(ierr); 2135637a0070SStefano Zampini ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 2136637a0070SStefano Zampini PetscFunctionReturn(0); 2137637a0070SStefano Zampini } 2138637a0070SStefano Zampini 2139637a0070SStefano Zampini /*@C 2140d5ea218eSStefano Zampini MatDenseCUDAReplaceArray - Allows one to replace the GPU array in a dense matrix with an 2141d5ea218eSStefano Zampini array provided by the user. This is useful to avoid copying an array 2142d5ea218eSStefano Zampini into a matrix 2143d5ea218eSStefano Zampini 2144d5ea218eSStefano Zampini Not Collective 2145d5ea218eSStefano Zampini 2146d5ea218eSStefano Zampini Input Parameters: 2147d5ea218eSStefano Zampini + mat - the matrix 2148d5ea218eSStefano Zampini - array - the array in column major order 2149d5ea218eSStefano Zampini 2150d5ea218eSStefano Zampini Notes: 2151d5ea218eSStefano Zampini This permanently replaces the GPU array and frees the memory associated with the old GPU array. 2152d5ea218eSStefano Zampini The memory passed in CANNOT be freed by the user. It will be freed 2153d5ea218eSStefano Zampini when the matrix is destroyed. The array should respect the matrix leading dimension. 2154d5ea218eSStefano Zampini 2155d5ea218eSStefano Zampini Level: developer 2156d5ea218eSStefano Zampini 2157d5ea218eSStefano Zampini .seealso: MatDenseCUDAGetArray(), MatDenseCUDAPlaceArray(), MatDenseCUDAResetArray() 2158d5ea218eSStefano Zampini @*/ 2159d5ea218eSStefano Zampini PetscErrorCode MatDenseCUDAReplaceArray(Mat mat,const PetscScalar *array) 2160d5ea218eSStefano Zampini { 2161d5ea218eSStefano Zampini PetscErrorCode ierr; 2162d5ea218eSStefano Zampini 2163d5ea218eSStefano Zampini PetscFunctionBegin; 2164d5ea218eSStefano Zampini PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2165d5ea218eSStefano Zampini ierr = PetscUseMethod(mat,"MatDenseCUDAReplaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr); 2166d5ea218eSStefano Zampini ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 2167d5ea218eSStefano Zampini mat->offloadmask = PETSC_OFFLOAD_GPU; 2168d5ea218eSStefano Zampini PetscFunctionReturn(0); 2169d5ea218eSStefano Zampini } 2170d5ea218eSStefano Zampini 2171d5ea218eSStefano Zampini /*@C 2172637a0070SStefano Zampini MatDenseCUDAGetArrayWrite - Provides write access to the CUDA buffer inside a dense matrix. 2173637a0070SStefano Zampini 2174637a0070SStefano Zampini Not Collective 2175637a0070SStefano Zampini 2176637a0070SStefano Zampini Input Parameters: 2177637a0070SStefano Zampini . A - the matrix 2178637a0070SStefano Zampini 2179637a0070SStefano Zampini Output Parameters 2180637a0070SStefano Zampini . array - the GPU array in column major order 2181637a0070SStefano Zampini 2182637a0070SStefano Zampini Notes: 2183637a0070SStefano Zampini The data on the GPU may not be updated due to operations done on the CPU. If you need updated data, use MatDenseCUDAGetArray(). The array must be restored with MatDenseCUDARestoreArrayWrite() when no longer needed. 2184637a0070SStefano Zampini 2185637a0070SStefano Zampini Level: developer 2186637a0070SStefano Zampini 2187637a0070SStefano Zampini .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayRead(), MatDenseCUDARestoreArrayRead() 2188637a0070SStefano Zampini @*/ 2189637a0070SStefano Zampini PetscErrorCode MatDenseCUDAGetArrayWrite(Mat A, PetscScalar **a) 2190637a0070SStefano Zampini { 2191637a0070SStefano Zampini PetscErrorCode ierr; 2192637a0070SStefano Zampini 2193637a0070SStefano Zampini PetscFunctionBegin; 2194d5ea218eSStefano Zampini PetscValidHeaderSpecific(A,MAT_CLASSID,1); 2195637a0070SStefano Zampini ierr = PetscUseMethod(A,"MatDenseCUDAGetArrayWrite_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr); 2196637a0070SStefano Zampini ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr); 2197637a0070SStefano Zampini PetscFunctionReturn(0); 2198637a0070SStefano Zampini } 2199637a0070SStefano Zampini 2200637a0070SStefano Zampini /*@C 2201637a0070SStefano Zampini MatDenseCUDARestoreArrayWrite - Restore write access to the CUDA buffer inside a dense matrix previously obtained with MatDenseCUDAGetArrayWrite(). 2202637a0070SStefano Zampini 2203637a0070SStefano Zampini Not Collective 2204637a0070SStefano Zampini 2205637a0070SStefano Zampini Input Parameters: 2206637a0070SStefano Zampini + A - the matrix 2207637a0070SStefano Zampini - array - the GPU array in column major order 2208637a0070SStefano Zampini 2209637a0070SStefano Zampini Notes: 2210637a0070SStefano Zampini 2211637a0070SStefano Zampini Level: developer 2212637a0070SStefano Zampini 2213637a0070SStefano Zampini .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead(), MatDenseCUDAGetArrayRead() 2214637a0070SStefano Zampini @*/ 2215637a0070SStefano Zampini PetscErrorCode MatDenseCUDARestoreArrayWrite(Mat A, PetscScalar **a) 2216637a0070SStefano Zampini { 2217637a0070SStefano Zampini PetscErrorCode ierr; 2218637a0070SStefano Zampini 2219637a0070SStefano Zampini PetscFunctionBegin; 2220d5ea218eSStefano Zampini PetscValidHeaderSpecific(A,MAT_CLASSID,1); 2221637a0070SStefano Zampini ierr = PetscUseMethod(A,"MatDenseCUDARestoreArrayWrite_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr); 2222637a0070SStefano Zampini ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr); 2223637a0070SStefano Zampini A->offloadmask = PETSC_OFFLOAD_GPU; 2224637a0070SStefano Zampini PetscFunctionReturn(0); 2225637a0070SStefano Zampini } 2226637a0070SStefano Zampini 2227637a0070SStefano Zampini /*@C 2228637a0070SStefano Zampini MatDenseCUDAGetArrayRead - Provides read-only access to the CUDA buffer inside a dense matrix. The array must be restored with MatDenseCUDARestoreArrayRead() when no longer needed. 2229637a0070SStefano Zampini 2230637a0070SStefano Zampini Not Collective 2231637a0070SStefano Zampini 2232637a0070SStefano Zampini Input Parameters: 2233637a0070SStefano Zampini . A - the matrix 2234637a0070SStefano Zampini 2235637a0070SStefano Zampini Output Parameters 2236637a0070SStefano Zampini . array - the GPU array in column major order 2237637a0070SStefano Zampini 2238637a0070SStefano Zampini Notes: 2239637a0070SStefano Zampini Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite(). 2240637a0070SStefano Zampini 2241637a0070SStefano Zampini Level: developer 2242637a0070SStefano Zampini 2243637a0070SStefano Zampini .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead() 2244637a0070SStefano Zampini @*/ 2245637a0070SStefano Zampini PetscErrorCode MatDenseCUDAGetArrayRead(Mat A, const PetscScalar **a) 2246637a0070SStefano Zampini { 2247637a0070SStefano Zampini PetscErrorCode ierr; 2248637a0070SStefano Zampini 2249637a0070SStefano Zampini PetscFunctionBegin; 2250d5ea218eSStefano Zampini PetscValidHeaderSpecific(A,MAT_CLASSID,1); 2251637a0070SStefano Zampini ierr = PetscUseMethod(A,"MatDenseCUDAGetArrayRead_C",(Mat,const PetscScalar**),(A,a));CHKERRQ(ierr); 2252637a0070SStefano Zampini PetscFunctionReturn(0); 2253637a0070SStefano Zampini } 2254637a0070SStefano Zampini 2255637a0070SStefano Zampini /*@C 2256637a0070SStefano Zampini MatDenseCUDARestoreArrayRead - Restore read-only access to the CUDA buffer inside a dense matrix previously obtained with a call to MatDenseCUDAGetArrayRead(). 2257637a0070SStefano Zampini 2258637a0070SStefano Zampini Not Collective 2259637a0070SStefano Zampini 2260637a0070SStefano Zampini Input Parameters: 2261637a0070SStefano Zampini + A - the matrix 2262637a0070SStefano Zampini - array - the GPU array in column major order 2263637a0070SStefano Zampini 2264637a0070SStefano Zampini Notes: 2265637a0070SStefano Zampini Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite(). 2266637a0070SStefano Zampini 2267637a0070SStefano Zampini Level: developer 2268637a0070SStefano Zampini 2269637a0070SStefano Zampini .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDAGetArrayRead() 2270637a0070SStefano Zampini @*/ 2271637a0070SStefano Zampini PetscErrorCode MatDenseCUDARestoreArrayRead(Mat A, const PetscScalar **a) 2272637a0070SStefano Zampini { 2273637a0070SStefano Zampini PetscErrorCode ierr; 2274637a0070SStefano Zampini 2275637a0070SStefano Zampini PetscFunctionBegin; 2276637a0070SStefano Zampini ierr = PetscUseMethod(A,"MatDenseCUDARestoreArrayRead_C",(Mat,const PetscScalar**),(A,a));CHKERRQ(ierr); 2277637a0070SStefano Zampini PetscFunctionReturn(0); 2278637a0070SStefano Zampini } 2279637a0070SStefano Zampini 2280637a0070SStefano Zampini /*@C 2281637a0070SStefano Zampini MatDenseCUDAGetArray - Provides access to the CUDA buffer inside a dense matrix. The array must be restored with MatDenseCUDARestoreArray() when no longer needed. 2282637a0070SStefano Zampini 2283637a0070SStefano Zampini Not Collective 2284637a0070SStefano Zampini 2285637a0070SStefano Zampini Input Parameters: 2286637a0070SStefano Zampini . A - the matrix 2287637a0070SStefano Zampini 2288637a0070SStefano Zampini Output Parameters 2289637a0070SStefano Zampini . array - the GPU array in column major order 2290637a0070SStefano Zampini 2291637a0070SStefano Zampini Notes: 2292637a0070SStefano Zampini Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite(). For read-only access, use MatDenseCUDAGetArrayRead(). 2293637a0070SStefano Zampini 2294637a0070SStefano Zampini Level: developer 2295637a0070SStefano Zampini 2296637a0070SStefano Zampini .seealso: MatDenseCUDAGetArrayRead(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead() 2297637a0070SStefano Zampini @*/ 2298637a0070SStefano Zampini PetscErrorCode MatDenseCUDAGetArray(Mat A, PetscScalar **a) 2299637a0070SStefano Zampini { 2300637a0070SStefano Zampini PetscErrorCode ierr; 2301637a0070SStefano Zampini 2302637a0070SStefano Zampini PetscFunctionBegin; 2303d5ea218eSStefano Zampini PetscValidHeaderSpecific(A,MAT_CLASSID,1); 2304637a0070SStefano Zampini ierr = PetscUseMethod(A,"MatDenseCUDAGetArray_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr); 2305637a0070SStefano Zampini ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr); 2306637a0070SStefano Zampini PetscFunctionReturn(0); 2307637a0070SStefano Zampini } 2308637a0070SStefano Zampini 2309637a0070SStefano Zampini /*@C 2310637a0070SStefano Zampini MatDenseCUDARestoreArray - Restore access to the CUDA buffer inside a dense matrix previously obtained with MatDenseCUDAGetArray(). 2311637a0070SStefano Zampini 2312637a0070SStefano Zampini Not Collective 2313637a0070SStefano Zampini 2314637a0070SStefano Zampini Input Parameters: 2315637a0070SStefano Zampini + A - the matrix 2316637a0070SStefano Zampini - array - the GPU array in column major order 2317637a0070SStefano Zampini 2318637a0070SStefano Zampini Notes: 2319637a0070SStefano Zampini 2320637a0070SStefano Zampini Level: developer 2321637a0070SStefano Zampini 2322637a0070SStefano Zampini .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead(), MatDenseCUDAGetArrayRead() 2323637a0070SStefano Zampini @*/ 2324637a0070SStefano Zampini PetscErrorCode MatDenseCUDARestoreArray(Mat A, PetscScalar **a) 2325637a0070SStefano Zampini { 2326637a0070SStefano Zampini PetscErrorCode ierr; 2327637a0070SStefano Zampini 2328637a0070SStefano Zampini PetscFunctionBegin; 2329d5ea218eSStefano Zampini PetscValidHeaderSpecific(A,MAT_CLASSID,1); 2330637a0070SStefano Zampini ierr = PetscUseMethod(A,"MatDenseCUDARestoreArray_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr); 2331637a0070SStefano Zampini ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr); 2332637a0070SStefano Zampini A->offloadmask = PETSC_OFFLOAD_GPU; 2333637a0070SStefano Zampini PetscFunctionReturn(0); 2334637a0070SStefano Zampini } 2335637a0070SStefano Zampini #endif 2336637a0070SStefano Zampini 2337637a0070SStefano Zampini /*@C 2338637a0070SStefano Zampini MatCreateDense - Creates a matrix in dense format. 23398965ea79SLois Curfman McInnes 2340d083f849SBarry Smith Collective 2341db81eaa0SLois Curfman McInnes 23428965ea79SLois Curfman McInnes Input Parameters: 2343db81eaa0SLois Curfman McInnes + comm - MPI communicator 23448965ea79SLois Curfman McInnes . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2345db81eaa0SLois Curfman McInnes . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 23468965ea79SLois Curfman McInnes . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 2347db81eaa0SLois Curfman McInnes . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 23486cfe35ddSJose E. Roman - data - optional location of matrix data. Set data=NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc 2349dfc5480cSLois Curfman McInnes to control all matrix memory allocation. 23508965ea79SLois Curfman McInnes 23518965ea79SLois Curfman McInnes Output Parameter: 2352477f1c0bSLois Curfman McInnes . A - the matrix 23538965ea79SLois Curfman McInnes 2354b259b22eSLois Curfman McInnes Notes: 235539ddd567SLois Curfman McInnes The dense format is fully compatible with standard Fortran 77 235639ddd567SLois Curfman McInnes storage by columns. 23578965ea79SLois Curfman McInnes 235818f449edSLois Curfman McInnes The data input variable is intended primarily for Fortran programmers 235918f449edSLois Curfman McInnes who wish to allocate their own matrix memory space. Most users should 23606cfe35ddSJose E. Roman set data=NULL (PETSC_NULL_SCALAR for Fortran users). 236118f449edSLois Curfman McInnes 23628965ea79SLois Curfman McInnes The user MUST specify either the local or global matrix dimensions 23638965ea79SLois Curfman McInnes (possibly both). 23648965ea79SLois Curfman McInnes 2365027ccd11SLois Curfman McInnes Level: intermediate 2366027ccd11SLois Curfman McInnes 236739ddd567SLois Curfman McInnes .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 23688965ea79SLois Curfman McInnes @*/ 236969b1f4b7SBarry Smith PetscErrorCode MatCreateDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A) 23708965ea79SLois Curfman McInnes { 23716849ba73SBarry Smith PetscErrorCode ierr; 237213f74950SBarry Smith PetscMPIInt size; 23738965ea79SLois Curfman McInnes 23743a40ed3dSBarry Smith PetscFunctionBegin; 2375f69a0ea3SMatthew Knepley ierr = MatCreate(comm,A);CHKERRQ(ierr); 23768491ab44SLisandro Dalcin PetscValidLogicalCollectiveBool(*A,!!data,6); 2377f69a0ea3SMatthew Knepley ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 2378273d9f13SBarry Smith ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2379273d9f13SBarry Smith if (size > 1) { 2380273d9f13SBarry Smith ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr); 2381273d9f13SBarry Smith ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr); 23826cfe35ddSJose E. Roman if (data) { /* user provided data array, so no need to assemble */ 23836cfe35ddSJose E. Roman ierr = MatSetUpMultiply_MPIDense(*A);CHKERRQ(ierr); 23846cfe35ddSJose E. Roman (*A)->assembled = PETSC_TRUE; 23856cfe35ddSJose E. Roman } 2386273d9f13SBarry Smith } else { 2387273d9f13SBarry Smith ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr); 2388273d9f13SBarry Smith ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr); 23898c469469SLois Curfman McInnes } 23903a40ed3dSBarry Smith PetscFunctionReturn(0); 23918965ea79SLois Curfman McInnes } 23928965ea79SLois Curfman McInnes 2393637a0070SStefano Zampini #if defined(PETSC_HAVE_CUDA) 2394637a0070SStefano Zampini /*@C 2395637a0070SStefano Zampini MatCreateDenseCUDA - Creates a matrix in dense format using CUDA. 2396637a0070SStefano Zampini 2397637a0070SStefano Zampini Collective 2398637a0070SStefano Zampini 2399637a0070SStefano Zampini Input Parameters: 2400637a0070SStefano Zampini + comm - MPI communicator 2401637a0070SStefano Zampini . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2402637a0070SStefano Zampini . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 2403637a0070SStefano Zampini . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 2404637a0070SStefano Zampini . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 2405637a0070SStefano Zampini - data - optional location of GPU matrix data. Set data=NULL for PETSc 2406637a0070SStefano Zampini to control matrix memory allocation. 2407637a0070SStefano Zampini 2408637a0070SStefano Zampini Output Parameter: 2409637a0070SStefano Zampini . A - the matrix 2410637a0070SStefano Zampini 2411637a0070SStefano Zampini Notes: 2412637a0070SStefano Zampini 2413637a0070SStefano Zampini Level: intermediate 2414637a0070SStefano Zampini 2415637a0070SStefano Zampini .seealso: MatCreate(), MatCreateDense() 2416637a0070SStefano Zampini @*/ 2417637a0070SStefano Zampini PetscErrorCode MatCreateDenseCUDA(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A) 2418637a0070SStefano Zampini { 2419637a0070SStefano Zampini PetscErrorCode ierr; 2420637a0070SStefano Zampini PetscMPIInt size; 2421637a0070SStefano Zampini 2422637a0070SStefano Zampini PetscFunctionBegin; 2423637a0070SStefano Zampini ierr = MatCreate(comm,A);CHKERRQ(ierr); 2424637a0070SStefano Zampini PetscValidLogicalCollectiveBool(*A,!!data,6); 2425637a0070SStefano Zampini ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 2426637a0070SStefano Zampini ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2427637a0070SStefano Zampini if (size > 1) { 2428637a0070SStefano Zampini ierr = MatSetType(*A,MATMPIDENSECUDA);CHKERRQ(ierr); 2429637a0070SStefano Zampini ierr = MatMPIDenseCUDASetPreallocation(*A,data);CHKERRQ(ierr); 2430637a0070SStefano Zampini if (data) { /* user provided data array, so no need to assemble */ 2431637a0070SStefano Zampini ierr = MatSetUpMultiply_MPIDense(*A);CHKERRQ(ierr); 2432637a0070SStefano Zampini (*A)->assembled = PETSC_TRUE; 2433637a0070SStefano Zampini } 2434637a0070SStefano Zampini } else { 2435637a0070SStefano Zampini ierr = MatSetType(*A,MATSEQDENSECUDA);CHKERRQ(ierr); 2436637a0070SStefano Zampini ierr = MatSeqDenseCUDASetPreallocation(*A,data);CHKERRQ(ierr); 2437637a0070SStefano Zampini } 2438637a0070SStefano Zampini PetscFunctionReturn(0); 2439637a0070SStefano Zampini } 2440637a0070SStefano Zampini #endif 2441637a0070SStefano Zampini 24426849ba73SBarry Smith static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat) 24438965ea79SLois Curfman McInnes { 24448965ea79SLois Curfman McInnes Mat mat; 24453501a2bdSLois Curfman McInnes Mat_MPIDense *a,*oldmat = (Mat_MPIDense*)A->data; 2446dfbe8321SBarry Smith PetscErrorCode ierr; 24478965ea79SLois Curfman McInnes 24483a40ed3dSBarry Smith PetscFunctionBegin; 24498965ea79SLois Curfman McInnes *newmat = 0; 2450ce94432eSBarry Smith ierr = MatCreate(PetscObjectComm((PetscObject)A),&mat);CHKERRQ(ierr); 2451d0f46423SBarry Smith ierr = MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 24527adad957SLisandro Dalcin ierr = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr); 2453834f8fabSBarry Smith a = (Mat_MPIDense*)mat->data; 24545aa7edbeSHong Zhang 2455d5f3da31SBarry Smith mat->factortype = A->factortype; 2456c456f294SBarry Smith mat->assembled = PETSC_TRUE; 2457273d9f13SBarry Smith mat->preallocated = PETSC_TRUE; 24588965ea79SLois Curfman McInnes 2459e0fa3b82SLois Curfman McInnes mat->insertmode = NOT_SET_VALUES; 24603782ba37SSatish Balay a->donotstash = oldmat->donotstash; 2461e04c1aa4SHong Zhang 24621e1e43feSBarry Smith ierr = PetscLayoutReference(A->rmap,&mat->rmap);CHKERRQ(ierr); 24631e1e43feSBarry Smith ierr = PetscLayoutReference(A->cmap,&mat->cmap);CHKERRQ(ierr); 24648965ea79SLois Curfman McInnes 24655609ef8eSBarry Smith ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 24663bb1ff40SBarry Smith ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 2467637a0070SStefano Zampini ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 246801b82886SBarry Smith 24698965ea79SLois Curfman McInnes *newmat = mat; 24703a40ed3dSBarry Smith PetscFunctionReturn(0); 24718965ea79SLois Curfman McInnes } 24728965ea79SLois Curfman McInnes 2473eb91f321SVaclav Hapla PetscErrorCode MatLoad_MPIDense(Mat newMat, PetscViewer viewer) 2474eb91f321SVaclav Hapla { 2475eb91f321SVaclav Hapla PetscErrorCode ierr; 247687d5ce66SSatish Balay PetscBool isbinary; 247787d5ce66SSatish Balay #if defined(PETSC_HAVE_HDF5) 247887d5ce66SSatish Balay PetscBool ishdf5; 247987d5ce66SSatish Balay #endif 2480eb91f321SVaclav Hapla 2481eb91f321SVaclav Hapla PetscFunctionBegin; 2482eb91f321SVaclav Hapla PetscValidHeaderSpecific(newMat,MAT_CLASSID,1); 2483eb91f321SVaclav Hapla PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2); 2484eb91f321SVaclav Hapla /* force binary viewer to load .info file if it has not yet done so */ 2485eb91f321SVaclav Hapla ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 2486eb91f321SVaclav Hapla ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 248787d5ce66SSatish Balay #if defined(PETSC_HAVE_HDF5) 2488eb91f321SVaclav Hapla ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5, &ishdf5);CHKERRQ(ierr); 248987d5ce66SSatish Balay #endif 2490eb91f321SVaclav Hapla if (isbinary) { 24918491ab44SLisandro Dalcin ierr = MatLoad_Dense_Binary(newMat,viewer);CHKERRQ(ierr); 2492eb91f321SVaclav Hapla #if defined(PETSC_HAVE_HDF5) 249387d5ce66SSatish Balay } else if (ishdf5) { 2494eb91f321SVaclav Hapla ierr = MatLoad_Dense_HDF5(newMat,viewer);CHKERRQ(ierr); 2495eb91f321SVaclav Hapla #endif 249687d5ce66SSatish Balay } else SETERRQ2(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name); 2497eb91f321SVaclav Hapla PetscFunctionReturn(0); 2498eb91f321SVaclav Hapla } 2499eb91f321SVaclav Hapla 25006718818eSStefano Zampini static PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscBool *flag) 25016e4ee0c6SHong Zhang { 25026e4ee0c6SHong Zhang Mat_MPIDense *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data; 25036e4ee0c6SHong Zhang Mat a,b; 2504ace3abfcSBarry Smith PetscBool flg; 25056e4ee0c6SHong Zhang PetscErrorCode ierr; 250690ace30eSBarry Smith 25076e4ee0c6SHong Zhang PetscFunctionBegin; 25086e4ee0c6SHong Zhang a = matA->A; 25096e4ee0c6SHong Zhang b = matB->A; 25106e4ee0c6SHong Zhang ierr = MatEqual(a,b,&flg);CHKERRQ(ierr); 2511b2566f29SBarry Smith ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 25126e4ee0c6SHong Zhang PetscFunctionReturn(0); 25136e4ee0c6SHong Zhang } 251490ace30eSBarry Smith 25156718818eSStefano Zampini PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(void *data) 2516baa3c1c6SHong Zhang { 2517baa3c1c6SHong Zhang PetscErrorCode ierr; 25186718818eSStefano Zampini Mat_TransMatMultDense *atb = (Mat_TransMatMultDense *)data; 2519baa3c1c6SHong Zhang 2520baa3c1c6SHong Zhang PetscFunctionBegin; 2521637a0070SStefano Zampini ierr = PetscFree2(atb->sendbuf,atb->recvcounts);CHKERRQ(ierr); 2522637a0070SStefano Zampini ierr = MatDestroy(&atb->atb);CHKERRQ(ierr); 2523baa3c1c6SHong Zhang ierr = PetscFree(atb);CHKERRQ(ierr); 2524baa3c1c6SHong Zhang PetscFunctionReturn(0); 2525baa3c1c6SHong Zhang } 2526baa3c1c6SHong Zhang 25276718818eSStefano Zampini PetscErrorCode MatDestroy_MatMatTransMult_MPIDense_MPIDense(void *data) 2528cc48ffa7SToby Isaac { 2529cc48ffa7SToby Isaac PetscErrorCode ierr; 25306718818eSStefano Zampini Mat_MatTransMultDense *abt = (Mat_MatTransMultDense *)data; 2531cc48ffa7SToby Isaac 2532cc48ffa7SToby Isaac PetscFunctionBegin; 2533cc48ffa7SToby Isaac ierr = PetscFree2(abt->buf[0],abt->buf[1]);CHKERRQ(ierr); 2534faa55883SToby Isaac ierr = PetscFree2(abt->recvcounts,abt->recvdispls);CHKERRQ(ierr); 2535cc48ffa7SToby Isaac ierr = PetscFree(abt);CHKERRQ(ierr); 2536cc48ffa7SToby Isaac PetscFunctionReturn(0); 2537cc48ffa7SToby Isaac } 2538cc48ffa7SToby Isaac 25396718818eSStefano Zampini static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C) 2540cb20be35SHong Zhang { 2541baa3c1c6SHong Zhang Mat_MPIDense *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data; 25426718818eSStefano Zampini Mat_TransMatMultDense *atb; 2543cb20be35SHong Zhang PetscErrorCode ierr; 2544cb20be35SHong Zhang MPI_Comm comm; 25456718818eSStefano Zampini PetscMPIInt size,*recvcounts; 25466718818eSStefano Zampini PetscScalar *carray,*sendbuf; 2547637a0070SStefano Zampini const PetscScalar *atbarray; 2548d5017740SHong Zhang PetscInt i,cN=C->cmap->N,cM=C->rmap->N,proc,k,j; 2549e68c0b26SHong Zhang const PetscInt *ranges; 2550cb20be35SHong Zhang 2551cb20be35SHong Zhang PetscFunctionBegin; 25526718818eSStefano Zampini MatCheckProduct(C,3); 25536718818eSStefano Zampini if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty"); 25546718818eSStefano Zampini atb = (Mat_TransMatMultDense *)C->product->data; 25556718818eSStefano Zampini recvcounts = atb->recvcounts; 25566718818eSStefano Zampini sendbuf = atb->sendbuf; 25576718818eSStefano Zampini 2558cb20be35SHong Zhang ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 2559cb20be35SHong Zhang ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2560e68c0b26SHong Zhang 2561c5ef1628SHong Zhang /* compute atbarray = aseq^T * bseq */ 2562637a0070SStefano Zampini ierr = MatTransposeMatMult(a->A,b->A,atb->atb ? MAT_REUSE_MATRIX : MAT_INITIAL_MATRIX,PETSC_DEFAULT,&atb->atb);CHKERRQ(ierr); 2563cb20be35SHong Zhang 2564cb20be35SHong Zhang ierr = MatGetOwnershipRanges(C,&ranges);CHKERRQ(ierr); 2565cb20be35SHong Zhang 2566660d5466SHong Zhang /* arrange atbarray into sendbuf */ 2567637a0070SStefano Zampini ierr = MatDenseGetArrayRead(atb->atb,&atbarray);CHKERRQ(ierr); 2568637a0070SStefano Zampini for (proc=0, k=0; proc<size; proc++) { 2569baa3c1c6SHong Zhang for (j=0; j<cN; j++) { 2570c5ef1628SHong Zhang for (i=ranges[proc]; i<ranges[proc+1]; i++) sendbuf[k++] = atbarray[i+j*cM]; 2571cb20be35SHong Zhang } 2572cb20be35SHong Zhang } 2573637a0070SStefano Zampini ierr = MatDenseRestoreArrayRead(atb->atb,&atbarray);CHKERRQ(ierr); 2574637a0070SStefano Zampini 2575c5ef1628SHong Zhang /* sum all atbarray to local values of C */ 25760acaeb71SStefano Zampini ierr = MatDenseGetArrayWrite(c->A,&carray);CHKERRQ(ierr); 25773462b7efSHong Zhang ierr = MPI_Reduce_scatter(sendbuf,carray,recvcounts,MPIU_SCALAR,MPIU_SUM,comm);CHKERRQ(ierr); 25780acaeb71SStefano Zampini ierr = MatDenseRestoreArrayWrite(c->A,&carray);CHKERRQ(ierr); 25790acaeb71SStefano Zampini ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 25800acaeb71SStefano Zampini ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2581cb20be35SHong Zhang PetscFunctionReturn(0); 2582cb20be35SHong Zhang } 2583cb20be35SHong Zhang 25846718818eSStefano Zampini static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat C) 2585cb20be35SHong Zhang { 2586cb20be35SHong Zhang PetscErrorCode ierr; 2587cb20be35SHong Zhang MPI_Comm comm; 2588baa3c1c6SHong Zhang PetscMPIInt size; 2589660d5466SHong Zhang PetscInt cm=A->cmap->n,cM,cN=B->cmap->N; 2590baa3c1c6SHong Zhang Mat_TransMatMultDense *atb; 25916718818eSStefano Zampini PetscBool cisdense; 25920acaeb71SStefano Zampini PetscInt i; 25930acaeb71SStefano Zampini const PetscInt *ranges; 2594cb20be35SHong Zhang 2595cb20be35SHong Zhang PetscFunctionBegin; 25966718818eSStefano Zampini MatCheckProduct(C,3); 25976718818eSStefano Zampini if (C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty"); 2598baa3c1c6SHong Zhang ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 2599cb20be35SHong Zhang if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) { 2600cb20be35SHong Zhang SETERRQ4(comm,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend); 2601cb20be35SHong Zhang } 2602cb20be35SHong Zhang 26034222ddf1SHong Zhang /* create matrix product C */ 26046718818eSStefano Zampini ierr = MatSetSizes(C,cm,B->cmap->n,A->cmap->N,B->cmap->N);CHKERRQ(ierr); 26056718818eSStefano Zampini ierr = PetscObjectTypeCompareAny((PetscObject)C,&cisdense,MATMPIDENSE,MATMPIDENSECUDA,"");CHKERRQ(ierr); 26066718818eSStefano Zampini if (!cisdense) { 26076718818eSStefano Zampini ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 26086718818eSStefano Zampini } 260918992e5dSStefano Zampini ierr = MatSetUp(C);CHKERRQ(ierr); 2610baa3c1c6SHong Zhang 26114222ddf1SHong Zhang /* create data structure for reuse C */ 2612baa3c1c6SHong Zhang ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2613baa3c1c6SHong Zhang ierr = PetscNew(&atb);CHKERRQ(ierr); 26144222ddf1SHong Zhang cM = C->rmap->N; 26156718818eSStefano Zampini ierr = PetscMalloc2((size_t)cM*(size_t)cN,&atb->sendbuf,size,&atb->recvcounts);CHKERRQ(ierr); 26160acaeb71SStefano Zampini ierr = MatGetOwnershipRanges(C,&ranges);CHKERRQ(ierr); 26170acaeb71SStefano Zampini for (i=0; i<size; i++) atb->recvcounts[i] = (ranges[i+1] - ranges[i])*cN; 2618baa3c1c6SHong Zhang 26196718818eSStefano Zampini C->product->data = atb; 26206718818eSStefano Zampini C->product->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense; 2621cb20be35SHong Zhang PetscFunctionReturn(0); 2622cb20be35SHong Zhang } 2623cb20be35SHong Zhang 26244222ddf1SHong Zhang static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C) 2625cb20be35SHong Zhang { 2626cb20be35SHong Zhang PetscErrorCode ierr; 2627cc48ffa7SToby Isaac MPI_Comm comm; 2628cc48ffa7SToby Isaac PetscMPIInt i, size; 2629cc48ffa7SToby Isaac PetscInt maxRows, bufsiz; 2630cc48ffa7SToby Isaac PetscMPIInt tag; 26314222ddf1SHong Zhang PetscInt alg; 2632cc48ffa7SToby Isaac Mat_MatTransMultDense *abt; 26334222ddf1SHong Zhang Mat_Product *product = C->product; 26344222ddf1SHong Zhang PetscBool flg; 2635cc48ffa7SToby Isaac 2636cc48ffa7SToby Isaac PetscFunctionBegin; 26376718818eSStefano Zampini MatCheckProduct(C,4); 26386718818eSStefano Zampini if (C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty"); 26394222ddf1SHong Zhang /* check local size of A and B */ 2640637a0070SStefano Zampini if (A->cmap->n != B->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local column dimensions are incompatible, A (%D) != B (%D)",A->cmap->n,B->cmap->n); 2641cc48ffa7SToby Isaac 26424222ddf1SHong Zhang ierr = PetscStrcmp(product->alg,"allgatherv",&flg);CHKERRQ(ierr); 2643637a0070SStefano Zampini alg = flg ? 0 : 1; 2644cc48ffa7SToby Isaac 26454222ddf1SHong Zhang /* setup matrix product C */ 26464222ddf1SHong Zhang ierr = MatSetSizes(C,A->rmap->n,B->rmap->n,A->rmap->N,B->rmap->N);CHKERRQ(ierr); 26474222ddf1SHong Zhang ierr = MatSetType(C,MATMPIDENSE);CHKERRQ(ierr); 264818992e5dSStefano Zampini ierr = MatSetUp(C);CHKERRQ(ierr); 26494222ddf1SHong Zhang ierr = PetscObjectGetNewTag((PetscObject)C,&tag);CHKERRQ(ierr); 26504222ddf1SHong Zhang 26514222ddf1SHong Zhang /* create data structure for reuse C */ 26524222ddf1SHong Zhang ierr = PetscObjectGetComm((PetscObject)C,&comm);CHKERRQ(ierr); 2653cc48ffa7SToby Isaac ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2654cc48ffa7SToby Isaac ierr = PetscNew(&abt);CHKERRQ(ierr); 2655cc48ffa7SToby Isaac abt->tag = tag; 2656faa55883SToby Isaac abt->alg = alg; 2657faa55883SToby Isaac switch (alg) { 26584222ddf1SHong Zhang case 1: /* alg: "cyclic" */ 2659cc48ffa7SToby Isaac for (maxRows = 0, i = 0; i < size; i++) maxRows = PetscMax(maxRows, (B->rmap->range[i + 1] - B->rmap->range[i])); 2660cc48ffa7SToby Isaac bufsiz = A->cmap->N * maxRows; 2661cc48ffa7SToby Isaac ierr = PetscMalloc2(bufsiz,&(abt->buf[0]),bufsiz,&(abt->buf[1]));CHKERRQ(ierr); 2662faa55883SToby Isaac break; 26634222ddf1SHong Zhang default: /* alg: "allgatherv" */ 2664faa55883SToby Isaac ierr = PetscMalloc2(B->rmap->n * B->cmap->N, &(abt->buf[0]), B->rmap->N * B->cmap->N, &(abt->buf[1]));CHKERRQ(ierr); 2665faa55883SToby Isaac ierr = PetscMalloc2(size,&(abt->recvcounts),size+1,&(abt->recvdispls));CHKERRQ(ierr); 2666faa55883SToby Isaac for (i = 0; i <= size; i++) abt->recvdispls[i] = B->rmap->range[i] * A->cmap->N; 2667faa55883SToby Isaac for (i = 0; i < size; i++) abt->recvcounts[i] = abt->recvdispls[i + 1] - abt->recvdispls[i]; 2668faa55883SToby Isaac break; 2669faa55883SToby Isaac } 2670cc48ffa7SToby Isaac 26716718818eSStefano Zampini C->product->data = abt; 26726718818eSStefano Zampini C->product->destroy = MatDestroy_MatMatTransMult_MPIDense_MPIDense; 26734222ddf1SHong Zhang C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_MPIDense_MPIDense; 2674cc48ffa7SToby Isaac PetscFunctionReturn(0); 2675cc48ffa7SToby Isaac } 2676cc48ffa7SToby Isaac 2677faa55883SToby Isaac static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(Mat A, Mat B, Mat C) 2678cc48ffa7SToby Isaac { 2679cc48ffa7SToby Isaac Mat_MPIDense *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data; 26806718818eSStefano Zampini Mat_MatTransMultDense *abt; 2681cc48ffa7SToby Isaac PetscErrorCode ierr; 2682cc48ffa7SToby Isaac MPI_Comm comm; 2683cc48ffa7SToby Isaac PetscMPIInt rank,size, sendsiz, recvsiz, sendto, recvfrom, recvisfrom; 2684637a0070SStefano Zampini PetscScalar *sendbuf, *recvbuf=0, *cv; 2685cc48ffa7SToby Isaac PetscInt i,cK=A->cmap->N,k,j,bn; 2686cc48ffa7SToby Isaac PetscScalar _DOne=1.0,_DZero=0.0; 2687637a0070SStefano Zampini const PetscScalar *av,*bv; 2688637a0070SStefano Zampini PetscBLASInt cm, cn, ck, alda, blda, clda; 2689cc48ffa7SToby Isaac MPI_Request reqs[2]; 2690cc48ffa7SToby Isaac const PetscInt *ranges; 2691cc48ffa7SToby Isaac 2692cc48ffa7SToby Isaac PetscFunctionBegin; 26936718818eSStefano Zampini MatCheckProduct(C,3); 26946718818eSStefano Zampini if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty"); 26956718818eSStefano Zampini abt = (Mat_MatTransMultDense*)C->product->data; 26966718818eSStefano Zampini ierr = PetscObjectGetComm((PetscObject)C,&comm);CHKERRQ(ierr); 2697cc48ffa7SToby Isaac ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2698cc48ffa7SToby Isaac ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2699637a0070SStefano Zampini ierr = MatDenseGetArrayRead(a->A,&av);CHKERRQ(ierr); 2700637a0070SStefano Zampini ierr = MatDenseGetArrayRead(b->A,&bv);CHKERRQ(ierr); 27010acaeb71SStefano Zampini ierr = MatDenseGetArrayWrite(c->A,&cv);CHKERRQ(ierr); 2702637a0070SStefano Zampini ierr = MatDenseGetLDA(a->A,&i);CHKERRQ(ierr); 2703637a0070SStefano Zampini ierr = PetscBLASIntCast(i,&alda);CHKERRQ(ierr); 2704637a0070SStefano Zampini ierr = MatDenseGetLDA(b->A,&i);CHKERRQ(ierr); 2705637a0070SStefano Zampini ierr = PetscBLASIntCast(i,&blda);CHKERRQ(ierr); 2706637a0070SStefano Zampini ierr = MatDenseGetLDA(c->A,&i);CHKERRQ(ierr); 2707637a0070SStefano Zampini ierr = PetscBLASIntCast(i,&clda);CHKERRQ(ierr); 2708cc48ffa7SToby Isaac ierr = MatGetOwnershipRanges(B,&ranges);CHKERRQ(ierr); 2709cc48ffa7SToby Isaac bn = B->rmap->n; 2710637a0070SStefano Zampini if (blda == bn) { 2711637a0070SStefano Zampini sendbuf = (PetscScalar*)bv; 2712cc48ffa7SToby Isaac } else { 2713cc48ffa7SToby Isaac sendbuf = abt->buf[0]; 2714cc48ffa7SToby Isaac for (k = 0, i = 0; i < cK; i++) { 2715cc48ffa7SToby Isaac for (j = 0; j < bn; j++, k++) { 2716637a0070SStefano Zampini sendbuf[k] = bv[i * blda + j]; 2717cc48ffa7SToby Isaac } 2718cc48ffa7SToby Isaac } 2719cc48ffa7SToby Isaac } 2720cc48ffa7SToby Isaac if (size > 1) { 2721cc48ffa7SToby Isaac sendto = (rank + size - 1) % size; 2722cc48ffa7SToby Isaac recvfrom = (rank + size + 1) % size; 2723cc48ffa7SToby Isaac } else { 2724cc48ffa7SToby Isaac sendto = recvfrom = 0; 2725cc48ffa7SToby Isaac } 2726cc48ffa7SToby Isaac ierr = PetscBLASIntCast(cK,&ck);CHKERRQ(ierr); 2727cc48ffa7SToby Isaac ierr = PetscBLASIntCast(c->A->rmap->n,&cm);CHKERRQ(ierr); 2728cc48ffa7SToby Isaac recvisfrom = rank; 2729cc48ffa7SToby Isaac for (i = 0; i < size; i++) { 2730cc48ffa7SToby Isaac /* we have finished receiving in sending, bufs can be read/modified */ 2731cc48ffa7SToby Isaac PetscInt nextrecvisfrom = (recvisfrom + 1) % size; /* which process the next recvbuf will originate on */ 2732cc48ffa7SToby Isaac PetscInt nextbn = ranges[nextrecvisfrom + 1] - ranges[nextrecvisfrom]; 2733cc48ffa7SToby Isaac 2734cc48ffa7SToby Isaac if (nextrecvisfrom != rank) { 2735cc48ffa7SToby Isaac /* start the cyclic sends from sendbuf, to recvbuf (which will switch to sendbuf) */ 2736cc48ffa7SToby Isaac sendsiz = cK * bn; 2737cc48ffa7SToby Isaac recvsiz = cK * nextbn; 2738cc48ffa7SToby Isaac recvbuf = (i & 1) ? abt->buf[0] : abt->buf[1]; 2739cc48ffa7SToby Isaac ierr = MPI_Isend(sendbuf, sendsiz, MPIU_SCALAR, sendto, abt->tag, comm, &reqs[0]);CHKERRQ(ierr); 2740cc48ffa7SToby Isaac ierr = MPI_Irecv(recvbuf, recvsiz, MPIU_SCALAR, recvfrom, abt->tag, comm, &reqs[1]);CHKERRQ(ierr); 2741cc48ffa7SToby Isaac } 2742cc48ffa7SToby Isaac 2743cc48ffa7SToby Isaac /* local aseq * sendbuf^T */ 2744cc48ffa7SToby Isaac ierr = PetscBLASIntCast(ranges[recvisfrom + 1] - ranges[recvisfrom], &cn);CHKERRQ(ierr); 274550ce3c9cSStefano Zampini if (cm && cn && ck) PetscStackCallBLAS("BLASgemm",BLASgemm_("N","T",&cm,&cn,&ck,&_DOne,av,&alda,sendbuf,&cn,&_DZero,cv + clda * ranges[recvisfrom],&clda)); 2746cc48ffa7SToby Isaac 2747cc48ffa7SToby Isaac if (nextrecvisfrom != rank) { 2748cc48ffa7SToby Isaac /* wait for the sends and receives to complete, swap sendbuf and recvbuf */ 2749cc48ffa7SToby Isaac ierr = MPI_Waitall(2, reqs, MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2750cc48ffa7SToby Isaac } 2751cc48ffa7SToby Isaac bn = nextbn; 2752cc48ffa7SToby Isaac recvisfrom = nextrecvisfrom; 2753cc48ffa7SToby Isaac sendbuf = recvbuf; 2754cc48ffa7SToby Isaac } 2755637a0070SStefano Zampini ierr = MatDenseRestoreArrayRead(a->A,&av);CHKERRQ(ierr); 2756637a0070SStefano Zampini ierr = MatDenseRestoreArrayRead(b->A,&bv);CHKERRQ(ierr); 27570acaeb71SStefano Zampini ierr = MatDenseRestoreArrayWrite(c->A,&cv);CHKERRQ(ierr); 27580acaeb71SStefano Zampini ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 27590acaeb71SStefano Zampini ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2760cc48ffa7SToby Isaac PetscFunctionReturn(0); 2761cc48ffa7SToby Isaac } 2762cc48ffa7SToby Isaac 2763faa55883SToby Isaac static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(Mat A, Mat B, Mat C) 2764faa55883SToby Isaac { 2765faa55883SToby Isaac Mat_MPIDense *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data; 27666718818eSStefano Zampini Mat_MatTransMultDense *abt; 2767faa55883SToby Isaac PetscErrorCode ierr; 2768faa55883SToby Isaac MPI_Comm comm; 2769637a0070SStefano Zampini PetscMPIInt size; 2770637a0070SStefano Zampini PetscScalar *cv, *sendbuf, *recvbuf; 2771637a0070SStefano Zampini const PetscScalar *av,*bv; 2772637a0070SStefano Zampini PetscInt blda,i,cK=A->cmap->N,k,j,bn; 2773faa55883SToby Isaac PetscScalar _DOne=1.0,_DZero=0.0; 2774637a0070SStefano Zampini PetscBLASInt cm, cn, ck, alda, clda; 2775faa55883SToby Isaac 2776faa55883SToby Isaac PetscFunctionBegin; 27776718818eSStefano Zampini MatCheckProduct(C,3); 27786718818eSStefano Zampini if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty"); 27796718818eSStefano Zampini abt = (Mat_MatTransMultDense*)C->product->data; 2780faa55883SToby Isaac ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 2781faa55883SToby Isaac ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2782637a0070SStefano Zampini ierr = MatDenseGetArrayRead(a->A,&av);CHKERRQ(ierr); 2783637a0070SStefano Zampini ierr = MatDenseGetArrayRead(b->A,&bv);CHKERRQ(ierr); 27840acaeb71SStefano Zampini ierr = MatDenseGetArrayWrite(c->A,&cv);CHKERRQ(ierr); 2785637a0070SStefano Zampini ierr = MatDenseGetLDA(a->A,&i);CHKERRQ(ierr); 2786637a0070SStefano Zampini ierr = PetscBLASIntCast(i,&alda);CHKERRQ(ierr); 2787637a0070SStefano Zampini ierr = MatDenseGetLDA(b->A,&blda);CHKERRQ(ierr); 2788637a0070SStefano Zampini ierr = MatDenseGetLDA(c->A,&i);CHKERRQ(ierr); 2789637a0070SStefano Zampini ierr = PetscBLASIntCast(i,&clda);CHKERRQ(ierr); 2790faa55883SToby Isaac /* copy transpose of B into buf[0] */ 2791faa55883SToby Isaac bn = B->rmap->n; 2792faa55883SToby Isaac sendbuf = abt->buf[0]; 2793faa55883SToby Isaac recvbuf = abt->buf[1]; 2794faa55883SToby Isaac for (k = 0, j = 0; j < bn; j++) { 2795faa55883SToby Isaac for (i = 0; i < cK; i++, k++) { 2796637a0070SStefano Zampini sendbuf[k] = bv[i * blda + j]; 2797faa55883SToby Isaac } 2798faa55883SToby Isaac } 2799637a0070SStefano Zampini ierr = MatDenseRestoreArrayRead(b->A,&bv);CHKERRQ(ierr); 2800faa55883SToby Isaac ierr = MPI_Allgatherv(sendbuf, bn * cK, MPIU_SCALAR, recvbuf, abt->recvcounts, abt->recvdispls, MPIU_SCALAR, comm);CHKERRQ(ierr); 2801faa55883SToby Isaac ierr = PetscBLASIntCast(cK,&ck);CHKERRQ(ierr); 2802faa55883SToby Isaac ierr = PetscBLASIntCast(c->A->rmap->n,&cm);CHKERRQ(ierr); 2803faa55883SToby Isaac ierr = PetscBLASIntCast(c->A->cmap->n,&cn);CHKERRQ(ierr); 280450ce3c9cSStefano Zampini if (cm && cn && ck) PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&cm,&cn,&ck,&_DOne,av,&alda,recvbuf,&ck,&_DZero,cv,&clda)); 2805637a0070SStefano Zampini ierr = MatDenseRestoreArrayRead(a->A,&av);CHKERRQ(ierr); 2806637a0070SStefano Zampini ierr = MatDenseRestoreArrayRead(b->A,&bv);CHKERRQ(ierr); 28070acaeb71SStefano Zampini ierr = MatDenseRestoreArrayWrite(c->A,&cv);CHKERRQ(ierr); 28080acaeb71SStefano Zampini ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 28090acaeb71SStefano Zampini ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2810faa55883SToby Isaac PetscFunctionReturn(0); 2811faa55883SToby Isaac } 2812faa55883SToby Isaac 2813faa55883SToby Isaac static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C) 2814faa55883SToby Isaac { 28156718818eSStefano Zampini Mat_MatTransMultDense *abt; 2816faa55883SToby Isaac PetscErrorCode ierr; 2817faa55883SToby Isaac 2818faa55883SToby Isaac PetscFunctionBegin; 28196718818eSStefano Zampini MatCheckProduct(C,3); 28206718818eSStefano Zampini if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty"); 28216718818eSStefano Zampini abt = (Mat_MatTransMultDense*)C->product->data; 2822faa55883SToby Isaac switch (abt->alg) { 2823faa55883SToby Isaac case 1: 2824faa55883SToby Isaac ierr = MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(A, B, C);CHKERRQ(ierr); 2825faa55883SToby Isaac break; 2826faa55883SToby Isaac default: 2827faa55883SToby Isaac ierr = MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(A, B, C);CHKERRQ(ierr); 2828faa55883SToby Isaac break; 2829faa55883SToby Isaac } 2830faa55883SToby Isaac PetscFunctionReturn(0); 2831faa55883SToby Isaac } 2832faa55883SToby Isaac 28336718818eSStefano Zampini PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(void *data) 2834320f2790SHong Zhang { 2835320f2790SHong Zhang PetscErrorCode ierr; 28366718818eSStefano Zampini Mat_MatMultDense *ab = (Mat_MatMultDense*)data; 2837320f2790SHong Zhang 2838320f2790SHong Zhang PetscFunctionBegin; 2839320f2790SHong Zhang ierr = MatDestroy(&ab->Ce);CHKERRQ(ierr); 2840320f2790SHong Zhang ierr = MatDestroy(&ab->Ae);CHKERRQ(ierr); 2841320f2790SHong Zhang ierr = MatDestroy(&ab->Be);CHKERRQ(ierr); 2842320f2790SHong Zhang ierr = PetscFree(ab);CHKERRQ(ierr); 2843320f2790SHong Zhang PetscFunctionReturn(0); 2844320f2790SHong Zhang } 2845320f2790SHong Zhang 28465242a7b1SHong Zhang #if defined(PETSC_HAVE_ELEMENTAL) 2847320f2790SHong Zhang PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C) 2848320f2790SHong Zhang { 2849320f2790SHong Zhang PetscErrorCode ierr; 28506718818eSStefano Zampini Mat_MatMultDense *ab; 2851320f2790SHong Zhang 2852320f2790SHong Zhang PetscFunctionBegin; 28536718818eSStefano Zampini MatCheckProduct(C,3); 28546718818eSStefano Zampini if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Missing product data"); 28556718818eSStefano Zampini ab = (Mat_MatMultDense*)C->product->data; 2856de0a22f0SHong Zhang ierr = MatConvert_MPIDense_Elemental(A,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Ae);CHKERRQ(ierr); 2857de0a22f0SHong Zhang ierr = MatConvert_MPIDense_Elemental(B,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Be);CHKERRQ(ierr); 28584222ddf1SHong Zhang ierr = MatMatMultNumeric_Elemental(ab->Ae,ab->Be,ab->Ce);CHKERRQ(ierr); 2859de0a22f0SHong Zhang ierr = MatConvert(ab->Ce,MATMPIDENSE,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 2860320f2790SHong Zhang PetscFunctionReturn(0); 2861320f2790SHong Zhang } 2862320f2790SHong Zhang 28636718818eSStefano Zampini static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat C) 2864320f2790SHong Zhang { 2865320f2790SHong Zhang PetscErrorCode ierr; 2866320f2790SHong Zhang Mat Ae,Be,Ce; 2867320f2790SHong Zhang Mat_MatMultDense *ab; 2868320f2790SHong Zhang 2869320f2790SHong Zhang PetscFunctionBegin; 28706718818eSStefano Zampini MatCheckProduct(C,4); 28716718818eSStefano Zampini if (C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty"); 28724222ddf1SHong Zhang /* check local size of A and B */ 2873320f2790SHong Zhang if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 2874378336b6SHong Zhang SETERRQ4(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend); 2875320f2790SHong Zhang } 2876320f2790SHong Zhang 28774222ddf1SHong Zhang /* create elemental matrices Ae and Be */ 28784222ddf1SHong Zhang ierr = MatCreate(PetscObjectComm((PetscObject)A), &Ae);CHKERRQ(ierr); 28794222ddf1SHong Zhang ierr = MatSetSizes(Ae,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 28804222ddf1SHong Zhang ierr = MatSetType(Ae,MATELEMENTAL);CHKERRQ(ierr); 28814222ddf1SHong Zhang ierr = MatSetUp(Ae);CHKERRQ(ierr); 28824222ddf1SHong Zhang ierr = MatSetOption(Ae,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 2883320f2790SHong Zhang 28844222ddf1SHong Zhang ierr = MatCreate(PetscObjectComm((PetscObject)B), &Be);CHKERRQ(ierr); 28854222ddf1SHong Zhang ierr = MatSetSizes(Be,PETSC_DECIDE,PETSC_DECIDE,B->rmap->N,B->cmap->N);CHKERRQ(ierr); 28864222ddf1SHong Zhang ierr = MatSetType(Be,MATELEMENTAL);CHKERRQ(ierr); 28874222ddf1SHong Zhang ierr = MatSetUp(Be);CHKERRQ(ierr); 28884222ddf1SHong Zhang ierr = MatSetOption(Be,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 2889320f2790SHong Zhang 28904222ddf1SHong Zhang /* compute symbolic Ce = Ae*Be */ 28914222ddf1SHong Zhang ierr = MatCreate(PetscObjectComm((PetscObject)C),&Ce);CHKERRQ(ierr); 28924222ddf1SHong Zhang ierr = MatMatMultSymbolic_Elemental(Ae,Be,fill,Ce);CHKERRQ(ierr); 28934222ddf1SHong Zhang 28944222ddf1SHong Zhang /* setup C */ 28954222ddf1SHong Zhang ierr = MatSetSizes(C,A->rmap->n,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 28964222ddf1SHong Zhang ierr = MatSetType(C,MATDENSE);CHKERRQ(ierr); 28974222ddf1SHong Zhang ierr = MatSetUp(C);CHKERRQ(ierr); 2898320f2790SHong Zhang 2899320f2790SHong Zhang /* create data structure for reuse Cdense */ 2900320f2790SHong Zhang ierr = PetscNew(&ab);CHKERRQ(ierr); 2901320f2790SHong Zhang ab->Ae = Ae; 2902320f2790SHong Zhang ab->Be = Be; 2903320f2790SHong Zhang ab->Ce = Ce; 29046718818eSStefano Zampini 29056718818eSStefano Zampini C->product->data = ab; 29066718818eSStefano Zampini C->product->destroy = MatDestroy_MatMatMult_MPIDense_MPIDense; 29074222ddf1SHong Zhang C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense; 2908320f2790SHong Zhang PetscFunctionReturn(0); 2909320f2790SHong Zhang } 29104222ddf1SHong Zhang #endif 29114222ddf1SHong Zhang /* ----------------------------------------------- */ 29124222ddf1SHong Zhang #if defined(PETSC_HAVE_ELEMENTAL) 29134222ddf1SHong Zhang static PetscErrorCode MatProductSetFromOptions_MPIDense_AB(Mat C) 2914320f2790SHong Zhang { 2915320f2790SHong Zhang PetscFunctionBegin; 29164222ddf1SHong Zhang C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIDense; 29174222ddf1SHong Zhang C->ops->productsymbolic = MatProductSymbolic_AB; 2918320f2790SHong Zhang PetscFunctionReturn(0); 2919320f2790SHong Zhang } 29205242a7b1SHong Zhang #endif 292186aefd0dSHong Zhang 29224222ddf1SHong Zhang static PetscErrorCode MatProductSetFromOptions_MPIDense_AtB(Mat C) 29234222ddf1SHong Zhang { 29244222ddf1SHong Zhang Mat_Product *product = C->product; 29254222ddf1SHong Zhang Mat A = product->A,B=product->B; 29264222ddf1SHong Zhang 29274222ddf1SHong Zhang PetscFunctionBegin; 29284222ddf1SHong Zhang if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) 29294222ddf1SHong Zhang SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend); 29306718818eSStefano Zampini C->ops->transposematmultsymbolic = MatTransposeMatMultSymbolic_MPIDense_MPIDense; 29316718818eSStefano Zampini C->ops->productsymbolic = MatProductSymbolic_AtB; 29324222ddf1SHong Zhang PetscFunctionReturn(0); 29334222ddf1SHong Zhang } 29344222ddf1SHong Zhang 29354222ddf1SHong Zhang static PetscErrorCode MatProductSetFromOptions_MPIDense_ABt(Mat C) 29364222ddf1SHong Zhang { 29374222ddf1SHong Zhang PetscErrorCode ierr; 29384222ddf1SHong Zhang Mat_Product *product = C->product; 29394222ddf1SHong Zhang const char *algTypes[2] = {"allgatherv","cyclic"}; 29404222ddf1SHong Zhang PetscInt alg,nalg = 2; 29414222ddf1SHong Zhang PetscBool flg = PETSC_FALSE; 29424222ddf1SHong Zhang 29434222ddf1SHong Zhang PetscFunctionBegin; 29444222ddf1SHong Zhang /* Set default algorithm */ 29454222ddf1SHong Zhang alg = 0; /* default is allgatherv */ 29464222ddf1SHong Zhang ierr = PetscStrcmp(product->alg,"default",&flg);CHKERRQ(ierr); 29474222ddf1SHong Zhang if (flg) { 29484222ddf1SHong Zhang ierr = MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);CHKERRQ(ierr); 29494222ddf1SHong Zhang } 29504222ddf1SHong Zhang 29514222ddf1SHong Zhang /* Get runtime option */ 29524222ddf1SHong Zhang if (product->api_user) { 29534222ddf1SHong Zhang ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatMatTransposeMult","Mat");CHKERRQ(ierr); 29544222ddf1SHong Zhang ierr = PetscOptionsEList("-matmattransmult_mpidense_mpidense_via","Algorithmic approach","MatMatTransposeMult",algTypes,nalg,algTypes[alg],&alg,&flg);CHKERRQ(ierr); 29554222ddf1SHong Zhang ierr = PetscOptionsEnd();CHKERRQ(ierr); 29564222ddf1SHong Zhang } else { 29574222ddf1SHong Zhang ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_ABt","Mat");CHKERRQ(ierr); 29584222ddf1SHong Zhang ierr = PetscOptionsEList("-matproduct_abt_mpidense_mpidense_via","Algorithmic approach","MatProduct_ABt",algTypes,nalg,algTypes[alg],&alg,&flg);CHKERRQ(ierr); 29594222ddf1SHong Zhang ierr = PetscOptionsEnd();CHKERRQ(ierr); 29604222ddf1SHong Zhang } 29614222ddf1SHong Zhang if (flg) { 29624222ddf1SHong Zhang ierr = MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);CHKERRQ(ierr); 29634222ddf1SHong Zhang } 29644222ddf1SHong Zhang 29654222ddf1SHong Zhang C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_MPIDense_MPIDense; 29664222ddf1SHong Zhang C->ops->productsymbolic = MatProductSymbolic_ABt; 29674222ddf1SHong Zhang PetscFunctionReturn(0); 29684222ddf1SHong Zhang } 29694222ddf1SHong Zhang 29704222ddf1SHong Zhang PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense(Mat C) 29714222ddf1SHong Zhang { 29724222ddf1SHong Zhang PetscErrorCode ierr; 29734222ddf1SHong Zhang Mat_Product *product = C->product; 29744222ddf1SHong Zhang 29754222ddf1SHong Zhang PetscFunctionBegin; 29764222ddf1SHong Zhang switch (product->type) { 29774222ddf1SHong Zhang #if defined(PETSC_HAVE_ELEMENTAL) 29784222ddf1SHong Zhang case MATPRODUCT_AB: 29794222ddf1SHong Zhang ierr = MatProductSetFromOptions_MPIDense_AB(C);CHKERRQ(ierr); 29804222ddf1SHong Zhang break; 29814222ddf1SHong Zhang #endif 29824222ddf1SHong Zhang case MATPRODUCT_AtB: 29834222ddf1SHong Zhang ierr = MatProductSetFromOptions_MPIDense_AtB(C);CHKERRQ(ierr); 29844222ddf1SHong Zhang break; 29854222ddf1SHong Zhang case MATPRODUCT_ABt: 29864222ddf1SHong Zhang ierr = MatProductSetFromOptions_MPIDense_ABt(C);CHKERRQ(ierr); 29874222ddf1SHong Zhang break; 29886718818eSStefano Zampini default: 29896718818eSStefano Zampini break; 29904222ddf1SHong Zhang } 29914222ddf1SHong Zhang PetscFunctionReturn(0); 29924222ddf1SHong Zhang } 2993