1be1d678aSKris Buschelman 2ed3cc1f0SBarry Smith /* 3ed3cc1f0SBarry Smith Basic functions for basic parallel dense matrices. 4ed3cc1f0SBarry Smith */ 5ed3cc1f0SBarry Smith 6c6db04a5SJed Brown #include <../src/mat/impls/dense/mpi/mpidense.h> /*I "petscmat.h" I*/ 78949adfdSHong Zhang #include <../src/mat/impls/aij/mpi/mpiaij.h> 8baa3c1c6SHong Zhang #include <petscblaslapack.h> 98965ea79SLois Curfman McInnes 10ab92ecdeSBarry Smith /*@ 11ab92ecdeSBarry Smith 12ab92ecdeSBarry Smith MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential 13ab92ecdeSBarry Smith matrix that represents the operator. For sequential matrices it returns itself. 14ab92ecdeSBarry Smith 15ab92ecdeSBarry Smith Input Parameter: 16ab92ecdeSBarry Smith . A - the Seq or MPI dense matrix 17ab92ecdeSBarry Smith 18ab92ecdeSBarry Smith Output Parameter: 19ab92ecdeSBarry Smith . B - the inner matrix 20ab92ecdeSBarry Smith 218e6c10adSSatish Balay Level: intermediate 228e6c10adSSatish Balay 23ab92ecdeSBarry Smith @*/ 24ab92ecdeSBarry Smith PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B) 25ab92ecdeSBarry Smith { 26ab92ecdeSBarry Smith Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 27ab92ecdeSBarry Smith PetscErrorCode ierr; 28ace3abfcSBarry Smith PetscBool flg; 29ab92ecdeSBarry Smith 30ab92ecdeSBarry Smith PetscFunctionBegin; 31d5ea218eSStefano Zampini PetscValidHeaderSpecific(A,MAT_CLASSID,1); 32d5ea218eSStefano Zampini PetscValidPointer(B,2); 33251f4c67SDmitry Karpeev ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr); 342205254eSKarl Rupp if (flg) *B = mat->A; 352205254eSKarl Rupp else *B = A; 36ab92ecdeSBarry Smith PetscFunctionReturn(0); 37ab92ecdeSBarry Smith } 38ab92ecdeSBarry Smith 39ba8c8a56SBarry Smith PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 40ba8c8a56SBarry Smith { 41ba8c8a56SBarry Smith Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 42ba8c8a56SBarry Smith PetscErrorCode ierr; 43d0f46423SBarry Smith PetscInt lrow,rstart = A->rmap->rstart,rend = A->rmap->rend; 44ba8c8a56SBarry Smith 45ba8c8a56SBarry Smith PetscFunctionBegin; 46e7e72b3dSBarry Smith if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows"); 47ba8c8a56SBarry Smith lrow = row - rstart; 48ba8c8a56SBarry Smith ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v);CHKERRQ(ierr); 49ba8c8a56SBarry Smith PetscFunctionReturn(0); 50ba8c8a56SBarry Smith } 51ba8c8a56SBarry Smith 52637a0070SStefano Zampini PetscErrorCode MatRestoreRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 53ba8c8a56SBarry Smith { 54637a0070SStefano Zampini Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 55ba8c8a56SBarry Smith PetscErrorCode ierr; 56637a0070SStefano Zampini PetscInt lrow,rstart = A->rmap->rstart,rend = A->rmap->rend; 57ba8c8a56SBarry Smith 58ba8c8a56SBarry Smith PetscFunctionBegin; 59637a0070SStefano Zampini if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows"); 60637a0070SStefano Zampini lrow = row - rstart; 61637a0070SStefano Zampini ierr = MatRestoreRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v);CHKERRQ(ierr); 62ba8c8a56SBarry Smith PetscFunctionReturn(0); 63ba8c8a56SBarry Smith } 64ba8c8a56SBarry Smith 6511bd1e4dSLisandro Dalcin PetscErrorCode MatGetDiagonalBlock_MPIDense(Mat A,Mat *a) 660de54da6SSatish Balay { 670de54da6SSatish Balay Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 686849ba73SBarry Smith PetscErrorCode ierr; 69d0f46423SBarry Smith PetscInt m = A->rmap->n,rstart = A->rmap->rstart; 7087828ca2SBarry Smith PetscScalar *array; 710de54da6SSatish Balay MPI_Comm comm; 724b6ae80fSStefano Zampini PetscBool flg; 7311bd1e4dSLisandro Dalcin Mat B; 740de54da6SSatish Balay 750de54da6SSatish Balay PetscFunctionBegin; 764b6ae80fSStefano Zampini ierr = MatHasCongruentLayouts(A,&flg);CHKERRQ(ierr); 774b6ae80fSStefano Zampini if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only square matrices supported."); 7811bd1e4dSLisandro Dalcin ierr = PetscObjectQuery((PetscObject)A,"DiagonalBlock",(PetscObject*)&B);CHKERRQ(ierr); 79616b8fbbSStefano Zampini if (!B) { /* This should use MatDenseGetSubMatrix (not create), but we would need a call like MatRestoreDiagonalBlock */ 804b6ae80fSStefano Zampini 814b6ae80fSStefano Zampini ierr = PetscObjectTypeCompare((PetscObject)mdn->A,MATSEQDENSECUDA,&flg);CHKERRQ(ierr); 824b6ae80fSStefano Zampini if (flg) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature",MATSEQDENSECUDA); 830de54da6SSatish Balay ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr); 8411bd1e4dSLisandro Dalcin ierr = MatCreate(comm,&B);CHKERRQ(ierr); 8511bd1e4dSLisandro Dalcin ierr = MatSetSizes(B,m,m,m,m);CHKERRQ(ierr); 8611bd1e4dSLisandro Dalcin ierr = MatSetType(B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr); 874b6ae80fSStefano Zampini ierr = MatDenseGetArrayRead(mdn->A,(const PetscScalar**)&array);CHKERRQ(ierr); 8811bd1e4dSLisandro Dalcin ierr = MatSeqDenseSetPreallocation(B,array+m*rstart);CHKERRQ(ierr); 894b6ae80fSStefano Zampini ierr = MatDenseRestoreArrayRead(mdn->A,(const PetscScalar**)&array);CHKERRQ(ierr); 9011bd1e4dSLisandro Dalcin ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 9111bd1e4dSLisandro Dalcin ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 9211bd1e4dSLisandro Dalcin ierr = PetscObjectCompose((PetscObject)A,"DiagonalBlock",(PetscObject)B);CHKERRQ(ierr); 9311bd1e4dSLisandro Dalcin *a = B; 9411bd1e4dSLisandro Dalcin ierr = MatDestroy(&B);CHKERRQ(ierr); 952205254eSKarl Rupp } else *a = B; 960de54da6SSatish Balay PetscFunctionReturn(0); 970de54da6SSatish Balay } 980de54da6SSatish Balay 9913f74950SBarry Smith PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv) 1008965ea79SLois Curfman McInnes { 10139b7565bSBarry Smith Mat_MPIDense *A = (Mat_MPIDense*)mat->data; 102dfbe8321SBarry Smith PetscErrorCode ierr; 103d0f46423SBarry Smith PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row; 104ace3abfcSBarry Smith PetscBool roworiented = A->roworiented; 1058965ea79SLois Curfman McInnes 1063a40ed3dSBarry Smith PetscFunctionBegin; 1078965ea79SLois Curfman McInnes for (i=0; i<m; i++) { 1085ef9f2a5SBarry Smith if (idxm[i] < 0) continue; 109e32f2f54SBarry Smith if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 1108965ea79SLois Curfman McInnes if (idxm[i] >= rstart && idxm[i] < rend) { 1118965ea79SLois Curfman McInnes row = idxm[i] - rstart; 11239b7565bSBarry Smith if (roworiented) { 11339b7565bSBarry Smith ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr); 1143a40ed3dSBarry Smith } else { 1158965ea79SLois Curfman McInnes for (j=0; j<n; j++) { 1165ef9f2a5SBarry Smith if (idxn[j] < 0) continue; 117e32f2f54SBarry Smith if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 11839b7565bSBarry Smith ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr); 11939b7565bSBarry Smith } 1208965ea79SLois Curfman McInnes } 1212205254eSKarl Rupp } else if (!A->donotstash) { 1225080c13bSMatthew G Knepley mat->assembled = PETSC_FALSE; 12339b7565bSBarry Smith if (roworiented) { 124b400d20cSBarry Smith ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 125d36fbae8SSatish Balay } else { 126b400d20cSBarry Smith ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 12739b7565bSBarry Smith } 128b49de8d1SLois Curfman McInnes } 129b49de8d1SLois Curfman McInnes } 1303a40ed3dSBarry Smith PetscFunctionReturn(0); 131b49de8d1SLois Curfman McInnes } 132b49de8d1SLois Curfman McInnes 13313f74950SBarry Smith PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 134b49de8d1SLois Curfman McInnes { 135b49de8d1SLois Curfman McInnes Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 136dfbe8321SBarry Smith PetscErrorCode ierr; 137d0f46423SBarry Smith PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row; 138b49de8d1SLois Curfman McInnes 1393a40ed3dSBarry Smith PetscFunctionBegin; 140b49de8d1SLois Curfman McInnes for (i=0; i<m; i++) { 141e32f2f54SBarry Smith if (idxm[i] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 142e32f2f54SBarry Smith if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 143b49de8d1SLois Curfman McInnes if (idxm[i] >= rstart && idxm[i] < rend) { 144b49de8d1SLois Curfman McInnes row = idxm[i] - rstart; 145b49de8d1SLois Curfman McInnes for (j=0; j<n; j++) { 146e32f2f54SBarry Smith if (idxn[j] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 147e7e72b3dSBarry Smith if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 148b49de8d1SLois Curfman McInnes ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr); 149b49de8d1SLois Curfman McInnes } 150e7e72b3dSBarry Smith } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 1518965ea79SLois Curfman McInnes } 1523a40ed3dSBarry Smith PetscFunctionReturn(0); 1538965ea79SLois Curfman McInnes } 1548965ea79SLois Curfman McInnes 15549a6ff4bSBarry Smith static PetscErrorCode MatDenseGetLDA_MPIDense(Mat A,PetscInt *lda) 15649a6ff4bSBarry Smith { 15749a6ff4bSBarry Smith Mat_MPIDense *a = (Mat_MPIDense*)A->data; 15849a6ff4bSBarry Smith PetscErrorCode ierr; 15949a6ff4bSBarry Smith 16049a6ff4bSBarry Smith PetscFunctionBegin; 16149a6ff4bSBarry Smith ierr = MatDenseGetLDA(a->A,lda);CHKERRQ(ierr); 16249a6ff4bSBarry Smith PetscFunctionReturn(0); 16349a6ff4bSBarry Smith } 16449a6ff4bSBarry Smith 165ad16ce7aSStefano Zampini static PetscErrorCode MatDenseSetLDA_MPIDense(Mat A,PetscInt lda) 166ad16ce7aSStefano Zampini { 167ad16ce7aSStefano Zampini Mat_MPIDense *a = (Mat_MPIDense*)A->data; 168ad16ce7aSStefano Zampini PetscErrorCode ierr; 169ad16ce7aSStefano Zampini 170ad16ce7aSStefano Zampini PetscFunctionBegin; 171ad16ce7aSStefano Zampini ierr = MatDenseSetLDA(a->A,lda);CHKERRQ(ierr); 172ad16ce7aSStefano Zampini PetscFunctionReturn(0); 173ad16ce7aSStefano Zampini } 174ad16ce7aSStefano Zampini 175637a0070SStefano Zampini static PetscErrorCode MatDenseGetArray_MPIDense(Mat A,PetscScalar **array) 176ff14e315SSatish Balay { 177ff14e315SSatish Balay Mat_MPIDense *a = (Mat_MPIDense*)A->data; 178dfbe8321SBarry Smith PetscErrorCode ierr; 179ff14e315SSatish Balay 1803a40ed3dSBarry Smith PetscFunctionBegin; 181616b8fbbSStefano Zampini if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1828c778c55SBarry Smith ierr = MatDenseGetArray(a->A,array);CHKERRQ(ierr); 1833a40ed3dSBarry Smith PetscFunctionReturn(0); 184ff14e315SSatish Balay } 185ff14e315SSatish Balay 186637a0070SStefano Zampini static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A,const PetscScalar **array) 1878572280aSBarry Smith { 1888572280aSBarry Smith Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1898572280aSBarry Smith PetscErrorCode ierr; 1908572280aSBarry Smith 1918572280aSBarry Smith PetscFunctionBegin; 192616b8fbbSStefano Zampini if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1938572280aSBarry Smith ierr = MatDenseGetArrayRead(a->A,array);CHKERRQ(ierr); 1948572280aSBarry Smith PetscFunctionReturn(0); 1958572280aSBarry Smith } 1968572280aSBarry Smith 1976947451fSStefano Zampini static PetscErrorCode MatDenseGetArrayWrite_MPIDense(Mat A,PetscScalar **array) 1986947451fSStefano Zampini { 1996947451fSStefano Zampini Mat_MPIDense *a = (Mat_MPIDense*)A->data; 2006947451fSStefano Zampini PetscErrorCode ierr; 2016947451fSStefano Zampini 2026947451fSStefano Zampini PetscFunctionBegin; 203616b8fbbSStefano Zampini if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 2046947451fSStefano Zampini ierr = MatDenseGetArrayWrite(a->A,array);CHKERRQ(ierr); 2056947451fSStefano Zampini PetscFunctionReturn(0); 2066947451fSStefano Zampini } 2076947451fSStefano Zampini 208637a0070SStefano Zampini static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A,const PetscScalar *array) 209d3042a70SBarry Smith { 210d3042a70SBarry Smith Mat_MPIDense *a = (Mat_MPIDense*)A->data; 211d3042a70SBarry Smith PetscErrorCode ierr; 212d3042a70SBarry Smith 213d3042a70SBarry Smith PetscFunctionBegin; 214616b8fbbSStefano Zampini if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 215616b8fbbSStefano Zampini if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 216d3042a70SBarry Smith ierr = MatDensePlaceArray(a->A,array);CHKERRQ(ierr); 217d3042a70SBarry Smith PetscFunctionReturn(0); 218d3042a70SBarry Smith } 219d3042a70SBarry Smith 220d3042a70SBarry Smith static PetscErrorCode MatDenseResetArray_MPIDense(Mat A) 221d3042a70SBarry Smith { 222d3042a70SBarry Smith Mat_MPIDense *a = (Mat_MPIDense*)A->data; 223d3042a70SBarry Smith PetscErrorCode ierr; 224d3042a70SBarry Smith 225d3042a70SBarry Smith PetscFunctionBegin; 226616b8fbbSStefano Zampini if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 227616b8fbbSStefano Zampini if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 228d3042a70SBarry Smith ierr = MatDenseResetArray(a->A);CHKERRQ(ierr); 229d3042a70SBarry Smith PetscFunctionReturn(0); 230d3042a70SBarry Smith } 231d3042a70SBarry Smith 232d5ea218eSStefano Zampini static PetscErrorCode MatDenseReplaceArray_MPIDense(Mat A,const PetscScalar *array) 233d5ea218eSStefano Zampini { 234d5ea218eSStefano Zampini Mat_MPIDense *a = (Mat_MPIDense*)A->data; 235d5ea218eSStefano Zampini PetscErrorCode ierr; 236d5ea218eSStefano Zampini 237d5ea218eSStefano Zampini PetscFunctionBegin; 238616b8fbbSStefano Zampini if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 239616b8fbbSStefano Zampini if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 240d5ea218eSStefano Zampini ierr = MatDenseReplaceArray(a->A,array);CHKERRQ(ierr); 241d5ea218eSStefano Zampini PetscFunctionReturn(0); 242d5ea218eSStefano Zampini } 243d5ea218eSStefano Zampini 2447dae84e0SHong Zhang static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B) 245ca3fa75bSLois Curfman McInnes { 246ca3fa75bSLois Curfman McInnes Mat_MPIDense *mat = (Mat_MPIDense*)A->data,*newmatd; 2476849ba73SBarry Smith PetscErrorCode ierr; 248637a0070SStefano Zampini PetscInt lda,i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols; 2495d0c19d7SBarry Smith const PetscInt *irow,*icol; 250637a0070SStefano Zampini const PetscScalar *v; 251637a0070SStefano Zampini PetscScalar *bv; 252ca3fa75bSLois Curfman McInnes Mat newmat; 2534aa3045dSJed Brown IS iscol_local; 25442a884f0SBarry Smith MPI_Comm comm_is,comm_mat; 255ca3fa75bSLois Curfman McInnes 256ca3fa75bSLois Curfman McInnes PetscFunctionBegin; 25742a884f0SBarry Smith ierr = PetscObjectGetComm((PetscObject)A,&comm_mat);CHKERRQ(ierr); 25842a884f0SBarry Smith ierr = PetscObjectGetComm((PetscObject)iscol,&comm_is);CHKERRQ(ierr); 25942a884f0SBarry Smith if (comm_mat != comm_is) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NOTSAMECOMM,"IS communicator must match matrix communicator"); 26042a884f0SBarry Smith 2614aa3045dSJed Brown ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 262ca3fa75bSLois Curfman McInnes ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr); 2634aa3045dSJed Brown ierr = ISGetIndices(iscol_local,&icol);CHKERRQ(ierr); 264b9b97703SBarry Smith ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr); 265b9b97703SBarry Smith ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 2664aa3045dSJed Brown ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); /* global number of columns, size of iscol_local */ 267ca3fa75bSLois Curfman McInnes 268ca3fa75bSLois Curfman McInnes /* No parallel redistribution currently supported! Should really check each index set 2697eba5e9cSLois Curfman McInnes to comfirm that it is OK. ... Currently supports only submatrix same partitioning as 2707eba5e9cSLois Curfman McInnes original matrix! */ 271ca3fa75bSLois Curfman McInnes 272ca3fa75bSLois Curfman McInnes ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr); 2737eba5e9cSLois Curfman McInnes ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 274ca3fa75bSLois Curfman McInnes 275ca3fa75bSLois Curfman McInnes /* Check submatrix call */ 276ca3fa75bSLois Curfman McInnes if (scall == MAT_REUSE_MATRIX) { 277e32f2f54SBarry Smith /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */ 2787eba5e9cSLois Curfman McInnes /* Really need to test rows and column sizes! */ 279ca3fa75bSLois Curfman McInnes newmat = *B; 280ca3fa75bSLois Curfman McInnes } else { 281ca3fa75bSLois Curfman McInnes /* Create and fill new matrix */ 282ce94432eSBarry Smith ierr = MatCreate(PetscObjectComm((PetscObject)A),&newmat);CHKERRQ(ierr); 2834aa3045dSJed Brown ierr = MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols);CHKERRQ(ierr); 2847adad957SLisandro Dalcin ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr); 2850298fd71SBarry Smith ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr); 286ca3fa75bSLois Curfman McInnes } 287ca3fa75bSLois Curfman McInnes 288ca3fa75bSLois Curfman McInnes /* Now extract the data pointers and do the copy, column at a time */ 289ca3fa75bSLois Curfman McInnes newmatd = (Mat_MPIDense*)newmat->data; 290637a0070SStefano Zampini ierr = MatDenseGetArray(newmatd->A,&bv);CHKERRQ(ierr); 291637a0070SStefano Zampini ierr = MatDenseGetArrayRead(mat->A,&v);CHKERRQ(ierr); 292637a0070SStefano Zampini ierr = MatDenseGetLDA(mat->A,&lda);CHKERRQ(ierr); 2934aa3045dSJed Brown for (i=0; i<Ncols; i++) { 294637a0070SStefano Zampini const PetscScalar *av = v + lda*icol[i]; 295ca3fa75bSLois Curfman McInnes for (j=0; j<nrows; j++) { 2967eba5e9cSLois Curfman McInnes *bv++ = av[irow[j] - rstart]; 297ca3fa75bSLois Curfman McInnes } 298ca3fa75bSLois Curfman McInnes } 299637a0070SStefano Zampini ierr = MatDenseRestoreArrayRead(mat->A,&v);CHKERRQ(ierr); 300637a0070SStefano Zampini ierr = MatDenseRestoreArray(newmatd->A,&bv);CHKERRQ(ierr); 301ca3fa75bSLois Curfman McInnes 302ca3fa75bSLois Curfman McInnes /* Assemble the matrices so that the correct flags are set */ 303ca3fa75bSLois Curfman McInnes ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 304ca3fa75bSLois Curfman McInnes ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 305ca3fa75bSLois Curfman McInnes 306ca3fa75bSLois Curfman McInnes /* Free work space */ 307ca3fa75bSLois Curfman McInnes ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr); 3085bdf786aSShri Abhyankar ierr = ISRestoreIndices(iscol_local,&icol);CHKERRQ(ierr); 30932bb1f2dSLisandro Dalcin ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 310ca3fa75bSLois Curfman McInnes *B = newmat; 311ca3fa75bSLois Curfman McInnes PetscFunctionReturn(0); 312ca3fa75bSLois Curfman McInnes } 313ca3fa75bSLois Curfman McInnes 314637a0070SStefano Zampini PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A,PetscScalar **array) 315ff14e315SSatish Balay { 31673a71a0fSBarry Smith Mat_MPIDense *a = (Mat_MPIDense*)A->data; 31773a71a0fSBarry Smith PetscErrorCode ierr; 31873a71a0fSBarry Smith 3193a40ed3dSBarry Smith PetscFunctionBegin; 3208c778c55SBarry Smith ierr = MatDenseRestoreArray(a->A,array);CHKERRQ(ierr); 3213a40ed3dSBarry Smith PetscFunctionReturn(0); 322ff14e315SSatish Balay } 323ff14e315SSatish Balay 324637a0070SStefano Zampini PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A,const PetscScalar **array) 3258572280aSBarry Smith { 3268572280aSBarry Smith Mat_MPIDense *a = (Mat_MPIDense*)A->data; 3278572280aSBarry Smith PetscErrorCode ierr; 3288572280aSBarry Smith 3298572280aSBarry Smith PetscFunctionBegin; 3308572280aSBarry Smith ierr = MatDenseRestoreArrayRead(a->A,array);CHKERRQ(ierr); 3318572280aSBarry Smith PetscFunctionReturn(0); 3328572280aSBarry Smith } 3338572280aSBarry Smith 3346947451fSStefano Zampini PetscErrorCode MatDenseRestoreArrayWrite_MPIDense(Mat A,PetscScalar **array) 3356947451fSStefano Zampini { 3366947451fSStefano Zampini Mat_MPIDense *a = (Mat_MPIDense*)A->data; 3376947451fSStefano Zampini PetscErrorCode ierr; 3386947451fSStefano Zampini 3396947451fSStefano Zampini PetscFunctionBegin; 3406947451fSStefano Zampini ierr = MatDenseRestoreArrayWrite(a->A,array);CHKERRQ(ierr); 3416947451fSStefano Zampini PetscFunctionReturn(0); 3426947451fSStefano Zampini } 3436947451fSStefano Zampini 344dfbe8321SBarry Smith PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode) 3458965ea79SLois Curfman McInnes { 34639ddd567SLois Curfman McInnes Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 347dfbe8321SBarry Smith PetscErrorCode ierr; 34813f74950SBarry Smith PetscInt nstash,reallocs; 3498965ea79SLois Curfman McInnes 3503a40ed3dSBarry Smith PetscFunctionBegin; 351910cf402Sprj- if (mdn->donotstash || mat->nooffprocentries) return(0); 3528965ea79SLois Curfman McInnes 353d0f46423SBarry Smith ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 3548798bf22SSatish Balay ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 355ae15b995SBarry Smith ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 3563a40ed3dSBarry Smith PetscFunctionReturn(0); 3578965ea79SLois Curfman McInnes } 3588965ea79SLois Curfman McInnes 359dfbe8321SBarry Smith PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode) 3608965ea79SLois Curfman McInnes { 36139ddd567SLois Curfman McInnes Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data; 3626849ba73SBarry Smith PetscErrorCode ierr; 36313f74950SBarry Smith PetscInt i,*row,*col,flg,j,rstart,ncols; 36413f74950SBarry Smith PetscMPIInt n; 36587828ca2SBarry Smith PetscScalar *val; 3668965ea79SLois Curfman McInnes 3673a40ed3dSBarry Smith PetscFunctionBegin; 368910cf402Sprj- if (!mdn->donotstash && !mat->nooffprocentries) { 3698965ea79SLois Curfman McInnes /* wait on receives */ 3707ef1d9bdSSatish Balay while (1) { 3718798bf22SSatish Balay ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 3727ef1d9bdSSatish Balay if (!flg) break; 3738965ea79SLois Curfman McInnes 3747ef1d9bdSSatish Balay for (i=0; i<n;) { 3757ef1d9bdSSatish Balay /* Now identify the consecutive vals belonging to the same row */ 3762205254eSKarl Rupp for (j=i,rstart=row[j]; j<n; j++) { 3772205254eSKarl Rupp if (row[j] != rstart) break; 3782205254eSKarl Rupp } 3797ef1d9bdSSatish Balay if (j < n) ncols = j-i; 3807ef1d9bdSSatish Balay else ncols = n-i; 3817ef1d9bdSSatish Balay /* Now assemble all these values with a single function call */ 3824b4eb8d3SJed Brown ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 3837ef1d9bdSSatish Balay i = j; 3848965ea79SLois Curfman McInnes } 3857ef1d9bdSSatish Balay } 3868798bf22SSatish Balay ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 387910cf402Sprj- } 3888965ea79SLois Curfman McInnes 38939ddd567SLois Curfman McInnes ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr); 39039ddd567SLois Curfman McInnes ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr); 3918965ea79SLois Curfman McInnes 3926d4a8577SBarry Smith if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 39339ddd567SLois Curfman McInnes ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 3948965ea79SLois Curfman McInnes } 3953a40ed3dSBarry Smith PetscFunctionReturn(0); 3968965ea79SLois Curfman McInnes } 3978965ea79SLois Curfman McInnes 398dfbe8321SBarry Smith PetscErrorCode MatZeroEntries_MPIDense(Mat A) 3998965ea79SLois Curfman McInnes { 400dfbe8321SBarry Smith PetscErrorCode ierr; 40139ddd567SLois Curfman McInnes Mat_MPIDense *l = (Mat_MPIDense*)A->data; 4023a40ed3dSBarry Smith 4033a40ed3dSBarry Smith PetscFunctionBegin; 4043a40ed3dSBarry Smith ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 4053a40ed3dSBarry Smith PetscFunctionReturn(0); 4068965ea79SLois Curfman McInnes } 4078965ea79SLois Curfman McInnes 408637a0070SStefano Zampini PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt n,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 4098965ea79SLois Curfman McInnes { 41039ddd567SLois Curfman McInnes Mat_MPIDense *l = (Mat_MPIDense*)A->data; 4116849ba73SBarry Smith PetscErrorCode ierr; 412637a0070SStefano Zampini PetscInt i,len,*lrows; 413637a0070SStefano Zampini 414637a0070SStefano Zampini PetscFunctionBegin; 415637a0070SStefano Zampini /* get locally owned rows */ 416637a0070SStefano Zampini ierr = PetscLayoutMapLocal(A->rmap,n,rows,&len,&lrows,NULL);CHKERRQ(ierr); 417637a0070SStefano Zampini /* fix right hand side if needed */ 418637a0070SStefano Zampini if (x && b) { 41997b48c8fSBarry Smith const PetscScalar *xx; 42097b48c8fSBarry Smith PetscScalar *bb; 4218965ea79SLois Curfman McInnes 42297b48c8fSBarry Smith ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr); 423637a0070SStefano Zampini ierr = VecGetArrayWrite(b, &bb);CHKERRQ(ierr); 424637a0070SStefano Zampini for (i=0;i<len;++i) bb[lrows[i]] = diag*xx[lrows[i]]; 42597b48c8fSBarry Smith ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr); 426637a0070SStefano Zampini ierr = VecRestoreArrayWrite(b, &bb);CHKERRQ(ierr); 42797b48c8fSBarry Smith } 428637a0070SStefano Zampini ierr = MatZeroRows(l->A,len,lrows,0.0,NULL,NULL);CHKERRQ(ierr); 429e2eb51b1SBarry Smith if (diag != 0.0) { 430637a0070SStefano Zampini Vec d; 431b9679d65SBarry Smith 432637a0070SStefano Zampini ierr = MatCreateVecs(A,NULL,&d);CHKERRQ(ierr); 433637a0070SStefano Zampini ierr = VecSet(d,diag);CHKERRQ(ierr); 434637a0070SStefano Zampini ierr = MatDiagonalSet(A,d,INSERT_VALUES);CHKERRQ(ierr); 435637a0070SStefano Zampini ierr = VecDestroy(&d);CHKERRQ(ierr); 436b9679d65SBarry Smith } 437606d414cSSatish Balay ierr = PetscFree(lrows);CHKERRQ(ierr); 4383a40ed3dSBarry Smith PetscFunctionReturn(0); 4398965ea79SLois Curfman McInnes } 4408965ea79SLois Curfman McInnes 441cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat,Vec,Vec); 442cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat,Vec,Vec,Vec); 443cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat,Vec,Vec); 444cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat,Vec,Vec,Vec); 445cc2e6a90SBarry Smith 446dfbe8321SBarry Smith PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy) 4478965ea79SLois Curfman McInnes { 44839ddd567SLois Curfman McInnes Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 449dfbe8321SBarry Smith PetscErrorCode ierr; 450637a0070SStefano Zampini const PetscScalar *ax; 451637a0070SStefano Zampini PetscScalar *ay; 452c456f294SBarry Smith 4533a40ed3dSBarry Smith PetscFunctionBegin; 454637a0070SStefano Zampini ierr = VecGetArrayReadInPlace(xx,&ax);CHKERRQ(ierr); 455637a0070SStefano Zampini ierr = VecGetArrayInPlace(mdn->lvec,&ay);CHKERRQ(ierr); 456637a0070SStefano Zampini ierr = PetscSFBcastBegin(mdn->Mvctx,MPIU_SCALAR,ax,ay);CHKERRQ(ierr); 457637a0070SStefano Zampini ierr = PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ax,ay);CHKERRQ(ierr); 458637a0070SStefano Zampini ierr = VecRestoreArrayInPlace(mdn->lvec,&ay);CHKERRQ(ierr); 459637a0070SStefano Zampini ierr = VecRestoreArrayReadInPlace(xx,&ax);CHKERRQ(ierr); 460637a0070SStefano Zampini ierr = (*mdn->A->ops->mult)(mdn->A,mdn->lvec,yy);CHKERRQ(ierr); 4613a40ed3dSBarry Smith PetscFunctionReturn(0); 4628965ea79SLois Curfman McInnes } 4638965ea79SLois Curfman McInnes 464dfbe8321SBarry Smith PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz) 4658965ea79SLois Curfman McInnes { 46639ddd567SLois Curfman McInnes Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 467dfbe8321SBarry Smith PetscErrorCode ierr; 468637a0070SStefano Zampini const PetscScalar *ax; 469637a0070SStefano Zampini PetscScalar *ay; 470c456f294SBarry Smith 4713a40ed3dSBarry Smith PetscFunctionBegin; 472637a0070SStefano Zampini ierr = VecGetArrayReadInPlace(xx,&ax);CHKERRQ(ierr); 473637a0070SStefano Zampini ierr = VecGetArrayInPlace(mdn->lvec,&ay);CHKERRQ(ierr); 474637a0070SStefano Zampini ierr = PetscSFBcastBegin(mdn->Mvctx,MPIU_SCALAR,ax,ay);CHKERRQ(ierr); 475637a0070SStefano Zampini ierr = PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ax,ay);CHKERRQ(ierr); 476637a0070SStefano Zampini ierr = VecRestoreArrayInPlace(mdn->lvec,&ay);CHKERRQ(ierr); 477637a0070SStefano Zampini ierr = VecRestoreArrayReadInPlace(xx,&ax);CHKERRQ(ierr); 478637a0070SStefano Zampini ierr = (*mdn->A->ops->multadd)(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr); 4793a40ed3dSBarry Smith PetscFunctionReturn(0); 4808965ea79SLois Curfman McInnes } 4818965ea79SLois Curfman McInnes 482dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy) 483096963f5SLois Curfman McInnes { 484096963f5SLois Curfman McInnes Mat_MPIDense *a = (Mat_MPIDense*)A->data; 485dfbe8321SBarry Smith PetscErrorCode ierr; 486637a0070SStefano Zampini const PetscScalar *ax; 487637a0070SStefano Zampini PetscScalar *ay; 488096963f5SLois Curfman McInnes 4893a40ed3dSBarry Smith PetscFunctionBegin; 490637a0070SStefano Zampini ierr = VecSet(yy,0.0);CHKERRQ(ierr); 491637a0070SStefano Zampini ierr = (*a->A->ops->multtranspose)(a->A,xx,a->lvec);CHKERRQ(ierr); 492637a0070SStefano Zampini ierr = VecGetArrayReadInPlace(a->lvec,&ax);CHKERRQ(ierr); 493637a0070SStefano Zampini ierr = VecGetArrayInPlace(yy,&ay);CHKERRQ(ierr); 494637a0070SStefano Zampini ierr = PetscSFReduceBegin(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM);CHKERRQ(ierr); 495637a0070SStefano Zampini ierr = PetscSFReduceEnd(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM);CHKERRQ(ierr); 496637a0070SStefano Zampini ierr = VecRestoreArrayReadInPlace(a->lvec,&ax);CHKERRQ(ierr); 497637a0070SStefano Zampini ierr = VecRestoreArrayInPlace(yy,&ay);CHKERRQ(ierr); 4983a40ed3dSBarry Smith PetscFunctionReturn(0); 499096963f5SLois Curfman McInnes } 500096963f5SLois Curfman McInnes 501dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz) 502096963f5SLois Curfman McInnes { 503096963f5SLois Curfman McInnes Mat_MPIDense *a = (Mat_MPIDense*)A->data; 504dfbe8321SBarry Smith PetscErrorCode ierr; 505637a0070SStefano Zampini const PetscScalar *ax; 506637a0070SStefano Zampini PetscScalar *ay; 507096963f5SLois Curfman McInnes 5083a40ed3dSBarry Smith PetscFunctionBegin; 5093501a2bdSLois Curfman McInnes ierr = VecCopy(yy,zz);CHKERRQ(ierr); 510637a0070SStefano Zampini ierr = (*a->A->ops->multtranspose)(a->A,xx,a->lvec);CHKERRQ(ierr); 511637a0070SStefano Zampini ierr = VecGetArrayReadInPlace(a->lvec,&ax);CHKERRQ(ierr); 512637a0070SStefano Zampini ierr = VecGetArrayInPlace(zz,&ay);CHKERRQ(ierr); 513637a0070SStefano Zampini ierr = PetscSFReduceBegin(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM);CHKERRQ(ierr); 514637a0070SStefano Zampini ierr = PetscSFReduceEnd(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM);CHKERRQ(ierr); 515637a0070SStefano Zampini ierr = VecRestoreArrayReadInPlace(a->lvec,&ax);CHKERRQ(ierr); 516637a0070SStefano Zampini ierr = VecRestoreArrayInPlace(zz,&ay);CHKERRQ(ierr); 5173a40ed3dSBarry Smith PetscFunctionReturn(0); 518096963f5SLois Curfman McInnes } 519096963f5SLois Curfman McInnes 520dfbe8321SBarry Smith PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v) 5218965ea79SLois Curfman McInnes { 52239ddd567SLois Curfman McInnes Mat_MPIDense *a = (Mat_MPIDense*)A->data; 523dfbe8321SBarry Smith PetscErrorCode ierr; 524637a0070SStefano Zampini PetscInt lda,len,i,n,m = A->rmap->n,radd; 52587828ca2SBarry Smith PetscScalar *x,zero = 0.0; 526637a0070SStefano Zampini const PetscScalar *av; 527ed3cc1f0SBarry Smith 5283a40ed3dSBarry Smith PetscFunctionBegin; 5292dcb1b2aSMatthew Knepley ierr = VecSet(v,zero);CHKERRQ(ierr); 5301ebc52fbSHong Zhang ierr = VecGetArray(v,&x);CHKERRQ(ierr); 531096963f5SLois Curfman McInnes ierr = VecGetSize(v,&n);CHKERRQ(ierr); 532e32f2f54SBarry Smith if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec"); 533d0f46423SBarry Smith len = PetscMin(a->A->rmap->n,a->A->cmap->n); 534d0f46423SBarry Smith radd = A->rmap->rstart*m; 535637a0070SStefano Zampini ierr = MatDenseGetArrayRead(a->A,&av);CHKERRQ(ierr); 536637a0070SStefano Zampini ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr); 53744cd7ae7SLois Curfman McInnes for (i=0; i<len; i++) { 538637a0070SStefano Zampini x[i] = av[radd + i*lda + i]; 539096963f5SLois Curfman McInnes } 540637a0070SStefano Zampini ierr = MatDenseRestoreArrayRead(a->A,&av);CHKERRQ(ierr); 5411ebc52fbSHong Zhang ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 5423a40ed3dSBarry Smith PetscFunctionReturn(0); 5438965ea79SLois Curfman McInnes } 5448965ea79SLois Curfman McInnes 545dfbe8321SBarry Smith PetscErrorCode MatDestroy_MPIDense(Mat mat) 5468965ea79SLois Curfman McInnes { 5473501a2bdSLois Curfman McInnes Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 548dfbe8321SBarry Smith PetscErrorCode ierr; 549ed3cc1f0SBarry Smith 5503a40ed3dSBarry Smith PetscFunctionBegin; 551aa482453SBarry Smith #if defined(PETSC_USE_LOG) 552d0f46423SBarry Smith PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 5538965ea79SLois Curfman McInnes #endif 5548798bf22SSatish Balay ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 555616b8fbbSStefano Zampini if (mdn->vecinuse) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 556616b8fbbSStefano Zampini if (mdn->matinuse) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 5576bf464f9SBarry Smith ierr = MatDestroy(&mdn->A);CHKERRQ(ierr); 5586bf464f9SBarry Smith ierr = VecDestroy(&mdn->lvec);CHKERRQ(ierr); 559637a0070SStefano Zampini ierr = PetscSFDestroy(&mdn->Mvctx);CHKERRQ(ierr); 5606947451fSStefano Zampini ierr = VecDestroy(&mdn->cvec);CHKERRQ(ierr); 5615ea7661aSPierre Jolivet ierr = MatDestroy(&mdn->cmat);CHKERRQ(ierr); 56201b82886SBarry Smith 563bf0cc555SLisandro Dalcin ierr = PetscFree(mat->data);CHKERRQ(ierr); 564dbd8c25aSHong Zhang ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 5658baccfbdSHong Zhang 56649a6ff4bSBarry Smith ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetLDA_C",NULL);CHKERRQ(ierr); 567ad16ce7aSStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseSetLDA_C",NULL);CHKERRQ(ierr); 5688baccfbdSHong Zhang ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",NULL);CHKERRQ(ierr); 5698572280aSBarry Smith ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",NULL);CHKERRQ(ierr); 5708572280aSBarry Smith ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",NULL);CHKERRQ(ierr); 5718572280aSBarry Smith ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",NULL);CHKERRQ(ierr); 5726947451fSStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayWrite_C",NULL);CHKERRQ(ierr); 5736947451fSStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayWrite_C",NULL);CHKERRQ(ierr); 574d3042a70SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",NULL);CHKERRQ(ierr); 575d3042a70SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",NULL);CHKERRQ(ierr); 576d5ea218eSStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseReplaceArray_C",NULL);CHKERRQ(ierr); 5778baccfbdSHong Zhang #if defined(PETSC_HAVE_ELEMENTAL) 5788baccfbdSHong Zhang ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",NULL);CHKERRQ(ierr); 5798baccfbdSHong Zhang #endif 580*d24d4204SJose E. Roman #if defined(PETSC_HAVE_SCALAPACK) 581*d24d4204SJose E. Roman ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_scalapack_C",NULL);CHKERRQ(ierr); 582*d24d4204SJose E. Roman #endif 583bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",NULL);CHKERRQ(ierr); 5844222ddf1SHong Zhang ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 5854222ddf1SHong Zhang ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaij_C",NULL);CHKERRQ(ierr); 5866718818eSStefano Zampini #if defined (PETSC_HAVE_CUDA) 5876718818eSStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaijcusparse_mpidense_C",NULL);CHKERRQ(ierr); 5886718818eSStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaijcusparse_C",NULL);CHKERRQ(ierr); 5896718818eSStefano Zampini #endif 59086aefd0dSHong Zhang ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",NULL);CHKERRQ(ierr); 59186aefd0dSHong Zhang ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",NULL);CHKERRQ(ierr); 5926947451fSStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",NULL);CHKERRQ(ierr); 5936947451fSStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",NULL);CHKERRQ(ierr); 5946947451fSStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",NULL);CHKERRQ(ierr); 5956947451fSStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",NULL);CHKERRQ(ierr); 5966947451fSStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",NULL);CHKERRQ(ierr); 5976947451fSStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",NULL);CHKERRQ(ierr); 5985ea7661aSPierre Jolivet ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetSubMatrix_C",NULL);CHKERRQ(ierr); 5995ea7661aSPierre Jolivet ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreSubMatrix_C",NULL);CHKERRQ(ierr); 6003a40ed3dSBarry Smith PetscFunctionReturn(0); 6018965ea79SLois Curfman McInnes } 60239ddd567SLois Curfman McInnes 60352c5f739Sprj- PETSC_INTERN PetscErrorCode MatView_SeqDense(Mat,PetscViewer); 60452c5f739Sprj- 6059804daf3SBarry Smith #include <petscdraw.h> 6066849ba73SBarry Smith static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 6078965ea79SLois Curfman McInnes { 60839ddd567SLois Curfman McInnes Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 609dfbe8321SBarry Smith PetscErrorCode ierr; 610616b8fbbSStefano Zampini PetscMPIInt rank; 61119fd82e9SBarry Smith PetscViewerType vtype; 612ace3abfcSBarry Smith PetscBool iascii,isdraw; 613b0a32e0cSBarry Smith PetscViewer sviewer; 614f3ef73ceSBarry Smith PetscViewerFormat format; 6158965ea79SLois Curfman McInnes 6163a40ed3dSBarry Smith PetscFunctionBegin; 617616b8fbbSStefano Zampini ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 618251f4c67SDmitry Karpeev ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 619251f4c67SDmitry Karpeev ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 62032077d6dSBarry Smith if (iascii) { 621b0a32e0cSBarry Smith ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr); 622b0a32e0cSBarry Smith ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 623456192e2SBarry Smith if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 6244e220ebcSLois Curfman McInnes MatInfo info; 625888f2ed8SSatish Balay ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 6261575c14dSBarry Smith ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 6277b23a99aSBarry Smith ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 628b0a32e0cSBarry Smith ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 6291575c14dSBarry Smith ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 630637a0070SStefano Zampini ierr = PetscSFView(mdn->Mvctx,viewer);CHKERRQ(ierr); 6313a40ed3dSBarry Smith PetscFunctionReturn(0); 632fb9695e5SSatish Balay } else if (format == PETSC_VIEWER_ASCII_INFO) { 6333a40ed3dSBarry Smith PetscFunctionReturn(0); 6348965ea79SLois Curfman McInnes } 635f1af5d2fSBarry Smith } else if (isdraw) { 636b0a32e0cSBarry Smith PetscDraw draw; 637ace3abfcSBarry Smith PetscBool isnull; 638f1af5d2fSBarry Smith 639b0a32e0cSBarry Smith ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 640b0a32e0cSBarry Smith ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 641f1af5d2fSBarry Smith if (isnull) PetscFunctionReturn(0); 642f1af5d2fSBarry Smith } 64377ed5343SBarry Smith 6447da1fb6eSBarry Smith { 6458965ea79SLois Curfman McInnes /* assemble the entire matrix onto first processor. */ 6468965ea79SLois Curfman McInnes Mat A; 647d0f46423SBarry Smith PetscInt M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz; 648ba8c8a56SBarry Smith PetscInt *cols; 649ba8c8a56SBarry Smith PetscScalar *vals; 6508965ea79SLois Curfman McInnes 651ce94432eSBarry Smith ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 6528965ea79SLois Curfman McInnes if (!rank) { 653f69a0ea3SMatthew Knepley ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 6543a40ed3dSBarry Smith } else { 655f69a0ea3SMatthew Knepley ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 6568965ea79SLois Curfman McInnes } 6577adad957SLisandro Dalcin /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */ 658878740d9SKris Buschelman ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr); 6590298fd71SBarry Smith ierr = MatMPIDenseSetPreallocation(A,NULL);CHKERRQ(ierr); 6603bb1ff40SBarry Smith ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 6618965ea79SLois Curfman McInnes 66239ddd567SLois Curfman McInnes /* Copy the matrix ... This isn't the most efficient means, 66339ddd567SLois Curfman McInnes but it's quick for now */ 66451022da4SBarry Smith A->insertmode = INSERT_VALUES; 6652205254eSKarl Rupp 6662205254eSKarl Rupp row = mat->rmap->rstart; 6672205254eSKarl Rupp m = mdn->A->rmap->n; 6688965ea79SLois Curfman McInnes for (i=0; i<m; i++) { 669ba8c8a56SBarry Smith ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 670ba8c8a56SBarry Smith ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 671ba8c8a56SBarry Smith ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 67239ddd567SLois Curfman McInnes row++; 6738965ea79SLois Curfman McInnes } 6748965ea79SLois Curfman McInnes 6756d4a8577SBarry Smith ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6766d4a8577SBarry Smith ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6773f08860eSBarry Smith ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 678b9b97703SBarry Smith if (!rank) { 6791a9d3c3cSBarry Smith ierr = PetscObjectSetName((PetscObject)((Mat_MPIDense*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 6807da1fb6eSBarry Smith ierr = MatView_SeqDense(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr); 6818965ea79SLois Curfman McInnes } 6823f08860eSBarry Smith ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 683b0a32e0cSBarry Smith ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 6846bf464f9SBarry Smith ierr = MatDestroy(&A);CHKERRQ(ierr); 6858965ea79SLois Curfman McInnes } 6863a40ed3dSBarry Smith PetscFunctionReturn(0); 6878965ea79SLois Curfman McInnes } 6888965ea79SLois Curfman McInnes 689dfbe8321SBarry Smith PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer) 6908965ea79SLois Curfman McInnes { 691dfbe8321SBarry Smith PetscErrorCode ierr; 692ace3abfcSBarry Smith PetscBool iascii,isbinary,isdraw,issocket; 6938965ea79SLois Curfman McInnes 694433994e6SBarry Smith PetscFunctionBegin; 695251f4c67SDmitry Karpeev ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 696251f4c67SDmitry Karpeev ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 697251f4c67SDmitry Karpeev ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 698251f4c67SDmitry Karpeev ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 6990f5bd95cSBarry Smith 70032077d6dSBarry Smith if (iascii || issocket || isdraw) { 701f1af5d2fSBarry Smith ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 7020f5bd95cSBarry Smith } else if (isbinary) { 7038491ab44SLisandro Dalcin ierr = MatView_Dense_Binary(mat,viewer);CHKERRQ(ierr); 70411aeaf0aSBarry Smith } 7053a40ed3dSBarry Smith PetscFunctionReturn(0); 7068965ea79SLois Curfman McInnes } 7078965ea79SLois Curfman McInnes 708dfbe8321SBarry Smith PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info) 7098965ea79SLois Curfman McInnes { 7103501a2bdSLois Curfman McInnes Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 7113501a2bdSLois Curfman McInnes Mat mdn = mat->A; 712dfbe8321SBarry Smith PetscErrorCode ierr; 7133966268fSBarry Smith PetscLogDouble isend[5],irecv[5]; 7148965ea79SLois Curfman McInnes 7153a40ed3dSBarry Smith PetscFunctionBegin; 7164e220ebcSLois Curfman McInnes info->block_size = 1.0; 7172205254eSKarl Rupp 7184e220ebcSLois Curfman McInnes ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr); 7192205254eSKarl Rupp 7204e220ebcSLois Curfman McInnes isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 7214e220ebcSLois Curfman McInnes isend[3] = info->memory; isend[4] = info->mallocs; 7228965ea79SLois Curfman McInnes if (flag == MAT_LOCAL) { 7234e220ebcSLois Curfman McInnes info->nz_used = isend[0]; 7244e220ebcSLois Curfman McInnes info->nz_allocated = isend[1]; 7254e220ebcSLois Curfman McInnes info->nz_unneeded = isend[2]; 7264e220ebcSLois Curfman McInnes info->memory = isend[3]; 7274e220ebcSLois Curfman McInnes info->mallocs = isend[4]; 7288965ea79SLois Curfman McInnes } else if (flag == MAT_GLOBAL_MAX) { 7293966268fSBarry Smith ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 7302205254eSKarl Rupp 7314e220ebcSLois Curfman McInnes info->nz_used = irecv[0]; 7324e220ebcSLois Curfman McInnes info->nz_allocated = irecv[1]; 7334e220ebcSLois Curfman McInnes info->nz_unneeded = irecv[2]; 7344e220ebcSLois Curfman McInnes info->memory = irecv[3]; 7354e220ebcSLois Curfman McInnes info->mallocs = irecv[4]; 7368965ea79SLois Curfman McInnes } else if (flag == MAT_GLOBAL_SUM) { 7373966268fSBarry Smith ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 7382205254eSKarl Rupp 7394e220ebcSLois Curfman McInnes info->nz_used = irecv[0]; 7404e220ebcSLois Curfman McInnes info->nz_allocated = irecv[1]; 7414e220ebcSLois Curfman McInnes info->nz_unneeded = irecv[2]; 7424e220ebcSLois Curfman McInnes info->memory = irecv[3]; 7434e220ebcSLois Curfman McInnes info->mallocs = irecv[4]; 7448965ea79SLois Curfman McInnes } 7454e220ebcSLois Curfman McInnes info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 7464e220ebcSLois Curfman McInnes info->fill_ratio_needed = 0; 7474e220ebcSLois Curfman McInnes info->factor_mallocs = 0; 7483a40ed3dSBarry Smith PetscFunctionReturn(0); 7498965ea79SLois Curfman McInnes } 7508965ea79SLois Curfman McInnes 751ace3abfcSBarry Smith PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscBool flg) 7528965ea79SLois Curfman McInnes { 75339ddd567SLois Curfman McInnes Mat_MPIDense *a = (Mat_MPIDense*)A->data; 754dfbe8321SBarry Smith PetscErrorCode ierr; 7558965ea79SLois Curfman McInnes 7563a40ed3dSBarry Smith PetscFunctionBegin; 75712c028f9SKris Buschelman switch (op) { 758512a5fc5SBarry Smith case MAT_NEW_NONZERO_LOCATIONS: 75912c028f9SKris Buschelman case MAT_NEW_NONZERO_LOCATION_ERR: 76012c028f9SKris Buschelman case MAT_NEW_NONZERO_ALLOCATION_ERR: 76143674050SBarry Smith MatCheckPreallocated(A,1); 7624e0d8c25SBarry Smith ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 76312c028f9SKris Buschelman break; 76412c028f9SKris Buschelman case MAT_ROW_ORIENTED: 76543674050SBarry Smith MatCheckPreallocated(A,1); 7664e0d8c25SBarry Smith a->roworiented = flg; 7674e0d8c25SBarry Smith ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 76812c028f9SKris Buschelman break; 7694e0d8c25SBarry Smith case MAT_NEW_DIAGONALS: 77013fa8e87SLisandro Dalcin case MAT_KEEP_NONZERO_PATTERN: 77112c028f9SKris Buschelman case MAT_USE_HASH_TABLE: 772071fcb05SBarry Smith case MAT_SORTED_FULL: 773290bbb0aSBarry Smith ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 77412c028f9SKris Buschelman break; 77512c028f9SKris Buschelman case MAT_IGNORE_OFF_PROC_ENTRIES: 7764e0d8c25SBarry Smith a->donotstash = flg; 77712c028f9SKris Buschelman break; 77877e54ba9SKris Buschelman case MAT_SYMMETRIC: 77977e54ba9SKris Buschelman case MAT_STRUCTURALLY_SYMMETRIC: 7809a4540c5SBarry Smith case MAT_HERMITIAN: 7819a4540c5SBarry Smith case MAT_SYMMETRY_ETERNAL: 782600fe468SBarry Smith case MAT_IGNORE_LOWER_TRIANGULAR: 7835d7aebe8SStefano Zampini case MAT_IGNORE_ZERO_ENTRIES: 784290bbb0aSBarry Smith ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 78577e54ba9SKris Buschelman break; 78612c028f9SKris Buschelman default: 787e32f2f54SBarry Smith SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]); 7883a40ed3dSBarry Smith } 7893a40ed3dSBarry Smith PetscFunctionReturn(0); 7908965ea79SLois Curfman McInnes } 7918965ea79SLois Curfman McInnes 792dfbe8321SBarry Smith PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr) 7935b2fa520SLois Curfman McInnes { 7945b2fa520SLois Curfman McInnes Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 795637a0070SStefano Zampini const PetscScalar *l; 796637a0070SStefano Zampini PetscScalar x,*v,*vv,*r; 797dfbe8321SBarry Smith PetscErrorCode ierr; 798637a0070SStefano Zampini PetscInt i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n,lda; 7995b2fa520SLois Curfman McInnes 8005b2fa520SLois Curfman McInnes PetscFunctionBegin; 801637a0070SStefano Zampini ierr = MatDenseGetArray(mdn->A,&vv);CHKERRQ(ierr); 802637a0070SStefano Zampini ierr = MatDenseGetLDA(mdn->A,&lda);CHKERRQ(ierr); 80372d926a5SLois Curfman McInnes ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr); 8045b2fa520SLois Curfman McInnes if (ll) { 80572d926a5SLois Curfman McInnes ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr); 806637a0070SStefano Zampini if (s2a != s2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %D != %D", s2a, s2); 807bca11509SBarry Smith ierr = VecGetArrayRead(ll,&l);CHKERRQ(ierr); 8085b2fa520SLois Curfman McInnes for (i=0; i<m; i++) { 8095b2fa520SLois Curfman McInnes x = l[i]; 810637a0070SStefano Zampini v = vv + i; 811637a0070SStefano Zampini for (j=0; j<n; j++) { (*v) *= x; v+= lda;} 8125b2fa520SLois Curfman McInnes } 813bca11509SBarry Smith ierr = VecRestoreArrayRead(ll,&l);CHKERRQ(ierr); 814637a0070SStefano Zampini ierr = PetscLogFlops(1.0*n*m);CHKERRQ(ierr); 8155b2fa520SLois Curfman McInnes } 8165b2fa520SLois Curfman McInnes if (rr) { 817637a0070SStefano Zampini const PetscScalar *ar; 818637a0070SStefano Zampini 819175be7b4SMatthew Knepley ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr); 820e32f2f54SBarry Smith if (s3a != s3) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3); 821637a0070SStefano Zampini ierr = VecGetArrayRead(rr,&ar);CHKERRQ(ierr); 822637a0070SStefano Zampini ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr); 823637a0070SStefano Zampini ierr = PetscSFBcastBegin(mdn->Mvctx,MPIU_SCALAR,ar,r);CHKERRQ(ierr); 824637a0070SStefano Zampini ierr = PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ar,r);CHKERRQ(ierr); 825637a0070SStefano Zampini ierr = VecRestoreArrayRead(rr,&ar);CHKERRQ(ierr); 8265b2fa520SLois Curfman McInnes for (i=0; i<n; i++) { 8275b2fa520SLois Curfman McInnes x = r[i]; 828637a0070SStefano Zampini v = vv + i*lda; 8292205254eSKarl Rupp for (j=0; j<m; j++) (*v++) *= x; 8305b2fa520SLois Curfman McInnes } 831637a0070SStefano Zampini ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr); 832637a0070SStefano Zampini ierr = PetscLogFlops(1.0*n*m);CHKERRQ(ierr); 8335b2fa520SLois Curfman McInnes } 834637a0070SStefano Zampini ierr = MatDenseRestoreArray(mdn->A,&vv);CHKERRQ(ierr); 8355b2fa520SLois Curfman McInnes PetscFunctionReturn(0); 8365b2fa520SLois Curfman McInnes } 8375b2fa520SLois Curfman McInnes 838dfbe8321SBarry Smith PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm) 839096963f5SLois Curfman McInnes { 8403501a2bdSLois Curfman McInnes Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 841dfbe8321SBarry Smith PetscErrorCode ierr; 84213f74950SBarry Smith PetscInt i,j; 843616b8fbbSStefano Zampini PetscMPIInt size; 844329f5518SBarry Smith PetscReal sum = 0.0; 845637a0070SStefano Zampini const PetscScalar *av,*v; 8463501a2bdSLois Curfman McInnes 8473a40ed3dSBarry Smith PetscFunctionBegin; 848637a0070SStefano Zampini ierr = MatDenseGetArrayRead(mdn->A,&av);CHKERRQ(ierr); 849637a0070SStefano Zampini v = av; 850616b8fbbSStefano Zampini ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 851616b8fbbSStefano Zampini if (size == 1) { 852064f8208SBarry Smith ierr = MatNorm(mdn->A,type,nrm);CHKERRQ(ierr); 8533501a2bdSLois Curfman McInnes } else { 8543501a2bdSLois Curfman McInnes if (type == NORM_FROBENIUS) { 855d0f46423SBarry Smith for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) { 856329f5518SBarry Smith sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 8573501a2bdSLois Curfman McInnes } 858b2566f29SBarry Smith ierr = MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 8598f1a2a5eSBarry Smith *nrm = PetscSqrtReal(*nrm); 860dc0b31edSSatish Balay ierr = PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n);CHKERRQ(ierr); 8613a40ed3dSBarry Smith } else if (type == NORM_1) { 862329f5518SBarry Smith PetscReal *tmp,*tmp2; 863580bdb30SBarry Smith ierr = PetscCalloc2(A->cmap->N,&tmp,A->cmap->N,&tmp2);CHKERRQ(ierr); 864064f8208SBarry Smith *nrm = 0.0; 865637a0070SStefano Zampini v = av; 866d0f46423SBarry Smith for (j=0; j<mdn->A->cmap->n; j++) { 867d0f46423SBarry Smith for (i=0; i<mdn->A->rmap->n; i++) { 86867e560aaSBarry Smith tmp[j] += PetscAbsScalar(*v); v++; 8693501a2bdSLois Curfman McInnes } 8703501a2bdSLois Curfman McInnes } 871b2566f29SBarry Smith ierr = MPIU_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 872d0f46423SBarry Smith for (j=0; j<A->cmap->N; j++) { 873064f8208SBarry Smith if (tmp2[j] > *nrm) *nrm = tmp2[j]; 8743501a2bdSLois Curfman McInnes } 8758627564fSBarry Smith ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr); 876d0f46423SBarry Smith ierr = PetscLogFlops(A->cmap->n*A->rmap->n);CHKERRQ(ierr); 8773a40ed3dSBarry Smith } else if (type == NORM_INFINITY) { /* max row norm */ 878329f5518SBarry Smith PetscReal ntemp; 8793501a2bdSLois Curfman McInnes ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr); 880b2566f29SBarry Smith ierr = MPIU_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 881ce94432eSBarry Smith } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for two norm"); 8823501a2bdSLois Curfman McInnes } 883637a0070SStefano Zampini ierr = MatDenseRestoreArrayRead(mdn->A,&av);CHKERRQ(ierr); 8843a40ed3dSBarry Smith PetscFunctionReturn(0); 8853501a2bdSLois Curfman McInnes } 8863501a2bdSLois Curfman McInnes 887fc4dec0aSBarry Smith PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout) 8883501a2bdSLois Curfman McInnes { 8893501a2bdSLois Curfman McInnes Mat_MPIDense *a = (Mat_MPIDense*)A->data; 8903501a2bdSLois Curfman McInnes Mat B; 891d0f46423SBarry Smith PetscInt M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart; 8926849ba73SBarry Smith PetscErrorCode ierr; 893637a0070SStefano Zampini PetscInt j,i,lda; 89487828ca2SBarry Smith PetscScalar *v; 8953501a2bdSLois Curfman McInnes 8963a40ed3dSBarry Smith PetscFunctionBegin; 897cf37664fSBarry Smith if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) { 898ce94432eSBarry Smith ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 899d0f46423SBarry Smith ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 9007adad957SLisandro Dalcin ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 9010298fd71SBarry Smith ierr = MatMPIDenseSetPreallocation(B,NULL);CHKERRQ(ierr); 902637a0070SStefano Zampini } else B = *matout; 9033501a2bdSLois Curfman McInnes 904637a0070SStefano Zampini m = a->A->rmap->n; n = a->A->cmap->n; 905637a0070SStefano Zampini ierr = MatDenseGetArrayRead(a->A,(const PetscScalar**)&v);CHKERRQ(ierr); 906637a0070SStefano Zampini ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr); 907785e854fSJed Brown ierr = PetscMalloc1(m,&rwork);CHKERRQ(ierr); 9083501a2bdSLois Curfman McInnes for (i=0; i<m; i++) rwork[i] = rstart + i; 9091acff37aSSatish Balay for (j=0; j<n; j++) { 9103501a2bdSLois Curfman McInnes ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr); 911637a0070SStefano Zampini v += lda; 9123501a2bdSLois Curfman McInnes } 913637a0070SStefano Zampini ierr = MatDenseRestoreArrayRead(a->A,(const PetscScalar**)&v);CHKERRQ(ierr); 914606d414cSSatish Balay ierr = PetscFree(rwork);CHKERRQ(ierr); 9156d4a8577SBarry Smith ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 9166d4a8577SBarry Smith ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 917cf37664fSBarry Smith if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 9183501a2bdSLois Curfman McInnes *matout = B; 9193501a2bdSLois Curfman McInnes } else { 92028be2f97SBarry Smith ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr); 9213501a2bdSLois Curfman McInnes } 9223a40ed3dSBarry Smith PetscFunctionReturn(0); 923096963f5SLois Curfman McInnes } 924096963f5SLois Curfman McInnes 9256849ba73SBarry Smith static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat*); 92652c5f739Sprj- PETSC_INTERN PetscErrorCode MatScale_MPIDense(Mat,PetscScalar); 9278965ea79SLois Curfman McInnes 9284994cf47SJed Brown PetscErrorCode MatSetUp_MPIDense(Mat A) 929273d9f13SBarry Smith { 930dfbe8321SBarry Smith PetscErrorCode ierr; 931273d9f13SBarry Smith 932273d9f13SBarry Smith PetscFunctionBegin; 93318992e5dSStefano Zampini ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr); 93418992e5dSStefano Zampini ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr); 93518992e5dSStefano Zampini if (!A->preallocated) { 936273d9f13SBarry Smith ierr = MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr); 93718992e5dSStefano Zampini } 938273d9f13SBarry Smith PetscFunctionReturn(0); 939273d9f13SBarry Smith } 940273d9f13SBarry Smith 941488007eeSBarry Smith PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str) 942488007eeSBarry Smith { 943488007eeSBarry Smith PetscErrorCode ierr; 944488007eeSBarry Smith Mat_MPIDense *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data; 945488007eeSBarry Smith 946488007eeSBarry Smith PetscFunctionBegin; 947488007eeSBarry Smith ierr = MatAXPY(A->A,alpha,B->A,str);CHKERRQ(ierr); 948488007eeSBarry Smith PetscFunctionReturn(0); 949488007eeSBarry Smith } 950488007eeSBarry Smith 9517087cfbeSBarry Smith PetscErrorCode MatConjugate_MPIDense(Mat mat) 952ba337c44SJed Brown { 953ba337c44SJed Brown Mat_MPIDense *a = (Mat_MPIDense*)mat->data; 954ba337c44SJed Brown PetscErrorCode ierr; 955ba337c44SJed Brown 956ba337c44SJed Brown PetscFunctionBegin; 957ba337c44SJed Brown ierr = MatConjugate(a->A);CHKERRQ(ierr); 958ba337c44SJed Brown PetscFunctionReturn(0); 959ba337c44SJed Brown } 960ba337c44SJed Brown 961ba337c44SJed Brown PetscErrorCode MatRealPart_MPIDense(Mat A) 962ba337c44SJed Brown { 963ba337c44SJed Brown Mat_MPIDense *a = (Mat_MPIDense*)A->data; 964ba337c44SJed Brown PetscErrorCode ierr; 965ba337c44SJed Brown 966ba337c44SJed Brown PetscFunctionBegin; 967ba337c44SJed Brown ierr = MatRealPart(a->A);CHKERRQ(ierr); 968ba337c44SJed Brown PetscFunctionReturn(0); 969ba337c44SJed Brown } 970ba337c44SJed Brown 971ba337c44SJed Brown PetscErrorCode MatImaginaryPart_MPIDense(Mat A) 972ba337c44SJed Brown { 973ba337c44SJed Brown Mat_MPIDense *a = (Mat_MPIDense*)A->data; 974ba337c44SJed Brown PetscErrorCode ierr; 975ba337c44SJed Brown 976ba337c44SJed Brown PetscFunctionBegin; 977ba337c44SJed Brown ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 978ba337c44SJed Brown PetscFunctionReturn(0); 979ba337c44SJed Brown } 980ba337c44SJed Brown 98149a6ff4bSBarry Smith static PetscErrorCode MatGetColumnVector_MPIDense(Mat A,Vec v,PetscInt col) 98249a6ff4bSBarry Smith { 98349a6ff4bSBarry Smith PetscErrorCode ierr; 984637a0070SStefano Zampini Mat_MPIDense *a = (Mat_MPIDense*) A->data; 98549a6ff4bSBarry Smith 98649a6ff4bSBarry Smith PetscFunctionBegin; 987637a0070SStefano Zampini if (!a->A) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Missing local matrix"); 988637a0070SStefano Zampini if (!a->A->ops->getcolumnvector) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Missing get column operation"); 989637a0070SStefano Zampini ierr = (*a->A->ops->getcolumnvector)(a->A,v,col);CHKERRQ(ierr); 99049a6ff4bSBarry Smith PetscFunctionReturn(0); 99149a6ff4bSBarry Smith } 99249a6ff4bSBarry Smith 99352c5f739Sprj- PETSC_INTERN PetscErrorCode MatGetColumnNorms_SeqDense(Mat,NormType,PetscReal*); 99452c5f739Sprj- 9950716a85fSBarry Smith PetscErrorCode MatGetColumnNorms_MPIDense(Mat A,NormType type,PetscReal *norms) 9960716a85fSBarry Smith { 9970716a85fSBarry Smith PetscErrorCode ierr; 9980716a85fSBarry Smith PetscInt i,n; 9990716a85fSBarry Smith Mat_MPIDense *a = (Mat_MPIDense*) A->data; 10000716a85fSBarry Smith PetscReal *work; 10010716a85fSBarry Smith 10020716a85fSBarry Smith PetscFunctionBegin; 10030298fd71SBarry Smith ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 1004785e854fSJed Brown ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 10050716a85fSBarry Smith ierr = MatGetColumnNorms_SeqDense(a->A,type,work);CHKERRQ(ierr); 10060716a85fSBarry Smith if (type == NORM_2) { 10070716a85fSBarry Smith for (i=0; i<n; i++) work[i] *= work[i]; 10080716a85fSBarry Smith } 10090716a85fSBarry Smith if (type == NORM_INFINITY) { 1010b2566f29SBarry Smith ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr); 10110716a85fSBarry Smith } else { 1012b2566f29SBarry Smith ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr); 10130716a85fSBarry Smith } 10140716a85fSBarry Smith ierr = PetscFree(work);CHKERRQ(ierr); 10150716a85fSBarry Smith if (type == NORM_2) { 10168f1a2a5eSBarry Smith for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 10170716a85fSBarry Smith } 10180716a85fSBarry Smith PetscFunctionReturn(0); 10190716a85fSBarry Smith } 10200716a85fSBarry Smith 1021637a0070SStefano Zampini #if defined(PETSC_HAVE_CUDA) 10226947451fSStefano Zampini static PetscErrorCode MatDenseGetColumnVec_MPIDenseCUDA(Mat A,PetscInt col,Vec *v) 10236947451fSStefano Zampini { 10246947451fSStefano Zampini Mat_MPIDense *a = (Mat_MPIDense*)A->data; 10256947451fSStefano Zampini PetscErrorCode ierr; 10266947451fSStefano Zampini PetscInt lda; 10276947451fSStefano Zampini 10286947451fSStefano Zampini PetscFunctionBegin; 1029616b8fbbSStefano Zampini if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1030616b8fbbSStefano Zampini if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 10316947451fSStefano Zampini if (!a->cvec) { 10326947451fSStefano Zampini ierr = VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr); 1033616b8fbbSStefano Zampini ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)a->cvec);CHKERRQ(ierr); 10346947451fSStefano Zampini } 10356947451fSStefano Zampini a->vecinuse = col + 1; 10366947451fSStefano Zampini ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr); 10376947451fSStefano Zampini ierr = MatDenseCUDAGetArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr); 10386947451fSStefano Zampini ierr = VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr); 10396947451fSStefano Zampini *v = a->cvec; 10406947451fSStefano Zampini PetscFunctionReturn(0); 10416947451fSStefano Zampini } 10426947451fSStefano Zampini 10436947451fSStefano Zampini static PetscErrorCode MatDenseRestoreColumnVec_MPIDenseCUDA(Mat A,PetscInt col,Vec *v) 10446947451fSStefano Zampini { 10456947451fSStefano Zampini Mat_MPIDense *a = (Mat_MPIDense*)A->data; 10466947451fSStefano Zampini PetscErrorCode ierr; 10476947451fSStefano Zampini 10486947451fSStefano Zampini PetscFunctionBegin; 10495ea7661aSPierre Jolivet if (!a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first"); 10506947451fSStefano Zampini if (!a->cvec) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector"); 10516947451fSStefano Zampini a->vecinuse = 0; 10526947451fSStefano Zampini ierr = MatDenseCUDARestoreArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr); 10536947451fSStefano Zampini ierr = VecCUDAResetArray(a->cvec);CHKERRQ(ierr); 10546947451fSStefano Zampini *v = NULL; 10556947451fSStefano Zampini PetscFunctionReturn(0); 10566947451fSStefano Zampini } 10576947451fSStefano Zampini 10586947451fSStefano Zampini static PetscErrorCode MatDenseGetColumnVecRead_MPIDenseCUDA(Mat A,PetscInt col,Vec *v) 10596947451fSStefano Zampini { 10606947451fSStefano Zampini Mat_MPIDense *a = (Mat_MPIDense*)A->data; 10616947451fSStefano Zampini PetscInt lda; 10626947451fSStefano Zampini PetscErrorCode ierr; 10636947451fSStefano Zampini 10646947451fSStefano Zampini PetscFunctionBegin; 1065616b8fbbSStefano Zampini if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1066616b8fbbSStefano Zampini if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 10676947451fSStefano Zampini if (!a->cvec) { 10686947451fSStefano Zampini ierr = VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr); 1069616b8fbbSStefano Zampini ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)a->cvec);CHKERRQ(ierr); 10706947451fSStefano Zampini } 10716947451fSStefano Zampini a->vecinuse = col + 1; 10726947451fSStefano Zampini ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr); 10736947451fSStefano Zampini ierr = MatDenseCUDAGetArrayRead(a->A,&a->ptrinuse);CHKERRQ(ierr); 10746947451fSStefano Zampini ierr = VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr); 10756947451fSStefano Zampini ierr = VecLockReadPush(a->cvec);CHKERRQ(ierr); 10766947451fSStefano Zampini *v = a->cvec; 10776947451fSStefano Zampini PetscFunctionReturn(0); 10786947451fSStefano Zampini } 10796947451fSStefano Zampini 10806947451fSStefano Zampini static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDenseCUDA(Mat A,PetscInt col,Vec *v) 10816947451fSStefano Zampini { 10826947451fSStefano Zampini Mat_MPIDense *a = (Mat_MPIDense*)A->data; 10836947451fSStefano Zampini PetscErrorCode ierr; 10846947451fSStefano Zampini 10856947451fSStefano Zampini PetscFunctionBegin; 1086616b8fbbSStefano Zampini if (!a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first"); 1087616b8fbbSStefano Zampini if (!a->cvec) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector"); 10886947451fSStefano Zampini a->vecinuse = 0; 10896947451fSStefano Zampini ierr = MatDenseCUDARestoreArrayRead(a->A,&a->ptrinuse);CHKERRQ(ierr); 10906947451fSStefano Zampini ierr = VecLockReadPop(a->cvec);CHKERRQ(ierr); 10916947451fSStefano Zampini ierr = VecCUDAResetArray(a->cvec);CHKERRQ(ierr); 10926947451fSStefano Zampini *v = NULL; 10936947451fSStefano Zampini PetscFunctionReturn(0); 10946947451fSStefano Zampini } 10956947451fSStefano Zampini 10966947451fSStefano Zampini static PetscErrorCode MatDenseGetColumnVecWrite_MPIDenseCUDA(Mat A,PetscInt col,Vec *v) 10976947451fSStefano Zampini { 10986947451fSStefano Zampini Mat_MPIDense *a = (Mat_MPIDense*)A->data; 10996947451fSStefano Zampini PetscErrorCode ierr; 11006947451fSStefano Zampini PetscInt lda; 11016947451fSStefano Zampini 11026947451fSStefano Zampini PetscFunctionBegin; 1103616b8fbbSStefano Zampini if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1104616b8fbbSStefano Zampini if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 11056947451fSStefano Zampini if (!a->cvec) { 11066947451fSStefano Zampini ierr = VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr); 1107616b8fbbSStefano Zampini ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)a->cvec);CHKERRQ(ierr); 11086947451fSStefano Zampini } 11096947451fSStefano Zampini a->vecinuse = col + 1; 11106947451fSStefano Zampini ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr); 11116947451fSStefano Zampini ierr = MatDenseCUDAGetArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr); 11126947451fSStefano Zampini ierr = VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr); 11136947451fSStefano Zampini *v = a->cvec; 11146947451fSStefano Zampini PetscFunctionReturn(0); 11156947451fSStefano Zampini } 11166947451fSStefano Zampini 11176947451fSStefano Zampini static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDenseCUDA(Mat A,PetscInt col,Vec *v) 11186947451fSStefano Zampini { 11196947451fSStefano Zampini Mat_MPIDense *a = (Mat_MPIDense*)A->data; 11206947451fSStefano Zampini PetscErrorCode ierr; 11216947451fSStefano Zampini 11226947451fSStefano Zampini PetscFunctionBegin; 1123616b8fbbSStefano Zampini if (!a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first"); 1124616b8fbbSStefano Zampini if (!a->cvec) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector"); 11256947451fSStefano Zampini a->vecinuse = 0; 11266947451fSStefano Zampini ierr = MatDenseCUDARestoreArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr); 11276947451fSStefano Zampini ierr = VecCUDAResetArray(a->cvec);CHKERRQ(ierr); 11286947451fSStefano Zampini *v = NULL; 11296947451fSStefano Zampini PetscFunctionReturn(0); 11306947451fSStefano Zampini } 11316947451fSStefano Zampini 1132637a0070SStefano Zampini static PetscErrorCode MatDenseCUDAPlaceArray_MPIDenseCUDA(Mat A, const PetscScalar *a) 1133637a0070SStefano Zampini { 1134637a0070SStefano Zampini Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1135637a0070SStefano Zampini PetscErrorCode ierr; 1136637a0070SStefano Zampini 1137637a0070SStefano Zampini PetscFunctionBegin; 1138616b8fbbSStefano Zampini if (l->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1139616b8fbbSStefano Zampini if (l->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1140637a0070SStefano Zampini ierr = MatDenseCUDAPlaceArray(l->A,a);CHKERRQ(ierr); 1141637a0070SStefano Zampini PetscFunctionReturn(0); 1142637a0070SStefano Zampini } 1143637a0070SStefano Zampini 1144637a0070SStefano Zampini static PetscErrorCode MatDenseCUDAResetArray_MPIDenseCUDA(Mat A) 1145637a0070SStefano Zampini { 1146637a0070SStefano Zampini Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1147637a0070SStefano Zampini PetscErrorCode ierr; 1148637a0070SStefano Zampini 1149637a0070SStefano Zampini PetscFunctionBegin; 1150616b8fbbSStefano Zampini if (l->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1151616b8fbbSStefano Zampini if (l->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1152637a0070SStefano Zampini ierr = MatDenseCUDAResetArray(l->A);CHKERRQ(ierr); 1153637a0070SStefano Zampini PetscFunctionReturn(0); 1154637a0070SStefano Zampini } 1155637a0070SStefano Zampini 1156d5ea218eSStefano Zampini static PetscErrorCode MatDenseCUDAReplaceArray_MPIDenseCUDA(Mat A, const PetscScalar *a) 1157d5ea218eSStefano Zampini { 1158d5ea218eSStefano Zampini Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1159d5ea218eSStefano Zampini PetscErrorCode ierr; 1160d5ea218eSStefano Zampini 1161d5ea218eSStefano Zampini PetscFunctionBegin; 1162616b8fbbSStefano Zampini if (l->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1163616b8fbbSStefano Zampini if (l->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1164d5ea218eSStefano Zampini ierr = MatDenseCUDAReplaceArray(l->A,a);CHKERRQ(ierr); 1165d5ea218eSStefano Zampini PetscFunctionReturn(0); 1166d5ea218eSStefano Zampini } 1167d5ea218eSStefano Zampini 1168637a0070SStefano Zampini static PetscErrorCode MatDenseCUDAGetArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a) 1169637a0070SStefano Zampini { 1170637a0070SStefano Zampini Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1171637a0070SStefano Zampini PetscErrorCode ierr; 1172637a0070SStefano Zampini 1173637a0070SStefano Zampini PetscFunctionBegin; 1174637a0070SStefano Zampini ierr = MatDenseCUDAGetArrayWrite(l->A,a);CHKERRQ(ierr); 1175637a0070SStefano Zampini PetscFunctionReturn(0); 1176637a0070SStefano Zampini } 1177637a0070SStefano Zampini 1178637a0070SStefano Zampini static PetscErrorCode MatDenseCUDARestoreArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a) 1179637a0070SStefano Zampini { 1180637a0070SStefano Zampini Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1181637a0070SStefano Zampini PetscErrorCode ierr; 1182637a0070SStefano Zampini 1183637a0070SStefano Zampini PetscFunctionBegin; 1184637a0070SStefano Zampini ierr = MatDenseCUDARestoreArrayWrite(l->A,a);CHKERRQ(ierr); 1185637a0070SStefano Zampini PetscFunctionReturn(0); 1186637a0070SStefano Zampini } 1187637a0070SStefano Zampini 1188637a0070SStefano Zampini static PetscErrorCode MatDenseCUDAGetArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a) 1189637a0070SStefano Zampini { 1190637a0070SStefano Zampini Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1191637a0070SStefano Zampini PetscErrorCode ierr; 1192637a0070SStefano Zampini 1193637a0070SStefano Zampini PetscFunctionBegin; 1194637a0070SStefano Zampini ierr = MatDenseCUDAGetArrayRead(l->A,a);CHKERRQ(ierr); 1195637a0070SStefano Zampini PetscFunctionReturn(0); 1196637a0070SStefano Zampini } 1197637a0070SStefano Zampini 1198637a0070SStefano Zampini static PetscErrorCode MatDenseCUDARestoreArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a) 1199637a0070SStefano Zampini { 1200637a0070SStefano Zampini Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1201637a0070SStefano Zampini PetscErrorCode ierr; 1202637a0070SStefano Zampini 1203637a0070SStefano Zampini PetscFunctionBegin; 1204637a0070SStefano Zampini ierr = MatDenseCUDARestoreArrayRead(l->A,a);CHKERRQ(ierr); 1205637a0070SStefano Zampini PetscFunctionReturn(0); 1206637a0070SStefano Zampini } 1207637a0070SStefano Zampini 1208637a0070SStefano Zampini static PetscErrorCode MatDenseCUDAGetArray_MPIDenseCUDA(Mat A, PetscScalar **a) 1209637a0070SStefano Zampini { 1210637a0070SStefano Zampini Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1211637a0070SStefano Zampini PetscErrorCode ierr; 1212637a0070SStefano Zampini 1213637a0070SStefano Zampini PetscFunctionBegin; 1214637a0070SStefano Zampini ierr = MatDenseCUDAGetArray(l->A,a);CHKERRQ(ierr); 1215637a0070SStefano Zampini PetscFunctionReturn(0); 1216637a0070SStefano Zampini } 1217637a0070SStefano Zampini 1218637a0070SStefano Zampini static PetscErrorCode MatDenseCUDARestoreArray_MPIDenseCUDA(Mat A, PetscScalar **a) 1219637a0070SStefano Zampini { 1220637a0070SStefano Zampini Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1221637a0070SStefano Zampini PetscErrorCode ierr; 1222637a0070SStefano Zampini 1223637a0070SStefano Zampini PetscFunctionBegin; 1224637a0070SStefano Zampini ierr = MatDenseCUDARestoreArray(l->A,a);CHKERRQ(ierr); 1225637a0070SStefano Zampini PetscFunctionReturn(0); 1226637a0070SStefano Zampini } 1227637a0070SStefano Zampini 12286947451fSStefano Zampini static PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat,PetscInt,Vec*); 12296947451fSStefano Zampini static PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat,PetscInt,Vec*); 12306947451fSStefano Zampini static PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat,PetscInt,Vec*); 12316947451fSStefano Zampini static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat,PetscInt,Vec*); 12326947451fSStefano Zampini static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat,PetscInt,Vec*); 12336947451fSStefano Zampini static PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat,PetscInt,Vec*); 12345ea7661aSPierre Jolivet static PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat,Mat*); 12356947451fSStefano Zampini 1236637a0070SStefano Zampini static PetscErrorCode MatBindToCPU_MPIDenseCUDA(Mat mat,PetscBool bind) 1237637a0070SStefano Zampini { 1238637a0070SStefano Zampini Mat_MPIDense *d = (Mat_MPIDense*)mat->data; 1239637a0070SStefano Zampini PetscErrorCode ierr; 1240637a0070SStefano Zampini 1241637a0070SStefano Zampini PetscFunctionBegin; 1242616b8fbbSStefano Zampini if (d->vecinuse) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1243616b8fbbSStefano Zampini if (d->matinuse) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1244637a0070SStefano Zampini if (d->A) { 1245637a0070SStefano Zampini ierr = MatBindToCPU(d->A,bind);CHKERRQ(ierr); 1246637a0070SStefano Zampini } 1247637a0070SStefano Zampini mat->boundtocpu = bind; 12486947451fSStefano Zampini if (!bind) { 12496947451fSStefano Zampini PetscBool iscuda; 12506947451fSStefano Zampini 12516947451fSStefano Zampini ierr = PetscObjectTypeCompare((PetscObject)d->cvec,VECMPICUDA,&iscuda);CHKERRQ(ierr); 12526947451fSStefano Zampini if (!iscuda) { 12536947451fSStefano Zampini ierr = VecDestroy(&d->cvec);CHKERRQ(ierr); 1254616b8fbbSStefano Zampini } 1255616b8fbbSStefano Zampini ierr = PetscObjectTypeCompare((PetscObject)d->cmat,MATMPIDENSECUDA,&iscuda);CHKERRQ(ierr); 1256616b8fbbSStefano Zampini if (!iscuda) { 12575ea7661aSPierre Jolivet ierr = MatDestroy(&d->cmat);CHKERRQ(ierr); 12586947451fSStefano Zampini } 12596947451fSStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDenseCUDA);CHKERRQ(ierr); 12606947451fSStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDenseCUDA);CHKERRQ(ierr); 12616947451fSStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDenseCUDA);CHKERRQ(ierr); 12626947451fSStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDenseCUDA);CHKERRQ(ierr); 12636947451fSStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDenseCUDA);CHKERRQ(ierr); 12646947451fSStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDenseCUDA);CHKERRQ(ierr); 12656947451fSStefano Zampini } else { 12666947451fSStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDense);CHKERRQ(ierr); 12676947451fSStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDense);CHKERRQ(ierr); 12686947451fSStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDense);CHKERRQ(ierr); 12696947451fSStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDense);CHKERRQ(ierr); 12706947451fSStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDense);CHKERRQ(ierr); 12716947451fSStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDense);CHKERRQ(ierr); 1272616b8fbbSStefano Zampini } 1273616b8fbbSStefano Zampini if (d->cmat) { 1274616b8fbbSStefano Zampini ierr = MatBindToCPU(d->cmat,bind);CHKERRQ(ierr); 12756947451fSStefano Zampini } 1276637a0070SStefano Zampini PetscFunctionReturn(0); 1277637a0070SStefano Zampini } 1278637a0070SStefano Zampini 1279637a0070SStefano Zampini PetscErrorCode MatMPIDenseCUDASetPreallocation(Mat A, PetscScalar *d_data) 1280637a0070SStefano Zampini { 1281637a0070SStefano Zampini Mat_MPIDense *d = (Mat_MPIDense*)A->data; 1282637a0070SStefano Zampini PetscErrorCode ierr; 1283637a0070SStefano Zampini PetscBool iscuda; 1284637a0070SStefano Zampini 1285637a0070SStefano Zampini PetscFunctionBegin; 1286d5ea218eSStefano Zampini PetscValidHeaderSpecific(A,MAT_CLASSID,1); 1287637a0070SStefano Zampini ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIDENSECUDA,&iscuda);CHKERRQ(ierr); 1288637a0070SStefano Zampini if (!iscuda) PetscFunctionReturn(0); 1289637a0070SStefano Zampini ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr); 1290637a0070SStefano Zampini ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr); 1291637a0070SStefano Zampini if (!d->A) { 1292637a0070SStefano Zampini ierr = MatCreate(PETSC_COMM_SELF,&d->A);CHKERRQ(ierr); 1293637a0070SStefano Zampini ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)d->A);CHKERRQ(ierr); 1294637a0070SStefano Zampini ierr = MatSetSizes(d->A,A->rmap->n,A->cmap->N,A->rmap->n,A->cmap->N);CHKERRQ(ierr); 1295637a0070SStefano Zampini } 1296637a0070SStefano Zampini ierr = MatSetType(d->A,MATSEQDENSECUDA);CHKERRQ(ierr); 1297637a0070SStefano Zampini ierr = MatSeqDenseCUDASetPreallocation(d->A,d_data);CHKERRQ(ierr); 1298637a0070SStefano Zampini A->preallocated = PETSC_TRUE; 1299637a0070SStefano Zampini PetscFunctionReturn(0); 1300637a0070SStefano Zampini } 1301637a0070SStefano Zampini #endif 1302637a0070SStefano Zampini 130373a71a0fSBarry Smith static PetscErrorCode MatSetRandom_MPIDense(Mat x,PetscRandom rctx) 130473a71a0fSBarry Smith { 130573a71a0fSBarry Smith Mat_MPIDense *d = (Mat_MPIDense*)x->data; 130673a71a0fSBarry Smith PetscErrorCode ierr; 130773a71a0fSBarry Smith 130873a71a0fSBarry Smith PetscFunctionBegin; 1309637a0070SStefano Zampini ierr = MatSetRandom(d->A,rctx);CHKERRQ(ierr); 131073a71a0fSBarry Smith PetscFunctionReturn(0); 131173a71a0fSBarry Smith } 131273a71a0fSBarry Smith 13133b49f96aSBarry Smith static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A,PetscBool *missing,PetscInt *d) 13143b49f96aSBarry Smith { 13153b49f96aSBarry Smith PetscFunctionBegin; 13163b49f96aSBarry Smith *missing = PETSC_FALSE; 13173b49f96aSBarry Smith PetscFunctionReturn(0); 13183b49f96aSBarry Smith } 13193b49f96aSBarry Smith 13204222ddf1SHong Zhang static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat,Mat,PetscReal,Mat); 1321cc48ffa7SToby Isaac static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat,Mat,Mat); 13226718818eSStefano Zampini static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat,Mat,Mat); 13236718818eSStefano Zampini static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat,Mat,PetscReal,Mat); 13246718818eSStefano Zampini static PetscErrorCode MatEqual_MPIDense(Mat,Mat,PetscBool*); 13256718818eSStefano Zampini static PetscErrorCode MatLoad_MPIDense(Mat,PetscViewer); 1326cc48ffa7SToby Isaac 13278965ea79SLois Curfman McInnes /* -------------------------------------------------------------------*/ 132809dc0095SBarry Smith static struct _MatOps MatOps_Values = { MatSetValues_MPIDense, 132909dc0095SBarry Smith MatGetRow_MPIDense, 133009dc0095SBarry Smith MatRestoreRow_MPIDense, 133109dc0095SBarry Smith MatMult_MPIDense, 133297304618SKris Buschelman /* 4*/ MatMultAdd_MPIDense, 13337c922b88SBarry Smith MatMultTranspose_MPIDense, 13347c922b88SBarry Smith MatMultTransposeAdd_MPIDense, 13358965ea79SLois Curfman McInnes 0, 133609dc0095SBarry Smith 0, 133709dc0095SBarry Smith 0, 133897304618SKris Buschelman /* 10*/ 0, 133909dc0095SBarry Smith 0, 134009dc0095SBarry Smith 0, 134109dc0095SBarry Smith 0, 134209dc0095SBarry Smith MatTranspose_MPIDense, 134397304618SKris Buschelman /* 15*/ MatGetInfo_MPIDense, 13446e4ee0c6SHong Zhang MatEqual_MPIDense, 134509dc0095SBarry Smith MatGetDiagonal_MPIDense, 13465b2fa520SLois Curfman McInnes MatDiagonalScale_MPIDense, 134709dc0095SBarry Smith MatNorm_MPIDense, 134897304618SKris Buschelman /* 20*/ MatAssemblyBegin_MPIDense, 134909dc0095SBarry Smith MatAssemblyEnd_MPIDense, 135009dc0095SBarry Smith MatSetOption_MPIDense, 135109dc0095SBarry Smith MatZeroEntries_MPIDense, 1352d519adbfSMatthew Knepley /* 24*/ MatZeroRows_MPIDense, 1353919b68f7SBarry Smith 0, 135401b82886SBarry Smith 0, 135501b82886SBarry Smith 0, 135601b82886SBarry Smith 0, 13574994cf47SJed Brown /* 29*/ MatSetUp_MPIDense, 1358273d9f13SBarry Smith 0, 135909dc0095SBarry Smith 0, 1360c56a70eeSBarry Smith MatGetDiagonalBlock_MPIDense, 13618c778c55SBarry Smith 0, 1362d519adbfSMatthew Knepley /* 34*/ MatDuplicate_MPIDense, 136309dc0095SBarry Smith 0, 136409dc0095SBarry Smith 0, 136509dc0095SBarry Smith 0, 136609dc0095SBarry Smith 0, 1367d519adbfSMatthew Knepley /* 39*/ MatAXPY_MPIDense, 13687dae84e0SHong Zhang MatCreateSubMatrices_MPIDense, 136909dc0095SBarry Smith 0, 137009dc0095SBarry Smith MatGetValues_MPIDense, 137109dc0095SBarry Smith 0, 1372d519adbfSMatthew Knepley /* 44*/ 0, 137309dc0095SBarry Smith MatScale_MPIDense, 13747d68702bSBarry Smith MatShift_Basic, 137509dc0095SBarry Smith 0, 137609dc0095SBarry Smith 0, 137773a71a0fSBarry Smith /* 49*/ MatSetRandom_MPIDense, 137809dc0095SBarry Smith 0, 137909dc0095SBarry Smith 0, 138009dc0095SBarry Smith 0, 138109dc0095SBarry Smith 0, 1382d519adbfSMatthew Knepley /* 54*/ 0, 138309dc0095SBarry Smith 0, 138409dc0095SBarry Smith 0, 138509dc0095SBarry Smith 0, 138609dc0095SBarry Smith 0, 13877dae84e0SHong Zhang /* 59*/ MatCreateSubMatrix_MPIDense, 1388b9b97703SBarry Smith MatDestroy_MPIDense, 1389b9b97703SBarry Smith MatView_MPIDense, 1390357abbc8SBarry Smith 0, 139197304618SKris Buschelman 0, 1392d519adbfSMatthew Knepley /* 64*/ 0, 139397304618SKris Buschelman 0, 139497304618SKris Buschelman 0, 139597304618SKris Buschelman 0, 139697304618SKris Buschelman 0, 1397d519adbfSMatthew Knepley /* 69*/ 0, 139897304618SKris Buschelman 0, 139997304618SKris Buschelman 0, 140097304618SKris Buschelman 0, 140197304618SKris Buschelman 0, 1402d519adbfSMatthew Knepley /* 74*/ 0, 140397304618SKris Buschelman 0, 140497304618SKris Buschelman 0, 140597304618SKris Buschelman 0, 140697304618SKris Buschelman 0, 1407d519adbfSMatthew Knepley /* 79*/ 0, 140897304618SKris Buschelman 0, 140997304618SKris Buschelman 0, 141097304618SKris Buschelman 0, 14115bba2384SShri Abhyankar /* 83*/ MatLoad_MPIDense, 1412865e5f61SKris Buschelman 0, 1413865e5f61SKris Buschelman 0, 1414865e5f61SKris Buschelman 0, 1415865e5f61SKris Buschelman 0, 1416865e5f61SKris Buschelman 0, 14174222ddf1SHong Zhang /* 89*/ 0, 14184222ddf1SHong Zhang 0, 14196718818eSStefano Zampini 0, 14202fbe02b9SBarry Smith 0, 1421ba337c44SJed Brown 0, 1422d519adbfSMatthew Knepley /* 94*/ 0, 14234222ddf1SHong Zhang 0, 14246718818eSStefano Zampini MatMatTransposeMultSymbolic_MPIDense_MPIDense, 1425cc48ffa7SToby Isaac MatMatTransposeMultNumeric_MPIDense_MPIDense, 1426ba337c44SJed Brown 0, 14274222ddf1SHong Zhang /* 99*/ MatProductSetFromOptions_MPIDense, 1428ba337c44SJed Brown 0, 1429ba337c44SJed Brown 0, 1430ba337c44SJed Brown MatConjugate_MPIDense, 1431ba337c44SJed Brown 0, 1432ba337c44SJed Brown /*104*/ 0, 1433ba337c44SJed Brown MatRealPart_MPIDense, 1434ba337c44SJed Brown MatImaginaryPart_MPIDense, 143586d161a7SShri Abhyankar 0, 143686d161a7SShri Abhyankar 0, 143786d161a7SShri Abhyankar /*109*/ 0, 143886d161a7SShri Abhyankar 0, 143986d161a7SShri Abhyankar 0, 144049a6ff4bSBarry Smith MatGetColumnVector_MPIDense, 14413b49f96aSBarry Smith MatMissingDiagonal_MPIDense, 144286d161a7SShri Abhyankar /*114*/ 0, 144386d161a7SShri Abhyankar 0, 144486d161a7SShri Abhyankar 0, 144586d161a7SShri Abhyankar 0, 144686d161a7SShri Abhyankar 0, 144786d161a7SShri Abhyankar /*119*/ 0, 144886d161a7SShri Abhyankar 0, 144986d161a7SShri Abhyankar 0, 14500716a85fSBarry Smith 0, 14510716a85fSBarry Smith 0, 14520716a85fSBarry Smith /*124*/ 0, 14533964eb88SJed Brown MatGetColumnNorms_MPIDense, 14543964eb88SJed Brown 0, 14553964eb88SJed Brown 0, 14563964eb88SJed Brown 0, 14573964eb88SJed Brown /*129*/ 0, 14584222ddf1SHong Zhang 0, 14596718818eSStefano Zampini MatTransposeMatMultSymbolic_MPIDense_MPIDense, 1460cb20be35SHong Zhang MatTransposeMatMultNumeric_MPIDense_MPIDense, 14613964eb88SJed Brown 0, 14623964eb88SJed Brown /*134*/ 0, 14633964eb88SJed Brown 0, 14643964eb88SJed Brown 0, 14653964eb88SJed Brown 0, 14663964eb88SJed Brown 0, 14673964eb88SJed Brown /*139*/ 0, 1468f9426fe0SMark Adams 0, 146994e2cb23SJakub Kruzik 0, 147094e2cb23SJakub Kruzik 0, 147194e2cb23SJakub Kruzik 0, 14724222ddf1SHong Zhang MatCreateMPIMatConcatenateSeqMat_MPIDense, 14734222ddf1SHong Zhang /*145*/ 0, 14744222ddf1SHong Zhang 0, 14754222ddf1SHong Zhang 0 1476ba337c44SJed Brown }; 14778965ea79SLois Curfman McInnes 14787087cfbeSBarry Smith PetscErrorCode MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data) 1479a23d5eceSKris Buschelman { 1480637a0070SStefano Zampini Mat_MPIDense *a = (Mat_MPIDense*)mat->data; 1481637a0070SStefano Zampini PetscBool iscuda; 1482dfbe8321SBarry Smith PetscErrorCode ierr; 1483a23d5eceSKris Buschelman 1484a23d5eceSKris Buschelman PetscFunctionBegin; 1485616b8fbbSStefano Zampini if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 148634ef9618SShri Abhyankar ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr); 148734ef9618SShri Abhyankar ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr); 1488637a0070SStefano Zampini if (!a->A) { 1489f69a0ea3SMatthew Knepley ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr); 14903bb1ff40SBarry Smith ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 1491637a0070SStefano Zampini ierr = MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);CHKERRQ(ierr); 1492637a0070SStefano Zampini } 1493637a0070SStefano Zampini ierr = PetscObjectTypeCompare((PetscObject)mat,MATMPIDENSECUDA,&iscuda);CHKERRQ(ierr); 1494637a0070SStefano Zampini ierr = MatSetType(a->A,iscuda ? MATSEQDENSECUDA : MATSEQDENSE);CHKERRQ(ierr); 1495637a0070SStefano Zampini ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr); 1496637a0070SStefano Zampini mat->preallocated = PETSC_TRUE; 1497a23d5eceSKris Buschelman PetscFunctionReturn(0); 1498a23d5eceSKris Buschelman } 1499a23d5eceSKris Buschelman 150065b80a83SHong Zhang #if defined(PETSC_HAVE_ELEMENTAL) 1501cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype,MatReuse reuse,Mat *newmat) 15028baccfbdSHong Zhang { 15038ea901baSHong Zhang Mat mat_elemental; 15048ea901baSHong Zhang PetscErrorCode ierr; 150532d7a744SHong Zhang PetscScalar *v; 150632d7a744SHong Zhang PetscInt m=A->rmap->n,N=A->cmap->N,rstart=A->rmap->rstart,i,*rows,*cols; 15078ea901baSHong Zhang 15088baccfbdSHong Zhang PetscFunctionBegin; 1509378336b6SHong Zhang if (reuse == MAT_REUSE_MATRIX) { 1510378336b6SHong Zhang mat_elemental = *newmat; 1511378336b6SHong Zhang ierr = MatZeroEntries(*newmat);CHKERRQ(ierr); 1512378336b6SHong Zhang } else { 1513378336b6SHong Zhang ierr = MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental);CHKERRQ(ierr); 1514378336b6SHong Zhang ierr = MatSetSizes(mat_elemental,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 1515378336b6SHong Zhang ierr = MatSetType(mat_elemental,MATELEMENTAL);CHKERRQ(ierr); 1516378336b6SHong Zhang ierr = MatSetUp(mat_elemental);CHKERRQ(ierr); 151732d7a744SHong Zhang ierr = MatSetOption(mat_elemental,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 1518378336b6SHong Zhang } 1519378336b6SHong Zhang 152032d7a744SHong Zhang ierr = PetscMalloc2(m,&rows,N,&cols);CHKERRQ(ierr); 152132d7a744SHong Zhang for (i=0; i<N; i++) cols[i] = i; 152232d7a744SHong Zhang for (i=0; i<m; i++) rows[i] = rstart + i; 15238ea901baSHong Zhang 1524637a0070SStefano Zampini /* PETSc-Elemental interface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */ 152532d7a744SHong Zhang ierr = MatDenseGetArray(A,&v);CHKERRQ(ierr); 152632d7a744SHong Zhang ierr = MatSetValues(mat_elemental,m,rows,N,cols,v,ADD_VALUES);CHKERRQ(ierr); 15278ea901baSHong Zhang ierr = MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 15288ea901baSHong Zhang ierr = MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 152932d7a744SHong Zhang ierr = MatDenseRestoreArray(A,&v);CHKERRQ(ierr); 153032d7a744SHong Zhang ierr = PetscFree2(rows,cols);CHKERRQ(ierr); 15318ea901baSHong Zhang 1532511c6705SHong Zhang if (reuse == MAT_INPLACE_MATRIX) { 153328be2f97SBarry Smith ierr = MatHeaderReplace(A,&mat_elemental);CHKERRQ(ierr); 15348ea901baSHong Zhang } else { 15358ea901baSHong Zhang *newmat = mat_elemental; 15368ea901baSHong Zhang } 15378baccfbdSHong Zhang PetscFunctionReturn(0); 15388baccfbdSHong Zhang } 153965b80a83SHong Zhang #endif 15408baccfbdSHong Zhang 1541af53bab2SHong Zhang static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A,PetscInt col,PetscScalar **vals) 154286aefd0dSHong Zhang { 154386aefd0dSHong Zhang Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 154486aefd0dSHong Zhang PetscErrorCode ierr; 154586aefd0dSHong Zhang 154686aefd0dSHong Zhang PetscFunctionBegin; 154786aefd0dSHong Zhang ierr = MatDenseGetColumn(mat->A,col,vals);CHKERRQ(ierr); 154886aefd0dSHong Zhang PetscFunctionReturn(0); 154986aefd0dSHong Zhang } 155086aefd0dSHong Zhang 1551af53bab2SHong Zhang static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A,PetscScalar **vals) 155286aefd0dSHong Zhang { 155386aefd0dSHong Zhang Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 155486aefd0dSHong Zhang PetscErrorCode ierr; 155586aefd0dSHong Zhang 155686aefd0dSHong Zhang PetscFunctionBegin; 155786aefd0dSHong Zhang ierr = MatDenseRestoreColumn(mat->A,vals);CHKERRQ(ierr); 155886aefd0dSHong Zhang PetscFunctionReturn(0); 155986aefd0dSHong Zhang } 156086aefd0dSHong Zhang 156194e2cb23SJakub Kruzik PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 156294e2cb23SJakub Kruzik { 156394e2cb23SJakub Kruzik PetscErrorCode ierr; 156494e2cb23SJakub Kruzik Mat_MPIDense *mat; 156594e2cb23SJakub Kruzik PetscInt m,nloc,N; 156694e2cb23SJakub Kruzik 156794e2cb23SJakub Kruzik PetscFunctionBegin; 156894e2cb23SJakub Kruzik ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 156994e2cb23SJakub Kruzik ierr = MatGetLocalSize(inmat,NULL,&nloc);CHKERRQ(ierr); 157094e2cb23SJakub Kruzik if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 157194e2cb23SJakub Kruzik PetscInt sum; 157294e2cb23SJakub Kruzik 157394e2cb23SJakub Kruzik if (n == PETSC_DECIDE) { 157494e2cb23SJakub Kruzik ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 157594e2cb23SJakub Kruzik } 157694e2cb23SJakub Kruzik /* Check sum(n) = N */ 157794e2cb23SJakub Kruzik ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 157894e2cb23SJakub Kruzik if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N); 157994e2cb23SJakub Kruzik 158094e2cb23SJakub Kruzik ierr = MatCreateDense(comm,m,n,PETSC_DETERMINE,N,NULL,outmat);CHKERRQ(ierr); 158194e2cb23SJakub Kruzik } 158294e2cb23SJakub Kruzik 158394e2cb23SJakub Kruzik /* numeric phase */ 158494e2cb23SJakub Kruzik mat = (Mat_MPIDense*)(*outmat)->data; 158594e2cb23SJakub Kruzik ierr = MatCopy(inmat,mat->A,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 158694e2cb23SJakub Kruzik ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 158794e2cb23SJakub Kruzik ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 158894e2cb23SJakub Kruzik PetscFunctionReturn(0); 158994e2cb23SJakub Kruzik } 159094e2cb23SJakub Kruzik 1591637a0070SStefano Zampini #if defined(PETSC_HAVE_CUDA) 1592637a0070SStefano Zampini PetscErrorCode MatConvert_MPIDenseCUDA_MPIDense(Mat M,MatType type,MatReuse reuse,Mat *newmat) 1593637a0070SStefano Zampini { 1594637a0070SStefano Zampini Mat B; 1595637a0070SStefano Zampini Mat_MPIDense *m; 1596637a0070SStefano Zampini PetscErrorCode ierr; 1597637a0070SStefano Zampini 1598637a0070SStefano Zampini PetscFunctionBegin; 1599637a0070SStefano Zampini if (reuse == MAT_INITIAL_MATRIX) { 1600637a0070SStefano Zampini ierr = MatDuplicate(M,MAT_COPY_VALUES,newmat);CHKERRQ(ierr); 1601637a0070SStefano Zampini } else if (reuse == MAT_REUSE_MATRIX) { 1602637a0070SStefano Zampini ierr = MatCopy(M,*newmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1603637a0070SStefano Zampini } 1604637a0070SStefano Zampini 1605637a0070SStefano Zampini B = *newmat; 1606637a0070SStefano Zampini ierr = MatBindToCPU_MPIDenseCUDA(B,PETSC_TRUE);CHKERRQ(ierr); 1607637a0070SStefano Zampini ierr = PetscFree(B->defaultvectype);CHKERRQ(ierr); 1608637a0070SStefano Zampini ierr = PetscStrallocpy(VECSTANDARD,&B->defaultvectype);CHKERRQ(ierr); 1609637a0070SStefano Zampini ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIDENSE);CHKERRQ(ierr); 1610637a0070SStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpidensecuda_mpidense_C",NULL);CHKERRQ(ierr); 1611637a0070SStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpidensecuda_C",NULL);CHKERRQ(ierr); 16126718818eSStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C",NULL);CHKERRQ(ierr); 1613637a0070SStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaij_C",NULL);CHKERRQ(ierr); 16146718818eSStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C",NULL);CHKERRQ(ierr); 1615637a0070SStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArray_C",NULL);CHKERRQ(ierr); 1616637a0070SStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayRead_C",NULL);CHKERRQ(ierr); 1617637a0070SStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayWrite_C",NULL);CHKERRQ(ierr); 1618637a0070SStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArray_C",NULL);CHKERRQ(ierr); 1619637a0070SStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayRead_C",NULL);CHKERRQ(ierr); 1620637a0070SStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayWrite_C",NULL);CHKERRQ(ierr); 1621637a0070SStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAPlaceArray_C",NULL);CHKERRQ(ierr); 1622637a0070SStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAResetArray_C",NULL);CHKERRQ(ierr); 1623d5ea218eSStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAReplaceArray_C",NULL);CHKERRQ(ierr); 1624637a0070SStefano Zampini m = (Mat_MPIDense*)(B)->data; 1625637a0070SStefano Zampini if (m->A) { 1626637a0070SStefano Zampini ierr = MatConvert(m->A,MATSEQDENSE,MAT_INPLACE_MATRIX,&m->A);CHKERRQ(ierr); 1627637a0070SStefano Zampini ierr = MatSetUpMultiply_MPIDense(B);CHKERRQ(ierr); 1628637a0070SStefano Zampini } 1629637a0070SStefano Zampini B->ops->bindtocpu = NULL; 1630637a0070SStefano Zampini B->offloadmask = PETSC_OFFLOAD_CPU; 1631637a0070SStefano Zampini PetscFunctionReturn(0); 1632637a0070SStefano Zampini } 1633637a0070SStefano Zampini 1634637a0070SStefano Zampini PetscErrorCode MatConvert_MPIDense_MPIDenseCUDA(Mat M,MatType type,MatReuse reuse,Mat *newmat) 1635637a0070SStefano Zampini { 1636637a0070SStefano Zampini Mat B; 1637637a0070SStefano Zampini Mat_MPIDense *m; 1638637a0070SStefano Zampini PetscErrorCode ierr; 1639637a0070SStefano Zampini 1640637a0070SStefano Zampini PetscFunctionBegin; 1641637a0070SStefano Zampini if (reuse == MAT_INITIAL_MATRIX) { 1642637a0070SStefano Zampini ierr = MatDuplicate(M,MAT_COPY_VALUES,newmat);CHKERRQ(ierr); 1643637a0070SStefano Zampini } else if (reuse == MAT_REUSE_MATRIX) { 1644637a0070SStefano Zampini ierr = MatCopy(M,*newmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1645637a0070SStefano Zampini } 1646637a0070SStefano Zampini 1647637a0070SStefano Zampini B = *newmat; 1648637a0070SStefano Zampini ierr = PetscFree(B->defaultvectype);CHKERRQ(ierr); 1649637a0070SStefano Zampini ierr = PetscStrallocpy(VECCUDA,&B->defaultvectype);CHKERRQ(ierr); 1650637a0070SStefano Zampini ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIDENSECUDA);CHKERRQ(ierr); 1651637a0070SStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpidensecuda_mpidense_C", MatConvert_MPIDenseCUDA_MPIDense);CHKERRQ(ierr); 1652637a0070SStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpidensecuda_C", MatProductSetFromOptions_MPIAIJ_MPIDense);CHKERRQ(ierr); 16536718818eSStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C",MatProductSetFromOptions_MPIAIJ_MPIDense);CHKERRQ(ierr); 1654637a0070SStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaij_C", MatProductSetFromOptions_MPIDense_MPIAIJ);CHKERRQ(ierr); 16556718818eSStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C",MatProductSetFromOptions_MPIDense_MPIAIJ);CHKERRQ(ierr); 1656637a0070SStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArray_C", MatDenseCUDAGetArray_MPIDenseCUDA);CHKERRQ(ierr); 1657637a0070SStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayRead_C", MatDenseCUDAGetArrayRead_MPIDenseCUDA);CHKERRQ(ierr); 1658637a0070SStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayWrite_C", MatDenseCUDAGetArrayWrite_MPIDenseCUDA);CHKERRQ(ierr); 1659637a0070SStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArray_C", MatDenseCUDARestoreArray_MPIDenseCUDA);CHKERRQ(ierr); 1660637a0070SStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayRead_C", MatDenseCUDARestoreArrayRead_MPIDenseCUDA);CHKERRQ(ierr); 1661637a0070SStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayWrite_C", MatDenseCUDARestoreArrayWrite_MPIDenseCUDA);CHKERRQ(ierr); 1662637a0070SStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAPlaceArray_C", MatDenseCUDAPlaceArray_MPIDenseCUDA);CHKERRQ(ierr); 1663637a0070SStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAResetArray_C", MatDenseCUDAResetArray_MPIDenseCUDA);CHKERRQ(ierr); 1664d5ea218eSStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAReplaceArray_C", MatDenseCUDAReplaceArray_MPIDenseCUDA);CHKERRQ(ierr); 1665637a0070SStefano Zampini m = (Mat_MPIDense*)(B)->data; 1666637a0070SStefano Zampini if (m->A) { 1667637a0070SStefano Zampini ierr = MatConvert(m->A,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&m->A);CHKERRQ(ierr); 1668637a0070SStefano Zampini ierr = MatSetUpMultiply_MPIDense(B);CHKERRQ(ierr); 1669637a0070SStefano Zampini B->offloadmask = PETSC_OFFLOAD_BOTH; 1670637a0070SStefano Zampini } else { 1671637a0070SStefano Zampini B->offloadmask = PETSC_OFFLOAD_UNALLOCATED; 1672637a0070SStefano Zampini } 1673637a0070SStefano Zampini ierr = MatBindToCPU_MPIDenseCUDA(B,PETSC_FALSE);CHKERRQ(ierr); 1674637a0070SStefano Zampini 1675637a0070SStefano Zampini B->ops->bindtocpu = MatBindToCPU_MPIDenseCUDA; 1676637a0070SStefano Zampini PetscFunctionReturn(0); 1677637a0070SStefano Zampini } 1678637a0070SStefano Zampini #endif 1679637a0070SStefano Zampini 16806947451fSStefano Zampini PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat A,PetscInt col,Vec *v) 16816947451fSStefano Zampini { 16826947451fSStefano Zampini Mat_MPIDense *a = (Mat_MPIDense*)A->data; 16836947451fSStefano Zampini PetscErrorCode ierr; 16846947451fSStefano Zampini PetscInt lda; 16856947451fSStefano Zampini 16866947451fSStefano Zampini PetscFunctionBegin; 1687616b8fbbSStefano Zampini if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1688616b8fbbSStefano Zampini if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 16896947451fSStefano Zampini if (!a->cvec) { 16906947451fSStefano Zampini ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr); 16916947451fSStefano Zampini } 16926947451fSStefano Zampini a->vecinuse = col + 1; 16936947451fSStefano Zampini ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr); 16946947451fSStefano Zampini ierr = MatDenseGetArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr); 16956947451fSStefano Zampini ierr = VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr); 16966947451fSStefano Zampini *v = a->cvec; 16976947451fSStefano Zampini PetscFunctionReturn(0); 16986947451fSStefano Zampini } 16996947451fSStefano Zampini 17006947451fSStefano Zampini PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat A,PetscInt col,Vec *v) 17016947451fSStefano Zampini { 17026947451fSStefano Zampini Mat_MPIDense *a = (Mat_MPIDense*)A->data; 17036947451fSStefano Zampini PetscErrorCode ierr; 17046947451fSStefano Zampini 17056947451fSStefano Zampini PetscFunctionBegin; 17065ea7661aSPierre Jolivet if (!a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first"); 17076947451fSStefano Zampini if (!a->cvec) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector"); 17086947451fSStefano Zampini a->vecinuse = 0; 17096947451fSStefano Zampini ierr = MatDenseRestoreArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr); 17106947451fSStefano Zampini ierr = VecResetArray(a->cvec);CHKERRQ(ierr); 17116947451fSStefano Zampini *v = NULL; 17126947451fSStefano Zampini PetscFunctionReturn(0); 17136947451fSStefano Zampini } 17146947451fSStefano Zampini 17156947451fSStefano Zampini PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat A,PetscInt col,Vec *v) 17166947451fSStefano Zampini { 17176947451fSStefano Zampini Mat_MPIDense *a = (Mat_MPIDense*)A->data; 17186947451fSStefano Zampini PetscErrorCode ierr; 17196947451fSStefano Zampini PetscInt lda; 17206947451fSStefano Zampini 17216947451fSStefano Zampini PetscFunctionBegin; 1722616b8fbbSStefano Zampini if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1723616b8fbbSStefano Zampini if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 17246947451fSStefano Zampini if (!a->cvec) { 17256947451fSStefano Zampini ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr); 17266947451fSStefano Zampini } 17276947451fSStefano Zampini a->vecinuse = col + 1; 17286947451fSStefano Zampini ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr); 17296947451fSStefano Zampini ierr = MatDenseGetArrayRead(a->A,&a->ptrinuse);CHKERRQ(ierr); 17306947451fSStefano Zampini ierr = VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr); 17316947451fSStefano Zampini ierr = VecLockReadPush(a->cvec);CHKERRQ(ierr); 17326947451fSStefano Zampini *v = a->cvec; 17336947451fSStefano Zampini PetscFunctionReturn(0); 17346947451fSStefano Zampini } 17356947451fSStefano Zampini 17366947451fSStefano Zampini PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat A,PetscInt col,Vec *v) 17376947451fSStefano Zampini { 17386947451fSStefano Zampini Mat_MPIDense *a = (Mat_MPIDense*)A->data; 17396947451fSStefano Zampini PetscErrorCode ierr; 17406947451fSStefano Zampini 17416947451fSStefano Zampini PetscFunctionBegin; 1742616b8fbbSStefano Zampini if (!a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first"); 1743616b8fbbSStefano Zampini if (!a->cvec) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector"); 17446947451fSStefano Zampini a->vecinuse = 0; 17456947451fSStefano Zampini ierr = MatDenseRestoreArrayRead(a->A,&a->ptrinuse);CHKERRQ(ierr); 17466947451fSStefano Zampini ierr = VecLockReadPop(a->cvec);CHKERRQ(ierr); 17476947451fSStefano Zampini ierr = VecResetArray(a->cvec);CHKERRQ(ierr); 17486947451fSStefano Zampini *v = NULL; 17496947451fSStefano Zampini PetscFunctionReturn(0); 17506947451fSStefano Zampini } 17516947451fSStefano Zampini 17526947451fSStefano Zampini PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat A,PetscInt col,Vec *v) 17536947451fSStefano Zampini { 17546947451fSStefano Zampini Mat_MPIDense *a = (Mat_MPIDense*)A->data; 17556947451fSStefano Zampini PetscErrorCode ierr; 17566947451fSStefano Zampini PetscInt lda; 17576947451fSStefano Zampini 17586947451fSStefano Zampini PetscFunctionBegin; 1759616b8fbbSStefano Zampini if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1760616b8fbbSStefano Zampini if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 17616947451fSStefano Zampini if (!a->cvec) { 17626947451fSStefano Zampini ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr); 17636947451fSStefano Zampini } 17646947451fSStefano Zampini a->vecinuse = col + 1; 17656947451fSStefano Zampini ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr); 17666947451fSStefano Zampini ierr = MatDenseGetArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr); 17676947451fSStefano Zampini ierr = VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr); 17686947451fSStefano Zampini *v = a->cvec; 17696947451fSStefano Zampini PetscFunctionReturn(0); 17706947451fSStefano Zampini } 17716947451fSStefano Zampini 17726947451fSStefano Zampini PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat A,PetscInt col,Vec *v) 17736947451fSStefano Zampini { 17746947451fSStefano Zampini Mat_MPIDense *a = (Mat_MPIDense*)A->data; 17756947451fSStefano Zampini PetscErrorCode ierr; 17766947451fSStefano Zampini 17776947451fSStefano Zampini PetscFunctionBegin; 1778616b8fbbSStefano Zampini if (!a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first"); 1779616b8fbbSStefano Zampini if (!a->cvec) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector"); 17806947451fSStefano Zampini a->vecinuse = 0; 17816947451fSStefano Zampini ierr = MatDenseRestoreArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr); 17826947451fSStefano Zampini ierr = VecResetArray(a->cvec);CHKERRQ(ierr); 17836947451fSStefano Zampini *v = NULL; 17846947451fSStefano Zampini PetscFunctionReturn(0); 17856947451fSStefano Zampini } 17866947451fSStefano Zampini 17875ea7661aSPierre Jolivet PetscErrorCode MatDenseGetSubMatrix_MPIDense(Mat A,PetscInt cbegin,PetscInt cend,Mat *v) 17885ea7661aSPierre Jolivet { 17895ea7661aSPierre Jolivet Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1790616b8fbbSStefano Zampini Mat_MPIDense *c; 17915ea7661aSPierre Jolivet PetscErrorCode ierr; 1792616b8fbbSStefano Zampini MPI_Comm comm; 1793616b8fbbSStefano Zampini PetscBool setup = PETSC_FALSE; 17945ea7661aSPierre Jolivet 17955ea7661aSPierre Jolivet PetscFunctionBegin; 1796616b8fbbSStefano Zampini ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1797616b8fbbSStefano Zampini if (a->vecinuse) SETERRQ(comm,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1798616b8fbbSStefano Zampini if (a->matinuse) SETERRQ(comm,PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 17995ea7661aSPierre Jolivet if (!a->cmat) { 1800616b8fbbSStefano Zampini setup = PETSC_TRUE; 1801616b8fbbSStefano Zampini ierr = MatCreate(comm,&a->cmat);CHKERRQ(ierr); 1802616b8fbbSStefano Zampini ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)a->cmat);CHKERRQ(ierr); 1803616b8fbbSStefano Zampini ierr = MatSetType(a->cmat,((PetscObject)A)->type_name);CHKERRQ(ierr); 1804616b8fbbSStefano Zampini ierr = PetscLayoutReference(A->rmap,&a->cmat->rmap);CHKERRQ(ierr); 1805616b8fbbSStefano Zampini ierr = PetscLayoutSetSize(a->cmat->cmap,cend-cbegin);CHKERRQ(ierr); 1806616b8fbbSStefano Zampini ierr = PetscLayoutSetUp(a->cmat->cmap);CHKERRQ(ierr); 1807616b8fbbSStefano Zampini } else if (cend-cbegin != a->cmat->cmap->N) { 1808616b8fbbSStefano Zampini setup = PETSC_TRUE; 1809616b8fbbSStefano Zampini ierr = PetscLayoutDestroy(&a->cmat->cmap);CHKERRQ(ierr); 1810616b8fbbSStefano Zampini ierr = PetscLayoutCreate(comm,&a->cmat->cmap);CHKERRQ(ierr); 1811616b8fbbSStefano Zampini ierr = PetscLayoutSetSize(a->cmat->cmap,cend-cbegin);CHKERRQ(ierr); 1812616b8fbbSStefano Zampini ierr = PetscLayoutSetUp(a->cmat->cmap);CHKERRQ(ierr); 18135ea7661aSPierre Jolivet } 1814616b8fbbSStefano Zampini c = (Mat_MPIDense*)a->cmat->data; 1815616b8fbbSStefano Zampini if (c->A) SETERRQ(comm,PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1816616b8fbbSStefano Zampini ierr = MatDenseGetSubMatrix(a->A,cbegin,cend,&c->A);CHKERRQ(ierr); 1817616b8fbbSStefano Zampini if (setup) { /* do we really need this? */ 1818616b8fbbSStefano Zampini ierr = MatSetUpMultiply_MPIDense(a->cmat);CHKERRQ(ierr); 1819616b8fbbSStefano Zampini } 1820616b8fbbSStefano Zampini a->cmat->preallocated = PETSC_TRUE; 1821616b8fbbSStefano Zampini a->cmat->assembled = PETSC_TRUE; 18225ea7661aSPierre Jolivet a->matinuse = cbegin + 1; 18235ea7661aSPierre Jolivet *v = a->cmat; 18245ea7661aSPierre Jolivet PetscFunctionReturn(0); 18255ea7661aSPierre Jolivet } 18265ea7661aSPierre Jolivet 18275ea7661aSPierre Jolivet PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat A,Mat *v) 18285ea7661aSPierre Jolivet { 18295ea7661aSPierre Jolivet Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1830616b8fbbSStefano Zampini Mat_MPIDense *c; 18315ea7661aSPierre Jolivet PetscErrorCode ierr; 18325ea7661aSPierre Jolivet 18335ea7661aSPierre Jolivet PetscFunctionBegin; 1834616b8fbbSStefano Zampini if (!a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetSubMatrix() first"); 1835616b8fbbSStefano Zampini if (!a->cmat) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal matrix"); 1836616b8fbbSStefano Zampini if (*v != a->cmat) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Not the matrix obtained from MatDenseGetSubMatrix()"); 18375ea7661aSPierre Jolivet a->matinuse = 0; 1838616b8fbbSStefano Zampini c = (Mat_MPIDense*)a->cmat->data; 1839616b8fbbSStefano Zampini ierr = MatDenseRestoreSubMatrix(a->A,&c->A);CHKERRQ(ierr); 18405ea7661aSPierre Jolivet *v = NULL; 18415ea7661aSPierre Jolivet PetscFunctionReturn(0); 18425ea7661aSPierre Jolivet } 18435ea7661aSPierre Jolivet 18448cc058d9SJed Brown PETSC_EXTERN PetscErrorCode MatCreate_MPIDense(Mat mat) 1845273d9f13SBarry Smith { 1846273d9f13SBarry Smith Mat_MPIDense *a; 1847dfbe8321SBarry Smith PetscErrorCode ierr; 1848273d9f13SBarry Smith 1849273d9f13SBarry Smith PetscFunctionBegin; 1850b00a9115SJed Brown ierr = PetscNewLog(mat,&a);CHKERRQ(ierr); 1851b0a32e0cSBarry Smith mat->data = (void*)a; 1852273d9f13SBarry Smith ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1853273d9f13SBarry Smith 1854273d9f13SBarry Smith mat->insertmode = NOT_SET_VALUES; 1855273d9f13SBarry Smith 1856273d9f13SBarry Smith /* build cache for off array entries formed */ 1857273d9f13SBarry Smith a->donotstash = PETSC_FALSE; 18582205254eSKarl Rupp 1859ce94432eSBarry Smith ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)mat),1,&mat->stash);CHKERRQ(ierr); 1860273d9f13SBarry Smith 1861273d9f13SBarry Smith /* stuff used for matrix vector multiply */ 1862273d9f13SBarry Smith a->lvec = 0; 1863273d9f13SBarry Smith a->Mvctx = 0; 1864273d9f13SBarry Smith a->roworiented = PETSC_TRUE; 1865273d9f13SBarry Smith 186649a6ff4bSBarry Smith ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetLDA_C",MatDenseGetLDA_MPIDense);CHKERRQ(ierr); 1867ad16ce7aSStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseSetLDA_C",MatDenseSetLDA_MPIDense);CHKERRQ(ierr); 1868bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",MatDenseGetArray_MPIDense);CHKERRQ(ierr); 18698572280aSBarry Smith ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",MatDenseRestoreArray_MPIDense);CHKERRQ(ierr); 18708572280aSBarry Smith ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",MatDenseGetArrayRead_MPIDense);CHKERRQ(ierr); 18718572280aSBarry Smith ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",MatDenseRestoreArrayRead_MPIDense);CHKERRQ(ierr); 18726947451fSStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayWrite_C",MatDenseGetArrayWrite_MPIDense);CHKERRQ(ierr); 18736947451fSStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayWrite_C",MatDenseRestoreArrayWrite_MPIDense);CHKERRQ(ierr); 1874d3042a70SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",MatDensePlaceArray_MPIDense);CHKERRQ(ierr); 1875d3042a70SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",MatDenseResetArray_MPIDense);CHKERRQ(ierr); 1876d5ea218eSStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseReplaceArray_C",MatDenseReplaceArray_MPIDense);CHKERRQ(ierr); 18776947451fSStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDense);CHKERRQ(ierr); 18786947451fSStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDense);CHKERRQ(ierr); 18796947451fSStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDense);CHKERRQ(ierr); 18806947451fSStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDense);CHKERRQ(ierr); 18816947451fSStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDense);CHKERRQ(ierr); 18826947451fSStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDense);CHKERRQ(ierr); 18835ea7661aSPierre Jolivet ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetSubMatrix_C",MatDenseGetSubMatrix_MPIDense);CHKERRQ(ierr); 18845ea7661aSPierre Jolivet ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreSubMatrix_C",MatDenseRestoreSubMatrix_MPIDense);CHKERRQ(ierr); 18858baccfbdSHong Zhang #if defined(PETSC_HAVE_ELEMENTAL) 18868baccfbdSHong Zhang ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",MatConvert_MPIDense_Elemental);CHKERRQ(ierr); 18878baccfbdSHong Zhang #endif 1888*d24d4204SJose E. Roman #if defined(PETSC_HAVE_SCALAPACK) 1889*d24d4204SJose E. Roman ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_scalapack_C",MatConvert_Dense_ScaLAPACK);CHKERRQ(ierr); 1890*d24d4204SJose E. Roman #endif 1891637a0070SStefano Zampini #if defined(PETSC_HAVE_CUDA) 1892637a0070SStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_mpidensecuda_C",MatConvert_MPIDense_MPIDenseCUDA);CHKERRQ(ierr); 1893637a0070SStefano Zampini #endif 1894bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr); 18954222ddf1SHong Zhang ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpidense_C",MatProductSetFromOptions_MPIAIJ_MPIDense);CHKERRQ(ierr); 18964222ddf1SHong Zhang ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaij_C",MatProductSetFromOptions_MPIDense_MPIAIJ);CHKERRQ(ierr); 18976718818eSStefano Zampini #if defined(PETSC_HAVE_CUDA) 18986718818eSStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaijcusparse_mpidense_C",MatProductSetFromOptions_MPIAIJ_MPIDense);CHKERRQ(ierr); 18996718818eSStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaijcusparse_C",MatProductSetFromOptions_MPIDense_MPIAIJ);CHKERRQ(ierr); 19006718818eSStefano Zampini #endif 19018949adfdSHong Zhang 1902af53bab2SHong Zhang ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",MatDenseGetColumn_MPIDense);CHKERRQ(ierr); 1903af53bab2SHong Zhang ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",MatDenseRestoreColumn_MPIDense);CHKERRQ(ierr); 190438aed534SBarry Smith ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr); 1905273d9f13SBarry Smith PetscFunctionReturn(0); 1906273d9f13SBarry Smith } 1907273d9f13SBarry Smith 1908209238afSKris Buschelman /*MC 1909637a0070SStefano Zampini MATMPIDENSECUDA - MATMPIDENSECUDA = "mpidensecuda" - A matrix type to be used for distributed dense matrices on GPUs. 1910637a0070SStefano Zampini 1911637a0070SStefano Zampini Options Database Keys: 1912637a0070SStefano Zampini . -mat_type mpidensecuda - sets the matrix type to "mpidensecuda" during a call to MatSetFromOptions() 1913637a0070SStefano Zampini 1914637a0070SStefano Zampini Level: beginner 1915637a0070SStefano Zampini 1916637a0070SStefano Zampini .seealso: 1917637a0070SStefano Zampini 1918637a0070SStefano Zampini M*/ 1919637a0070SStefano Zampini #if defined(PETSC_HAVE_CUDA) 1920637a0070SStefano Zampini PETSC_EXTERN PetscErrorCode MatCreate_MPIDenseCUDA(Mat B) 1921637a0070SStefano Zampini { 1922637a0070SStefano Zampini PetscErrorCode ierr; 1923637a0070SStefano Zampini 1924637a0070SStefano Zampini PetscFunctionBegin; 1925637a0070SStefano Zampini ierr = MatCreate_MPIDense(B);CHKERRQ(ierr); 1926637a0070SStefano Zampini ierr = MatConvert_MPIDense_MPIDenseCUDA(B,MATMPIDENSECUDA,MAT_INPLACE_MATRIX,&B);CHKERRQ(ierr); 1927637a0070SStefano Zampini PetscFunctionReturn(0); 1928637a0070SStefano Zampini } 1929637a0070SStefano Zampini #endif 1930637a0070SStefano Zampini 1931637a0070SStefano Zampini /*MC 1932002d173eSKris Buschelman MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices. 1933209238afSKris Buschelman 1934209238afSKris Buschelman This matrix type is identical to MATSEQDENSE when constructed with a single process communicator, 1935209238afSKris Buschelman and MATMPIDENSE otherwise. 1936209238afSKris Buschelman 1937209238afSKris Buschelman Options Database Keys: 1938209238afSKris Buschelman . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions() 1939209238afSKris Buschelman 1940209238afSKris Buschelman Level: beginner 1941209238afSKris Buschelman 194201b82886SBarry Smith 19436947451fSStefano Zampini .seealso: MATSEQDENSE,MATMPIDENSE,MATDENSECUDA 19446947451fSStefano Zampini M*/ 19456947451fSStefano Zampini 19466947451fSStefano Zampini /*MC 19476947451fSStefano Zampini MATDENSECUDA - MATDENSECUDA = "densecuda" - A matrix type to be used for dense matrices on GPUs. 19486947451fSStefano Zampini 19496947451fSStefano Zampini This matrix type is identical to MATSEQDENSECUDA when constructed with a single process communicator, 19506947451fSStefano Zampini and MATMPIDENSECUDA otherwise. 19516947451fSStefano Zampini 19526947451fSStefano Zampini Options Database Keys: 19536947451fSStefano Zampini . -mat_type densecuda - sets the matrix type to "densecuda" during a call to MatSetFromOptions() 19546947451fSStefano Zampini 19556947451fSStefano Zampini Level: beginner 19566947451fSStefano Zampini 19576947451fSStefano Zampini .seealso: MATSEQDENSECUDA,MATMPIDENSECUDA,MATDENSE 1958209238afSKris Buschelman M*/ 1959209238afSKris Buschelman 1960273d9f13SBarry Smith /*@C 1961273d9f13SBarry Smith MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries 1962273d9f13SBarry Smith 1963273d9f13SBarry Smith Not collective 1964273d9f13SBarry Smith 1965273d9f13SBarry Smith Input Parameters: 19661c4f3114SJed Brown . B - the matrix 19670298fd71SBarry Smith - data - optional location of matrix data. Set data=NULL for PETSc 1968273d9f13SBarry Smith to control all matrix memory allocation. 1969273d9f13SBarry Smith 1970273d9f13SBarry Smith Notes: 1971273d9f13SBarry Smith The dense format is fully compatible with standard Fortran 77 1972273d9f13SBarry Smith storage by columns. 1973273d9f13SBarry Smith 1974273d9f13SBarry Smith The data input variable is intended primarily for Fortran programmers 1975273d9f13SBarry Smith who wish to allocate their own matrix memory space. Most users should 19760298fd71SBarry Smith set data=NULL. 1977273d9f13SBarry Smith 1978273d9f13SBarry Smith Level: intermediate 1979273d9f13SBarry Smith 1980273d9f13SBarry Smith .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1981273d9f13SBarry Smith @*/ 19821c4f3114SJed Brown PetscErrorCode MatMPIDenseSetPreallocation(Mat B,PetscScalar *data) 1983273d9f13SBarry Smith { 19844ac538c5SBarry Smith PetscErrorCode ierr; 1985273d9f13SBarry Smith 1986273d9f13SBarry Smith PetscFunctionBegin; 1987d5ea218eSStefano Zampini PetscValidHeaderSpecific(B,MAT_CLASSID,1); 19881c4f3114SJed Brown ierr = PetscTryMethod(B,"MatMPIDenseSetPreallocation_C",(Mat,PetscScalar*),(B,data));CHKERRQ(ierr); 1989273d9f13SBarry Smith PetscFunctionReturn(0); 1990273d9f13SBarry Smith } 1991273d9f13SBarry Smith 1992d3042a70SBarry Smith /*@ 1993637a0070SStefano Zampini MatDensePlaceArray - Allows one to replace the array in a dense matrix with an 1994d3042a70SBarry Smith array provided by the user. This is useful to avoid copying an array 1995d3042a70SBarry Smith into a matrix 1996d3042a70SBarry Smith 1997d3042a70SBarry Smith Not Collective 1998d3042a70SBarry Smith 1999d3042a70SBarry Smith Input Parameters: 2000d3042a70SBarry Smith + mat - the matrix 2001d3042a70SBarry Smith - array - the array in column major order 2002d3042a70SBarry Smith 2003d3042a70SBarry Smith Notes: 2004d3042a70SBarry Smith You can return to the original array with a call to MatDenseResetArray(). The user is responsible for freeing this array; it will not be 2005d3042a70SBarry Smith freed when the matrix is destroyed. 2006d3042a70SBarry Smith 2007d3042a70SBarry Smith Level: developer 2008d3042a70SBarry Smith 2009d3042a70SBarry Smith .seealso: MatDenseGetArray(), MatDenseResetArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray() 2010d3042a70SBarry Smith 2011d3042a70SBarry Smith @*/ 2012637a0070SStefano Zampini PetscErrorCode MatDensePlaceArray(Mat mat,const PetscScalar *array) 2013d3042a70SBarry Smith { 2014d3042a70SBarry Smith PetscErrorCode ierr; 2015637a0070SStefano Zampini 2016d3042a70SBarry Smith PetscFunctionBegin; 2017d5ea218eSStefano Zampini PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2018d3042a70SBarry Smith ierr = PetscUseMethod(mat,"MatDensePlaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr); 2019d3042a70SBarry Smith ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 2020637a0070SStefano Zampini #if defined(PETSC_HAVE_CUDA) 2021637a0070SStefano Zampini mat->offloadmask = PETSC_OFFLOAD_CPU; 2022637a0070SStefano Zampini #endif 2023d3042a70SBarry Smith PetscFunctionReturn(0); 2024d3042a70SBarry Smith } 2025d3042a70SBarry Smith 2026d3042a70SBarry Smith /*@ 2027d3042a70SBarry Smith MatDenseResetArray - Resets the matrix array to that it previously had before the call to MatDensePlaceArray() 2028d3042a70SBarry Smith 2029d3042a70SBarry Smith Not Collective 2030d3042a70SBarry Smith 2031d3042a70SBarry Smith Input Parameters: 2032d3042a70SBarry Smith . mat - the matrix 2033d3042a70SBarry Smith 2034d3042a70SBarry Smith Notes: 2035d3042a70SBarry Smith You can only call this after a call to MatDensePlaceArray() 2036d3042a70SBarry Smith 2037d3042a70SBarry Smith Level: developer 2038d3042a70SBarry Smith 2039d3042a70SBarry Smith .seealso: MatDenseGetArray(), MatDensePlaceArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray() 2040d3042a70SBarry Smith 2041d3042a70SBarry Smith @*/ 2042d3042a70SBarry Smith PetscErrorCode MatDenseResetArray(Mat mat) 2043d3042a70SBarry Smith { 2044d3042a70SBarry Smith PetscErrorCode ierr; 2045637a0070SStefano Zampini 2046d3042a70SBarry Smith PetscFunctionBegin; 2047d5ea218eSStefano Zampini PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2048d3042a70SBarry Smith ierr = PetscUseMethod(mat,"MatDenseResetArray_C",(Mat),(mat));CHKERRQ(ierr); 2049d3042a70SBarry Smith ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 2050d3042a70SBarry Smith PetscFunctionReturn(0); 2051d3042a70SBarry Smith } 2052d3042a70SBarry Smith 2053d5ea218eSStefano Zampini /*@ 2054d5ea218eSStefano Zampini MatDenseReplaceArray - Allows one to replace the array in a dense matrix with an 2055d5ea218eSStefano Zampini array provided by the user. This is useful to avoid copying an array 2056d5ea218eSStefano Zampini into a matrix 2057d5ea218eSStefano Zampini 2058d5ea218eSStefano Zampini Not Collective 2059d5ea218eSStefano Zampini 2060d5ea218eSStefano Zampini Input Parameters: 2061d5ea218eSStefano Zampini + mat - the matrix 2062d5ea218eSStefano Zampini - array - the array in column major order 2063d5ea218eSStefano Zampini 2064d5ea218eSStefano Zampini Notes: 2065d5ea218eSStefano Zampini The memory passed in MUST be obtained with PetscMalloc() and CANNOT be 2066d5ea218eSStefano Zampini freed by the user. It will be freed when the matrix is destroyed. 2067d5ea218eSStefano Zampini 2068d5ea218eSStefano Zampini Level: developer 2069d5ea218eSStefano Zampini 2070d5ea218eSStefano Zampini .seealso: MatDenseGetArray(), VecReplaceArray() 2071d5ea218eSStefano Zampini @*/ 2072d5ea218eSStefano Zampini PetscErrorCode MatDenseReplaceArray(Mat mat,const PetscScalar *array) 2073d5ea218eSStefano Zampini { 2074d5ea218eSStefano Zampini PetscErrorCode ierr; 2075d5ea218eSStefano Zampini 2076d5ea218eSStefano Zampini PetscFunctionBegin; 2077d5ea218eSStefano Zampini PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2078d5ea218eSStefano Zampini ierr = PetscUseMethod(mat,"MatDenseReplaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr); 2079d5ea218eSStefano Zampini ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 2080d5ea218eSStefano Zampini #if defined(PETSC_HAVE_CUDA) 2081d5ea218eSStefano Zampini mat->offloadmask = PETSC_OFFLOAD_CPU; 2082d5ea218eSStefano Zampini #endif 2083d5ea218eSStefano Zampini PetscFunctionReturn(0); 2084d5ea218eSStefano Zampini } 2085d5ea218eSStefano Zampini 2086637a0070SStefano Zampini #if defined(PETSC_HAVE_CUDA) 20878965ea79SLois Curfman McInnes /*@C 2088637a0070SStefano Zampini MatDenseCUDAPlaceArray - Allows one to replace the GPU array in a dense matrix with an 2089637a0070SStefano Zampini array provided by the user. This is useful to avoid copying an array 2090637a0070SStefano Zampini into a matrix 2091637a0070SStefano Zampini 2092637a0070SStefano Zampini Not Collective 2093637a0070SStefano Zampini 2094637a0070SStefano Zampini Input Parameters: 2095637a0070SStefano Zampini + mat - the matrix 2096637a0070SStefano Zampini - array - the array in column major order 2097637a0070SStefano Zampini 2098637a0070SStefano Zampini Notes: 2099637a0070SStefano Zampini You can return to the original array with a call to MatDenseCUDAResetArray(). The user is responsible for freeing this array; it will not be 2100637a0070SStefano Zampini freed when the matrix is destroyed. The array must have been allocated with cudaMalloc(). 2101637a0070SStefano Zampini 2102637a0070SStefano Zampini Level: developer 2103637a0070SStefano Zampini 2104637a0070SStefano Zampini .seealso: MatDenseCUDAGetArray(), MatDenseCUDAResetArray() 2105637a0070SStefano Zampini @*/ 2106637a0070SStefano Zampini PetscErrorCode MatDenseCUDAPlaceArray(Mat mat,const PetscScalar *array) 2107637a0070SStefano Zampini { 2108637a0070SStefano Zampini PetscErrorCode ierr; 2109637a0070SStefano Zampini 2110637a0070SStefano Zampini PetscFunctionBegin; 2111d5ea218eSStefano Zampini PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2112637a0070SStefano Zampini ierr = PetscUseMethod(mat,"MatDenseCUDAPlaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr); 2113637a0070SStefano Zampini ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 2114637a0070SStefano Zampini mat->offloadmask = PETSC_OFFLOAD_GPU; 2115637a0070SStefano Zampini PetscFunctionReturn(0); 2116637a0070SStefano Zampini } 2117637a0070SStefano Zampini 2118637a0070SStefano Zampini /*@C 2119637a0070SStefano Zampini MatDenseCUDAResetArray - Resets the matrix array to that it previously had before the call to MatDenseCUDAPlaceArray() 2120637a0070SStefano Zampini 2121637a0070SStefano Zampini Not Collective 2122637a0070SStefano Zampini 2123637a0070SStefano Zampini Input Parameters: 2124637a0070SStefano Zampini . mat - the matrix 2125637a0070SStefano Zampini 2126637a0070SStefano Zampini Notes: 2127637a0070SStefano Zampini You can only call this after a call to MatDenseCUDAPlaceArray() 2128637a0070SStefano Zampini 2129637a0070SStefano Zampini Level: developer 2130637a0070SStefano Zampini 2131637a0070SStefano Zampini .seealso: MatDenseCUDAGetArray(), MatDenseCUDAPlaceArray() 2132637a0070SStefano Zampini 2133637a0070SStefano Zampini @*/ 2134637a0070SStefano Zampini PetscErrorCode MatDenseCUDAResetArray(Mat mat) 2135637a0070SStefano Zampini { 2136637a0070SStefano Zampini PetscErrorCode ierr; 2137637a0070SStefano Zampini 2138637a0070SStefano Zampini PetscFunctionBegin; 2139d5ea218eSStefano Zampini PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2140637a0070SStefano Zampini ierr = PetscUseMethod(mat,"MatDenseCUDAResetArray_C",(Mat),(mat));CHKERRQ(ierr); 2141637a0070SStefano Zampini ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 2142637a0070SStefano Zampini PetscFunctionReturn(0); 2143637a0070SStefano Zampini } 2144637a0070SStefano Zampini 2145637a0070SStefano Zampini /*@C 2146d5ea218eSStefano Zampini MatDenseCUDAReplaceArray - Allows one to replace the GPU array in a dense matrix with an 2147d5ea218eSStefano Zampini array provided by the user. This is useful to avoid copying an array 2148d5ea218eSStefano Zampini into a matrix 2149d5ea218eSStefano Zampini 2150d5ea218eSStefano Zampini Not Collective 2151d5ea218eSStefano Zampini 2152d5ea218eSStefano Zampini Input Parameters: 2153d5ea218eSStefano Zampini + mat - the matrix 2154d5ea218eSStefano Zampini - array - the array in column major order 2155d5ea218eSStefano Zampini 2156d5ea218eSStefano Zampini Notes: 2157d5ea218eSStefano Zampini This permanently replaces the GPU array and frees the memory associated with the old GPU array. 2158d5ea218eSStefano Zampini The memory passed in CANNOT be freed by the user. It will be freed 2159d5ea218eSStefano Zampini when the matrix is destroyed. The array should respect the matrix leading dimension. 2160d5ea218eSStefano Zampini 2161d5ea218eSStefano Zampini Level: developer 2162d5ea218eSStefano Zampini 2163d5ea218eSStefano Zampini .seealso: MatDenseCUDAGetArray(), MatDenseCUDAPlaceArray(), MatDenseCUDAResetArray() 2164d5ea218eSStefano Zampini @*/ 2165d5ea218eSStefano Zampini PetscErrorCode MatDenseCUDAReplaceArray(Mat mat,const PetscScalar *array) 2166d5ea218eSStefano Zampini { 2167d5ea218eSStefano Zampini PetscErrorCode ierr; 2168d5ea218eSStefano Zampini 2169d5ea218eSStefano Zampini PetscFunctionBegin; 2170d5ea218eSStefano Zampini PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2171d5ea218eSStefano Zampini ierr = PetscUseMethod(mat,"MatDenseCUDAReplaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr); 2172d5ea218eSStefano Zampini ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 2173d5ea218eSStefano Zampini mat->offloadmask = PETSC_OFFLOAD_GPU; 2174d5ea218eSStefano Zampini PetscFunctionReturn(0); 2175d5ea218eSStefano Zampini } 2176d5ea218eSStefano Zampini 2177d5ea218eSStefano Zampini /*@C 2178637a0070SStefano Zampini MatDenseCUDAGetArrayWrite - Provides write access to the CUDA buffer inside a dense matrix. 2179637a0070SStefano Zampini 2180637a0070SStefano Zampini Not Collective 2181637a0070SStefano Zampini 2182637a0070SStefano Zampini Input Parameters: 2183637a0070SStefano Zampini . A - the matrix 2184637a0070SStefano Zampini 2185637a0070SStefano Zampini Output Parameters 2186637a0070SStefano Zampini . array - the GPU array in column major order 2187637a0070SStefano Zampini 2188637a0070SStefano Zampini Notes: 2189637a0070SStefano Zampini The data on the GPU may not be updated due to operations done on the CPU. If you need updated data, use MatDenseCUDAGetArray(). The array must be restored with MatDenseCUDARestoreArrayWrite() when no longer needed. 2190637a0070SStefano Zampini 2191637a0070SStefano Zampini Level: developer 2192637a0070SStefano Zampini 2193637a0070SStefano Zampini .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayRead(), MatDenseCUDARestoreArrayRead() 2194637a0070SStefano Zampini @*/ 2195637a0070SStefano Zampini PetscErrorCode MatDenseCUDAGetArrayWrite(Mat A, PetscScalar **a) 2196637a0070SStefano Zampini { 2197637a0070SStefano Zampini PetscErrorCode ierr; 2198637a0070SStefano Zampini 2199637a0070SStefano Zampini PetscFunctionBegin; 2200d5ea218eSStefano Zampini PetscValidHeaderSpecific(A,MAT_CLASSID,1); 2201637a0070SStefano Zampini ierr = PetscUseMethod(A,"MatDenseCUDAGetArrayWrite_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr); 2202637a0070SStefano Zampini ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr); 2203637a0070SStefano Zampini PetscFunctionReturn(0); 2204637a0070SStefano Zampini } 2205637a0070SStefano Zampini 2206637a0070SStefano Zampini /*@C 2207637a0070SStefano Zampini MatDenseCUDARestoreArrayWrite - Restore write access to the CUDA buffer inside a dense matrix previously obtained with MatDenseCUDAGetArrayWrite(). 2208637a0070SStefano Zampini 2209637a0070SStefano Zampini Not Collective 2210637a0070SStefano Zampini 2211637a0070SStefano Zampini Input Parameters: 2212637a0070SStefano Zampini + A - the matrix 2213637a0070SStefano Zampini - array - the GPU array in column major order 2214637a0070SStefano Zampini 2215637a0070SStefano Zampini Notes: 2216637a0070SStefano Zampini 2217637a0070SStefano Zampini Level: developer 2218637a0070SStefano Zampini 2219637a0070SStefano Zampini .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead(), MatDenseCUDAGetArrayRead() 2220637a0070SStefano Zampini @*/ 2221637a0070SStefano Zampini PetscErrorCode MatDenseCUDARestoreArrayWrite(Mat A, PetscScalar **a) 2222637a0070SStefano Zampini { 2223637a0070SStefano Zampini PetscErrorCode ierr; 2224637a0070SStefano Zampini 2225637a0070SStefano Zampini PetscFunctionBegin; 2226d5ea218eSStefano Zampini PetscValidHeaderSpecific(A,MAT_CLASSID,1); 2227637a0070SStefano Zampini ierr = PetscUseMethod(A,"MatDenseCUDARestoreArrayWrite_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr); 2228637a0070SStefano Zampini ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr); 2229637a0070SStefano Zampini A->offloadmask = PETSC_OFFLOAD_GPU; 2230637a0070SStefano Zampini PetscFunctionReturn(0); 2231637a0070SStefano Zampini } 2232637a0070SStefano Zampini 2233637a0070SStefano Zampini /*@C 2234637a0070SStefano Zampini MatDenseCUDAGetArrayRead - Provides read-only access to the CUDA buffer inside a dense matrix. The array must be restored with MatDenseCUDARestoreArrayRead() when no longer needed. 2235637a0070SStefano Zampini 2236637a0070SStefano Zampini Not Collective 2237637a0070SStefano Zampini 2238637a0070SStefano Zampini Input Parameters: 2239637a0070SStefano Zampini . A - the matrix 2240637a0070SStefano Zampini 2241637a0070SStefano Zampini Output Parameters 2242637a0070SStefano Zampini . array - the GPU array in column major order 2243637a0070SStefano Zampini 2244637a0070SStefano Zampini Notes: 2245637a0070SStefano Zampini Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite(). 2246637a0070SStefano Zampini 2247637a0070SStefano Zampini Level: developer 2248637a0070SStefano Zampini 2249637a0070SStefano Zampini .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead() 2250637a0070SStefano Zampini @*/ 2251637a0070SStefano Zampini PetscErrorCode MatDenseCUDAGetArrayRead(Mat A, const PetscScalar **a) 2252637a0070SStefano Zampini { 2253637a0070SStefano Zampini PetscErrorCode ierr; 2254637a0070SStefano Zampini 2255637a0070SStefano Zampini PetscFunctionBegin; 2256d5ea218eSStefano Zampini PetscValidHeaderSpecific(A,MAT_CLASSID,1); 2257637a0070SStefano Zampini ierr = PetscUseMethod(A,"MatDenseCUDAGetArrayRead_C",(Mat,const PetscScalar**),(A,a));CHKERRQ(ierr); 2258637a0070SStefano Zampini PetscFunctionReturn(0); 2259637a0070SStefano Zampini } 2260637a0070SStefano Zampini 2261637a0070SStefano Zampini /*@C 2262637a0070SStefano Zampini MatDenseCUDARestoreArrayRead - Restore read-only access to the CUDA buffer inside a dense matrix previously obtained with a call to MatDenseCUDAGetArrayRead(). 2263637a0070SStefano Zampini 2264637a0070SStefano Zampini Not Collective 2265637a0070SStefano Zampini 2266637a0070SStefano Zampini Input Parameters: 2267637a0070SStefano Zampini + A - the matrix 2268637a0070SStefano Zampini - array - the GPU array in column major order 2269637a0070SStefano Zampini 2270637a0070SStefano Zampini Notes: 2271637a0070SStefano Zampini Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite(). 2272637a0070SStefano Zampini 2273637a0070SStefano Zampini Level: developer 2274637a0070SStefano Zampini 2275637a0070SStefano Zampini .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDAGetArrayRead() 2276637a0070SStefano Zampini @*/ 2277637a0070SStefano Zampini PetscErrorCode MatDenseCUDARestoreArrayRead(Mat A, const PetscScalar **a) 2278637a0070SStefano Zampini { 2279637a0070SStefano Zampini PetscErrorCode ierr; 2280637a0070SStefano Zampini 2281637a0070SStefano Zampini PetscFunctionBegin; 2282637a0070SStefano Zampini ierr = PetscUseMethod(A,"MatDenseCUDARestoreArrayRead_C",(Mat,const PetscScalar**),(A,a));CHKERRQ(ierr); 2283637a0070SStefano Zampini PetscFunctionReturn(0); 2284637a0070SStefano Zampini } 2285637a0070SStefano Zampini 2286637a0070SStefano Zampini /*@C 2287637a0070SStefano Zampini MatDenseCUDAGetArray - Provides access to the CUDA buffer inside a dense matrix. The array must be restored with MatDenseCUDARestoreArray() when no longer needed. 2288637a0070SStefano Zampini 2289637a0070SStefano Zampini Not Collective 2290637a0070SStefano Zampini 2291637a0070SStefano Zampini Input Parameters: 2292637a0070SStefano Zampini . A - the matrix 2293637a0070SStefano Zampini 2294637a0070SStefano Zampini Output Parameters 2295637a0070SStefano Zampini . array - the GPU array in column major order 2296637a0070SStefano Zampini 2297637a0070SStefano Zampini Notes: 2298637a0070SStefano Zampini Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite(). For read-only access, use MatDenseCUDAGetArrayRead(). 2299637a0070SStefano Zampini 2300637a0070SStefano Zampini Level: developer 2301637a0070SStefano Zampini 2302637a0070SStefano Zampini .seealso: MatDenseCUDAGetArrayRead(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead() 2303637a0070SStefano Zampini @*/ 2304637a0070SStefano Zampini PetscErrorCode MatDenseCUDAGetArray(Mat A, PetscScalar **a) 2305637a0070SStefano Zampini { 2306637a0070SStefano Zampini PetscErrorCode ierr; 2307637a0070SStefano Zampini 2308637a0070SStefano Zampini PetscFunctionBegin; 2309d5ea218eSStefano Zampini PetscValidHeaderSpecific(A,MAT_CLASSID,1); 2310637a0070SStefano Zampini ierr = PetscUseMethod(A,"MatDenseCUDAGetArray_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr); 2311637a0070SStefano Zampini ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr); 2312637a0070SStefano Zampini PetscFunctionReturn(0); 2313637a0070SStefano Zampini } 2314637a0070SStefano Zampini 2315637a0070SStefano Zampini /*@C 2316637a0070SStefano Zampini MatDenseCUDARestoreArray - Restore access to the CUDA buffer inside a dense matrix previously obtained with MatDenseCUDAGetArray(). 2317637a0070SStefano Zampini 2318637a0070SStefano Zampini Not Collective 2319637a0070SStefano Zampini 2320637a0070SStefano Zampini Input Parameters: 2321637a0070SStefano Zampini + A - the matrix 2322637a0070SStefano Zampini - array - the GPU array in column major order 2323637a0070SStefano Zampini 2324637a0070SStefano Zampini Notes: 2325637a0070SStefano Zampini 2326637a0070SStefano Zampini Level: developer 2327637a0070SStefano Zampini 2328637a0070SStefano Zampini .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead(), MatDenseCUDAGetArrayRead() 2329637a0070SStefano Zampini @*/ 2330637a0070SStefano Zampini PetscErrorCode MatDenseCUDARestoreArray(Mat A, PetscScalar **a) 2331637a0070SStefano Zampini { 2332637a0070SStefano Zampini PetscErrorCode ierr; 2333637a0070SStefano Zampini 2334637a0070SStefano Zampini PetscFunctionBegin; 2335d5ea218eSStefano Zampini PetscValidHeaderSpecific(A,MAT_CLASSID,1); 2336637a0070SStefano Zampini ierr = PetscUseMethod(A,"MatDenseCUDARestoreArray_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr); 2337637a0070SStefano Zampini ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr); 2338637a0070SStefano Zampini A->offloadmask = PETSC_OFFLOAD_GPU; 2339637a0070SStefano Zampini PetscFunctionReturn(0); 2340637a0070SStefano Zampini } 2341637a0070SStefano Zampini #endif 2342637a0070SStefano Zampini 2343637a0070SStefano Zampini /*@C 2344637a0070SStefano Zampini MatCreateDense - Creates a matrix in dense format. 23458965ea79SLois Curfman McInnes 2346d083f849SBarry Smith Collective 2347db81eaa0SLois Curfman McInnes 23488965ea79SLois Curfman McInnes Input Parameters: 2349db81eaa0SLois Curfman McInnes + comm - MPI communicator 23508965ea79SLois Curfman McInnes . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2351db81eaa0SLois Curfman McInnes . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 23528965ea79SLois Curfman McInnes . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 2353db81eaa0SLois Curfman McInnes . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 23546cfe35ddSJose E. Roman - data - optional location of matrix data. Set data=NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc 2355dfc5480cSLois Curfman McInnes to control all matrix memory allocation. 23568965ea79SLois Curfman McInnes 23578965ea79SLois Curfman McInnes Output Parameter: 2358477f1c0bSLois Curfman McInnes . A - the matrix 23598965ea79SLois Curfman McInnes 2360b259b22eSLois Curfman McInnes Notes: 236139ddd567SLois Curfman McInnes The dense format is fully compatible with standard Fortran 77 236239ddd567SLois Curfman McInnes storage by columns. 23638965ea79SLois Curfman McInnes 236418f449edSLois Curfman McInnes The data input variable is intended primarily for Fortran programmers 236518f449edSLois Curfman McInnes who wish to allocate their own matrix memory space. Most users should 23666cfe35ddSJose E. Roman set data=NULL (PETSC_NULL_SCALAR for Fortran users). 236718f449edSLois Curfman McInnes 23688965ea79SLois Curfman McInnes The user MUST specify either the local or global matrix dimensions 23698965ea79SLois Curfman McInnes (possibly both). 23708965ea79SLois Curfman McInnes 2371027ccd11SLois Curfman McInnes Level: intermediate 2372027ccd11SLois Curfman McInnes 237339ddd567SLois Curfman McInnes .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 23748965ea79SLois Curfman McInnes @*/ 237569b1f4b7SBarry Smith PetscErrorCode MatCreateDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A) 23768965ea79SLois Curfman McInnes { 23776849ba73SBarry Smith PetscErrorCode ierr; 237813f74950SBarry Smith PetscMPIInt size; 23798965ea79SLois Curfman McInnes 23803a40ed3dSBarry Smith PetscFunctionBegin; 2381f69a0ea3SMatthew Knepley ierr = MatCreate(comm,A);CHKERRQ(ierr); 23828491ab44SLisandro Dalcin PetscValidLogicalCollectiveBool(*A,!!data,6); 2383f69a0ea3SMatthew Knepley ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 2384273d9f13SBarry Smith ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2385273d9f13SBarry Smith if (size > 1) { 2386273d9f13SBarry Smith ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr); 2387273d9f13SBarry Smith ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr); 23886cfe35ddSJose E. Roman if (data) { /* user provided data array, so no need to assemble */ 23896cfe35ddSJose E. Roman ierr = MatSetUpMultiply_MPIDense(*A);CHKERRQ(ierr); 23906cfe35ddSJose E. Roman (*A)->assembled = PETSC_TRUE; 23916cfe35ddSJose E. Roman } 2392273d9f13SBarry Smith } else { 2393273d9f13SBarry Smith ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr); 2394273d9f13SBarry Smith ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr); 23958c469469SLois Curfman McInnes } 23963a40ed3dSBarry Smith PetscFunctionReturn(0); 23978965ea79SLois Curfman McInnes } 23988965ea79SLois Curfman McInnes 2399637a0070SStefano Zampini #if defined(PETSC_HAVE_CUDA) 2400637a0070SStefano Zampini /*@C 2401637a0070SStefano Zampini MatCreateDenseCUDA - Creates a matrix in dense format using CUDA. 2402637a0070SStefano Zampini 2403637a0070SStefano Zampini Collective 2404637a0070SStefano Zampini 2405637a0070SStefano Zampini Input Parameters: 2406637a0070SStefano Zampini + comm - MPI communicator 2407637a0070SStefano Zampini . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2408637a0070SStefano Zampini . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 2409637a0070SStefano Zampini . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 2410637a0070SStefano Zampini . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 2411637a0070SStefano Zampini - data - optional location of GPU matrix data. Set data=NULL for PETSc 2412637a0070SStefano Zampini to control matrix memory allocation. 2413637a0070SStefano Zampini 2414637a0070SStefano Zampini Output Parameter: 2415637a0070SStefano Zampini . A - the matrix 2416637a0070SStefano Zampini 2417637a0070SStefano Zampini Notes: 2418637a0070SStefano Zampini 2419637a0070SStefano Zampini Level: intermediate 2420637a0070SStefano Zampini 2421637a0070SStefano Zampini .seealso: MatCreate(), MatCreateDense() 2422637a0070SStefano Zampini @*/ 2423637a0070SStefano Zampini PetscErrorCode MatCreateDenseCUDA(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A) 2424637a0070SStefano Zampini { 2425637a0070SStefano Zampini PetscErrorCode ierr; 2426637a0070SStefano Zampini PetscMPIInt size; 2427637a0070SStefano Zampini 2428637a0070SStefano Zampini PetscFunctionBegin; 2429637a0070SStefano Zampini ierr = MatCreate(comm,A);CHKERRQ(ierr); 2430637a0070SStefano Zampini PetscValidLogicalCollectiveBool(*A,!!data,6); 2431637a0070SStefano Zampini ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 2432637a0070SStefano Zampini ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2433637a0070SStefano Zampini if (size > 1) { 2434637a0070SStefano Zampini ierr = MatSetType(*A,MATMPIDENSECUDA);CHKERRQ(ierr); 2435637a0070SStefano Zampini ierr = MatMPIDenseCUDASetPreallocation(*A,data);CHKERRQ(ierr); 2436637a0070SStefano Zampini if (data) { /* user provided data array, so no need to assemble */ 2437637a0070SStefano Zampini ierr = MatSetUpMultiply_MPIDense(*A);CHKERRQ(ierr); 2438637a0070SStefano Zampini (*A)->assembled = PETSC_TRUE; 2439637a0070SStefano Zampini } 2440637a0070SStefano Zampini } else { 2441637a0070SStefano Zampini ierr = MatSetType(*A,MATSEQDENSECUDA);CHKERRQ(ierr); 2442637a0070SStefano Zampini ierr = MatSeqDenseCUDASetPreallocation(*A,data);CHKERRQ(ierr); 2443637a0070SStefano Zampini } 2444637a0070SStefano Zampini PetscFunctionReturn(0); 2445637a0070SStefano Zampini } 2446637a0070SStefano Zampini #endif 2447637a0070SStefano Zampini 24486849ba73SBarry Smith static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat) 24498965ea79SLois Curfman McInnes { 24508965ea79SLois Curfman McInnes Mat mat; 24513501a2bdSLois Curfman McInnes Mat_MPIDense *a,*oldmat = (Mat_MPIDense*)A->data; 2452dfbe8321SBarry Smith PetscErrorCode ierr; 24538965ea79SLois Curfman McInnes 24543a40ed3dSBarry Smith PetscFunctionBegin; 24558965ea79SLois Curfman McInnes *newmat = 0; 2456ce94432eSBarry Smith ierr = MatCreate(PetscObjectComm((PetscObject)A),&mat);CHKERRQ(ierr); 2457d0f46423SBarry Smith ierr = MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 24587adad957SLisandro Dalcin ierr = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr); 2459834f8fabSBarry Smith a = (Mat_MPIDense*)mat->data; 24605aa7edbeSHong Zhang 2461d5f3da31SBarry Smith mat->factortype = A->factortype; 2462c456f294SBarry Smith mat->assembled = PETSC_TRUE; 2463273d9f13SBarry Smith mat->preallocated = PETSC_TRUE; 24648965ea79SLois Curfman McInnes 2465e0fa3b82SLois Curfman McInnes mat->insertmode = NOT_SET_VALUES; 24663782ba37SSatish Balay a->donotstash = oldmat->donotstash; 2467e04c1aa4SHong Zhang 24681e1e43feSBarry Smith ierr = PetscLayoutReference(A->rmap,&mat->rmap);CHKERRQ(ierr); 24691e1e43feSBarry Smith ierr = PetscLayoutReference(A->cmap,&mat->cmap);CHKERRQ(ierr); 24708965ea79SLois Curfman McInnes 24715609ef8eSBarry Smith ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 24723bb1ff40SBarry Smith ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 2473637a0070SStefano Zampini ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 247401b82886SBarry Smith 24758965ea79SLois Curfman McInnes *newmat = mat; 24763a40ed3dSBarry Smith PetscFunctionReturn(0); 24778965ea79SLois Curfman McInnes } 24788965ea79SLois Curfman McInnes 2479eb91f321SVaclav Hapla PetscErrorCode MatLoad_MPIDense(Mat newMat, PetscViewer viewer) 2480eb91f321SVaclav Hapla { 2481eb91f321SVaclav Hapla PetscErrorCode ierr; 248287d5ce66SSatish Balay PetscBool isbinary; 248387d5ce66SSatish Balay #if defined(PETSC_HAVE_HDF5) 248487d5ce66SSatish Balay PetscBool ishdf5; 248587d5ce66SSatish Balay #endif 2486eb91f321SVaclav Hapla 2487eb91f321SVaclav Hapla PetscFunctionBegin; 2488eb91f321SVaclav Hapla PetscValidHeaderSpecific(newMat,MAT_CLASSID,1); 2489eb91f321SVaclav Hapla PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2); 2490eb91f321SVaclav Hapla /* force binary viewer to load .info file if it has not yet done so */ 2491eb91f321SVaclav Hapla ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 2492eb91f321SVaclav Hapla ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 249387d5ce66SSatish Balay #if defined(PETSC_HAVE_HDF5) 2494eb91f321SVaclav Hapla ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5, &ishdf5);CHKERRQ(ierr); 249587d5ce66SSatish Balay #endif 2496eb91f321SVaclav Hapla if (isbinary) { 24978491ab44SLisandro Dalcin ierr = MatLoad_Dense_Binary(newMat,viewer);CHKERRQ(ierr); 2498eb91f321SVaclav Hapla #if defined(PETSC_HAVE_HDF5) 249987d5ce66SSatish Balay } else if (ishdf5) { 2500eb91f321SVaclav Hapla ierr = MatLoad_Dense_HDF5(newMat,viewer);CHKERRQ(ierr); 2501eb91f321SVaclav Hapla #endif 250287d5ce66SSatish Balay } else SETERRQ2(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name); 2503eb91f321SVaclav Hapla PetscFunctionReturn(0); 2504eb91f321SVaclav Hapla } 2505eb91f321SVaclav Hapla 25066718818eSStefano Zampini static PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscBool *flag) 25076e4ee0c6SHong Zhang { 25086e4ee0c6SHong Zhang Mat_MPIDense *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data; 25096e4ee0c6SHong Zhang Mat a,b; 2510ace3abfcSBarry Smith PetscBool flg; 25116e4ee0c6SHong Zhang PetscErrorCode ierr; 251290ace30eSBarry Smith 25136e4ee0c6SHong Zhang PetscFunctionBegin; 25146e4ee0c6SHong Zhang a = matA->A; 25156e4ee0c6SHong Zhang b = matB->A; 25166e4ee0c6SHong Zhang ierr = MatEqual(a,b,&flg);CHKERRQ(ierr); 2517b2566f29SBarry Smith ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 25186e4ee0c6SHong Zhang PetscFunctionReturn(0); 25196e4ee0c6SHong Zhang } 252090ace30eSBarry Smith 25216718818eSStefano Zampini PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(void *data) 2522baa3c1c6SHong Zhang { 2523baa3c1c6SHong Zhang PetscErrorCode ierr; 25246718818eSStefano Zampini Mat_TransMatMultDense *atb = (Mat_TransMatMultDense *)data; 2525baa3c1c6SHong Zhang 2526baa3c1c6SHong Zhang PetscFunctionBegin; 2527637a0070SStefano Zampini ierr = PetscFree2(atb->sendbuf,atb->recvcounts);CHKERRQ(ierr); 2528637a0070SStefano Zampini ierr = MatDestroy(&atb->atb);CHKERRQ(ierr); 2529baa3c1c6SHong Zhang ierr = PetscFree(atb);CHKERRQ(ierr); 2530baa3c1c6SHong Zhang PetscFunctionReturn(0); 2531baa3c1c6SHong Zhang } 2532baa3c1c6SHong Zhang 25336718818eSStefano Zampini PetscErrorCode MatDestroy_MatMatTransMult_MPIDense_MPIDense(void *data) 2534cc48ffa7SToby Isaac { 2535cc48ffa7SToby Isaac PetscErrorCode ierr; 25366718818eSStefano Zampini Mat_MatTransMultDense *abt = (Mat_MatTransMultDense *)data; 2537cc48ffa7SToby Isaac 2538cc48ffa7SToby Isaac PetscFunctionBegin; 2539cc48ffa7SToby Isaac ierr = PetscFree2(abt->buf[0],abt->buf[1]);CHKERRQ(ierr); 2540faa55883SToby Isaac ierr = PetscFree2(abt->recvcounts,abt->recvdispls);CHKERRQ(ierr); 2541cc48ffa7SToby Isaac ierr = PetscFree(abt);CHKERRQ(ierr); 2542cc48ffa7SToby Isaac PetscFunctionReturn(0); 2543cc48ffa7SToby Isaac } 2544cc48ffa7SToby Isaac 25456718818eSStefano Zampini static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C) 2546cb20be35SHong Zhang { 2547baa3c1c6SHong Zhang Mat_MPIDense *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data; 25486718818eSStefano Zampini Mat_TransMatMultDense *atb; 2549cb20be35SHong Zhang PetscErrorCode ierr; 2550cb20be35SHong Zhang MPI_Comm comm; 25516718818eSStefano Zampini PetscMPIInt size,*recvcounts; 25526718818eSStefano Zampini PetscScalar *carray,*sendbuf; 2553637a0070SStefano Zampini const PetscScalar *atbarray; 2554d5017740SHong Zhang PetscInt i,cN=C->cmap->N,cM=C->rmap->N,proc,k,j; 2555e68c0b26SHong Zhang const PetscInt *ranges; 2556cb20be35SHong Zhang 2557cb20be35SHong Zhang PetscFunctionBegin; 25586718818eSStefano Zampini MatCheckProduct(C,3); 25596718818eSStefano Zampini if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty"); 25606718818eSStefano Zampini atb = (Mat_TransMatMultDense *)C->product->data; 25616718818eSStefano Zampini recvcounts = atb->recvcounts; 25626718818eSStefano Zampini sendbuf = atb->sendbuf; 25636718818eSStefano Zampini 2564cb20be35SHong Zhang ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 2565cb20be35SHong Zhang ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2566e68c0b26SHong Zhang 2567c5ef1628SHong Zhang /* compute atbarray = aseq^T * bseq */ 2568637a0070SStefano Zampini ierr = MatTransposeMatMult(a->A,b->A,atb->atb ? MAT_REUSE_MATRIX : MAT_INITIAL_MATRIX,PETSC_DEFAULT,&atb->atb);CHKERRQ(ierr); 2569cb20be35SHong Zhang 2570cb20be35SHong Zhang ierr = MatGetOwnershipRanges(C,&ranges);CHKERRQ(ierr); 2571cb20be35SHong Zhang 2572660d5466SHong Zhang /* arrange atbarray into sendbuf */ 2573637a0070SStefano Zampini ierr = MatDenseGetArrayRead(atb->atb,&atbarray);CHKERRQ(ierr); 2574637a0070SStefano Zampini for (proc=0, k=0; proc<size; proc++) { 2575baa3c1c6SHong Zhang for (j=0; j<cN; j++) { 2576c5ef1628SHong Zhang for (i=ranges[proc]; i<ranges[proc+1]; i++) sendbuf[k++] = atbarray[i+j*cM]; 2577cb20be35SHong Zhang } 2578cb20be35SHong Zhang } 2579637a0070SStefano Zampini ierr = MatDenseRestoreArrayRead(atb->atb,&atbarray);CHKERRQ(ierr); 2580637a0070SStefano Zampini 2581c5ef1628SHong Zhang /* sum all atbarray to local values of C */ 25820acaeb71SStefano Zampini ierr = MatDenseGetArrayWrite(c->A,&carray);CHKERRQ(ierr); 25833462b7efSHong Zhang ierr = MPI_Reduce_scatter(sendbuf,carray,recvcounts,MPIU_SCALAR,MPIU_SUM,comm);CHKERRQ(ierr); 25840acaeb71SStefano Zampini ierr = MatDenseRestoreArrayWrite(c->A,&carray);CHKERRQ(ierr); 25850acaeb71SStefano Zampini ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 25860acaeb71SStefano Zampini ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2587cb20be35SHong Zhang PetscFunctionReturn(0); 2588cb20be35SHong Zhang } 2589cb20be35SHong Zhang 25906718818eSStefano Zampini static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat C) 2591cb20be35SHong Zhang { 2592cb20be35SHong Zhang PetscErrorCode ierr; 2593cb20be35SHong Zhang MPI_Comm comm; 2594baa3c1c6SHong Zhang PetscMPIInt size; 2595660d5466SHong Zhang PetscInt cm=A->cmap->n,cM,cN=B->cmap->N; 2596baa3c1c6SHong Zhang Mat_TransMatMultDense *atb; 25976718818eSStefano Zampini PetscBool cisdense; 25980acaeb71SStefano Zampini PetscInt i; 25990acaeb71SStefano Zampini const PetscInt *ranges; 2600cb20be35SHong Zhang 2601cb20be35SHong Zhang PetscFunctionBegin; 26026718818eSStefano Zampini MatCheckProduct(C,3); 26036718818eSStefano Zampini if (C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty"); 2604baa3c1c6SHong Zhang ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 2605cb20be35SHong Zhang if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) { 2606cb20be35SHong Zhang SETERRQ4(comm,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend); 2607cb20be35SHong Zhang } 2608cb20be35SHong Zhang 26094222ddf1SHong Zhang /* create matrix product C */ 26106718818eSStefano Zampini ierr = MatSetSizes(C,cm,B->cmap->n,A->cmap->N,B->cmap->N);CHKERRQ(ierr); 26116718818eSStefano Zampini ierr = PetscObjectTypeCompareAny((PetscObject)C,&cisdense,MATMPIDENSE,MATMPIDENSECUDA,"");CHKERRQ(ierr); 26126718818eSStefano Zampini if (!cisdense) { 26136718818eSStefano Zampini ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 26146718818eSStefano Zampini } 261518992e5dSStefano Zampini ierr = MatSetUp(C);CHKERRQ(ierr); 2616baa3c1c6SHong Zhang 26174222ddf1SHong Zhang /* create data structure for reuse C */ 2618baa3c1c6SHong Zhang ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2619baa3c1c6SHong Zhang ierr = PetscNew(&atb);CHKERRQ(ierr); 26204222ddf1SHong Zhang cM = C->rmap->N; 26216718818eSStefano Zampini ierr = PetscMalloc2((size_t)cM*(size_t)cN,&atb->sendbuf,size,&atb->recvcounts);CHKERRQ(ierr); 26220acaeb71SStefano Zampini ierr = MatGetOwnershipRanges(C,&ranges);CHKERRQ(ierr); 26230acaeb71SStefano Zampini for (i=0; i<size; i++) atb->recvcounts[i] = (ranges[i+1] - ranges[i])*cN; 2624baa3c1c6SHong Zhang 26256718818eSStefano Zampini C->product->data = atb; 26266718818eSStefano Zampini C->product->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense; 2627cb20be35SHong Zhang PetscFunctionReturn(0); 2628cb20be35SHong Zhang } 2629cb20be35SHong Zhang 26304222ddf1SHong Zhang static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C) 2631cb20be35SHong Zhang { 2632cb20be35SHong Zhang PetscErrorCode ierr; 2633cc48ffa7SToby Isaac MPI_Comm comm; 2634cc48ffa7SToby Isaac PetscMPIInt i, size; 2635cc48ffa7SToby Isaac PetscInt maxRows, bufsiz; 2636cc48ffa7SToby Isaac PetscMPIInt tag; 26374222ddf1SHong Zhang PetscInt alg; 2638cc48ffa7SToby Isaac Mat_MatTransMultDense *abt; 26394222ddf1SHong Zhang Mat_Product *product = C->product; 26404222ddf1SHong Zhang PetscBool flg; 2641cc48ffa7SToby Isaac 2642cc48ffa7SToby Isaac PetscFunctionBegin; 26436718818eSStefano Zampini MatCheckProduct(C,4); 26446718818eSStefano Zampini if (C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty"); 26454222ddf1SHong Zhang /* check local size of A and B */ 2646637a0070SStefano Zampini if (A->cmap->n != B->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local column dimensions are incompatible, A (%D) != B (%D)",A->cmap->n,B->cmap->n); 2647cc48ffa7SToby Isaac 26484222ddf1SHong Zhang ierr = PetscStrcmp(product->alg,"allgatherv",&flg);CHKERRQ(ierr); 2649637a0070SStefano Zampini alg = flg ? 0 : 1; 2650cc48ffa7SToby Isaac 26514222ddf1SHong Zhang /* setup matrix product C */ 26524222ddf1SHong Zhang ierr = MatSetSizes(C,A->rmap->n,B->rmap->n,A->rmap->N,B->rmap->N);CHKERRQ(ierr); 26534222ddf1SHong Zhang ierr = MatSetType(C,MATMPIDENSE);CHKERRQ(ierr); 265418992e5dSStefano Zampini ierr = MatSetUp(C);CHKERRQ(ierr); 26554222ddf1SHong Zhang ierr = PetscObjectGetNewTag((PetscObject)C,&tag);CHKERRQ(ierr); 26564222ddf1SHong Zhang 26574222ddf1SHong Zhang /* create data structure for reuse C */ 26584222ddf1SHong Zhang ierr = PetscObjectGetComm((PetscObject)C,&comm);CHKERRQ(ierr); 2659cc48ffa7SToby Isaac ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2660cc48ffa7SToby Isaac ierr = PetscNew(&abt);CHKERRQ(ierr); 2661cc48ffa7SToby Isaac abt->tag = tag; 2662faa55883SToby Isaac abt->alg = alg; 2663faa55883SToby Isaac switch (alg) { 26644222ddf1SHong Zhang case 1: /* alg: "cyclic" */ 2665cc48ffa7SToby Isaac for (maxRows = 0, i = 0; i < size; i++) maxRows = PetscMax(maxRows, (B->rmap->range[i + 1] - B->rmap->range[i])); 2666cc48ffa7SToby Isaac bufsiz = A->cmap->N * maxRows; 2667cc48ffa7SToby Isaac ierr = PetscMalloc2(bufsiz,&(abt->buf[0]),bufsiz,&(abt->buf[1]));CHKERRQ(ierr); 2668faa55883SToby Isaac break; 26694222ddf1SHong Zhang default: /* alg: "allgatherv" */ 2670faa55883SToby Isaac ierr = PetscMalloc2(B->rmap->n * B->cmap->N, &(abt->buf[0]), B->rmap->N * B->cmap->N, &(abt->buf[1]));CHKERRQ(ierr); 2671faa55883SToby Isaac ierr = PetscMalloc2(size,&(abt->recvcounts),size+1,&(abt->recvdispls));CHKERRQ(ierr); 2672faa55883SToby Isaac for (i = 0; i <= size; i++) abt->recvdispls[i] = B->rmap->range[i] * A->cmap->N; 2673faa55883SToby Isaac for (i = 0; i < size; i++) abt->recvcounts[i] = abt->recvdispls[i + 1] - abt->recvdispls[i]; 2674faa55883SToby Isaac break; 2675faa55883SToby Isaac } 2676cc48ffa7SToby Isaac 26776718818eSStefano Zampini C->product->data = abt; 26786718818eSStefano Zampini C->product->destroy = MatDestroy_MatMatTransMult_MPIDense_MPIDense; 26794222ddf1SHong Zhang C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_MPIDense_MPIDense; 2680cc48ffa7SToby Isaac PetscFunctionReturn(0); 2681cc48ffa7SToby Isaac } 2682cc48ffa7SToby Isaac 2683faa55883SToby Isaac static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(Mat A, Mat B, Mat C) 2684cc48ffa7SToby Isaac { 2685cc48ffa7SToby Isaac Mat_MPIDense *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data; 26866718818eSStefano Zampini Mat_MatTransMultDense *abt; 2687cc48ffa7SToby Isaac PetscErrorCode ierr; 2688cc48ffa7SToby Isaac MPI_Comm comm; 2689cc48ffa7SToby Isaac PetscMPIInt rank,size, sendsiz, recvsiz, sendto, recvfrom, recvisfrom; 2690637a0070SStefano Zampini PetscScalar *sendbuf, *recvbuf=0, *cv; 2691cc48ffa7SToby Isaac PetscInt i,cK=A->cmap->N,k,j,bn; 2692cc48ffa7SToby Isaac PetscScalar _DOne=1.0,_DZero=0.0; 2693637a0070SStefano Zampini const PetscScalar *av,*bv; 2694637a0070SStefano Zampini PetscBLASInt cm, cn, ck, alda, blda, clda; 2695cc48ffa7SToby Isaac MPI_Request reqs[2]; 2696cc48ffa7SToby Isaac const PetscInt *ranges; 2697cc48ffa7SToby Isaac 2698cc48ffa7SToby Isaac PetscFunctionBegin; 26996718818eSStefano Zampini MatCheckProduct(C,3); 27006718818eSStefano Zampini if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty"); 27016718818eSStefano Zampini abt = (Mat_MatTransMultDense*)C->product->data; 27026718818eSStefano Zampini ierr = PetscObjectGetComm((PetscObject)C,&comm);CHKERRQ(ierr); 2703cc48ffa7SToby Isaac ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2704cc48ffa7SToby Isaac ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2705637a0070SStefano Zampini ierr = MatDenseGetArrayRead(a->A,&av);CHKERRQ(ierr); 2706637a0070SStefano Zampini ierr = MatDenseGetArrayRead(b->A,&bv);CHKERRQ(ierr); 27070acaeb71SStefano Zampini ierr = MatDenseGetArrayWrite(c->A,&cv);CHKERRQ(ierr); 2708637a0070SStefano Zampini ierr = MatDenseGetLDA(a->A,&i);CHKERRQ(ierr); 2709637a0070SStefano Zampini ierr = PetscBLASIntCast(i,&alda);CHKERRQ(ierr); 2710637a0070SStefano Zampini ierr = MatDenseGetLDA(b->A,&i);CHKERRQ(ierr); 2711637a0070SStefano Zampini ierr = PetscBLASIntCast(i,&blda);CHKERRQ(ierr); 2712637a0070SStefano Zampini ierr = MatDenseGetLDA(c->A,&i);CHKERRQ(ierr); 2713637a0070SStefano Zampini ierr = PetscBLASIntCast(i,&clda);CHKERRQ(ierr); 2714cc48ffa7SToby Isaac ierr = MatGetOwnershipRanges(B,&ranges);CHKERRQ(ierr); 2715cc48ffa7SToby Isaac bn = B->rmap->n; 2716637a0070SStefano Zampini if (blda == bn) { 2717637a0070SStefano Zampini sendbuf = (PetscScalar*)bv; 2718cc48ffa7SToby Isaac } else { 2719cc48ffa7SToby Isaac sendbuf = abt->buf[0]; 2720cc48ffa7SToby Isaac for (k = 0, i = 0; i < cK; i++) { 2721cc48ffa7SToby Isaac for (j = 0; j < bn; j++, k++) { 2722637a0070SStefano Zampini sendbuf[k] = bv[i * blda + j]; 2723cc48ffa7SToby Isaac } 2724cc48ffa7SToby Isaac } 2725cc48ffa7SToby Isaac } 2726cc48ffa7SToby Isaac if (size > 1) { 2727cc48ffa7SToby Isaac sendto = (rank + size - 1) % size; 2728cc48ffa7SToby Isaac recvfrom = (rank + size + 1) % size; 2729cc48ffa7SToby Isaac } else { 2730cc48ffa7SToby Isaac sendto = recvfrom = 0; 2731cc48ffa7SToby Isaac } 2732cc48ffa7SToby Isaac ierr = PetscBLASIntCast(cK,&ck);CHKERRQ(ierr); 2733cc48ffa7SToby Isaac ierr = PetscBLASIntCast(c->A->rmap->n,&cm);CHKERRQ(ierr); 2734cc48ffa7SToby Isaac recvisfrom = rank; 2735cc48ffa7SToby Isaac for (i = 0; i < size; i++) { 2736cc48ffa7SToby Isaac /* we have finished receiving in sending, bufs can be read/modified */ 2737cc48ffa7SToby Isaac PetscInt nextrecvisfrom = (recvisfrom + 1) % size; /* which process the next recvbuf will originate on */ 2738cc48ffa7SToby Isaac PetscInt nextbn = ranges[nextrecvisfrom + 1] - ranges[nextrecvisfrom]; 2739cc48ffa7SToby Isaac 2740cc48ffa7SToby Isaac if (nextrecvisfrom != rank) { 2741cc48ffa7SToby Isaac /* start the cyclic sends from sendbuf, to recvbuf (which will switch to sendbuf) */ 2742cc48ffa7SToby Isaac sendsiz = cK * bn; 2743cc48ffa7SToby Isaac recvsiz = cK * nextbn; 2744cc48ffa7SToby Isaac recvbuf = (i & 1) ? abt->buf[0] : abt->buf[1]; 2745cc48ffa7SToby Isaac ierr = MPI_Isend(sendbuf, sendsiz, MPIU_SCALAR, sendto, abt->tag, comm, &reqs[0]);CHKERRQ(ierr); 2746cc48ffa7SToby Isaac ierr = MPI_Irecv(recvbuf, recvsiz, MPIU_SCALAR, recvfrom, abt->tag, comm, &reqs[1]);CHKERRQ(ierr); 2747cc48ffa7SToby Isaac } 2748cc48ffa7SToby Isaac 2749cc48ffa7SToby Isaac /* local aseq * sendbuf^T */ 2750cc48ffa7SToby Isaac ierr = PetscBLASIntCast(ranges[recvisfrom + 1] - ranges[recvisfrom], &cn);CHKERRQ(ierr); 275150ce3c9cSStefano Zampini if (cm && cn && ck) PetscStackCallBLAS("BLASgemm",BLASgemm_("N","T",&cm,&cn,&ck,&_DOne,av,&alda,sendbuf,&cn,&_DZero,cv + clda * ranges[recvisfrom],&clda)); 2752cc48ffa7SToby Isaac 2753cc48ffa7SToby Isaac if (nextrecvisfrom != rank) { 2754cc48ffa7SToby Isaac /* wait for the sends and receives to complete, swap sendbuf and recvbuf */ 2755cc48ffa7SToby Isaac ierr = MPI_Waitall(2, reqs, MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2756cc48ffa7SToby Isaac } 2757cc48ffa7SToby Isaac bn = nextbn; 2758cc48ffa7SToby Isaac recvisfrom = nextrecvisfrom; 2759cc48ffa7SToby Isaac sendbuf = recvbuf; 2760cc48ffa7SToby Isaac } 2761637a0070SStefano Zampini ierr = MatDenseRestoreArrayRead(a->A,&av);CHKERRQ(ierr); 2762637a0070SStefano Zampini ierr = MatDenseRestoreArrayRead(b->A,&bv);CHKERRQ(ierr); 27630acaeb71SStefano Zampini ierr = MatDenseRestoreArrayWrite(c->A,&cv);CHKERRQ(ierr); 27640acaeb71SStefano Zampini ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 27650acaeb71SStefano Zampini ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2766cc48ffa7SToby Isaac PetscFunctionReturn(0); 2767cc48ffa7SToby Isaac } 2768cc48ffa7SToby Isaac 2769faa55883SToby Isaac static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(Mat A, Mat B, Mat C) 2770faa55883SToby Isaac { 2771faa55883SToby Isaac Mat_MPIDense *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data; 27726718818eSStefano Zampini Mat_MatTransMultDense *abt; 2773faa55883SToby Isaac PetscErrorCode ierr; 2774faa55883SToby Isaac MPI_Comm comm; 2775637a0070SStefano Zampini PetscMPIInt size; 2776637a0070SStefano Zampini PetscScalar *cv, *sendbuf, *recvbuf; 2777637a0070SStefano Zampini const PetscScalar *av,*bv; 2778637a0070SStefano Zampini PetscInt blda,i,cK=A->cmap->N,k,j,bn; 2779faa55883SToby Isaac PetscScalar _DOne=1.0,_DZero=0.0; 2780637a0070SStefano Zampini PetscBLASInt cm, cn, ck, alda, clda; 2781faa55883SToby Isaac 2782faa55883SToby Isaac PetscFunctionBegin; 27836718818eSStefano Zampini MatCheckProduct(C,3); 27846718818eSStefano Zampini if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty"); 27856718818eSStefano Zampini abt = (Mat_MatTransMultDense*)C->product->data; 2786faa55883SToby Isaac ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 2787faa55883SToby Isaac ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2788637a0070SStefano Zampini ierr = MatDenseGetArrayRead(a->A,&av);CHKERRQ(ierr); 2789637a0070SStefano Zampini ierr = MatDenseGetArrayRead(b->A,&bv);CHKERRQ(ierr); 27900acaeb71SStefano Zampini ierr = MatDenseGetArrayWrite(c->A,&cv);CHKERRQ(ierr); 2791637a0070SStefano Zampini ierr = MatDenseGetLDA(a->A,&i);CHKERRQ(ierr); 2792637a0070SStefano Zampini ierr = PetscBLASIntCast(i,&alda);CHKERRQ(ierr); 2793637a0070SStefano Zampini ierr = MatDenseGetLDA(b->A,&blda);CHKERRQ(ierr); 2794637a0070SStefano Zampini ierr = MatDenseGetLDA(c->A,&i);CHKERRQ(ierr); 2795637a0070SStefano Zampini ierr = PetscBLASIntCast(i,&clda);CHKERRQ(ierr); 2796faa55883SToby Isaac /* copy transpose of B into buf[0] */ 2797faa55883SToby Isaac bn = B->rmap->n; 2798faa55883SToby Isaac sendbuf = abt->buf[0]; 2799faa55883SToby Isaac recvbuf = abt->buf[1]; 2800faa55883SToby Isaac for (k = 0, j = 0; j < bn; j++) { 2801faa55883SToby Isaac for (i = 0; i < cK; i++, k++) { 2802637a0070SStefano Zampini sendbuf[k] = bv[i * blda + j]; 2803faa55883SToby Isaac } 2804faa55883SToby Isaac } 2805637a0070SStefano Zampini ierr = MatDenseRestoreArrayRead(b->A,&bv);CHKERRQ(ierr); 2806faa55883SToby Isaac ierr = MPI_Allgatherv(sendbuf, bn * cK, MPIU_SCALAR, recvbuf, abt->recvcounts, abt->recvdispls, MPIU_SCALAR, comm);CHKERRQ(ierr); 2807faa55883SToby Isaac ierr = PetscBLASIntCast(cK,&ck);CHKERRQ(ierr); 2808faa55883SToby Isaac ierr = PetscBLASIntCast(c->A->rmap->n,&cm);CHKERRQ(ierr); 2809faa55883SToby Isaac ierr = PetscBLASIntCast(c->A->cmap->n,&cn);CHKERRQ(ierr); 281050ce3c9cSStefano Zampini if (cm && cn && ck) PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&cm,&cn,&ck,&_DOne,av,&alda,recvbuf,&ck,&_DZero,cv,&clda)); 2811637a0070SStefano Zampini ierr = MatDenseRestoreArrayRead(a->A,&av);CHKERRQ(ierr); 2812637a0070SStefano Zampini ierr = MatDenseRestoreArrayRead(b->A,&bv);CHKERRQ(ierr); 28130acaeb71SStefano Zampini ierr = MatDenseRestoreArrayWrite(c->A,&cv);CHKERRQ(ierr); 28140acaeb71SStefano Zampini ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 28150acaeb71SStefano Zampini ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2816faa55883SToby Isaac PetscFunctionReturn(0); 2817faa55883SToby Isaac } 2818faa55883SToby Isaac 2819faa55883SToby Isaac static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C) 2820faa55883SToby Isaac { 28216718818eSStefano Zampini Mat_MatTransMultDense *abt; 2822faa55883SToby Isaac PetscErrorCode ierr; 2823faa55883SToby Isaac 2824faa55883SToby Isaac PetscFunctionBegin; 28256718818eSStefano Zampini MatCheckProduct(C,3); 28266718818eSStefano Zampini if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty"); 28276718818eSStefano Zampini abt = (Mat_MatTransMultDense*)C->product->data; 2828faa55883SToby Isaac switch (abt->alg) { 2829faa55883SToby Isaac case 1: 2830faa55883SToby Isaac ierr = MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(A, B, C);CHKERRQ(ierr); 2831faa55883SToby Isaac break; 2832faa55883SToby Isaac default: 2833faa55883SToby Isaac ierr = MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(A, B, C);CHKERRQ(ierr); 2834faa55883SToby Isaac break; 2835faa55883SToby Isaac } 2836faa55883SToby Isaac PetscFunctionReturn(0); 2837faa55883SToby Isaac } 2838faa55883SToby Isaac 28396718818eSStefano Zampini PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(void *data) 2840320f2790SHong Zhang { 2841320f2790SHong Zhang PetscErrorCode ierr; 28426718818eSStefano Zampini Mat_MatMultDense *ab = (Mat_MatMultDense*)data; 2843320f2790SHong Zhang 2844320f2790SHong Zhang PetscFunctionBegin; 2845320f2790SHong Zhang ierr = MatDestroy(&ab->Ce);CHKERRQ(ierr); 2846320f2790SHong Zhang ierr = MatDestroy(&ab->Ae);CHKERRQ(ierr); 2847320f2790SHong Zhang ierr = MatDestroy(&ab->Be);CHKERRQ(ierr); 2848320f2790SHong Zhang ierr = PetscFree(ab);CHKERRQ(ierr); 2849320f2790SHong Zhang PetscFunctionReturn(0); 2850320f2790SHong Zhang } 2851320f2790SHong Zhang 28525242a7b1SHong Zhang #if defined(PETSC_HAVE_ELEMENTAL) 2853320f2790SHong Zhang PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C) 2854320f2790SHong Zhang { 2855320f2790SHong Zhang PetscErrorCode ierr; 28566718818eSStefano Zampini Mat_MatMultDense *ab; 2857320f2790SHong Zhang 2858320f2790SHong Zhang PetscFunctionBegin; 28596718818eSStefano Zampini MatCheckProduct(C,3); 28606718818eSStefano Zampini if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Missing product data"); 28616718818eSStefano Zampini ab = (Mat_MatMultDense*)C->product->data; 2862de0a22f0SHong Zhang ierr = MatConvert_MPIDense_Elemental(A,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Ae);CHKERRQ(ierr); 2863de0a22f0SHong Zhang ierr = MatConvert_MPIDense_Elemental(B,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Be);CHKERRQ(ierr); 28644222ddf1SHong Zhang ierr = MatMatMultNumeric_Elemental(ab->Ae,ab->Be,ab->Ce);CHKERRQ(ierr); 2865de0a22f0SHong Zhang ierr = MatConvert(ab->Ce,MATMPIDENSE,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 2866320f2790SHong Zhang PetscFunctionReturn(0); 2867320f2790SHong Zhang } 2868320f2790SHong Zhang 28696718818eSStefano Zampini static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat C) 2870320f2790SHong Zhang { 2871320f2790SHong Zhang PetscErrorCode ierr; 2872320f2790SHong Zhang Mat Ae,Be,Ce; 2873320f2790SHong Zhang Mat_MatMultDense *ab; 2874320f2790SHong Zhang 2875320f2790SHong Zhang PetscFunctionBegin; 28766718818eSStefano Zampini MatCheckProduct(C,4); 28776718818eSStefano Zampini if (C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty"); 28784222ddf1SHong Zhang /* check local size of A and B */ 2879320f2790SHong Zhang if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 2880378336b6SHong Zhang SETERRQ4(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend); 2881320f2790SHong Zhang } 2882320f2790SHong Zhang 28834222ddf1SHong Zhang /* create elemental matrices Ae and Be */ 28844222ddf1SHong Zhang ierr = MatCreate(PetscObjectComm((PetscObject)A), &Ae);CHKERRQ(ierr); 28854222ddf1SHong Zhang ierr = MatSetSizes(Ae,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 28864222ddf1SHong Zhang ierr = MatSetType(Ae,MATELEMENTAL);CHKERRQ(ierr); 28874222ddf1SHong Zhang ierr = MatSetUp(Ae);CHKERRQ(ierr); 28884222ddf1SHong Zhang ierr = MatSetOption(Ae,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 2889320f2790SHong Zhang 28904222ddf1SHong Zhang ierr = MatCreate(PetscObjectComm((PetscObject)B), &Be);CHKERRQ(ierr); 28914222ddf1SHong Zhang ierr = MatSetSizes(Be,PETSC_DECIDE,PETSC_DECIDE,B->rmap->N,B->cmap->N);CHKERRQ(ierr); 28924222ddf1SHong Zhang ierr = MatSetType(Be,MATELEMENTAL);CHKERRQ(ierr); 28934222ddf1SHong Zhang ierr = MatSetUp(Be);CHKERRQ(ierr); 28944222ddf1SHong Zhang ierr = MatSetOption(Be,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 2895320f2790SHong Zhang 28964222ddf1SHong Zhang /* compute symbolic Ce = Ae*Be */ 28974222ddf1SHong Zhang ierr = MatCreate(PetscObjectComm((PetscObject)C),&Ce);CHKERRQ(ierr); 28984222ddf1SHong Zhang ierr = MatMatMultSymbolic_Elemental(Ae,Be,fill,Ce);CHKERRQ(ierr); 28994222ddf1SHong Zhang 29004222ddf1SHong Zhang /* setup C */ 29014222ddf1SHong Zhang ierr = MatSetSizes(C,A->rmap->n,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 29024222ddf1SHong Zhang ierr = MatSetType(C,MATDENSE);CHKERRQ(ierr); 29034222ddf1SHong Zhang ierr = MatSetUp(C);CHKERRQ(ierr); 2904320f2790SHong Zhang 2905320f2790SHong Zhang /* create data structure for reuse Cdense */ 2906320f2790SHong Zhang ierr = PetscNew(&ab);CHKERRQ(ierr); 2907320f2790SHong Zhang ab->Ae = Ae; 2908320f2790SHong Zhang ab->Be = Be; 2909320f2790SHong Zhang ab->Ce = Ce; 29106718818eSStefano Zampini 29116718818eSStefano Zampini C->product->data = ab; 29126718818eSStefano Zampini C->product->destroy = MatDestroy_MatMatMult_MPIDense_MPIDense; 29134222ddf1SHong Zhang C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense; 2914320f2790SHong Zhang PetscFunctionReturn(0); 2915320f2790SHong Zhang } 29164222ddf1SHong Zhang #endif 29174222ddf1SHong Zhang /* ----------------------------------------------- */ 29184222ddf1SHong Zhang #if defined(PETSC_HAVE_ELEMENTAL) 29194222ddf1SHong Zhang static PetscErrorCode MatProductSetFromOptions_MPIDense_AB(Mat C) 2920320f2790SHong Zhang { 2921320f2790SHong Zhang PetscFunctionBegin; 29224222ddf1SHong Zhang C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIDense; 29234222ddf1SHong Zhang C->ops->productsymbolic = MatProductSymbolic_AB; 2924320f2790SHong Zhang PetscFunctionReturn(0); 2925320f2790SHong Zhang } 29265242a7b1SHong Zhang #endif 292786aefd0dSHong Zhang 29284222ddf1SHong Zhang static PetscErrorCode MatProductSetFromOptions_MPIDense_AtB(Mat C) 29294222ddf1SHong Zhang { 29304222ddf1SHong Zhang Mat_Product *product = C->product; 29314222ddf1SHong Zhang Mat A = product->A,B=product->B; 29324222ddf1SHong Zhang 29334222ddf1SHong Zhang PetscFunctionBegin; 29344222ddf1SHong Zhang if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) 29354222ddf1SHong Zhang SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend); 29366718818eSStefano Zampini C->ops->transposematmultsymbolic = MatTransposeMatMultSymbolic_MPIDense_MPIDense; 29376718818eSStefano Zampini C->ops->productsymbolic = MatProductSymbolic_AtB; 29384222ddf1SHong Zhang PetscFunctionReturn(0); 29394222ddf1SHong Zhang } 29404222ddf1SHong Zhang 29414222ddf1SHong Zhang static PetscErrorCode MatProductSetFromOptions_MPIDense_ABt(Mat C) 29424222ddf1SHong Zhang { 29434222ddf1SHong Zhang PetscErrorCode ierr; 29444222ddf1SHong Zhang Mat_Product *product = C->product; 29454222ddf1SHong Zhang const char *algTypes[2] = {"allgatherv","cyclic"}; 29464222ddf1SHong Zhang PetscInt alg,nalg = 2; 29474222ddf1SHong Zhang PetscBool flg = PETSC_FALSE; 29484222ddf1SHong Zhang 29494222ddf1SHong Zhang PetscFunctionBegin; 29504222ddf1SHong Zhang /* Set default algorithm */ 29514222ddf1SHong Zhang alg = 0; /* default is allgatherv */ 29524222ddf1SHong Zhang ierr = PetscStrcmp(product->alg,"default",&flg);CHKERRQ(ierr); 29534222ddf1SHong Zhang if (flg) { 29544222ddf1SHong Zhang ierr = MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);CHKERRQ(ierr); 29554222ddf1SHong Zhang } 29564222ddf1SHong Zhang 29574222ddf1SHong Zhang /* Get runtime option */ 29584222ddf1SHong Zhang if (product->api_user) { 29594222ddf1SHong Zhang ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatMatTransposeMult","Mat");CHKERRQ(ierr); 29604222ddf1SHong Zhang ierr = PetscOptionsEList("-matmattransmult_mpidense_mpidense_via","Algorithmic approach","MatMatTransposeMult",algTypes,nalg,algTypes[alg],&alg,&flg);CHKERRQ(ierr); 29614222ddf1SHong Zhang ierr = PetscOptionsEnd();CHKERRQ(ierr); 29624222ddf1SHong Zhang } else { 29634222ddf1SHong Zhang ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_ABt","Mat");CHKERRQ(ierr); 29644222ddf1SHong Zhang ierr = PetscOptionsEList("-matproduct_abt_mpidense_mpidense_via","Algorithmic approach","MatProduct_ABt",algTypes,nalg,algTypes[alg],&alg,&flg);CHKERRQ(ierr); 29654222ddf1SHong Zhang ierr = PetscOptionsEnd();CHKERRQ(ierr); 29664222ddf1SHong Zhang } 29674222ddf1SHong Zhang if (flg) { 29684222ddf1SHong Zhang ierr = MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);CHKERRQ(ierr); 29694222ddf1SHong Zhang } 29704222ddf1SHong Zhang 29714222ddf1SHong Zhang C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_MPIDense_MPIDense; 29724222ddf1SHong Zhang C->ops->productsymbolic = MatProductSymbolic_ABt; 29734222ddf1SHong Zhang PetscFunctionReturn(0); 29744222ddf1SHong Zhang } 29754222ddf1SHong Zhang 29764222ddf1SHong Zhang PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense(Mat C) 29774222ddf1SHong Zhang { 29784222ddf1SHong Zhang PetscErrorCode ierr; 29794222ddf1SHong Zhang Mat_Product *product = C->product; 29804222ddf1SHong Zhang 29814222ddf1SHong Zhang PetscFunctionBegin; 29824222ddf1SHong Zhang switch (product->type) { 29834222ddf1SHong Zhang #if defined(PETSC_HAVE_ELEMENTAL) 29844222ddf1SHong Zhang case MATPRODUCT_AB: 29854222ddf1SHong Zhang ierr = MatProductSetFromOptions_MPIDense_AB(C);CHKERRQ(ierr); 29864222ddf1SHong Zhang break; 29874222ddf1SHong Zhang #endif 29884222ddf1SHong Zhang case MATPRODUCT_AtB: 29894222ddf1SHong Zhang ierr = MatProductSetFromOptions_MPIDense_AtB(C);CHKERRQ(ierr); 29904222ddf1SHong Zhang break; 29914222ddf1SHong Zhang case MATPRODUCT_ABt: 29924222ddf1SHong Zhang ierr = MatProductSetFromOptions_MPIDense_ABt(C);CHKERRQ(ierr); 29934222ddf1SHong Zhang break; 29946718818eSStefano Zampini default: 29956718818eSStefano Zampini break; 29964222ddf1SHong Zhang } 29974222ddf1SHong Zhang PetscFunctionReturn(0); 29984222ddf1SHong Zhang } 2999