1be1d678aSKris Buschelman 2ed3cc1f0SBarry Smith /* 3ed3cc1f0SBarry Smith Basic functions for basic parallel dense matrices. 4ed3cc1f0SBarry Smith */ 5ed3cc1f0SBarry Smith 6c6db04a5SJed Brown #include <../src/mat/impls/dense/mpi/mpidense.h> /*I "petscmat.h" I*/ 78949adfdSHong Zhang #include <../src/mat/impls/aij/mpi/mpiaij.h> 8baa3c1c6SHong Zhang #include <petscblaslapack.h> 98965ea79SLois Curfman McInnes 10ab92ecdeSBarry Smith /*@ 11ab92ecdeSBarry Smith 12ab92ecdeSBarry Smith MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential 13ab92ecdeSBarry Smith matrix that represents the operator. For sequential matrices it returns itself. 14ab92ecdeSBarry Smith 15ab92ecdeSBarry Smith Input Parameter: 16ab92ecdeSBarry Smith . A - the Seq or MPI dense matrix 17ab92ecdeSBarry Smith 18ab92ecdeSBarry Smith Output Parameter: 19ab92ecdeSBarry Smith . B - the inner matrix 20ab92ecdeSBarry Smith 218e6c10adSSatish Balay Level: intermediate 228e6c10adSSatish Balay 23ab92ecdeSBarry Smith @*/ 24ab92ecdeSBarry Smith PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B) 25ab92ecdeSBarry Smith { 26ab92ecdeSBarry Smith Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 27ab92ecdeSBarry Smith PetscErrorCode ierr; 28ace3abfcSBarry Smith PetscBool flg; 29ab92ecdeSBarry Smith 30ab92ecdeSBarry Smith PetscFunctionBegin; 31251f4c67SDmitry Karpeev ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr); 322205254eSKarl Rupp if (flg) *B = mat->A; 332205254eSKarl Rupp else *B = A; 34ab92ecdeSBarry Smith PetscFunctionReturn(0); 35ab92ecdeSBarry Smith } 36ab92ecdeSBarry Smith 37ba8c8a56SBarry Smith PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 38ba8c8a56SBarry Smith { 39ba8c8a56SBarry Smith Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 40ba8c8a56SBarry Smith PetscErrorCode ierr; 41d0f46423SBarry Smith PetscInt lrow,rstart = A->rmap->rstart,rend = A->rmap->rend; 42ba8c8a56SBarry Smith 43ba8c8a56SBarry Smith PetscFunctionBegin; 44e7e72b3dSBarry Smith if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows"); 45ba8c8a56SBarry Smith lrow = row - rstart; 46ba8c8a56SBarry Smith ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v);CHKERRQ(ierr); 47ba8c8a56SBarry Smith PetscFunctionReturn(0); 48ba8c8a56SBarry Smith } 49ba8c8a56SBarry Smith 50637a0070SStefano Zampini PetscErrorCode MatRestoreRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 51ba8c8a56SBarry Smith { 52637a0070SStefano Zampini Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 53ba8c8a56SBarry Smith PetscErrorCode ierr; 54637a0070SStefano Zampini PetscInt lrow,rstart = A->rmap->rstart,rend = A->rmap->rend; 55ba8c8a56SBarry Smith 56ba8c8a56SBarry Smith PetscFunctionBegin; 57637a0070SStefano Zampini if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows"); 58637a0070SStefano Zampini lrow = row - rstart; 59637a0070SStefano Zampini ierr = MatRestoreRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v);CHKERRQ(ierr); 60ba8c8a56SBarry Smith PetscFunctionReturn(0); 61ba8c8a56SBarry Smith } 62ba8c8a56SBarry Smith 6311bd1e4dSLisandro Dalcin PetscErrorCode MatGetDiagonalBlock_MPIDense(Mat A,Mat *a) 640de54da6SSatish Balay { 650de54da6SSatish Balay Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 666849ba73SBarry Smith PetscErrorCode ierr; 67d0f46423SBarry Smith PetscInt m = A->rmap->n,rstart = A->rmap->rstart; 6887828ca2SBarry Smith PetscScalar *array; 690de54da6SSatish Balay MPI_Comm comm; 70637a0070SStefano Zampini PetscBool cong; 7111bd1e4dSLisandro Dalcin Mat B; 720de54da6SSatish Balay 730de54da6SSatish Balay PetscFunctionBegin; 74637a0070SStefano Zampini ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr); 75637a0070SStefano Zampini if (!cong) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only square matrices supported."); 7611bd1e4dSLisandro Dalcin ierr = PetscObjectQuery((PetscObject)A,"DiagonalBlock",(PetscObject*)&B);CHKERRQ(ierr); 7711bd1e4dSLisandro Dalcin if (!B) { 780de54da6SSatish Balay ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr); 7911bd1e4dSLisandro Dalcin ierr = MatCreate(comm,&B);CHKERRQ(ierr); 8011bd1e4dSLisandro Dalcin ierr = MatSetSizes(B,m,m,m,m);CHKERRQ(ierr); 8111bd1e4dSLisandro Dalcin ierr = MatSetType(B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr); 828c778c55SBarry Smith ierr = MatDenseGetArray(mdn->A,&array);CHKERRQ(ierr); 8311bd1e4dSLisandro Dalcin ierr = MatSeqDenseSetPreallocation(B,array+m*rstart);CHKERRQ(ierr); 848c778c55SBarry Smith ierr = MatDenseRestoreArray(mdn->A,&array);CHKERRQ(ierr); 8511bd1e4dSLisandro Dalcin ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 8611bd1e4dSLisandro Dalcin ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 8711bd1e4dSLisandro Dalcin ierr = PetscObjectCompose((PetscObject)A,"DiagonalBlock",(PetscObject)B);CHKERRQ(ierr); 8811bd1e4dSLisandro Dalcin *a = B; 8911bd1e4dSLisandro Dalcin ierr = MatDestroy(&B);CHKERRQ(ierr); 902205254eSKarl Rupp } else *a = B; 910de54da6SSatish Balay PetscFunctionReturn(0); 920de54da6SSatish Balay } 930de54da6SSatish Balay 9413f74950SBarry Smith PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv) 958965ea79SLois Curfman McInnes { 9639b7565bSBarry Smith Mat_MPIDense *A = (Mat_MPIDense*)mat->data; 97dfbe8321SBarry Smith PetscErrorCode ierr; 98d0f46423SBarry Smith PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row; 99ace3abfcSBarry Smith PetscBool roworiented = A->roworiented; 1008965ea79SLois Curfman McInnes 1013a40ed3dSBarry Smith PetscFunctionBegin; 1028965ea79SLois Curfman McInnes for (i=0; i<m; i++) { 1035ef9f2a5SBarry Smith if (idxm[i] < 0) continue; 104e32f2f54SBarry Smith if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 1058965ea79SLois Curfman McInnes if (idxm[i] >= rstart && idxm[i] < rend) { 1068965ea79SLois Curfman McInnes row = idxm[i] - rstart; 10739b7565bSBarry Smith if (roworiented) { 10839b7565bSBarry Smith ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr); 1093a40ed3dSBarry Smith } else { 1108965ea79SLois Curfman McInnes for (j=0; j<n; j++) { 1115ef9f2a5SBarry Smith if (idxn[j] < 0) continue; 112e32f2f54SBarry Smith if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 11339b7565bSBarry Smith ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr); 11439b7565bSBarry Smith } 1158965ea79SLois Curfman McInnes } 1162205254eSKarl Rupp } else if (!A->donotstash) { 1175080c13bSMatthew G Knepley mat->assembled = PETSC_FALSE; 11839b7565bSBarry Smith if (roworiented) { 119b400d20cSBarry Smith ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 120d36fbae8SSatish Balay } else { 121b400d20cSBarry Smith ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 12239b7565bSBarry Smith } 123b49de8d1SLois Curfman McInnes } 124b49de8d1SLois Curfman McInnes } 1253a40ed3dSBarry Smith PetscFunctionReturn(0); 126b49de8d1SLois Curfman McInnes } 127b49de8d1SLois Curfman McInnes 12813f74950SBarry Smith PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 129b49de8d1SLois Curfman McInnes { 130b49de8d1SLois Curfman McInnes Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 131dfbe8321SBarry Smith PetscErrorCode ierr; 132d0f46423SBarry Smith PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row; 133b49de8d1SLois Curfman McInnes 1343a40ed3dSBarry Smith PetscFunctionBegin; 135b49de8d1SLois Curfman McInnes for (i=0; i<m; i++) { 136e32f2f54SBarry Smith if (idxm[i] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 137e32f2f54SBarry Smith if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 138b49de8d1SLois Curfman McInnes if (idxm[i] >= rstart && idxm[i] < rend) { 139b49de8d1SLois Curfman McInnes row = idxm[i] - rstart; 140b49de8d1SLois Curfman McInnes for (j=0; j<n; j++) { 141e32f2f54SBarry Smith if (idxn[j] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 142e7e72b3dSBarry Smith if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 143b49de8d1SLois Curfman McInnes ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr); 144b49de8d1SLois Curfman McInnes } 145e7e72b3dSBarry Smith } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 1468965ea79SLois Curfman McInnes } 1473a40ed3dSBarry Smith PetscFunctionReturn(0); 1488965ea79SLois Curfman McInnes } 1498965ea79SLois Curfman McInnes 15049a6ff4bSBarry Smith static PetscErrorCode MatDenseGetLDA_MPIDense(Mat A,PetscInt *lda) 15149a6ff4bSBarry Smith { 15249a6ff4bSBarry Smith Mat_MPIDense *a = (Mat_MPIDense*)A->data; 15349a6ff4bSBarry Smith PetscErrorCode ierr; 15449a6ff4bSBarry Smith 15549a6ff4bSBarry Smith PetscFunctionBegin; 15649a6ff4bSBarry Smith ierr = MatDenseGetLDA(a->A,lda);CHKERRQ(ierr); 15749a6ff4bSBarry Smith PetscFunctionReturn(0); 15849a6ff4bSBarry Smith } 15949a6ff4bSBarry Smith 160637a0070SStefano Zampini static PetscErrorCode MatDenseGetArray_MPIDense(Mat A,PetscScalar **array) 161ff14e315SSatish Balay { 162ff14e315SSatish Balay Mat_MPIDense *a = (Mat_MPIDense*)A->data; 163dfbe8321SBarry Smith PetscErrorCode ierr; 164ff14e315SSatish Balay 1653a40ed3dSBarry Smith PetscFunctionBegin; 1668c778c55SBarry Smith ierr = MatDenseGetArray(a->A,array);CHKERRQ(ierr); 1673a40ed3dSBarry Smith PetscFunctionReturn(0); 168ff14e315SSatish Balay } 169ff14e315SSatish Balay 170637a0070SStefano Zampini static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A,const PetscScalar **array) 1718572280aSBarry Smith { 1728572280aSBarry Smith Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1738572280aSBarry Smith PetscErrorCode ierr; 1748572280aSBarry Smith 1758572280aSBarry Smith PetscFunctionBegin; 1768572280aSBarry Smith ierr = MatDenseGetArrayRead(a->A,array);CHKERRQ(ierr); 1778572280aSBarry Smith PetscFunctionReturn(0); 1788572280aSBarry Smith } 1798572280aSBarry Smith 180*6947451fSStefano Zampini static PetscErrorCode MatDenseGetArrayWrite_MPIDense(Mat A,PetscScalar **array) 181*6947451fSStefano Zampini { 182*6947451fSStefano Zampini Mat_MPIDense *a = (Mat_MPIDense*)A->data; 183*6947451fSStefano Zampini PetscErrorCode ierr; 184*6947451fSStefano Zampini 185*6947451fSStefano Zampini PetscFunctionBegin; 186*6947451fSStefano Zampini ierr = MatDenseGetArrayWrite(a->A,array);CHKERRQ(ierr); 187*6947451fSStefano Zampini PetscFunctionReturn(0); 188*6947451fSStefano Zampini } 189*6947451fSStefano Zampini 190637a0070SStefano Zampini static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A,const PetscScalar *array) 191d3042a70SBarry Smith { 192d3042a70SBarry Smith Mat_MPIDense *a = (Mat_MPIDense*)A->data; 193d3042a70SBarry Smith PetscErrorCode ierr; 194d3042a70SBarry Smith 195d3042a70SBarry Smith PetscFunctionBegin; 196*6947451fSStefano Zampini if (a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec first"); 197d3042a70SBarry Smith ierr = MatDensePlaceArray(a->A,array);CHKERRQ(ierr); 198d3042a70SBarry Smith PetscFunctionReturn(0); 199d3042a70SBarry Smith } 200d3042a70SBarry Smith 201d3042a70SBarry Smith static PetscErrorCode MatDenseResetArray_MPIDense(Mat A) 202d3042a70SBarry Smith { 203d3042a70SBarry Smith Mat_MPIDense *a = (Mat_MPIDense*)A->data; 204d3042a70SBarry Smith PetscErrorCode ierr; 205d3042a70SBarry Smith 206d3042a70SBarry Smith PetscFunctionBegin; 207*6947451fSStefano Zampini if (a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec first"); 208d3042a70SBarry Smith ierr = MatDenseResetArray(a->A);CHKERRQ(ierr); 209d3042a70SBarry Smith PetscFunctionReturn(0); 210d3042a70SBarry Smith } 211d3042a70SBarry Smith 2127dae84e0SHong Zhang static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B) 213ca3fa75bSLois Curfman McInnes { 214ca3fa75bSLois Curfman McInnes Mat_MPIDense *mat = (Mat_MPIDense*)A->data,*newmatd; 2156849ba73SBarry Smith PetscErrorCode ierr; 216637a0070SStefano Zampini PetscInt lda,i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols; 2175d0c19d7SBarry Smith const PetscInt *irow,*icol; 218637a0070SStefano Zampini const PetscScalar *v; 219637a0070SStefano Zampini PetscScalar *bv; 220ca3fa75bSLois Curfman McInnes Mat newmat; 2214aa3045dSJed Brown IS iscol_local; 22242a884f0SBarry Smith MPI_Comm comm_is,comm_mat; 223ca3fa75bSLois Curfman McInnes 224ca3fa75bSLois Curfman McInnes PetscFunctionBegin; 22542a884f0SBarry Smith ierr = PetscObjectGetComm((PetscObject)A,&comm_mat);CHKERRQ(ierr); 22642a884f0SBarry Smith ierr = PetscObjectGetComm((PetscObject)iscol,&comm_is);CHKERRQ(ierr); 22742a884f0SBarry Smith if (comm_mat != comm_is) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NOTSAMECOMM,"IS communicator must match matrix communicator"); 22842a884f0SBarry Smith 2294aa3045dSJed Brown ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 230ca3fa75bSLois Curfman McInnes ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr); 2314aa3045dSJed Brown ierr = ISGetIndices(iscol_local,&icol);CHKERRQ(ierr); 232b9b97703SBarry Smith ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr); 233b9b97703SBarry Smith ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 2344aa3045dSJed Brown ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); /* global number of columns, size of iscol_local */ 235ca3fa75bSLois Curfman McInnes 236ca3fa75bSLois Curfman McInnes /* No parallel redistribution currently supported! Should really check each index set 2377eba5e9cSLois Curfman McInnes to comfirm that it is OK. ... Currently supports only submatrix same partitioning as 2387eba5e9cSLois Curfman McInnes original matrix! */ 239ca3fa75bSLois Curfman McInnes 240ca3fa75bSLois Curfman McInnes ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr); 2417eba5e9cSLois Curfman McInnes ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 242ca3fa75bSLois Curfman McInnes 243ca3fa75bSLois Curfman McInnes /* Check submatrix call */ 244ca3fa75bSLois Curfman McInnes if (scall == MAT_REUSE_MATRIX) { 245e32f2f54SBarry Smith /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */ 2467eba5e9cSLois Curfman McInnes /* Really need to test rows and column sizes! */ 247ca3fa75bSLois Curfman McInnes newmat = *B; 248ca3fa75bSLois Curfman McInnes } else { 249ca3fa75bSLois Curfman McInnes /* Create and fill new matrix */ 250ce94432eSBarry Smith ierr = MatCreate(PetscObjectComm((PetscObject)A),&newmat);CHKERRQ(ierr); 2514aa3045dSJed Brown ierr = MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols);CHKERRQ(ierr); 2527adad957SLisandro Dalcin ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr); 2530298fd71SBarry Smith ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr); 254ca3fa75bSLois Curfman McInnes } 255ca3fa75bSLois Curfman McInnes 256ca3fa75bSLois Curfman McInnes /* Now extract the data pointers and do the copy, column at a time */ 257ca3fa75bSLois Curfman McInnes newmatd = (Mat_MPIDense*)newmat->data; 258637a0070SStefano Zampini ierr = MatDenseGetArray(newmatd->A,&bv);CHKERRQ(ierr); 259637a0070SStefano Zampini ierr = MatDenseGetArrayRead(mat->A,&v);CHKERRQ(ierr); 260637a0070SStefano Zampini ierr = MatDenseGetLDA(mat->A,&lda);CHKERRQ(ierr); 2614aa3045dSJed Brown for (i=0; i<Ncols; i++) { 262637a0070SStefano Zampini const PetscScalar *av = v + lda*icol[i]; 263ca3fa75bSLois Curfman McInnes for (j=0; j<nrows; j++) { 2647eba5e9cSLois Curfman McInnes *bv++ = av[irow[j] - rstart]; 265ca3fa75bSLois Curfman McInnes } 266ca3fa75bSLois Curfman McInnes } 267637a0070SStefano Zampini ierr = MatDenseRestoreArrayRead(mat->A,&v);CHKERRQ(ierr); 268637a0070SStefano Zampini ierr = MatDenseRestoreArray(newmatd->A,&bv);CHKERRQ(ierr); 269ca3fa75bSLois Curfman McInnes 270ca3fa75bSLois Curfman McInnes /* Assemble the matrices so that the correct flags are set */ 271ca3fa75bSLois Curfman McInnes ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 272ca3fa75bSLois Curfman McInnes ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 273ca3fa75bSLois Curfman McInnes 274ca3fa75bSLois Curfman McInnes /* Free work space */ 275ca3fa75bSLois Curfman McInnes ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr); 2765bdf786aSShri Abhyankar ierr = ISRestoreIndices(iscol_local,&icol);CHKERRQ(ierr); 27732bb1f2dSLisandro Dalcin ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 278ca3fa75bSLois Curfman McInnes *B = newmat; 279ca3fa75bSLois Curfman McInnes PetscFunctionReturn(0); 280ca3fa75bSLois Curfman McInnes } 281ca3fa75bSLois Curfman McInnes 282637a0070SStefano Zampini PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A,PetscScalar **array) 283ff14e315SSatish Balay { 28473a71a0fSBarry Smith Mat_MPIDense *a = (Mat_MPIDense*)A->data; 28573a71a0fSBarry Smith PetscErrorCode ierr; 28673a71a0fSBarry Smith 2873a40ed3dSBarry Smith PetscFunctionBegin; 2888c778c55SBarry Smith ierr = MatDenseRestoreArray(a->A,array);CHKERRQ(ierr); 2893a40ed3dSBarry Smith PetscFunctionReturn(0); 290ff14e315SSatish Balay } 291ff14e315SSatish Balay 292637a0070SStefano Zampini PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A,const PetscScalar **array) 2938572280aSBarry Smith { 2948572280aSBarry Smith Mat_MPIDense *a = (Mat_MPIDense*)A->data; 2958572280aSBarry Smith PetscErrorCode ierr; 2968572280aSBarry Smith 2978572280aSBarry Smith PetscFunctionBegin; 2988572280aSBarry Smith ierr = MatDenseRestoreArrayRead(a->A,array);CHKERRQ(ierr); 2998572280aSBarry Smith PetscFunctionReturn(0); 3008572280aSBarry Smith } 3018572280aSBarry Smith 302*6947451fSStefano Zampini PetscErrorCode MatDenseRestoreArrayWrite_MPIDense(Mat A,PetscScalar **array) 303*6947451fSStefano Zampini { 304*6947451fSStefano Zampini Mat_MPIDense *a = (Mat_MPIDense*)A->data; 305*6947451fSStefano Zampini PetscErrorCode ierr; 306*6947451fSStefano Zampini 307*6947451fSStefano Zampini PetscFunctionBegin; 308*6947451fSStefano Zampini ierr = MatDenseRestoreArrayWrite(a->A,array);CHKERRQ(ierr); 309*6947451fSStefano Zampini PetscFunctionReturn(0); 310*6947451fSStefano Zampini } 311*6947451fSStefano Zampini 312dfbe8321SBarry Smith PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode) 3138965ea79SLois Curfman McInnes { 31439ddd567SLois Curfman McInnes Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 315dfbe8321SBarry Smith PetscErrorCode ierr; 31613f74950SBarry Smith PetscInt nstash,reallocs; 3178965ea79SLois Curfman McInnes 3183a40ed3dSBarry Smith PetscFunctionBegin; 319910cf402Sprj- if (mdn->donotstash || mat->nooffprocentries) return(0); 3208965ea79SLois Curfman McInnes 321d0f46423SBarry Smith ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 3228798bf22SSatish Balay ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 323ae15b995SBarry Smith ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 3243a40ed3dSBarry Smith PetscFunctionReturn(0); 3258965ea79SLois Curfman McInnes } 3268965ea79SLois Curfman McInnes 327dfbe8321SBarry Smith PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode) 3288965ea79SLois Curfman McInnes { 32939ddd567SLois Curfman McInnes Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data; 3306849ba73SBarry Smith PetscErrorCode ierr; 33113f74950SBarry Smith PetscInt i,*row,*col,flg,j,rstart,ncols; 33213f74950SBarry Smith PetscMPIInt n; 33387828ca2SBarry Smith PetscScalar *val; 3348965ea79SLois Curfman McInnes 3353a40ed3dSBarry Smith PetscFunctionBegin; 336910cf402Sprj- if (!mdn->donotstash && !mat->nooffprocentries) { 3378965ea79SLois Curfman McInnes /* wait on receives */ 3387ef1d9bdSSatish Balay while (1) { 3398798bf22SSatish Balay ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 3407ef1d9bdSSatish Balay if (!flg) break; 3418965ea79SLois Curfman McInnes 3427ef1d9bdSSatish Balay for (i=0; i<n;) { 3437ef1d9bdSSatish Balay /* Now identify the consecutive vals belonging to the same row */ 3442205254eSKarl Rupp for (j=i,rstart=row[j]; j<n; j++) { 3452205254eSKarl Rupp if (row[j] != rstart) break; 3462205254eSKarl Rupp } 3477ef1d9bdSSatish Balay if (j < n) ncols = j-i; 3487ef1d9bdSSatish Balay else ncols = n-i; 3497ef1d9bdSSatish Balay /* Now assemble all these values with a single function call */ 3504b4eb8d3SJed Brown ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 3517ef1d9bdSSatish Balay i = j; 3528965ea79SLois Curfman McInnes } 3537ef1d9bdSSatish Balay } 3548798bf22SSatish Balay ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 355910cf402Sprj- } 3568965ea79SLois Curfman McInnes 35739ddd567SLois Curfman McInnes ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr); 35839ddd567SLois Curfman McInnes ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr); 3598965ea79SLois Curfman McInnes 3606d4a8577SBarry Smith if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 36139ddd567SLois Curfman McInnes ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 3628965ea79SLois Curfman McInnes } 3633a40ed3dSBarry Smith PetscFunctionReturn(0); 3648965ea79SLois Curfman McInnes } 3658965ea79SLois Curfman McInnes 366dfbe8321SBarry Smith PetscErrorCode MatZeroEntries_MPIDense(Mat A) 3678965ea79SLois Curfman McInnes { 368dfbe8321SBarry Smith PetscErrorCode ierr; 36939ddd567SLois Curfman McInnes Mat_MPIDense *l = (Mat_MPIDense*)A->data; 3703a40ed3dSBarry Smith 3713a40ed3dSBarry Smith PetscFunctionBegin; 3723a40ed3dSBarry Smith ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 3733a40ed3dSBarry Smith PetscFunctionReturn(0); 3748965ea79SLois Curfman McInnes } 3758965ea79SLois Curfman McInnes 376637a0070SStefano Zampini PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt n,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 3778965ea79SLois Curfman McInnes { 37839ddd567SLois Curfman McInnes Mat_MPIDense *l = (Mat_MPIDense*)A->data; 3796849ba73SBarry Smith PetscErrorCode ierr; 380637a0070SStefano Zampini PetscInt i,len,*lrows; 381637a0070SStefano Zampini 382637a0070SStefano Zampini PetscFunctionBegin; 383637a0070SStefano Zampini /* get locally owned rows */ 384637a0070SStefano Zampini ierr = PetscLayoutMapLocal(A->rmap,n,rows,&len,&lrows,NULL);CHKERRQ(ierr); 385637a0070SStefano Zampini /* fix right hand side if needed */ 386637a0070SStefano Zampini if (x && b) { 38797b48c8fSBarry Smith const PetscScalar *xx; 38897b48c8fSBarry Smith PetscScalar *bb; 3898965ea79SLois Curfman McInnes 39097b48c8fSBarry Smith ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr); 391637a0070SStefano Zampini ierr = VecGetArrayWrite(b, &bb);CHKERRQ(ierr); 392637a0070SStefano Zampini for (i=0;i<len;++i) bb[lrows[i]] = diag*xx[lrows[i]]; 39397b48c8fSBarry Smith ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr); 394637a0070SStefano Zampini ierr = VecRestoreArrayWrite(b, &bb);CHKERRQ(ierr); 39597b48c8fSBarry Smith } 396637a0070SStefano Zampini ierr = MatZeroRows(l->A,len,lrows,0.0,NULL,NULL);CHKERRQ(ierr); 397e2eb51b1SBarry Smith if (diag != 0.0) { 398637a0070SStefano Zampini Vec d; 399b9679d65SBarry Smith 400637a0070SStefano Zampini ierr = MatCreateVecs(A,NULL,&d);CHKERRQ(ierr); 401637a0070SStefano Zampini ierr = VecSet(d,diag);CHKERRQ(ierr); 402637a0070SStefano Zampini ierr = MatDiagonalSet(A,d,INSERT_VALUES);CHKERRQ(ierr); 403637a0070SStefano Zampini ierr = VecDestroy(&d);CHKERRQ(ierr); 404b9679d65SBarry Smith } 405606d414cSSatish Balay ierr = PetscFree(lrows);CHKERRQ(ierr); 4063a40ed3dSBarry Smith PetscFunctionReturn(0); 4078965ea79SLois Curfman McInnes } 4088965ea79SLois Curfman McInnes 409cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat,Vec,Vec); 410cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat,Vec,Vec,Vec); 411cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat,Vec,Vec); 412cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat,Vec,Vec,Vec); 413cc2e6a90SBarry Smith 414dfbe8321SBarry Smith PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy) 4158965ea79SLois Curfman McInnes { 41639ddd567SLois Curfman McInnes Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 417dfbe8321SBarry Smith PetscErrorCode ierr; 418637a0070SStefano Zampini const PetscScalar *ax; 419637a0070SStefano Zampini PetscScalar *ay; 420c456f294SBarry Smith 4213a40ed3dSBarry Smith PetscFunctionBegin; 422637a0070SStefano Zampini ierr = VecGetArrayReadInPlace(xx,&ax);CHKERRQ(ierr); 423637a0070SStefano Zampini ierr = VecGetArrayInPlace(mdn->lvec,&ay);CHKERRQ(ierr); 424637a0070SStefano Zampini ierr = PetscSFBcastBegin(mdn->Mvctx,MPIU_SCALAR,ax,ay);CHKERRQ(ierr); 425637a0070SStefano Zampini ierr = PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ax,ay);CHKERRQ(ierr); 426637a0070SStefano Zampini ierr = VecRestoreArrayInPlace(mdn->lvec,&ay);CHKERRQ(ierr); 427637a0070SStefano Zampini ierr = VecRestoreArrayReadInPlace(xx,&ax);CHKERRQ(ierr); 428637a0070SStefano Zampini ierr = (*mdn->A->ops->mult)(mdn->A,mdn->lvec,yy);CHKERRQ(ierr); 4293a40ed3dSBarry Smith PetscFunctionReturn(0); 4308965ea79SLois Curfman McInnes } 4318965ea79SLois Curfman McInnes 432dfbe8321SBarry Smith PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz) 4338965ea79SLois Curfman McInnes { 43439ddd567SLois Curfman McInnes Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 435dfbe8321SBarry Smith PetscErrorCode ierr; 436637a0070SStefano Zampini const PetscScalar *ax; 437637a0070SStefano Zampini PetscScalar *ay; 438c456f294SBarry Smith 4393a40ed3dSBarry Smith PetscFunctionBegin; 440637a0070SStefano Zampini ierr = VecGetArrayReadInPlace(xx,&ax);CHKERRQ(ierr); 441637a0070SStefano Zampini ierr = VecGetArrayInPlace(mdn->lvec,&ay);CHKERRQ(ierr); 442637a0070SStefano Zampini ierr = PetscSFBcastBegin(mdn->Mvctx,MPIU_SCALAR,ax,ay);CHKERRQ(ierr); 443637a0070SStefano Zampini ierr = PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ax,ay);CHKERRQ(ierr); 444637a0070SStefano Zampini ierr = VecRestoreArrayInPlace(mdn->lvec,&ay);CHKERRQ(ierr); 445637a0070SStefano Zampini ierr = VecRestoreArrayReadInPlace(xx,&ax);CHKERRQ(ierr); 446637a0070SStefano Zampini ierr = (*mdn->A->ops->multadd)(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr); 4473a40ed3dSBarry Smith PetscFunctionReturn(0); 4488965ea79SLois Curfman McInnes } 4498965ea79SLois Curfman McInnes 450dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy) 451096963f5SLois Curfman McInnes { 452096963f5SLois Curfman McInnes Mat_MPIDense *a = (Mat_MPIDense*)A->data; 453dfbe8321SBarry Smith PetscErrorCode ierr; 454637a0070SStefano Zampini const PetscScalar *ax; 455637a0070SStefano Zampini PetscScalar *ay; 456096963f5SLois Curfman McInnes 4573a40ed3dSBarry Smith PetscFunctionBegin; 458637a0070SStefano Zampini ierr = VecSet(yy,0.0);CHKERRQ(ierr); 459637a0070SStefano Zampini ierr = (*a->A->ops->multtranspose)(a->A,xx,a->lvec);CHKERRQ(ierr); 460637a0070SStefano Zampini ierr = VecGetArrayReadInPlace(a->lvec,&ax);CHKERRQ(ierr); 461637a0070SStefano Zampini ierr = VecGetArrayInPlace(yy,&ay);CHKERRQ(ierr); 462637a0070SStefano Zampini ierr = PetscSFReduceBegin(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM);CHKERRQ(ierr); 463637a0070SStefano Zampini ierr = PetscSFReduceEnd(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM);CHKERRQ(ierr); 464637a0070SStefano Zampini ierr = VecRestoreArrayReadInPlace(a->lvec,&ax);CHKERRQ(ierr); 465637a0070SStefano Zampini ierr = VecRestoreArrayInPlace(yy,&ay);CHKERRQ(ierr); 4663a40ed3dSBarry Smith PetscFunctionReturn(0); 467096963f5SLois Curfman McInnes } 468096963f5SLois Curfman McInnes 469dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz) 470096963f5SLois Curfman McInnes { 471096963f5SLois Curfman McInnes Mat_MPIDense *a = (Mat_MPIDense*)A->data; 472dfbe8321SBarry Smith PetscErrorCode ierr; 473637a0070SStefano Zampini const PetscScalar *ax; 474637a0070SStefano Zampini PetscScalar *ay; 475096963f5SLois Curfman McInnes 4763a40ed3dSBarry Smith PetscFunctionBegin; 4773501a2bdSLois Curfman McInnes ierr = VecCopy(yy,zz);CHKERRQ(ierr); 478637a0070SStefano Zampini ierr = (*a->A->ops->multtranspose)(a->A,xx,a->lvec);CHKERRQ(ierr); 479637a0070SStefano Zampini ierr = VecGetArrayReadInPlace(a->lvec,&ax);CHKERRQ(ierr); 480637a0070SStefano Zampini ierr = VecGetArrayInPlace(zz,&ay);CHKERRQ(ierr); 481637a0070SStefano Zampini ierr = PetscSFReduceBegin(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM);CHKERRQ(ierr); 482637a0070SStefano Zampini ierr = PetscSFReduceEnd(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM);CHKERRQ(ierr); 483637a0070SStefano Zampini ierr = VecRestoreArrayReadInPlace(a->lvec,&ax);CHKERRQ(ierr); 484637a0070SStefano Zampini ierr = VecRestoreArrayInPlace(zz,&ay);CHKERRQ(ierr); 4853a40ed3dSBarry Smith PetscFunctionReturn(0); 486096963f5SLois Curfman McInnes } 487096963f5SLois Curfman McInnes 488dfbe8321SBarry Smith PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v) 4898965ea79SLois Curfman McInnes { 49039ddd567SLois Curfman McInnes Mat_MPIDense *a = (Mat_MPIDense*)A->data; 491dfbe8321SBarry Smith PetscErrorCode ierr; 492637a0070SStefano Zampini PetscInt lda,len,i,n,m = A->rmap->n,radd; 49387828ca2SBarry Smith PetscScalar *x,zero = 0.0; 494637a0070SStefano Zampini const PetscScalar *av; 495ed3cc1f0SBarry Smith 4963a40ed3dSBarry Smith PetscFunctionBegin; 4972dcb1b2aSMatthew Knepley ierr = VecSet(v,zero);CHKERRQ(ierr); 4981ebc52fbSHong Zhang ierr = VecGetArray(v,&x);CHKERRQ(ierr); 499096963f5SLois Curfman McInnes ierr = VecGetSize(v,&n);CHKERRQ(ierr); 500e32f2f54SBarry Smith if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec"); 501d0f46423SBarry Smith len = PetscMin(a->A->rmap->n,a->A->cmap->n); 502d0f46423SBarry Smith radd = A->rmap->rstart*m; 503637a0070SStefano Zampini ierr = MatDenseGetArrayRead(a->A,&av);CHKERRQ(ierr); 504637a0070SStefano Zampini ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr); 50544cd7ae7SLois Curfman McInnes for (i=0; i<len; i++) { 506637a0070SStefano Zampini x[i] = av[radd + i*lda + i]; 507096963f5SLois Curfman McInnes } 508637a0070SStefano Zampini ierr = MatDenseRestoreArrayRead(a->A,&av);CHKERRQ(ierr); 5091ebc52fbSHong Zhang ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 5103a40ed3dSBarry Smith PetscFunctionReturn(0); 5118965ea79SLois Curfman McInnes } 5128965ea79SLois Curfman McInnes 513dfbe8321SBarry Smith PetscErrorCode MatDestroy_MPIDense(Mat mat) 5148965ea79SLois Curfman McInnes { 5153501a2bdSLois Curfman McInnes Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 516dfbe8321SBarry Smith PetscErrorCode ierr; 517ed3cc1f0SBarry Smith 5183a40ed3dSBarry Smith PetscFunctionBegin; 519aa482453SBarry Smith #if defined(PETSC_USE_LOG) 520d0f46423SBarry Smith PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 5218965ea79SLois Curfman McInnes #endif 5228798bf22SSatish Balay ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 5236bf464f9SBarry Smith ierr = MatDestroy(&mdn->A);CHKERRQ(ierr); 5246bf464f9SBarry Smith ierr = VecDestroy(&mdn->lvec);CHKERRQ(ierr); 525637a0070SStefano Zampini ierr = PetscSFDestroy(&mdn->Mvctx);CHKERRQ(ierr); 526*6947451fSStefano Zampini ierr = VecDestroy(&mdn->cvec);CHKERRQ(ierr); 52701b82886SBarry Smith 528bf0cc555SLisandro Dalcin ierr = PetscFree(mat->data);CHKERRQ(ierr); 529dbd8c25aSHong Zhang ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 5308baccfbdSHong Zhang 53149a6ff4bSBarry Smith ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetLDA_C",NULL);CHKERRQ(ierr); 5328baccfbdSHong Zhang ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",NULL);CHKERRQ(ierr); 5338572280aSBarry Smith ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",NULL);CHKERRQ(ierr); 5348572280aSBarry Smith ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",NULL);CHKERRQ(ierr); 5358572280aSBarry Smith ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",NULL);CHKERRQ(ierr); 536*6947451fSStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayWrite_C",NULL);CHKERRQ(ierr); 537*6947451fSStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayWrite_C",NULL);CHKERRQ(ierr); 538d3042a70SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",NULL);CHKERRQ(ierr); 539d3042a70SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",NULL);CHKERRQ(ierr); 5408baccfbdSHong Zhang #if defined(PETSC_HAVE_ELEMENTAL) 5418baccfbdSHong Zhang ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",NULL);CHKERRQ(ierr); 5428baccfbdSHong Zhang #endif 543bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",NULL);CHKERRQ(ierr); 5444222ddf1SHong Zhang ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 5454222ddf1SHong Zhang ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaij_C",NULL);CHKERRQ(ierr); 546bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 547bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 54852c5f739Sprj- ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_nest_mpidense_C",NULL);CHKERRQ(ierr); 54952c5f739Sprj- ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_nest_mpidense_C",NULL);CHKERRQ(ierr); 5508baccfbdSHong Zhang ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultSymbolic_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 5518baccfbdSHong Zhang ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultNumeric_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 55286aefd0dSHong Zhang ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",NULL);CHKERRQ(ierr); 55386aefd0dSHong Zhang ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",NULL);CHKERRQ(ierr); 554*6947451fSStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",NULL);CHKERRQ(ierr); 555*6947451fSStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",NULL);CHKERRQ(ierr); 556*6947451fSStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",NULL);CHKERRQ(ierr); 557*6947451fSStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",NULL);CHKERRQ(ierr); 558*6947451fSStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",NULL);CHKERRQ(ierr); 559*6947451fSStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",NULL);CHKERRQ(ierr); 5603a40ed3dSBarry Smith PetscFunctionReturn(0); 5618965ea79SLois Curfman McInnes } 56239ddd567SLois Curfman McInnes 56352c5f739Sprj- PETSC_INTERN PetscErrorCode MatView_SeqDense(Mat,PetscViewer); 56452c5f739Sprj- 5659804daf3SBarry Smith #include <petscdraw.h> 5666849ba73SBarry Smith static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 5678965ea79SLois Curfman McInnes { 56839ddd567SLois Curfman McInnes Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 569dfbe8321SBarry Smith PetscErrorCode ierr; 5707da1fb6eSBarry Smith PetscMPIInt rank = mdn->rank; 57119fd82e9SBarry Smith PetscViewerType vtype; 572ace3abfcSBarry Smith PetscBool iascii,isdraw; 573b0a32e0cSBarry Smith PetscViewer sviewer; 574f3ef73ceSBarry Smith PetscViewerFormat format; 5758965ea79SLois Curfman McInnes 5763a40ed3dSBarry Smith PetscFunctionBegin; 577251f4c67SDmitry Karpeev ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 578251f4c67SDmitry Karpeev ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 57932077d6dSBarry Smith if (iascii) { 580b0a32e0cSBarry Smith ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr); 581b0a32e0cSBarry Smith ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 582456192e2SBarry Smith if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 5834e220ebcSLois Curfman McInnes MatInfo info; 584888f2ed8SSatish Balay ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 5851575c14dSBarry Smith ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 5867b23a99aSBarry Smith ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 587b0a32e0cSBarry Smith ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 5881575c14dSBarry Smith ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 589637a0070SStefano Zampini ierr = PetscSFView(mdn->Mvctx,viewer);CHKERRQ(ierr); 5903a40ed3dSBarry Smith PetscFunctionReturn(0); 591fb9695e5SSatish Balay } else if (format == PETSC_VIEWER_ASCII_INFO) { 5923a40ed3dSBarry Smith PetscFunctionReturn(0); 5938965ea79SLois Curfman McInnes } 594f1af5d2fSBarry Smith } else if (isdraw) { 595b0a32e0cSBarry Smith PetscDraw draw; 596ace3abfcSBarry Smith PetscBool isnull; 597f1af5d2fSBarry Smith 598b0a32e0cSBarry Smith ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 599b0a32e0cSBarry Smith ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 600f1af5d2fSBarry Smith if (isnull) PetscFunctionReturn(0); 601f1af5d2fSBarry Smith } 60277ed5343SBarry Smith 6037da1fb6eSBarry Smith { 6048965ea79SLois Curfman McInnes /* assemble the entire matrix onto first processor. */ 6058965ea79SLois Curfman McInnes Mat A; 606d0f46423SBarry Smith PetscInt M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz; 607ba8c8a56SBarry Smith PetscInt *cols; 608ba8c8a56SBarry Smith PetscScalar *vals; 6098965ea79SLois Curfman McInnes 610ce94432eSBarry Smith ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 6118965ea79SLois Curfman McInnes if (!rank) { 612f69a0ea3SMatthew Knepley ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 6133a40ed3dSBarry Smith } else { 614f69a0ea3SMatthew Knepley ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 6158965ea79SLois Curfman McInnes } 6167adad957SLisandro Dalcin /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */ 617878740d9SKris Buschelman ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr); 6180298fd71SBarry Smith ierr = MatMPIDenseSetPreallocation(A,NULL);CHKERRQ(ierr); 6193bb1ff40SBarry Smith ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 6208965ea79SLois Curfman McInnes 62139ddd567SLois Curfman McInnes /* Copy the matrix ... This isn't the most efficient means, 62239ddd567SLois Curfman McInnes but it's quick for now */ 62351022da4SBarry Smith A->insertmode = INSERT_VALUES; 6242205254eSKarl Rupp 6252205254eSKarl Rupp row = mat->rmap->rstart; 6262205254eSKarl Rupp m = mdn->A->rmap->n; 6278965ea79SLois Curfman McInnes for (i=0; i<m; i++) { 628ba8c8a56SBarry Smith ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 629ba8c8a56SBarry Smith ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 630ba8c8a56SBarry Smith ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 63139ddd567SLois Curfman McInnes row++; 6328965ea79SLois Curfman McInnes } 6338965ea79SLois Curfman McInnes 6346d4a8577SBarry Smith ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6356d4a8577SBarry Smith ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6363f08860eSBarry Smith ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 637b9b97703SBarry Smith if (!rank) { 6381a9d3c3cSBarry Smith ierr = PetscObjectSetName((PetscObject)((Mat_MPIDense*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 6397da1fb6eSBarry Smith ierr = MatView_SeqDense(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr); 6408965ea79SLois Curfman McInnes } 6413f08860eSBarry Smith ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 642b0a32e0cSBarry Smith ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 6436bf464f9SBarry Smith ierr = MatDestroy(&A);CHKERRQ(ierr); 6448965ea79SLois Curfman McInnes } 6453a40ed3dSBarry Smith PetscFunctionReturn(0); 6468965ea79SLois Curfman McInnes } 6478965ea79SLois Curfman McInnes 648dfbe8321SBarry Smith PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer) 6498965ea79SLois Curfman McInnes { 650dfbe8321SBarry Smith PetscErrorCode ierr; 651ace3abfcSBarry Smith PetscBool iascii,isbinary,isdraw,issocket; 6528965ea79SLois Curfman McInnes 653433994e6SBarry Smith PetscFunctionBegin; 654251f4c67SDmitry Karpeev ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 655251f4c67SDmitry Karpeev ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 656251f4c67SDmitry Karpeev ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 657251f4c67SDmitry Karpeev ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 6580f5bd95cSBarry Smith 65932077d6dSBarry Smith if (iascii || issocket || isdraw) { 660f1af5d2fSBarry Smith ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 6610f5bd95cSBarry Smith } else if (isbinary) { 6628491ab44SLisandro Dalcin ierr = MatView_Dense_Binary(mat,viewer);CHKERRQ(ierr); 66311aeaf0aSBarry Smith } 6643a40ed3dSBarry Smith PetscFunctionReturn(0); 6658965ea79SLois Curfman McInnes } 6668965ea79SLois Curfman McInnes 667dfbe8321SBarry Smith PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info) 6688965ea79SLois Curfman McInnes { 6693501a2bdSLois Curfman McInnes Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 6703501a2bdSLois Curfman McInnes Mat mdn = mat->A; 671dfbe8321SBarry Smith PetscErrorCode ierr; 6723966268fSBarry Smith PetscLogDouble isend[5],irecv[5]; 6738965ea79SLois Curfman McInnes 6743a40ed3dSBarry Smith PetscFunctionBegin; 6754e220ebcSLois Curfman McInnes info->block_size = 1.0; 6762205254eSKarl Rupp 6774e220ebcSLois Curfman McInnes ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr); 6782205254eSKarl Rupp 6794e220ebcSLois Curfman McInnes isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 6804e220ebcSLois Curfman McInnes isend[3] = info->memory; isend[4] = info->mallocs; 6818965ea79SLois Curfman McInnes if (flag == MAT_LOCAL) { 6824e220ebcSLois Curfman McInnes info->nz_used = isend[0]; 6834e220ebcSLois Curfman McInnes info->nz_allocated = isend[1]; 6844e220ebcSLois Curfman McInnes info->nz_unneeded = isend[2]; 6854e220ebcSLois Curfman McInnes info->memory = isend[3]; 6864e220ebcSLois Curfman McInnes info->mallocs = isend[4]; 6878965ea79SLois Curfman McInnes } else if (flag == MAT_GLOBAL_MAX) { 6883966268fSBarry Smith ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 6892205254eSKarl Rupp 6904e220ebcSLois Curfman McInnes info->nz_used = irecv[0]; 6914e220ebcSLois Curfman McInnes info->nz_allocated = irecv[1]; 6924e220ebcSLois Curfman McInnes info->nz_unneeded = irecv[2]; 6934e220ebcSLois Curfman McInnes info->memory = irecv[3]; 6944e220ebcSLois Curfman McInnes info->mallocs = irecv[4]; 6958965ea79SLois Curfman McInnes } else if (flag == MAT_GLOBAL_SUM) { 6963966268fSBarry Smith ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 6972205254eSKarl Rupp 6984e220ebcSLois Curfman McInnes info->nz_used = irecv[0]; 6994e220ebcSLois Curfman McInnes info->nz_allocated = irecv[1]; 7004e220ebcSLois Curfman McInnes info->nz_unneeded = irecv[2]; 7014e220ebcSLois Curfman McInnes info->memory = irecv[3]; 7024e220ebcSLois Curfman McInnes info->mallocs = irecv[4]; 7038965ea79SLois Curfman McInnes } 7044e220ebcSLois Curfman McInnes info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 7054e220ebcSLois Curfman McInnes info->fill_ratio_needed = 0; 7064e220ebcSLois Curfman McInnes info->factor_mallocs = 0; 7073a40ed3dSBarry Smith PetscFunctionReturn(0); 7088965ea79SLois Curfman McInnes } 7098965ea79SLois Curfman McInnes 710ace3abfcSBarry Smith PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscBool flg) 7118965ea79SLois Curfman McInnes { 71239ddd567SLois Curfman McInnes Mat_MPIDense *a = (Mat_MPIDense*)A->data; 713dfbe8321SBarry Smith PetscErrorCode ierr; 7148965ea79SLois Curfman McInnes 7153a40ed3dSBarry Smith PetscFunctionBegin; 71612c028f9SKris Buschelman switch (op) { 717512a5fc5SBarry Smith case MAT_NEW_NONZERO_LOCATIONS: 71812c028f9SKris Buschelman case MAT_NEW_NONZERO_LOCATION_ERR: 71912c028f9SKris Buschelman case MAT_NEW_NONZERO_ALLOCATION_ERR: 72043674050SBarry Smith MatCheckPreallocated(A,1); 7214e0d8c25SBarry Smith ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 72212c028f9SKris Buschelman break; 72312c028f9SKris Buschelman case MAT_ROW_ORIENTED: 72443674050SBarry Smith MatCheckPreallocated(A,1); 7254e0d8c25SBarry Smith a->roworiented = flg; 7264e0d8c25SBarry Smith ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 72712c028f9SKris Buschelman break; 7284e0d8c25SBarry Smith case MAT_NEW_DIAGONALS: 72913fa8e87SLisandro Dalcin case MAT_KEEP_NONZERO_PATTERN: 73012c028f9SKris Buschelman case MAT_USE_HASH_TABLE: 731071fcb05SBarry Smith case MAT_SORTED_FULL: 732290bbb0aSBarry Smith ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 73312c028f9SKris Buschelman break; 73412c028f9SKris Buschelman case MAT_IGNORE_OFF_PROC_ENTRIES: 7354e0d8c25SBarry Smith a->donotstash = flg; 73612c028f9SKris Buschelman break; 73777e54ba9SKris Buschelman case MAT_SYMMETRIC: 73877e54ba9SKris Buschelman case MAT_STRUCTURALLY_SYMMETRIC: 7399a4540c5SBarry Smith case MAT_HERMITIAN: 7409a4540c5SBarry Smith case MAT_SYMMETRY_ETERNAL: 741600fe468SBarry Smith case MAT_IGNORE_LOWER_TRIANGULAR: 7425d7aebe8SStefano Zampini case MAT_IGNORE_ZERO_ENTRIES: 743290bbb0aSBarry Smith ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 74477e54ba9SKris Buschelman break; 74512c028f9SKris Buschelman default: 746e32f2f54SBarry Smith SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]); 7473a40ed3dSBarry Smith } 7483a40ed3dSBarry Smith PetscFunctionReturn(0); 7498965ea79SLois Curfman McInnes } 7508965ea79SLois Curfman McInnes 751dfbe8321SBarry Smith PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr) 7525b2fa520SLois Curfman McInnes { 7535b2fa520SLois Curfman McInnes Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 754637a0070SStefano Zampini const PetscScalar *l; 755637a0070SStefano Zampini PetscScalar x,*v,*vv,*r; 756dfbe8321SBarry Smith PetscErrorCode ierr; 757637a0070SStefano Zampini PetscInt i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n,lda; 7585b2fa520SLois Curfman McInnes 7595b2fa520SLois Curfman McInnes PetscFunctionBegin; 760637a0070SStefano Zampini ierr = MatDenseGetArray(mdn->A,&vv);CHKERRQ(ierr); 761637a0070SStefano Zampini ierr = MatDenseGetLDA(mdn->A,&lda);CHKERRQ(ierr); 76272d926a5SLois Curfman McInnes ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr); 7635b2fa520SLois Curfman McInnes if (ll) { 76472d926a5SLois Curfman McInnes ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr); 765637a0070SStefano Zampini if (s2a != s2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %D != %D", s2a, s2); 766bca11509SBarry Smith ierr = VecGetArrayRead(ll,&l);CHKERRQ(ierr); 7675b2fa520SLois Curfman McInnes for (i=0; i<m; i++) { 7685b2fa520SLois Curfman McInnes x = l[i]; 769637a0070SStefano Zampini v = vv + i; 770637a0070SStefano Zampini for (j=0; j<n; j++) { (*v) *= x; v+= lda;} 7715b2fa520SLois Curfman McInnes } 772bca11509SBarry Smith ierr = VecRestoreArrayRead(ll,&l);CHKERRQ(ierr); 773637a0070SStefano Zampini ierr = PetscLogFlops(1.0*n*m);CHKERRQ(ierr); 7745b2fa520SLois Curfman McInnes } 7755b2fa520SLois Curfman McInnes if (rr) { 776637a0070SStefano Zampini const PetscScalar *ar; 777637a0070SStefano Zampini 778175be7b4SMatthew Knepley ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr); 779e32f2f54SBarry Smith if (s3a != s3) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3); 780637a0070SStefano Zampini ierr = VecGetArrayRead(rr,&ar);CHKERRQ(ierr); 781637a0070SStefano Zampini ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr); 782637a0070SStefano Zampini ierr = PetscSFBcastBegin(mdn->Mvctx,MPIU_SCALAR,ar,r);CHKERRQ(ierr); 783637a0070SStefano Zampini ierr = PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ar,r);CHKERRQ(ierr); 784637a0070SStefano Zampini ierr = VecRestoreArrayRead(rr,&ar);CHKERRQ(ierr); 7855b2fa520SLois Curfman McInnes for (i=0; i<n; i++) { 7865b2fa520SLois Curfman McInnes x = r[i]; 787637a0070SStefano Zampini v = vv + i*lda; 7882205254eSKarl Rupp for (j=0; j<m; j++) (*v++) *= x; 7895b2fa520SLois Curfman McInnes } 790637a0070SStefano Zampini ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr); 791637a0070SStefano Zampini ierr = PetscLogFlops(1.0*n*m);CHKERRQ(ierr); 7925b2fa520SLois Curfman McInnes } 793637a0070SStefano Zampini ierr = MatDenseRestoreArray(mdn->A,&vv);CHKERRQ(ierr); 7945b2fa520SLois Curfman McInnes PetscFunctionReturn(0); 7955b2fa520SLois Curfman McInnes } 7965b2fa520SLois Curfman McInnes 797dfbe8321SBarry Smith PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm) 798096963f5SLois Curfman McInnes { 7993501a2bdSLois Curfman McInnes Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 800dfbe8321SBarry Smith PetscErrorCode ierr; 80113f74950SBarry Smith PetscInt i,j; 802329f5518SBarry Smith PetscReal sum = 0.0; 803637a0070SStefano Zampini const PetscScalar *av,*v; 8043501a2bdSLois Curfman McInnes 8053a40ed3dSBarry Smith PetscFunctionBegin; 806637a0070SStefano Zampini ierr = MatDenseGetArrayRead(mdn->A,&av);CHKERRQ(ierr); 807637a0070SStefano Zampini v = av; 8083501a2bdSLois Curfman McInnes if (mdn->size == 1) { 809064f8208SBarry Smith ierr = MatNorm(mdn->A,type,nrm);CHKERRQ(ierr); 8103501a2bdSLois Curfman McInnes } else { 8113501a2bdSLois Curfman McInnes if (type == NORM_FROBENIUS) { 812d0f46423SBarry Smith for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) { 813329f5518SBarry Smith sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 8143501a2bdSLois Curfman McInnes } 815b2566f29SBarry Smith ierr = MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 8168f1a2a5eSBarry Smith *nrm = PetscSqrtReal(*nrm); 817dc0b31edSSatish Balay ierr = PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n);CHKERRQ(ierr); 8183a40ed3dSBarry Smith } else if (type == NORM_1) { 819329f5518SBarry Smith PetscReal *tmp,*tmp2; 820580bdb30SBarry Smith ierr = PetscCalloc2(A->cmap->N,&tmp,A->cmap->N,&tmp2);CHKERRQ(ierr); 821064f8208SBarry Smith *nrm = 0.0; 822637a0070SStefano Zampini v = av; 823d0f46423SBarry Smith for (j=0; j<mdn->A->cmap->n; j++) { 824d0f46423SBarry Smith for (i=0; i<mdn->A->rmap->n; i++) { 82567e560aaSBarry Smith tmp[j] += PetscAbsScalar(*v); v++; 8263501a2bdSLois Curfman McInnes } 8273501a2bdSLois Curfman McInnes } 828b2566f29SBarry Smith ierr = MPIU_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 829d0f46423SBarry Smith for (j=0; j<A->cmap->N; j++) { 830064f8208SBarry Smith if (tmp2[j] > *nrm) *nrm = tmp2[j]; 8313501a2bdSLois Curfman McInnes } 8328627564fSBarry Smith ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr); 833d0f46423SBarry Smith ierr = PetscLogFlops(A->cmap->n*A->rmap->n);CHKERRQ(ierr); 8343a40ed3dSBarry Smith } else if (type == NORM_INFINITY) { /* max row norm */ 835329f5518SBarry Smith PetscReal ntemp; 8363501a2bdSLois Curfman McInnes ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr); 837b2566f29SBarry Smith ierr = MPIU_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 838ce94432eSBarry Smith } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for two norm"); 8393501a2bdSLois Curfman McInnes } 840637a0070SStefano Zampini ierr = MatDenseRestoreArrayRead(mdn->A,&av);CHKERRQ(ierr); 8413a40ed3dSBarry Smith PetscFunctionReturn(0); 8423501a2bdSLois Curfman McInnes } 8433501a2bdSLois Curfman McInnes 844fc4dec0aSBarry Smith PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout) 8453501a2bdSLois Curfman McInnes { 8463501a2bdSLois Curfman McInnes Mat_MPIDense *a = (Mat_MPIDense*)A->data; 8473501a2bdSLois Curfman McInnes Mat B; 848d0f46423SBarry Smith PetscInt M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart; 8496849ba73SBarry Smith PetscErrorCode ierr; 850637a0070SStefano Zampini PetscInt j,i,lda; 85187828ca2SBarry Smith PetscScalar *v; 8523501a2bdSLois Curfman McInnes 8533a40ed3dSBarry Smith PetscFunctionBegin; 854cf37664fSBarry Smith if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) { 855ce94432eSBarry Smith ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 856d0f46423SBarry Smith ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 8577adad957SLisandro Dalcin ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 8580298fd71SBarry Smith ierr = MatMPIDenseSetPreallocation(B,NULL);CHKERRQ(ierr); 859637a0070SStefano Zampini } else B = *matout; 8603501a2bdSLois Curfman McInnes 861637a0070SStefano Zampini m = a->A->rmap->n; n = a->A->cmap->n; 862637a0070SStefano Zampini ierr = MatDenseGetArrayRead(a->A,(const PetscScalar**)&v);CHKERRQ(ierr); 863637a0070SStefano Zampini ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr); 864785e854fSJed Brown ierr = PetscMalloc1(m,&rwork);CHKERRQ(ierr); 8653501a2bdSLois Curfman McInnes for (i=0; i<m; i++) rwork[i] = rstart + i; 8661acff37aSSatish Balay for (j=0; j<n; j++) { 8673501a2bdSLois Curfman McInnes ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr); 868637a0070SStefano Zampini v += lda; 8693501a2bdSLois Curfman McInnes } 870637a0070SStefano Zampini ierr = MatDenseRestoreArrayRead(a->A,(const PetscScalar**)&v);CHKERRQ(ierr); 871606d414cSSatish Balay ierr = PetscFree(rwork);CHKERRQ(ierr); 8726d4a8577SBarry Smith ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 8736d4a8577SBarry Smith ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 874cf37664fSBarry Smith if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 8753501a2bdSLois Curfman McInnes *matout = B; 8763501a2bdSLois Curfman McInnes } else { 87728be2f97SBarry Smith ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr); 8783501a2bdSLois Curfman McInnes } 8793a40ed3dSBarry Smith PetscFunctionReturn(0); 880096963f5SLois Curfman McInnes } 881096963f5SLois Curfman McInnes 8826849ba73SBarry Smith static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat*); 88352c5f739Sprj- PETSC_INTERN PetscErrorCode MatScale_MPIDense(Mat,PetscScalar); 8848965ea79SLois Curfman McInnes 8854994cf47SJed Brown PetscErrorCode MatSetUp_MPIDense(Mat A) 886273d9f13SBarry Smith { 887dfbe8321SBarry Smith PetscErrorCode ierr; 888273d9f13SBarry Smith 889273d9f13SBarry Smith PetscFunctionBegin; 89018992e5dSStefano Zampini ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr); 89118992e5dSStefano Zampini ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr); 89218992e5dSStefano Zampini if (!A->preallocated) { 893273d9f13SBarry Smith ierr = MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr); 89418992e5dSStefano Zampini } 895273d9f13SBarry Smith PetscFunctionReturn(0); 896273d9f13SBarry Smith } 897273d9f13SBarry Smith 898488007eeSBarry Smith PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str) 899488007eeSBarry Smith { 900488007eeSBarry Smith PetscErrorCode ierr; 901488007eeSBarry Smith Mat_MPIDense *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data; 902488007eeSBarry Smith 903488007eeSBarry Smith PetscFunctionBegin; 904488007eeSBarry Smith ierr = MatAXPY(A->A,alpha,B->A,str);CHKERRQ(ierr); 905488007eeSBarry Smith PetscFunctionReturn(0); 906488007eeSBarry Smith } 907488007eeSBarry Smith 9087087cfbeSBarry Smith PetscErrorCode MatConjugate_MPIDense(Mat mat) 909ba337c44SJed Brown { 910ba337c44SJed Brown Mat_MPIDense *a = (Mat_MPIDense*)mat->data; 911ba337c44SJed Brown PetscErrorCode ierr; 912ba337c44SJed Brown 913ba337c44SJed Brown PetscFunctionBegin; 914ba337c44SJed Brown ierr = MatConjugate(a->A);CHKERRQ(ierr); 915ba337c44SJed Brown PetscFunctionReturn(0); 916ba337c44SJed Brown } 917ba337c44SJed Brown 918ba337c44SJed Brown PetscErrorCode MatRealPart_MPIDense(Mat A) 919ba337c44SJed Brown { 920ba337c44SJed Brown Mat_MPIDense *a = (Mat_MPIDense*)A->data; 921ba337c44SJed Brown PetscErrorCode ierr; 922ba337c44SJed Brown 923ba337c44SJed Brown PetscFunctionBegin; 924ba337c44SJed Brown ierr = MatRealPart(a->A);CHKERRQ(ierr); 925ba337c44SJed Brown PetscFunctionReturn(0); 926ba337c44SJed Brown } 927ba337c44SJed Brown 928ba337c44SJed Brown PetscErrorCode MatImaginaryPart_MPIDense(Mat A) 929ba337c44SJed Brown { 930ba337c44SJed Brown Mat_MPIDense *a = (Mat_MPIDense*)A->data; 931ba337c44SJed Brown PetscErrorCode ierr; 932ba337c44SJed Brown 933ba337c44SJed Brown PetscFunctionBegin; 934ba337c44SJed Brown ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 935ba337c44SJed Brown PetscFunctionReturn(0); 936ba337c44SJed Brown } 937ba337c44SJed Brown 93849a6ff4bSBarry Smith static PetscErrorCode MatGetColumnVector_MPIDense(Mat A,Vec v,PetscInt col) 93949a6ff4bSBarry Smith { 94049a6ff4bSBarry Smith PetscErrorCode ierr; 941637a0070SStefano Zampini Mat_MPIDense *a = (Mat_MPIDense*) A->data; 94249a6ff4bSBarry Smith 94349a6ff4bSBarry Smith PetscFunctionBegin; 944637a0070SStefano Zampini if (!a->A) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Missing local matrix"); 945637a0070SStefano Zampini if (!a->A->ops->getcolumnvector) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Missing get column operation"); 946637a0070SStefano Zampini ierr = (*a->A->ops->getcolumnvector)(a->A,v,col);CHKERRQ(ierr); 94749a6ff4bSBarry Smith PetscFunctionReturn(0); 94849a6ff4bSBarry Smith } 94949a6ff4bSBarry Smith 95052c5f739Sprj- PETSC_INTERN PetscErrorCode MatGetColumnNorms_SeqDense(Mat,NormType,PetscReal*); 95152c5f739Sprj- 9520716a85fSBarry Smith PetscErrorCode MatGetColumnNorms_MPIDense(Mat A,NormType type,PetscReal *norms) 9530716a85fSBarry Smith { 9540716a85fSBarry Smith PetscErrorCode ierr; 9550716a85fSBarry Smith PetscInt i,n; 9560716a85fSBarry Smith Mat_MPIDense *a = (Mat_MPIDense*) A->data; 9570716a85fSBarry Smith PetscReal *work; 9580716a85fSBarry Smith 9590716a85fSBarry Smith PetscFunctionBegin; 9600298fd71SBarry Smith ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 961785e854fSJed Brown ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 9620716a85fSBarry Smith ierr = MatGetColumnNorms_SeqDense(a->A,type,work);CHKERRQ(ierr); 9630716a85fSBarry Smith if (type == NORM_2) { 9640716a85fSBarry Smith for (i=0; i<n; i++) work[i] *= work[i]; 9650716a85fSBarry Smith } 9660716a85fSBarry Smith if (type == NORM_INFINITY) { 967b2566f29SBarry Smith ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr); 9680716a85fSBarry Smith } else { 969b2566f29SBarry Smith ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr); 9700716a85fSBarry Smith } 9710716a85fSBarry Smith ierr = PetscFree(work);CHKERRQ(ierr); 9720716a85fSBarry Smith if (type == NORM_2) { 9738f1a2a5eSBarry Smith for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 9740716a85fSBarry Smith } 9750716a85fSBarry Smith PetscFunctionReturn(0); 9760716a85fSBarry Smith } 9770716a85fSBarry Smith 978637a0070SStefano Zampini #if defined(PETSC_HAVE_CUDA) 979*6947451fSStefano Zampini static PetscErrorCode MatDenseGetColumnVec_MPIDenseCUDA(Mat A,PetscInt col,Vec *v) 980*6947451fSStefano Zampini { 981*6947451fSStefano Zampini Mat_MPIDense *a = (Mat_MPIDense*)A->data; 982*6947451fSStefano Zampini PetscErrorCode ierr; 983*6947451fSStefano Zampini PetscInt lda; 984*6947451fSStefano Zampini 985*6947451fSStefano Zampini PetscFunctionBegin; 986*6947451fSStefano Zampini if (a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec first"); 987*6947451fSStefano Zampini if (!a->cvec) { 988*6947451fSStefano Zampini ierr = VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr); 989*6947451fSStefano Zampini } 990*6947451fSStefano Zampini a->vecinuse = col + 1; 991*6947451fSStefano Zampini ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr); 992*6947451fSStefano Zampini ierr = MatDenseCUDAGetArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr); 993*6947451fSStefano Zampini ierr = VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr); 994*6947451fSStefano Zampini *v = a->cvec; 995*6947451fSStefano Zampini PetscFunctionReturn(0); 996*6947451fSStefano Zampini } 997*6947451fSStefano Zampini 998*6947451fSStefano Zampini static PetscErrorCode MatDenseRestoreColumnVec_MPIDenseCUDA(Mat A,PetscInt col,Vec *v) 999*6947451fSStefano Zampini { 1000*6947451fSStefano Zampini Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1001*6947451fSStefano Zampini PetscErrorCode ierr; 1002*6947451fSStefano Zampini 1003*6947451fSStefano Zampini PetscFunctionBegin; 1004*6947451fSStefano Zampini if (!a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec first"); 1005*6947451fSStefano Zampini if (!a->cvec) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector"); 1006*6947451fSStefano Zampini a->vecinuse = 0; 1007*6947451fSStefano Zampini ierr = MatDenseCUDARestoreArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr); 1008*6947451fSStefano Zampini ierr = VecCUDAResetArray(a->cvec);CHKERRQ(ierr); 1009*6947451fSStefano Zampini *v = NULL; 1010*6947451fSStefano Zampini PetscFunctionReturn(0); 1011*6947451fSStefano Zampini } 1012*6947451fSStefano Zampini 1013*6947451fSStefano Zampini static PetscErrorCode MatDenseGetColumnVecRead_MPIDenseCUDA(Mat A,PetscInt col,Vec *v) 1014*6947451fSStefano Zampini { 1015*6947451fSStefano Zampini Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1016*6947451fSStefano Zampini PetscInt lda; 1017*6947451fSStefano Zampini PetscErrorCode ierr; 1018*6947451fSStefano Zampini 1019*6947451fSStefano Zampini PetscFunctionBegin; 1020*6947451fSStefano Zampini if (a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec first"); 1021*6947451fSStefano Zampini if (!a->cvec) { 1022*6947451fSStefano Zampini ierr = VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr); 1023*6947451fSStefano Zampini } 1024*6947451fSStefano Zampini a->vecinuse = col + 1; 1025*6947451fSStefano Zampini ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr); 1026*6947451fSStefano Zampini ierr = MatDenseCUDAGetArrayRead(a->A,&a->ptrinuse);CHKERRQ(ierr); 1027*6947451fSStefano Zampini ierr = VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr); 1028*6947451fSStefano Zampini ierr = VecLockReadPush(a->cvec);CHKERRQ(ierr); 1029*6947451fSStefano Zampini *v = a->cvec; 1030*6947451fSStefano Zampini PetscFunctionReturn(0); 1031*6947451fSStefano Zampini } 1032*6947451fSStefano Zampini 1033*6947451fSStefano Zampini static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDenseCUDA(Mat A,PetscInt col,Vec *v) 1034*6947451fSStefano Zampini { 1035*6947451fSStefano Zampini Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1036*6947451fSStefano Zampini PetscErrorCode ierr; 1037*6947451fSStefano Zampini 1038*6947451fSStefano Zampini PetscFunctionBegin; 1039*6947451fSStefano Zampini if (!a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec first"); 1040*6947451fSStefano Zampini if (!a->cvec) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector"); 1041*6947451fSStefano Zampini a->vecinuse = 0; 1042*6947451fSStefano Zampini ierr = MatDenseCUDARestoreArrayRead(a->A,&a->ptrinuse);CHKERRQ(ierr); 1043*6947451fSStefano Zampini ierr = VecLockReadPop(a->cvec);CHKERRQ(ierr); 1044*6947451fSStefano Zampini ierr = VecCUDAResetArray(a->cvec);CHKERRQ(ierr); 1045*6947451fSStefano Zampini *v = NULL; 1046*6947451fSStefano Zampini PetscFunctionReturn(0); 1047*6947451fSStefano Zampini } 1048*6947451fSStefano Zampini 1049*6947451fSStefano Zampini static PetscErrorCode MatDenseGetColumnVecWrite_MPIDenseCUDA(Mat A,PetscInt col,Vec *v) 1050*6947451fSStefano Zampini { 1051*6947451fSStefano Zampini Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1052*6947451fSStefano Zampini PetscErrorCode ierr; 1053*6947451fSStefano Zampini PetscInt lda; 1054*6947451fSStefano Zampini 1055*6947451fSStefano Zampini PetscFunctionBegin; 1056*6947451fSStefano Zampini if (a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec first"); 1057*6947451fSStefano Zampini if (!a->cvec) { 1058*6947451fSStefano Zampini ierr = VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr); 1059*6947451fSStefano Zampini } 1060*6947451fSStefano Zampini a->vecinuse = col + 1; 1061*6947451fSStefano Zampini ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr); 1062*6947451fSStefano Zampini ierr = MatDenseCUDAGetArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr); 1063*6947451fSStefano Zampini ierr = VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr); 1064*6947451fSStefano Zampini *v = a->cvec; 1065*6947451fSStefano Zampini PetscFunctionReturn(0); 1066*6947451fSStefano Zampini } 1067*6947451fSStefano Zampini 1068*6947451fSStefano Zampini static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDenseCUDA(Mat A,PetscInt col,Vec *v) 1069*6947451fSStefano Zampini { 1070*6947451fSStefano Zampini Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1071*6947451fSStefano Zampini PetscErrorCode ierr; 1072*6947451fSStefano Zampini 1073*6947451fSStefano Zampini PetscFunctionBegin; 1074*6947451fSStefano Zampini if (!a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec first"); 1075*6947451fSStefano Zampini if (!a->cvec) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector"); 1076*6947451fSStefano Zampini a->vecinuse = 0; 1077*6947451fSStefano Zampini ierr = MatDenseCUDARestoreArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr); 1078*6947451fSStefano Zampini ierr = VecCUDAResetArray(a->cvec);CHKERRQ(ierr); 1079*6947451fSStefano Zampini *v = NULL; 1080*6947451fSStefano Zampini PetscFunctionReturn(0); 1081*6947451fSStefano Zampini } 1082*6947451fSStefano Zampini 1083637a0070SStefano Zampini static PetscErrorCode MatDenseCUDAPlaceArray_MPIDenseCUDA(Mat A, const PetscScalar *a) 1084637a0070SStefano Zampini { 1085637a0070SStefano Zampini Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1086637a0070SStefano Zampini PetscErrorCode ierr; 1087637a0070SStefano Zampini 1088637a0070SStefano Zampini PetscFunctionBegin; 1089*6947451fSStefano Zampini if (l->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec first"); 1090637a0070SStefano Zampini ierr = MatDenseCUDAPlaceArray(l->A,a);CHKERRQ(ierr); 1091637a0070SStefano Zampini PetscFunctionReturn(0); 1092637a0070SStefano Zampini } 1093637a0070SStefano Zampini 1094637a0070SStefano Zampini static PetscErrorCode MatDenseCUDAResetArray_MPIDenseCUDA(Mat A) 1095637a0070SStefano Zampini { 1096637a0070SStefano Zampini Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1097637a0070SStefano Zampini PetscErrorCode ierr; 1098637a0070SStefano Zampini 1099637a0070SStefano Zampini PetscFunctionBegin; 1100*6947451fSStefano Zampini if (l->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec first"); 1101637a0070SStefano Zampini ierr = MatDenseCUDAResetArray(l->A);CHKERRQ(ierr); 1102637a0070SStefano Zampini PetscFunctionReturn(0); 1103637a0070SStefano Zampini } 1104637a0070SStefano Zampini 1105637a0070SStefano Zampini static PetscErrorCode MatDenseCUDAGetArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a) 1106637a0070SStefano Zampini { 1107637a0070SStefano Zampini Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1108637a0070SStefano Zampini PetscErrorCode ierr; 1109637a0070SStefano Zampini 1110637a0070SStefano Zampini PetscFunctionBegin; 1111637a0070SStefano Zampini ierr = MatDenseCUDAGetArrayWrite(l->A,a);CHKERRQ(ierr); 1112637a0070SStefano Zampini PetscFunctionReturn(0); 1113637a0070SStefano Zampini } 1114637a0070SStefano Zampini 1115637a0070SStefano Zampini static PetscErrorCode MatDenseCUDARestoreArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a) 1116637a0070SStefano Zampini { 1117637a0070SStefano Zampini Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1118637a0070SStefano Zampini PetscErrorCode ierr; 1119637a0070SStefano Zampini 1120637a0070SStefano Zampini PetscFunctionBegin; 1121637a0070SStefano Zampini ierr = MatDenseCUDARestoreArrayWrite(l->A,a);CHKERRQ(ierr); 1122637a0070SStefano Zampini PetscFunctionReturn(0); 1123637a0070SStefano Zampini } 1124637a0070SStefano Zampini 1125637a0070SStefano Zampini static PetscErrorCode MatDenseCUDAGetArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a) 1126637a0070SStefano Zampini { 1127637a0070SStefano Zampini Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1128637a0070SStefano Zampini PetscErrorCode ierr; 1129637a0070SStefano Zampini 1130637a0070SStefano Zampini PetscFunctionBegin; 1131637a0070SStefano Zampini ierr = MatDenseCUDAGetArrayRead(l->A,a);CHKERRQ(ierr); 1132637a0070SStefano Zampini PetscFunctionReturn(0); 1133637a0070SStefano Zampini } 1134637a0070SStefano Zampini 1135637a0070SStefano Zampini static PetscErrorCode MatDenseCUDARestoreArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a) 1136637a0070SStefano Zampini { 1137637a0070SStefano Zampini Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1138637a0070SStefano Zampini PetscErrorCode ierr; 1139637a0070SStefano Zampini 1140637a0070SStefano Zampini PetscFunctionBegin; 1141637a0070SStefano Zampini ierr = MatDenseCUDARestoreArrayRead(l->A,a);CHKERRQ(ierr); 1142637a0070SStefano Zampini PetscFunctionReturn(0); 1143637a0070SStefano Zampini } 1144637a0070SStefano Zampini 1145637a0070SStefano Zampini static PetscErrorCode MatDenseCUDAGetArray_MPIDenseCUDA(Mat A, PetscScalar **a) 1146637a0070SStefano Zampini { 1147637a0070SStefano Zampini Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1148637a0070SStefano Zampini PetscErrorCode ierr; 1149637a0070SStefano Zampini 1150637a0070SStefano Zampini PetscFunctionBegin; 1151637a0070SStefano Zampini ierr = MatDenseCUDAGetArray(l->A,a);CHKERRQ(ierr); 1152637a0070SStefano Zampini PetscFunctionReturn(0); 1153637a0070SStefano Zampini } 1154637a0070SStefano Zampini 1155637a0070SStefano Zampini static PetscErrorCode MatDenseCUDARestoreArray_MPIDenseCUDA(Mat A, PetscScalar **a) 1156637a0070SStefano Zampini { 1157637a0070SStefano Zampini Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1158637a0070SStefano Zampini PetscErrorCode ierr; 1159637a0070SStefano Zampini 1160637a0070SStefano Zampini PetscFunctionBegin; 1161637a0070SStefano Zampini ierr = MatDenseCUDARestoreArray(l->A,a);CHKERRQ(ierr); 1162637a0070SStefano Zampini PetscFunctionReturn(0); 1163637a0070SStefano Zampini } 1164637a0070SStefano Zampini 1165*6947451fSStefano Zampini static PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat,PetscInt,Vec*); 1166*6947451fSStefano Zampini static PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat,PetscInt,Vec*); 1167*6947451fSStefano Zampini static PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat,PetscInt,Vec*); 1168*6947451fSStefano Zampini static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat,PetscInt,Vec*); 1169*6947451fSStefano Zampini static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat,PetscInt,Vec*); 1170*6947451fSStefano Zampini static PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat,PetscInt,Vec*); 1171*6947451fSStefano Zampini 1172637a0070SStefano Zampini static PetscErrorCode MatBindToCPU_MPIDenseCUDA(Mat mat,PetscBool bind) 1173637a0070SStefano Zampini { 1174637a0070SStefano Zampini Mat_MPIDense *d = (Mat_MPIDense*)mat->data; 1175637a0070SStefano Zampini PetscErrorCode ierr; 1176637a0070SStefano Zampini 1177637a0070SStefano Zampini PetscFunctionBegin; 1178*6947451fSStefano Zampini if (d->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec first"); 1179637a0070SStefano Zampini if (d->A) { 1180637a0070SStefano Zampini ierr = MatBindToCPU(d->A,bind);CHKERRQ(ierr); 1181637a0070SStefano Zampini } 1182637a0070SStefano Zampini mat->boundtocpu = bind; 1183*6947451fSStefano Zampini if (!bind) { 1184*6947451fSStefano Zampini PetscBool iscuda; 1185*6947451fSStefano Zampini 1186*6947451fSStefano Zampini ierr = PetscObjectTypeCompare((PetscObject)d->cvec,VECMPICUDA,&iscuda);CHKERRQ(ierr); 1187*6947451fSStefano Zampini if (!iscuda) { 1188*6947451fSStefano Zampini ierr = VecDestroy(&d->cvec);CHKERRQ(ierr); 1189*6947451fSStefano Zampini } 1190*6947451fSStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDenseCUDA);CHKERRQ(ierr); 1191*6947451fSStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDenseCUDA);CHKERRQ(ierr); 1192*6947451fSStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDenseCUDA);CHKERRQ(ierr); 1193*6947451fSStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDenseCUDA);CHKERRQ(ierr); 1194*6947451fSStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDenseCUDA);CHKERRQ(ierr); 1195*6947451fSStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDenseCUDA);CHKERRQ(ierr); 1196*6947451fSStefano Zampini } else { 1197*6947451fSStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDense);CHKERRQ(ierr); 1198*6947451fSStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDense);CHKERRQ(ierr); 1199*6947451fSStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDense);CHKERRQ(ierr); 1200*6947451fSStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDense);CHKERRQ(ierr); 1201*6947451fSStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDense);CHKERRQ(ierr); 1202*6947451fSStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDense);CHKERRQ(ierr); 1203*6947451fSStefano Zampini } 1204637a0070SStefano Zampini PetscFunctionReturn(0); 1205637a0070SStefano Zampini } 1206637a0070SStefano Zampini 1207637a0070SStefano Zampini PetscErrorCode MatMPIDenseCUDASetPreallocation(Mat A, PetscScalar *d_data) 1208637a0070SStefano Zampini { 1209637a0070SStefano Zampini Mat_MPIDense *d = (Mat_MPIDense*)A->data; 1210637a0070SStefano Zampini PetscErrorCode ierr; 1211637a0070SStefano Zampini PetscBool iscuda; 1212637a0070SStefano Zampini 1213637a0070SStefano Zampini PetscFunctionBegin; 1214637a0070SStefano Zampini ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIDENSECUDA,&iscuda);CHKERRQ(ierr); 1215637a0070SStefano Zampini if (!iscuda) PetscFunctionReturn(0); 1216637a0070SStefano Zampini ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr); 1217637a0070SStefano Zampini ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr); 1218637a0070SStefano Zampini if (!d->A) { 1219637a0070SStefano Zampini ierr = MatCreate(PETSC_COMM_SELF,&d->A);CHKERRQ(ierr); 1220637a0070SStefano Zampini ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)d->A);CHKERRQ(ierr); 1221637a0070SStefano Zampini ierr = MatSetSizes(d->A,A->rmap->n,A->cmap->N,A->rmap->n,A->cmap->N);CHKERRQ(ierr); 1222637a0070SStefano Zampini } 1223637a0070SStefano Zampini ierr = MatSetType(d->A,MATSEQDENSECUDA);CHKERRQ(ierr); 1224637a0070SStefano Zampini ierr = MatSeqDenseCUDASetPreallocation(d->A,d_data);CHKERRQ(ierr); 1225637a0070SStefano Zampini A->preallocated = PETSC_TRUE; 1226637a0070SStefano Zampini PetscFunctionReturn(0); 1227637a0070SStefano Zampini } 1228637a0070SStefano Zampini #endif 1229637a0070SStefano Zampini 123073a71a0fSBarry Smith static PetscErrorCode MatSetRandom_MPIDense(Mat x,PetscRandom rctx) 123173a71a0fSBarry Smith { 123273a71a0fSBarry Smith Mat_MPIDense *d = (Mat_MPIDense*)x->data; 123373a71a0fSBarry Smith PetscErrorCode ierr; 123473a71a0fSBarry Smith 123573a71a0fSBarry Smith PetscFunctionBegin; 1236637a0070SStefano Zampini ierr = MatSetRandom(d->A,rctx);CHKERRQ(ierr); 123773a71a0fSBarry Smith PetscFunctionReturn(0); 123873a71a0fSBarry Smith } 123973a71a0fSBarry Smith 124052c5f739Sprj- PETSC_INTERN PetscErrorCode MatMatMultNumeric_MPIDense(Mat A,Mat,Mat); 1241fd4e9aacSBarry Smith 12423b49f96aSBarry Smith static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A,PetscBool *missing,PetscInt *d) 12433b49f96aSBarry Smith { 12443b49f96aSBarry Smith PetscFunctionBegin; 12453b49f96aSBarry Smith *missing = PETSC_FALSE; 12463b49f96aSBarry Smith PetscFunctionReturn(0); 12473b49f96aSBarry Smith } 12483b49f96aSBarry Smith 12494222ddf1SHong Zhang static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat,Mat,PetscReal,Mat); 1250cc48ffa7SToby Isaac static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat,Mat,Mat); 1251cc48ffa7SToby Isaac 12528965ea79SLois Curfman McInnes /* -------------------------------------------------------------------*/ 125309dc0095SBarry Smith static struct _MatOps MatOps_Values = { MatSetValues_MPIDense, 125409dc0095SBarry Smith MatGetRow_MPIDense, 125509dc0095SBarry Smith MatRestoreRow_MPIDense, 125609dc0095SBarry Smith MatMult_MPIDense, 125797304618SKris Buschelman /* 4*/ MatMultAdd_MPIDense, 12587c922b88SBarry Smith MatMultTranspose_MPIDense, 12597c922b88SBarry Smith MatMultTransposeAdd_MPIDense, 12608965ea79SLois Curfman McInnes 0, 126109dc0095SBarry Smith 0, 126209dc0095SBarry Smith 0, 126397304618SKris Buschelman /* 10*/ 0, 126409dc0095SBarry Smith 0, 126509dc0095SBarry Smith 0, 126609dc0095SBarry Smith 0, 126709dc0095SBarry Smith MatTranspose_MPIDense, 126897304618SKris Buschelman /* 15*/ MatGetInfo_MPIDense, 12696e4ee0c6SHong Zhang MatEqual_MPIDense, 127009dc0095SBarry Smith MatGetDiagonal_MPIDense, 12715b2fa520SLois Curfman McInnes MatDiagonalScale_MPIDense, 127209dc0095SBarry Smith MatNorm_MPIDense, 127397304618SKris Buschelman /* 20*/ MatAssemblyBegin_MPIDense, 127409dc0095SBarry Smith MatAssemblyEnd_MPIDense, 127509dc0095SBarry Smith MatSetOption_MPIDense, 127609dc0095SBarry Smith MatZeroEntries_MPIDense, 1277d519adbfSMatthew Knepley /* 24*/ MatZeroRows_MPIDense, 1278919b68f7SBarry Smith 0, 127901b82886SBarry Smith 0, 128001b82886SBarry Smith 0, 128101b82886SBarry Smith 0, 12824994cf47SJed Brown /* 29*/ MatSetUp_MPIDense, 1283273d9f13SBarry Smith 0, 128409dc0095SBarry Smith 0, 1285c56a70eeSBarry Smith MatGetDiagonalBlock_MPIDense, 12868c778c55SBarry Smith 0, 1287d519adbfSMatthew Knepley /* 34*/ MatDuplicate_MPIDense, 128809dc0095SBarry Smith 0, 128909dc0095SBarry Smith 0, 129009dc0095SBarry Smith 0, 129109dc0095SBarry Smith 0, 1292d519adbfSMatthew Knepley /* 39*/ MatAXPY_MPIDense, 12937dae84e0SHong Zhang MatCreateSubMatrices_MPIDense, 129409dc0095SBarry Smith 0, 129509dc0095SBarry Smith MatGetValues_MPIDense, 129609dc0095SBarry Smith 0, 1297d519adbfSMatthew Knepley /* 44*/ 0, 129809dc0095SBarry Smith MatScale_MPIDense, 12997d68702bSBarry Smith MatShift_Basic, 130009dc0095SBarry Smith 0, 130109dc0095SBarry Smith 0, 130273a71a0fSBarry Smith /* 49*/ MatSetRandom_MPIDense, 130309dc0095SBarry Smith 0, 130409dc0095SBarry Smith 0, 130509dc0095SBarry Smith 0, 130609dc0095SBarry Smith 0, 1307d519adbfSMatthew Knepley /* 54*/ 0, 130809dc0095SBarry Smith 0, 130909dc0095SBarry Smith 0, 131009dc0095SBarry Smith 0, 131109dc0095SBarry Smith 0, 13127dae84e0SHong Zhang /* 59*/ MatCreateSubMatrix_MPIDense, 1313b9b97703SBarry Smith MatDestroy_MPIDense, 1314b9b97703SBarry Smith MatView_MPIDense, 1315357abbc8SBarry Smith 0, 131697304618SKris Buschelman 0, 1317d519adbfSMatthew Knepley /* 64*/ 0, 131897304618SKris Buschelman 0, 131997304618SKris Buschelman 0, 132097304618SKris Buschelman 0, 132197304618SKris Buschelman 0, 1322d519adbfSMatthew Knepley /* 69*/ 0, 132397304618SKris Buschelman 0, 132497304618SKris Buschelman 0, 132597304618SKris Buschelman 0, 132697304618SKris Buschelman 0, 1327d519adbfSMatthew Knepley /* 74*/ 0, 132897304618SKris Buschelman 0, 132997304618SKris Buschelman 0, 133097304618SKris Buschelman 0, 133197304618SKris Buschelman 0, 1332d519adbfSMatthew Knepley /* 79*/ 0, 133397304618SKris Buschelman 0, 133497304618SKris Buschelman 0, 133597304618SKris Buschelman 0, 13365bba2384SShri Abhyankar /* 83*/ MatLoad_MPIDense, 1337865e5f61SKris Buschelman 0, 1338865e5f61SKris Buschelman 0, 1339865e5f61SKris Buschelman 0, 1340865e5f61SKris Buschelman 0, 1341865e5f61SKris Buschelman 0, 13424222ddf1SHong Zhang /* 89*/ 0, 13434222ddf1SHong Zhang 0, 1344fd4e9aacSBarry Smith MatMatMultNumeric_MPIDense, 13452fbe02b9SBarry Smith 0, 1346ba337c44SJed Brown 0, 1347d519adbfSMatthew Knepley /* 94*/ 0, 13484222ddf1SHong Zhang 0, 13494222ddf1SHong Zhang 0, 1350cc48ffa7SToby Isaac MatMatTransposeMultNumeric_MPIDense_MPIDense, 1351ba337c44SJed Brown 0, 13524222ddf1SHong Zhang /* 99*/ MatProductSetFromOptions_MPIDense, 1353ba337c44SJed Brown 0, 1354ba337c44SJed Brown 0, 1355ba337c44SJed Brown MatConjugate_MPIDense, 1356ba337c44SJed Brown 0, 1357ba337c44SJed Brown /*104*/ 0, 1358ba337c44SJed Brown MatRealPart_MPIDense, 1359ba337c44SJed Brown MatImaginaryPart_MPIDense, 136086d161a7SShri Abhyankar 0, 136186d161a7SShri Abhyankar 0, 136286d161a7SShri Abhyankar /*109*/ 0, 136386d161a7SShri Abhyankar 0, 136486d161a7SShri Abhyankar 0, 136549a6ff4bSBarry Smith MatGetColumnVector_MPIDense, 13663b49f96aSBarry Smith MatMissingDiagonal_MPIDense, 136786d161a7SShri Abhyankar /*114*/ 0, 136886d161a7SShri Abhyankar 0, 136986d161a7SShri Abhyankar 0, 137086d161a7SShri Abhyankar 0, 137186d161a7SShri Abhyankar 0, 137286d161a7SShri Abhyankar /*119*/ 0, 137386d161a7SShri Abhyankar 0, 137486d161a7SShri Abhyankar 0, 13750716a85fSBarry Smith 0, 13760716a85fSBarry Smith 0, 13770716a85fSBarry Smith /*124*/ 0, 13783964eb88SJed Brown MatGetColumnNorms_MPIDense, 13793964eb88SJed Brown 0, 13803964eb88SJed Brown 0, 13813964eb88SJed Brown 0, 13823964eb88SJed Brown /*129*/ 0, 13834222ddf1SHong Zhang 0, 13844222ddf1SHong Zhang 0, 1385cb20be35SHong Zhang MatTransposeMatMultNumeric_MPIDense_MPIDense, 13863964eb88SJed Brown 0, 13873964eb88SJed Brown /*134*/ 0, 13883964eb88SJed Brown 0, 13893964eb88SJed Brown 0, 13903964eb88SJed Brown 0, 13913964eb88SJed Brown 0, 13923964eb88SJed Brown /*139*/ 0, 1393f9426fe0SMark Adams 0, 139494e2cb23SJakub Kruzik 0, 139594e2cb23SJakub Kruzik 0, 139694e2cb23SJakub Kruzik 0, 13974222ddf1SHong Zhang MatCreateMPIMatConcatenateSeqMat_MPIDense, 13984222ddf1SHong Zhang /*145*/ 0, 13994222ddf1SHong Zhang 0, 14004222ddf1SHong Zhang 0 1401ba337c44SJed Brown }; 14028965ea79SLois Curfman McInnes 14037087cfbeSBarry Smith PetscErrorCode MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data) 1404a23d5eceSKris Buschelman { 1405637a0070SStefano Zampini Mat_MPIDense *a = (Mat_MPIDense*)mat->data; 1406637a0070SStefano Zampini PetscBool iscuda; 1407dfbe8321SBarry Smith PetscErrorCode ierr; 1408a23d5eceSKris Buschelman 1409a23d5eceSKris Buschelman PetscFunctionBegin; 141034ef9618SShri Abhyankar ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr); 141134ef9618SShri Abhyankar ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr); 1412637a0070SStefano Zampini if (!a->A) { 1413f69a0ea3SMatthew Knepley ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr); 14143bb1ff40SBarry Smith ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 1415637a0070SStefano Zampini ierr = MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);CHKERRQ(ierr); 1416637a0070SStefano Zampini } 1417637a0070SStefano Zampini ierr = PetscObjectTypeCompare((PetscObject)mat,MATMPIDENSECUDA,&iscuda);CHKERRQ(ierr); 1418637a0070SStefano Zampini ierr = MatSetType(a->A,iscuda ? MATSEQDENSECUDA : MATSEQDENSE);CHKERRQ(ierr); 1419637a0070SStefano Zampini ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr); 1420637a0070SStefano Zampini mat->preallocated = PETSC_TRUE; 1421a23d5eceSKris Buschelman PetscFunctionReturn(0); 1422a23d5eceSKris Buschelman } 1423a23d5eceSKris Buschelman 142465b80a83SHong Zhang #if defined(PETSC_HAVE_ELEMENTAL) 1425cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype,MatReuse reuse,Mat *newmat) 14268baccfbdSHong Zhang { 14278ea901baSHong Zhang Mat mat_elemental; 14288ea901baSHong Zhang PetscErrorCode ierr; 142932d7a744SHong Zhang PetscScalar *v; 143032d7a744SHong Zhang PetscInt m=A->rmap->n,N=A->cmap->N,rstart=A->rmap->rstart,i,*rows,*cols; 14318ea901baSHong Zhang 14328baccfbdSHong Zhang PetscFunctionBegin; 1433378336b6SHong Zhang if (reuse == MAT_REUSE_MATRIX) { 1434378336b6SHong Zhang mat_elemental = *newmat; 1435378336b6SHong Zhang ierr = MatZeroEntries(*newmat);CHKERRQ(ierr); 1436378336b6SHong Zhang } else { 1437378336b6SHong Zhang ierr = MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental);CHKERRQ(ierr); 1438378336b6SHong Zhang ierr = MatSetSizes(mat_elemental,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 1439378336b6SHong Zhang ierr = MatSetType(mat_elemental,MATELEMENTAL);CHKERRQ(ierr); 1440378336b6SHong Zhang ierr = MatSetUp(mat_elemental);CHKERRQ(ierr); 144132d7a744SHong Zhang ierr = MatSetOption(mat_elemental,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 1442378336b6SHong Zhang } 1443378336b6SHong Zhang 144432d7a744SHong Zhang ierr = PetscMalloc2(m,&rows,N,&cols);CHKERRQ(ierr); 144532d7a744SHong Zhang for (i=0; i<N; i++) cols[i] = i; 144632d7a744SHong Zhang for (i=0; i<m; i++) rows[i] = rstart + i; 14478ea901baSHong Zhang 1448637a0070SStefano Zampini /* PETSc-Elemental interface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */ 144932d7a744SHong Zhang ierr = MatDenseGetArray(A,&v);CHKERRQ(ierr); 145032d7a744SHong Zhang ierr = MatSetValues(mat_elemental,m,rows,N,cols,v,ADD_VALUES);CHKERRQ(ierr); 14518ea901baSHong Zhang ierr = MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 14528ea901baSHong Zhang ierr = MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 145332d7a744SHong Zhang ierr = MatDenseRestoreArray(A,&v);CHKERRQ(ierr); 145432d7a744SHong Zhang ierr = PetscFree2(rows,cols);CHKERRQ(ierr); 14558ea901baSHong Zhang 1456511c6705SHong Zhang if (reuse == MAT_INPLACE_MATRIX) { 145728be2f97SBarry Smith ierr = MatHeaderReplace(A,&mat_elemental);CHKERRQ(ierr); 14588ea901baSHong Zhang } else { 14598ea901baSHong Zhang *newmat = mat_elemental; 14608ea901baSHong Zhang } 14618baccfbdSHong Zhang PetscFunctionReturn(0); 14628baccfbdSHong Zhang } 146365b80a83SHong Zhang #endif 14648baccfbdSHong Zhang 1465af53bab2SHong Zhang static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A,PetscInt col,PetscScalar **vals) 146686aefd0dSHong Zhang { 146786aefd0dSHong Zhang Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 146886aefd0dSHong Zhang PetscErrorCode ierr; 146986aefd0dSHong Zhang 147086aefd0dSHong Zhang PetscFunctionBegin; 147186aefd0dSHong Zhang ierr = MatDenseGetColumn(mat->A,col,vals);CHKERRQ(ierr); 147286aefd0dSHong Zhang PetscFunctionReturn(0); 147386aefd0dSHong Zhang } 147486aefd0dSHong Zhang 1475af53bab2SHong Zhang static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A,PetscScalar **vals) 147686aefd0dSHong Zhang { 147786aefd0dSHong Zhang Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 147886aefd0dSHong Zhang PetscErrorCode ierr; 147986aefd0dSHong Zhang 148086aefd0dSHong Zhang PetscFunctionBegin; 148186aefd0dSHong Zhang ierr = MatDenseRestoreColumn(mat->A,vals);CHKERRQ(ierr); 148286aefd0dSHong Zhang PetscFunctionReturn(0); 148386aefd0dSHong Zhang } 148486aefd0dSHong Zhang 148594e2cb23SJakub Kruzik PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 148694e2cb23SJakub Kruzik { 148794e2cb23SJakub Kruzik PetscErrorCode ierr; 148894e2cb23SJakub Kruzik Mat_MPIDense *mat; 148994e2cb23SJakub Kruzik PetscInt m,nloc,N; 149094e2cb23SJakub Kruzik 149194e2cb23SJakub Kruzik PetscFunctionBegin; 149294e2cb23SJakub Kruzik ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 149394e2cb23SJakub Kruzik ierr = MatGetLocalSize(inmat,NULL,&nloc);CHKERRQ(ierr); 149494e2cb23SJakub Kruzik if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 149594e2cb23SJakub Kruzik PetscInt sum; 149694e2cb23SJakub Kruzik 149794e2cb23SJakub Kruzik if (n == PETSC_DECIDE) { 149894e2cb23SJakub Kruzik ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 149994e2cb23SJakub Kruzik } 150094e2cb23SJakub Kruzik /* Check sum(n) = N */ 150194e2cb23SJakub Kruzik ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 150294e2cb23SJakub Kruzik if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N); 150394e2cb23SJakub Kruzik 150494e2cb23SJakub Kruzik ierr = MatCreateDense(comm,m,n,PETSC_DETERMINE,N,NULL,outmat);CHKERRQ(ierr); 150594e2cb23SJakub Kruzik } 150694e2cb23SJakub Kruzik 150794e2cb23SJakub Kruzik /* numeric phase */ 150894e2cb23SJakub Kruzik mat = (Mat_MPIDense*)(*outmat)->data; 150994e2cb23SJakub Kruzik ierr = MatCopy(inmat,mat->A,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 151094e2cb23SJakub Kruzik ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 151194e2cb23SJakub Kruzik ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 151294e2cb23SJakub Kruzik PetscFunctionReturn(0); 151394e2cb23SJakub Kruzik } 151494e2cb23SJakub Kruzik 1515637a0070SStefano Zampini #if defined(PETSC_HAVE_CUDA) 1516637a0070SStefano Zampini PetscErrorCode MatConvert_MPIDenseCUDA_MPIDense(Mat M,MatType type,MatReuse reuse,Mat *newmat) 1517637a0070SStefano Zampini { 1518637a0070SStefano Zampini Mat B; 1519637a0070SStefano Zampini Mat_MPIDense *m; 1520637a0070SStefano Zampini PetscErrorCode ierr; 1521637a0070SStefano Zampini 1522637a0070SStefano Zampini PetscFunctionBegin; 1523637a0070SStefano Zampini if (reuse == MAT_INITIAL_MATRIX) { 1524637a0070SStefano Zampini ierr = MatDuplicate(M,MAT_COPY_VALUES,newmat);CHKERRQ(ierr); 1525637a0070SStefano Zampini } else if (reuse == MAT_REUSE_MATRIX) { 1526637a0070SStefano Zampini ierr = MatCopy(M,*newmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1527637a0070SStefano Zampini } 1528637a0070SStefano Zampini 1529637a0070SStefano Zampini B = *newmat; 1530637a0070SStefano Zampini ierr = MatBindToCPU_MPIDenseCUDA(B,PETSC_TRUE);CHKERRQ(ierr); 1531637a0070SStefano Zampini ierr = PetscFree(B->defaultvectype);CHKERRQ(ierr); 1532637a0070SStefano Zampini ierr = PetscStrallocpy(VECSTANDARD,&B->defaultvectype);CHKERRQ(ierr); 1533637a0070SStefano Zampini ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIDENSE);CHKERRQ(ierr); 1534637a0070SStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpidensecuda_mpidense_C",NULL);CHKERRQ(ierr); 1535637a0070SStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpidensecuda_C",NULL);CHKERRQ(ierr); 1536637a0070SStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaij_C",NULL);CHKERRQ(ierr); 1537637a0070SStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArray_C",NULL);CHKERRQ(ierr); 1538637a0070SStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayRead_C",NULL);CHKERRQ(ierr); 1539637a0070SStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayWrite_C",NULL);CHKERRQ(ierr); 1540637a0070SStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArray_C",NULL);CHKERRQ(ierr); 1541637a0070SStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayRead_C",NULL);CHKERRQ(ierr); 1542637a0070SStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayWrite_C",NULL);CHKERRQ(ierr); 1543637a0070SStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAPlaceArray_C",NULL);CHKERRQ(ierr); 1544637a0070SStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAResetArray_C",NULL);CHKERRQ(ierr); 1545637a0070SStefano Zampini m = (Mat_MPIDense*)(B)->data; 1546637a0070SStefano Zampini if (m->A) { 1547637a0070SStefano Zampini ierr = MatConvert(m->A,MATSEQDENSE,MAT_INPLACE_MATRIX,&m->A);CHKERRQ(ierr); 1548637a0070SStefano Zampini ierr = MatSetUpMultiply_MPIDense(B);CHKERRQ(ierr); 1549637a0070SStefano Zampini } 1550637a0070SStefano Zampini B->ops->bindtocpu = NULL; 1551637a0070SStefano Zampini B->offloadmask = PETSC_OFFLOAD_CPU; 1552637a0070SStefano Zampini PetscFunctionReturn(0); 1553637a0070SStefano Zampini } 1554637a0070SStefano Zampini 1555637a0070SStefano Zampini PetscErrorCode MatConvert_MPIDense_MPIDenseCUDA(Mat M,MatType type,MatReuse reuse,Mat *newmat) 1556637a0070SStefano Zampini { 1557637a0070SStefano Zampini Mat B; 1558637a0070SStefano Zampini Mat_MPIDense *m; 1559637a0070SStefano Zampini PetscErrorCode ierr; 1560637a0070SStefano Zampini 1561637a0070SStefano Zampini PetscFunctionBegin; 1562637a0070SStefano Zampini if (reuse == MAT_INITIAL_MATRIX) { 1563637a0070SStefano Zampini ierr = MatDuplicate(M,MAT_COPY_VALUES,newmat);CHKERRQ(ierr); 1564637a0070SStefano Zampini } else if (reuse == MAT_REUSE_MATRIX) { 1565637a0070SStefano Zampini ierr = MatCopy(M,*newmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1566637a0070SStefano Zampini } 1567637a0070SStefano Zampini 1568637a0070SStefano Zampini B = *newmat; 1569637a0070SStefano Zampini ierr = PetscFree(B->defaultvectype);CHKERRQ(ierr); 1570637a0070SStefano Zampini ierr = PetscStrallocpy(VECCUDA,&B->defaultvectype);CHKERRQ(ierr); 1571637a0070SStefano Zampini ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIDENSECUDA);CHKERRQ(ierr); 1572637a0070SStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpidensecuda_mpidense_C", MatConvert_MPIDenseCUDA_MPIDense);CHKERRQ(ierr); 1573637a0070SStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpidensecuda_C",MatProductSetFromOptions_MPIAIJ_MPIDense);CHKERRQ(ierr); 1574637a0070SStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaij_C",MatProductSetFromOptions_MPIDense_MPIAIJ);CHKERRQ(ierr); 1575637a0070SStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArray_C", MatDenseCUDAGetArray_MPIDenseCUDA);CHKERRQ(ierr); 1576637a0070SStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayRead_C", MatDenseCUDAGetArrayRead_MPIDenseCUDA);CHKERRQ(ierr); 1577637a0070SStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayWrite_C", MatDenseCUDAGetArrayWrite_MPIDenseCUDA);CHKERRQ(ierr); 1578637a0070SStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArray_C", MatDenseCUDARestoreArray_MPIDenseCUDA);CHKERRQ(ierr); 1579637a0070SStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayRead_C", MatDenseCUDARestoreArrayRead_MPIDenseCUDA);CHKERRQ(ierr); 1580637a0070SStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayWrite_C", MatDenseCUDARestoreArrayWrite_MPIDenseCUDA);CHKERRQ(ierr); 1581637a0070SStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAPlaceArray_C", MatDenseCUDAPlaceArray_MPIDenseCUDA);CHKERRQ(ierr); 1582637a0070SStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAResetArray_C", MatDenseCUDAResetArray_MPIDenseCUDA);CHKERRQ(ierr); 1583637a0070SStefano Zampini m = (Mat_MPIDense*)(B)->data; 1584637a0070SStefano Zampini if (m->A) { 1585637a0070SStefano Zampini ierr = MatConvert(m->A,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&m->A);CHKERRQ(ierr); 1586637a0070SStefano Zampini ierr = MatSetUpMultiply_MPIDense(B);CHKERRQ(ierr); 1587637a0070SStefano Zampini B->offloadmask = PETSC_OFFLOAD_BOTH; 1588637a0070SStefano Zampini } else { 1589637a0070SStefano Zampini B->offloadmask = PETSC_OFFLOAD_UNALLOCATED; 1590637a0070SStefano Zampini } 1591637a0070SStefano Zampini ierr = MatBindToCPU_MPIDenseCUDA(B,PETSC_FALSE);CHKERRQ(ierr); 1592637a0070SStefano Zampini 1593637a0070SStefano Zampini B->ops->bindtocpu = MatBindToCPU_MPIDenseCUDA; 1594637a0070SStefano Zampini PetscFunctionReturn(0); 1595637a0070SStefano Zampini } 1596637a0070SStefano Zampini #endif 1597637a0070SStefano Zampini 1598*6947451fSStefano Zampini PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat A,PetscInt col,Vec *v) 1599*6947451fSStefano Zampini { 1600*6947451fSStefano Zampini Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1601*6947451fSStefano Zampini PetscErrorCode ierr; 1602*6947451fSStefano Zampini PetscInt lda; 1603*6947451fSStefano Zampini 1604*6947451fSStefano Zampini PetscFunctionBegin; 1605*6947451fSStefano Zampini if (a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec first"); 1606*6947451fSStefano Zampini if (!a->cvec) { 1607*6947451fSStefano Zampini ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr); 1608*6947451fSStefano Zampini } 1609*6947451fSStefano Zampini a->vecinuse = col + 1; 1610*6947451fSStefano Zampini ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr); 1611*6947451fSStefano Zampini ierr = MatDenseGetArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr); 1612*6947451fSStefano Zampini ierr = VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr); 1613*6947451fSStefano Zampini *v = a->cvec; 1614*6947451fSStefano Zampini PetscFunctionReturn(0); 1615*6947451fSStefano Zampini } 1616*6947451fSStefano Zampini 1617*6947451fSStefano Zampini PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat A,PetscInt col,Vec *v) 1618*6947451fSStefano Zampini { 1619*6947451fSStefano Zampini Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1620*6947451fSStefano Zampini PetscErrorCode ierr; 1621*6947451fSStefano Zampini 1622*6947451fSStefano Zampini PetscFunctionBegin; 1623*6947451fSStefano Zampini if (!a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec first"); 1624*6947451fSStefano Zampini if (!a->cvec) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector"); 1625*6947451fSStefano Zampini a->vecinuse = 0; 1626*6947451fSStefano Zampini ierr = MatDenseRestoreArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr); 1627*6947451fSStefano Zampini ierr = VecResetArray(a->cvec);CHKERRQ(ierr); 1628*6947451fSStefano Zampini *v = NULL; 1629*6947451fSStefano Zampini PetscFunctionReturn(0); 1630*6947451fSStefano Zampini } 1631*6947451fSStefano Zampini 1632*6947451fSStefano Zampini PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat A,PetscInt col,Vec *v) 1633*6947451fSStefano Zampini { 1634*6947451fSStefano Zampini Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1635*6947451fSStefano Zampini PetscErrorCode ierr; 1636*6947451fSStefano Zampini PetscInt lda; 1637*6947451fSStefano Zampini 1638*6947451fSStefano Zampini PetscFunctionBegin; 1639*6947451fSStefano Zampini if (a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec first"); 1640*6947451fSStefano Zampini if (!a->cvec) { 1641*6947451fSStefano Zampini ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr); 1642*6947451fSStefano Zampini } 1643*6947451fSStefano Zampini a->vecinuse = col + 1; 1644*6947451fSStefano Zampini ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr); 1645*6947451fSStefano Zampini ierr = MatDenseGetArrayRead(a->A,&a->ptrinuse);CHKERRQ(ierr); 1646*6947451fSStefano Zampini ierr = VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr); 1647*6947451fSStefano Zampini ierr = VecLockReadPush(a->cvec);CHKERRQ(ierr); 1648*6947451fSStefano Zampini *v = a->cvec; 1649*6947451fSStefano Zampini PetscFunctionReturn(0); 1650*6947451fSStefano Zampini } 1651*6947451fSStefano Zampini 1652*6947451fSStefano Zampini PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat A,PetscInt col,Vec *v) 1653*6947451fSStefano Zampini { 1654*6947451fSStefano Zampini Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1655*6947451fSStefano Zampini PetscErrorCode ierr; 1656*6947451fSStefano Zampini 1657*6947451fSStefano Zampini PetscFunctionBegin; 1658*6947451fSStefano Zampini if (!a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec first"); 1659*6947451fSStefano Zampini if (!a->cvec) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector"); 1660*6947451fSStefano Zampini a->vecinuse = 0; 1661*6947451fSStefano Zampini ierr = MatDenseRestoreArrayRead(a->A,&a->ptrinuse);CHKERRQ(ierr); 1662*6947451fSStefano Zampini ierr = VecLockReadPop(a->cvec);CHKERRQ(ierr); 1663*6947451fSStefano Zampini ierr = VecResetArray(a->cvec);CHKERRQ(ierr); 1664*6947451fSStefano Zampini *v = NULL; 1665*6947451fSStefano Zampini PetscFunctionReturn(0); 1666*6947451fSStefano Zampini } 1667*6947451fSStefano Zampini 1668*6947451fSStefano Zampini PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat A,PetscInt col,Vec *v) 1669*6947451fSStefano Zampini { 1670*6947451fSStefano Zampini Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1671*6947451fSStefano Zampini PetscErrorCode ierr; 1672*6947451fSStefano Zampini PetscInt lda; 1673*6947451fSStefano Zampini 1674*6947451fSStefano Zampini PetscFunctionBegin; 1675*6947451fSStefano Zampini if (a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec first"); 1676*6947451fSStefano Zampini if (!a->cvec) { 1677*6947451fSStefano Zampini ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr); 1678*6947451fSStefano Zampini } 1679*6947451fSStefano Zampini a->vecinuse = col + 1; 1680*6947451fSStefano Zampini ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr); 1681*6947451fSStefano Zampini ierr = MatDenseGetArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr); 1682*6947451fSStefano Zampini ierr = VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr); 1683*6947451fSStefano Zampini *v = a->cvec; 1684*6947451fSStefano Zampini PetscFunctionReturn(0); 1685*6947451fSStefano Zampini } 1686*6947451fSStefano Zampini 1687*6947451fSStefano Zampini PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat A,PetscInt col,Vec *v) 1688*6947451fSStefano Zampini { 1689*6947451fSStefano Zampini Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1690*6947451fSStefano Zampini PetscErrorCode ierr; 1691*6947451fSStefano Zampini 1692*6947451fSStefano Zampini PetscFunctionBegin; 1693*6947451fSStefano Zampini if (!a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec first"); 1694*6947451fSStefano Zampini if (!a->cvec) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector"); 1695*6947451fSStefano Zampini a->vecinuse = 0; 1696*6947451fSStefano Zampini ierr = MatDenseRestoreArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr); 1697*6947451fSStefano Zampini ierr = VecResetArray(a->cvec);CHKERRQ(ierr); 1698*6947451fSStefano Zampini *v = NULL; 1699*6947451fSStefano Zampini PetscFunctionReturn(0); 1700*6947451fSStefano Zampini } 1701*6947451fSStefano Zampini 17028cc058d9SJed Brown PETSC_EXTERN PetscErrorCode MatCreate_MPIDense(Mat mat) 1703273d9f13SBarry Smith { 1704273d9f13SBarry Smith Mat_MPIDense *a; 1705dfbe8321SBarry Smith PetscErrorCode ierr; 1706273d9f13SBarry Smith 1707273d9f13SBarry Smith PetscFunctionBegin; 1708b00a9115SJed Brown ierr = PetscNewLog(mat,&a);CHKERRQ(ierr); 1709b0a32e0cSBarry Smith mat->data = (void*)a; 1710273d9f13SBarry Smith ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1711273d9f13SBarry Smith 1712273d9f13SBarry Smith mat->insertmode = NOT_SET_VALUES; 1713ce94432eSBarry Smith ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&a->rank);CHKERRQ(ierr); 1714ce94432eSBarry Smith ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&a->size);CHKERRQ(ierr); 1715273d9f13SBarry Smith 1716273d9f13SBarry Smith /* build cache for off array entries formed */ 1717273d9f13SBarry Smith a->donotstash = PETSC_FALSE; 17182205254eSKarl Rupp 1719ce94432eSBarry Smith ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)mat),1,&mat->stash);CHKERRQ(ierr); 1720273d9f13SBarry Smith 1721273d9f13SBarry Smith /* stuff used for matrix vector multiply */ 1722273d9f13SBarry Smith a->lvec = 0; 1723273d9f13SBarry Smith a->Mvctx = 0; 1724273d9f13SBarry Smith a->roworiented = PETSC_TRUE; 1725273d9f13SBarry Smith 172649a6ff4bSBarry Smith ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetLDA_C",MatDenseGetLDA_MPIDense);CHKERRQ(ierr); 1727bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",MatDenseGetArray_MPIDense);CHKERRQ(ierr); 17288572280aSBarry Smith ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",MatDenseRestoreArray_MPIDense);CHKERRQ(ierr); 17298572280aSBarry Smith ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",MatDenseGetArrayRead_MPIDense);CHKERRQ(ierr); 17308572280aSBarry Smith ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",MatDenseRestoreArrayRead_MPIDense);CHKERRQ(ierr); 1731*6947451fSStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayWrite_C",MatDenseGetArrayWrite_MPIDense);CHKERRQ(ierr); 1732*6947451fSStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayWrite_C",MatDenseRestoreArrayWrite_MPIDense);CHKERRQ(ierr); 1733d3042a70SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",MatDensePlaceArray_MPIDense);CHKERRQ(ierr); 1734d3042a70SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",MatDenseResetArray_MPIDense);CHKERRQ(ierr); 1735*6947451fSStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDense);CHKERRQ(ierr); 1736*6947451fSStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDense);CHKERRQ(ierr); 1737*6947451fSStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDense);CHKERRQ(ierr); 1738*6947451fSStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDense);CHKERRQ(ierr); 1739*6947451fSStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDense);CHKERRQ(ierr); 1740*6947451fSStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDense);CHKERRQ(ierr); 17418baccfbdSHong Zhang #if defined(PETSC_HAVE_ELEMENTAL) 17428baccfbdSHong Zhang ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",MatConvert_MPIDense_Elemental);CHKERRQ(ierr); 17438baccfbdSHong Zhang #endif 1744637a0070SStefano Zampini #if defined(PETSC_HAVE_CUDA) 1745637a0070SStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_mpidensecuda_C",MatConvert_MPIDense_MPIDenseCUDA);CHKERRQ(ierr); 1746637a0070SStefano Zampini #endif 1747bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr); 17484222ddf1SHong Zhang ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpidense_C",MatProductSetFromOptions_MPIAIJ_MPIDense);CHKERRQ(ierr); 17494222ddf1SHong Zhang ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaij_C",MatProductSetFromOptions_MPIDense_MPIAIJ);CHKERRQ(ierr); 1750bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",MatMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr); 1751bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",MatMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr); 175252c5f739Sprj- ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_nest_mpidense_C",MatMatMultSymbolic_Nest_Dense);CHKERRQ(ierr); 175352c5f739Sprj- ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_nest_mpidense_C",MatMatMultNumeric_Nest_Dense);CHKERRQ(ierr); 17548949adfdSHong Zhang 17558949adfdSHong Zhang ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultSymbolic_mpiaij_mpidense_C",MatTransposeMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr); 17568949adfdSHong Zhang ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultNumeric_mpiaij_mpidense_C",MatTransposeMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr); 1757af53bab2SHong Zhang ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",MatDenseGetColumn_MPIDense);CHKERRQ(ierr); 1758af53bab2SHong Zhang ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",MatDenseRestoreColumn_MPIDense);CHKERRQ(ierr); 175938aed534SBarry Smith ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr); 1760273d9f13SBarry Smith PetscFunctionReturn(0); 1761273d9f13SBarry Smith } 1762273d9f13SBarry Smith 1763209238afSKris Buschelman /*MC 1764637a0070SStefano Zampini MATMPIDENSECUDA - MATMPIDENSECUDA = "mpidensecuda" - A matrix type to be used for distributed dense matrices on GPUs. 1765637a0070SStefano Zampini 1766637a0070SStefano Zampini Options Database Keys: 1767637a0070SStefano Zampini . -mat_type mpidensecuda - sets the matrix type to "mpidensecuda" during a call to MatSetFromOptions() 1768637a0070SStefano Zampini 1769637a0070SStefano Zampini Level: beginner 1770637a0070SStefano Zampini 1771637a0070SStefano Zampini .seealso: 1772637a0070SStefano Zampini 1773637a0070SStefano Zampini M*/ 1774637a0070SStefano Zampini #if defined(PETSC_HAVE_CUDA) 1775637a0070SStefano Zampini PETSC_EXTERN PetscErrorCode MatCreate_MPIDenseCUDA(Mat B) 1776637a0070SStefano Zampini { 1777637a0070SStefano Zampini PetscErrorCode ierr; 1778637a0070SStefano Zampini 1779637a0070SStefano Zampini PetscFunctionBegin; 1780637a0070SStefano Zampini ierr = MatCreate_MPIDense(B);CHKERRQ(ierr); 1781637a0070SStefano Zampini ierr = MatConvert_MPIDense_MPIDenseCUDA(B,MATMPIDENSECUDA,MAT_INPLACE_MATRIX,&B);CHKERRQ(ierr); 1782637a0070SStefano Zampini PetscFunctionReturn(0); 1783637a0070SStefano Zampini } 1784637a0070SStefano Zampini #endif 1785637a0070SStefano Zampini 1786637a0070SStefano Zampini /*MC 1787002d173eSKris Buschelman MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices. 1788209238afSKris Buschelman 1789209238afSKris Buschelman This matrix type is identical to MATSEQDENSE when constructed with a single process communicator, 1790209238afSKris Buschelman and MATMPIDENSE otherwise. 1791209238afSKris Buschelman 1792209238afSKris Buschelman Options Database Keys: 1793209238afSKris Buschelman . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions() 1794209238afSKris Buschelman 1795209238afSKris Buschelman Level: beginner 1796209238afSKris Buschelman 179701b82886SBarry Smith 1798*6947451fSStefano Zampini .seealso: MATSEQDENSE,MATMPIDENSE,MATDENSECUDA 1799*6947451fSStefano Zampini M*/ 1800*6947451fSStefano Zampini 1801*6947451fSStefano Zampini /*MC 1802*6947451fSStefano Zampini MATDENSECUDA - MATDENSECUDA = "densecuda" - A matrix type to be used for dense matrices on GPUs. 1803*6947451fSStefano Zampini 1804*6947451fSStefano Zampini This matrix type is identical to MATSEQDENSECUDA when constructed with a single process communicator, 1805*6947451fSStefano Zampini and MATMPIDENSECUDA otherwise. 1806*6947451fSStefano Zampini 1807*6947451fSStefano Zampini Options Database Keys: 1808*6947451fSStefano Zampini . -mat_type densecuda - sets the matrix type to "densecuda" during a call to MatSetFromOptions() 1809*6947451fSStefano Zampini 1810*6947451fSStefano Zampini Level: beginner 1811*6947451fSStefano Zampini 1812*6947451fSStefano Zampini .seealso: MATSEQDENSECUDA,MATMPIDENSECUDA,MATDENSE 1813209238afSKris Buschelman M*/ 1814209238afSKris Buschelman 1815273d9f13SBarry Smith /*@C 1816273d9f13SBarry Smith MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries 1817273d9f13SBarry Smith 1818273d9f13SBarry Smith Not collective 1819273d9f13SBarry Smith 1820273d9f13SBarry Smith Input Parameters: 18211c4f3114SJed Brown . B - the matrix 18220298fd71SBarry Smith - data - optional location of matrix data. Set data=NULL for PETSc 1823273d9f13SBarry Smith to control all matrix memory allocation. 1824273d9f13SBarry Smith 1825273d9f13SBarry Smith Notes: 1826273d9f13SBarry Smith The dense format is fully compatible with standard Fortran 77 1827273d9f13SBarry Smith storage by columns. 1828273d9f13SBarry Smith 1829273d9f13SBarry Smith The data input variable is intended primarily for Fortran programmers 1830273d9f13SBarry Smith who wish to allocate their own matrix memory space. Most users should 18310298fd71SBarry Smith set data=NULL. 1832273d9f13SBarry Smith 1833273d9f13SBarry Smith Level: intermediate 1834273d9f13SBarry Smith 1835273d9f13SBarry Smith .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1836273d9f13SBarry Smith @*/ 18371c4f3114SJed Brown PetscErrorCode MatMPIDenseSetPreallocation(Mat B,PetscScalar *data) 1838273d9f13SBarry Smith { 18394ac538c5SBarry Smith PetscErrorCode ierr; 1840273d9f13SBarry Smith 1841273d9f13SBarry Smith PetscFunctionBegin; 18421c4f3114SJed Brown ierr = PetscTryMethod(B,"MatMPIDenseSetPreallocation_C",(Mat,PetscScalar*),(B,data));CHKERRQ(ierr); 1843273d9f13SBarry Smith PetscFunctionReturn(0); 1844273d9f13SBarry Smith } 1845273d9f13SBarry Smith 1846d3042a70SBarry Smith /*@ 1847637a0070SStefano Zampini MatDensePlaceArray - Allows one to replace the array in a dense matrix with an 1848d3042a70SBarry Smith array provided by the user. This is useful to avoid copying an array 1849d3042a70SBarry Smith into a matrix 1850d3042a70SBarry Smith 1851d3042a70SBarry Smith Not Collective 1852d3042a70SBarry Smith 1853d3042a70SBarry Smith Input Parameters: 1854d3042a70SBarry Smith + mat - the matrix 1855d3042a70SBarry Smith - array - the array in column major order 1856d3042a70SBarry Smith 1857d3042a70SBarry Smith Notes: 1858d3042a70SBarry Smith You can return to the original array with a call to MatDenseResetArray(). The user is responsible for freeing this array; it will not be 1859d3042a70SBarry Smith freed when the matrix is destroyed. 1860d3042a70SBarry Smith 1861d3042a70SBarry Smith Level: developer 1862d3042a70SBarry Smith 1863d3042a70SBarry Smith .seealso: MatDenseGetArray(), MatDenseResetArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray() 1864d3042a70SBarry Smith 1865d3042a70SBarry Smith @*/ 1866637a0070SStefano Zampini PetscErrorCode MatDensePlaceArray(Mat mat,const PetscScalar *array) 1867d3042a70SBarry Smith { 1868d3042a70SBarry Smith PetscErrorCode ierr; 1869637a0070SStefano Zampini 1870d3042a70SBarry Smith PetscFunctionBegin; 1871d3042a70SBarry Smith ierr = PetscUseMethod(mat,"MatDensePlaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr); 1872d3042a70SBarry Smith ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 1873637a0070SStefano Zampini #if defined(PETSC_HAVE_CUDA) 1874637a0070SStefano Zampini mat->offloadmask = PETSC_OFFLOAD_CPU; 1875637a0070SStefano Zampini #endif 1876d3042a70SBarry Smith PetscFunctionReturn(0); 1877d3042a70SBarry Smith } 1878d3042a70SBarry Smith 1879d3042a70SBarry Smith /*@ 1880d3042a70SBarry Smith MatDenseResetArray - Resets the matrix array to that it previously had before the call to MatDensePlaceArray() 1881d3042a70SBarry Smith 1882d3042a70SBarry Smith Not Collective 1883d3042a70SBarry Smith 1884d3042a70SBarry Smith Input Parameters: 1885d3042a70SBarry Smith . mat - the matrix 1886d3042a70SBarry Smith 1887d3042a70SBarry Smith Notes: 1888d3042a70SBarry Smith You can only call this after a call to MatDensePlaceArray() 1889d3042a70SBarry Smith 1890d3042a70SBarry Smith Level: developer 1891d3042a70SBarry Smith 1892d3042a70SBarry Smith .seealso: MatDenseGetArray(), MatDensePlaceArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray() 1893d3042a70SBarry Smith 1894d3042a70SBarry Smith @*/ 1895d3042a70SBarry Smith PetscErrorCode MatDenseResetArray(Mat mat) 1896d3042a70SBarry Smith { 1897d3042a70SBarry Smith PetscErrorCode ierr; 1898637a0070SStefano Zampini 1899d3042a70SBarry Smith PetscFunctionBegin; 1900d3042a70SBarry Smith ierr = PetscUseMethod(mat,"MatDenseResetArray_C",(Mat),(mat));CHKERRQ(ierr); 1901d3042a70SBarry Smith ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 1902d3042a70SBarry Smith PetscFunctionReturn(0); 1903d3042a70SBarry Smith } 1904d3042a70SBarry Smith 1905637a0070SStefano Zampini #if defined(PETSC_HAVE_CUDA) 19068965ea79SLois Curfman McInnes /*@C 1907637a0070SStefano Zampini MatDenseCUDAPlaceArray - Allows one to replace the GPU array in a dense matrix with an 1908637a0070SStefano Zampini array provided by the user. This is useful to avoid copying an array 1909637a0070SStefano Zampini into a matrix 1910637a0070SStefano Zampini 1911637a0070SStefano Zampini Not Collective 1912637a0070SStefano Zampini 1913637a0070SStefano Zampini Input Parameters: 1914637a0070SStefano Zampini + mat - the matrix 1915637a0070SStefano Zampini - array - the array in column major order 1916637a0070SStefano Zampini 1917637a0070SStefano Zampini Notes: 1918637a0070SStefano Zampini You can return to the original array with a call to MatDenseCUDAResetArray(). The user is responsible for freeing this array; it will not be 1919637a0070SStefano Zampini freed when the matrix is destroyed. The array must have been allocated with cudaMalloc(). 1920637a0070SStefano Zampini 1921637a0070SStefano Zampini Level: developer 1922637a0070SStefano Zampini 1923637a0070SStefano Zampini .seealso: MatDenseCUDAGetArray(), MatDenseCUDAResetArray() 1924637a0070SStefano Zampini @*/ 1925637a0070SStefano Zampini PetscErrorCode MatDenseCUDAPlaceArray(Mat mat,const PetscScalar *array) 1926637a0070SStefano Zampini { 1927637a0070SStefano Zampini PetscErrorCode ierr; 1928637a0070SStefano Zampini 1929637a0070SStefano Zampini PetscFunctionBegin; 1930637a0070SStefano Zampini ierr = PetscUseMethod(mat,"MatDenseCUDAPlaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr); 1931637a0070SStefano Zampini ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 1932637a0070SStefano Zampini mat->offloadmask = PETSC_OFFLOAD_GPU; 1933637a0070SStefano Zampini PetscFunctionReturn(0); 1934637a0070SStefano Zampini } 1935637a0070SStefano Zampini 1936637a0070SStefano Zampini /*@C 1937637a0070SStefano Zampini MatDenseCUDAResetArray - Resets the matrix array to that it previously had before the call to MatDenseCUDAPlaceArray() 1938637a0070SStefano Zampini 1939637a0070SStefano Zampini Not Collective 1940637a0070SStefano Zampini 1941637a0070SStefano Zampini Input Parameters: 1942637a0070SStefano Zampini . mat - the matrix 1943637a0070SStefano Zampini 1944637a0070SStefano Zampini Notes: 1945637a0070SStefano Zampini You can only call this after a call to MatDenseCUDAPlaceArray() 1946637a0070SStefano Zampini 1947637a0070SStefano Zampini Level: developer 1948637a0070SStefano Zampini 1949637a0070SStefano Zampini .seealso: MatDenseCUDAGetArray(), MatDenseCUDAPlaceArray() 1950637a0070SStefano Zampini 1951637a0070SStefano Zampini @*/ 1952637a0070SStefano Zampini PetscErrorCode MatDenseCUDAResetArray(Mat mat) 1953637a0070SStefano Zampini { 1954637a0070SStefano Zampini PetscErrorCode ierr; 1955637a0070SStefano Zampini 1956637a0070SStefano Zampini PetscFunctionBegin; 1957637a0070SStefano Zampini ierr = PetscUseMethod(mat,"MatDenseCUDAResetArray_C",(Mat),(mat));CHKERRQ(ierr); 1958637a0070SStefano Zampini ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 1959637a0070SStefano Zampini PetscFunctionReturn(0); 1960637a0070SStefano Zampini } 1961637a0070SStefano Zampini 1962637a0070SStefano Zampini /*@C 1963637a0070SStefano Zampini MatDenseCUDAGetArrayWrite - Provides write access to the CUDA buffer inside a dense matrix. 1964637a0070SStefano Zampini 1965637a0070SStefano Zampini Not Collective 1966637a0070SStefano Zampini 1967637a0070SStefano Zampini Input Parameters: 1968637a0070SStefano Zampini . A - the matrix 1969637a0070SStefano Zampini 1970637a0070SStefano Zampini Output Parameters 1971637a0070SStefano Zampini . array - the GPU array in column major order 1972637a0070SStefano Zampini 1973637a0070SStefano Zampini Notes: 1974637a0070SStefano Zampini The data on the GPU may not be updated due to operations done on the CPU. If you need updated data, use MatDenseCUDAGetArray(). The array must be restored with MatDenseCUDARestoreArrayWrite() when no longer needed. 1975637a0070SStefano Zampini 1976637a0070SStefano Zampini Level: developer 1977637a0070SStefano Zampini 1978637a0070SStefano Zampini .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayRead(), MatDenseCUDARestoreArrayRead() 1979637a0070SStefano Zampini @*/ 1980637a0070SStefano Zampini PetscErrorCode MatDenseCUDAGetArrayWrite(Mat A, PetscScalar **a) 1981637a0070SStefano Zampini { 1982637a0070SStefano Zampini PetscErrorCode ierr; 1983637a0070SStefano Zampini 1984637a0070SStefano Zampini PetscFunctionBegin; 1985637a0070SStefano Zampini ierr = PetscUseMethod(A,"MatDenseCUDAGetArrayWrite_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr); 1986637a0070SStefano Zampini ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr); 1987637a0070SStefano Zampini PetscFunctionReturn(0); 1988637a0070SStefano Zampini } 1989637a0070SStefano Zampini 1990637a0070SStefano Zampini /*@C 1991637a0070SStefano Zampini MatDenseCUDARestoreArrayWrite - Restore write access to the CUDA buffer inside a dense matrix previously obtained with MatDenseCUDAGetArrayWrite(). 1992637a0070SStefano Zampini 1993637a0070SStefano Zampini Not Collective 1994637a0070SStefano Zampini 1995637a0070SStefano Zampini Input Parameters: 1996637a0070SStefano Zampini + A - the matrix 1997637a0070SStefano Zampini - array - the GPU array in column major order 1998637a0070SStefano Zampini 1999637a0070SStefano Zampini Notes: 2000637a0070SStefano Zampini 2001637a0070SStefano Zampini Level: developer 2002637a0070SStefano Zampini 2003637a0070SStefano Zampini .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead(), MatDenseCUDAGetArrayRead() 2004637a0070SStefano Zampini @*/ 2005637a0070SStefano Zampini PetscErrorCode MatDenseCUDARestoreArrayWrite(Mat A, PetscScalar **a) 2006637a0070SStefano Zampini { 2007637a0070SStefano Zampini PetscErrorCode ierr; 2008637a0070SStefano Zampini 2009637a0070SStefano Zampini PetscFunctionBegin; 2010637a0070SStefano Zampini ierr = PetscUseMethod(A,"MatDenseCUDARestoreArrayWrite_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr); 2011637a0070SStefano Zampini ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr); 2012637a0070SStefano Zampini A->offloadmask = PETSC_OFFLOAD_GPU; 2013637a0070SStefano Zampini PetscFunctionReturn(0); 2014637a0070SStefano Zampini } 2015637a0070SStefano Zampini 2016637a0070SStefano Zampini /*@C 2017637a0070SStefano Zampini MatDenseCUDAGetArrayRead - Provides read-only access to the CUDA buffer inside a dense matrix. The array must be restored with MatDenseCUDARestoreArrayRead() when no longer needed. 2018637a0070SStefano Zampini 2019637a0070SStefano Zampini Not Collective 2020637a0070SStefano Zampini 2021637a0070SStefano Zampini Input Parameters: 2022637a0070SStefano Zampini . A - the matrix 2023637a0070SStefano Zampini 2024637a0070SStefano Zampini Output Parameters 2025637a0070SStefano Zampini . array - the GPU array in column major order 2026637a0070SStefano Zampini 2027637a0070SStefano Zampini Notes: 2028637a0070SStefano Zampini Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite(). 2029637a0070SStefano Zampini 2030637a0070SStefano Zampini Level: developer 2031637a0070SStefano Zampini 2032637a0070SStefano Zampini .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead() 2033637a0070SStefano Zampini @*/ 2034637a0070SStefano Zampini PetscErrorCode MatDenseCUDAGetArrayRead(Mat A, const PetscScalar **a) 2035637a0070SStefano Zampini { 2036637a0070SStefano Zampini PetscErrorCode ierr; 2037637a0070SStefano Zampini 2038637a0070SStefano Zampini PetscFunctionBegin; 2039637a0070SStefano Zampini ierr = PetscUseMethod(A,"MatDenseCUDAGetArrayRead_C",(Mat,const PetscScalar**),(A,a));CHKERRQ(ierr); 2040637a0070SStefano Zampini PetscFunctionReturn(0); 2041637a0070SStefano Zampini } 2042637a0070SStefano Zampini 2043637a0070SStefano Zampini /*@C 2044637a0070SStefano Zampini MatDenseCUDARestoreArrayRead - Restore read-only access to the CUDA buffer inside a dense matrix previously obtained with a call to MatDenseCUDAGetArrayRead(). 2045637a0070SStefano Zampini 2046637a0070SStefano Zampini Not Collective 2047637a0070SStefano Zampini 2048637a0070SStefano Zampini Input Parameters: 2049637a0070SStefano Zampini + A - the matrix 2050637a0070SStefano Zampini - array - the GPU array in column major order 2051637a0070SStefano Zampini 2052637a0070SStefano Zampini Notes: 2053637a0070SStefano Zampini Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite(). 2054637a0070SStefano Zampini 2055637a0070SStefano Zampini Level: developer 2056637a0070SStefano Zampini 2057637a0070SStefano Zampini .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDAGetArrayRead() 2058637a0070SStefano Zampini @*/ 2059637a0070SStefano Zampini PetscErrorCode MatDenseCUDARestoreArrayRead(Mat A, const PetscScalar **a) 2060637a0070SStefano Zampini { 2061637a0070SStefano Zampini PetscErrorCode ierr; 2062637a0070SStefano Zampini 2063637a0070SStefano Zampini PetscFunctionBegin; 2064637a0070SStefano Zampini ierr = PetscUseMethod(A,"MatDenseCUDARestoreArrayRead_C",(Mat,const PetscScalar**),(A,a));CHKERRQ(ierr); 2065637a0070SStefano Zampini PetscFunctionReturn(0); 2066637a0070SStefano Zampini } 2067637a0070SStefano Zampini 2068637a0070SStefano Zampini /*@C 2069637a0070SStefano Zampini MatDenseCUDAGetArray - Provides access to the CUDA buffer inside a dense matrix. The array must be restored with MatDenseCUDARestoreArray() when no longer needed. 2070637a0070SStefano Zampini 2071637a0070SStefano Zampini Not Collective 2072637a0070SStefano Zampini 2073637a0070SStefano Zampini Input Parameters: 2074637a0070SStefano Zampini . A - the matrix 2075637a0070SStefano Zampini 2076637a0070SStefano Zampini Output Parameters 2077637a0070SStefano Zampini . array - the GPU array in column major order 2078637a0070SStefano Zampini 2079637a0070SStefano Zampini Notes: 2080637a0070SStefano Zampini Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite(). For read-only access, use MatDenseCUDAGetArrayRead(). 2081637a0070SStefano Zampini 2082637a0070SStefano Zampini Level: developer 2083637a0070SStefano Zampini 2084637a0070SStefano Zampini .seealso: MatDenseCUDAGetArrayRead(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead() 2085637a0070SStefano Zampini @*/ 2086637a0070SStefano Zampini PetscErrorCode MatDenseCUDAGetArray(Mat A, PetscScalar **a) 2087637a0070SStefano Zampini { 2088637a0070SStefano Zampini PetscErrorCode ierr; 2089637a0070SStefano Zampini 2090637a0070SStefano Zampini PetscFunctionBegin; 2091637a0070SStefano Zampini ierr = PetscUseMethod(A,"MatDenseCUDAGetArray_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr); 2092637a0070SStefano Zampini ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr); 2093637a0070SStefano Zampini PetscFunctionReturn(0); 2094637a0070SStefano Zampini } 2095637a0070SStefano Zampini 2096637a0070SStefano Zampini /*@C 2097637a0070SStefano Zampini MatDenseCUDARestoreArray - Restore access to the CUDA buffer inside a dense matrix previously obtained with MatDenseCUDAGetArray(). 2098637a0070SStefano Zampini 2099637a0070SStefano Zampini Not Collective 2100637a0070SStefano Zampini 2101637a0070SStefano Zampini Input Parameters: 2102637a0070SStefano Zampini + A - the matrix 2103637a0070SStefano Zampini - array - the GPU array in column major order 2104637a0070SStefano Zampini 2105637a0070SStefano Zampini Notes: 2106637a0070SStefano Zampini 2107637a0070SStefano Zampini Level: developer 2108637a0070SStefano Zampini 2109637a0070SStefano Zampini .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead(), MatDenseCUDAGetArrayRead() 2110637a0070SStefano Zampini @*/ 2111637a0070SStefano Zampini PetscErrorCode MatDenseCUDARestoreArray(Mat A, PetscScalar **a) 2112637a0070SStefano Zampini { 2113637a0070SStefano Zampini PetscErrorCode ierr; 2114637a0070SStefano Zampini 2115637a0070SStefano Zampini PetscFunctionBegin; 2116637a0070SStefano Zampini ierr = PetscUseMethod(A,"MatDenseCUDARestoreArray_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr); 2117637a0070SStefano Zampini ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr); 2118637a0070SStefano Zampini A->offloadmask = PETSC_OFFLOAD_GPU; 2119637a0070SStefano Zampini PetscFunctionReturn(0); 2120637a0070SStefano Zampini } 2121637a0070SStefano Zampini #endif 2122637a0070SStefano Zampini 2123637a0070SStefano Zampini /*@C 2124637a0070SStefano Zampini MatCreateDense - Creates a matrix in dense format. 21258965ea79SLois Curfman McInnes 2126d083f849SBarry Smith Collective 2127db81eaa0SLois Curfman McInnes 21288965ea79SLois Curfman McInnes Input Parameters: 2129db81eaa0SLois Curfman McInnes + comm - MPI communicator 21308965ea79SLois Curfman McInnes . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2131db81eaa0SLois Curfman McInnes . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 21328965ea79SLois Curfman McInnes . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 2133db81eaa0SLois Curfman McInnes . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 21346cfe35ddSJose E. Roman - data - optional location of matrix data. Set data=NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc 2135dfc5480cSLois Curfman McInnes to control all matrix memory allocation. 21368965ea79SLois Curfman McInnes 21378965ea79SLois Curfman McInnes Output Parameter: 2138477f1c0bSLois Curfman McInnes . A - the matrix 21398965ea79SLois Curfman McInnes 2140b259b22eSLois Curfman McInnes Notes: 214139ddd567SLois Curfman McInnes The dense format is fully compatible with standard Fortran 77 214239ddd567SLois Curfman McInnes storage by columns. 21438965ea79SLois Curfman McInnes 214418f449edSLois Curfman McInnes The data input variable is intended primarily for Fortran programmers 214518f449edSLois Curfman McInnes who wish to allocate their own matrix memory space. Most users should 21466cfe35ddSJose E. Roman set data=NULL (PETSC_NULL_SCALAR for Fortran users). 214718f449edSLois Curfman McInnes 21488965ea79SLois Curfman McInnes The user MUST specify either the local or global matrix dimensions 21498965ea79SLois Curfman McInnes (possibly both). 21508965ea79SLois Curfman McInnes 2151027ccd11SLois Curfman McInnes Level: intermediate 2152027ccd11SLois Curfman McInnes 215339ddd567SLois Curfman McInnes .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 21548965ea79SLois Curfman McInnes @*/ 215569b1f4b7SBarry Smith PetscErrorCode MatCreateDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A) 21568965ea79SLois Curfman McInnes { 21576849ba73SBarry Smith PetscErrorCode ierr; 215813f74950SBarry Smith PetscMPIInt size; 21598965ea79SLois Curfman McInnes 21603a40ed3dSBarry Smith PetscFunctionBegin; 2161f69a0ea3SMatthew Knepley ierr = MatCreate(comm,A);CHKERRQ(ierr); 21628491ab44SLisandro Dalcin PetscValidLogicalCollectiveBool(*A,!!data,6); 2163f69a0ea3SMatthew Knepley ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 2164273d9f13SBarry Smith ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2165273d9f13SBarry Smith if (size > 1) { 2166273d9f13SBarry Smith ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr); 2167273d9f13SBarry Smith ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr); 21686cfe35ddSJose E. Roman if (data) { /* user provided data array, so no need to assemble */ 21696cfe35ddSJose E. Roman ierr = MatSetUpMultiply_MPIDense(*A);CHKERRQ(ierr); 21706cfe35ddSJose E. Roman (*A)->assembled = PETSC_TRUE; 21716cfe35ddSJose E. Roman } 2172273d9f13SBarry Smith } else { 2173273d9f13SBarry Smith ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr); 2174273d9f13SBarry Smith ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr); 21758c469469SLois Curfman McInnes } 21763a40ed3dSBarry Smith PetscFunctionReturn(0); 21778965ea79SLois Curfman McInnes } 21788965ea79SLois Curfman McInnes 2179637a0070SStefano Zampini #if defined(PETSC_HAVE_CUDA) 2180637a0070SStefano Zampini /*@C 2181637a0070SStefano Zampini MatCreateDenseCUDA - Creates a matrix in dense format using CUDA. 2182637a0070SStefano Zampini 2183637a0070SStefano Zampini Collective 2184637a0070SStefano Zampini 2185637a0070SStefano Zampini Input Parameters: 2186637a0070SStefano Zampini + comm - MPI communicator 2187637a0070SStefano Zampini . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2188637a0070SStefano Zampini . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 2189637a0070SStefano Zampini . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 2190637a0070SStefano Zampini . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 2191637a0070SStefano Zampini - data - optional location of GPU matrix data. Set data=NULL for PETSc 2192637a0070SStefano Zampini to control matrix memory allocation. 2193637a0070SStefano Zampini 2194637a0070SStefano Zampini Output Parameter: 2195637a0070SStefano Zampini . A - the matrix 2196637a0070SStefano Zampini 2197637a0070SStefano Zampini Notes: 2198637a0070SStefano Zampini 2199637a0070SStefano Zampini Level: intermediate 2200637a0070SStefano Zampini 2201637a0070SStefano Zampini .seealso: MatCreate(), MatCreateDense() 2202637a0070SStefano Zampini @*/ 2203637a0070SStefano Zampini PetscErrorCode MatCreateDenseCUDA(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A) 2204637a0070SStefano Zampini { 2205637a0070SStefano Zampini PetscErrorCode ierr; 2206637a0070SStefano Zampini PetscMPIInt size; 2207637a0070SStefano Zampini 2208637a0070SStefano Zampini PetscFunctionBegin; 2209637a0070SStefano Zampini ierr = MatCreate(comm,A);CHKERRQ(ierr); 2210637a0070SStefano Zampini PetscValidLogicalCollectiveBool(*A,!!data,6); 2211637a0070SStefano Zampini ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 2212637a0070SStefano Zampini ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2213637a0070SStefano Zampini if (size > 1) { 2214637a0070SStefano Zampini ierr = MatSetType(*A,MATMPIDENSECUDA);CHKERRQ(ierr); 2215637a0070SStefano Zampini ierr = MatMPIDenseCUDASetPreallocation(*A,data);CHKERRQ(ierr); 2216637a0070SStefano Zampini if (data) { /* user provided data array, so no need to assemble */ 2217637a0070SStefano Zampini ierr = MatSetUpMultiply_MPIDense(*A);CHKERRQ(ierr); 2218637a0070SStefano Zampini (*A)->assembled = PETSC_TRUE; 2219637a0070SStefano Zampini } 2220637a0070SStefano Zampini } else { 2221637a0070SStefano Zampini ierr = MatSetType(*A,MATSEQDENSECUDA);CHKERRQ(ierr); 2222637a0070SStefano Zampini ierr = MatSeqDenseCUDASetPreallocation(*A,data);CHKERRQ(ierr); 2223637a0070SStefano Zampini } 2224637a0070SStefano Zampini PetscFunctionReturn(0); 2225637a0070SStefano Zampini } 2226637a0070SStefano Zampini #endif 2227637a0070SStefano Zampini 22286849ba73SBarry Smith static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat) 22298965ea79SLois Curfman McInnes { 22308965ea79SLois Curfman McInnes Mat mat; 22313501a2bdSLois Curfman McInnes Mat_MPIDense *a,*oldmat = (Mat_MPIDense*)A->data; 2232dfbe8321SBarry Smith PetscErrorCode ierr; 22338965ea79SLois Curfman McInnes 22343a40ed3dSBarry Smith PetscFunctionBegin; 22358965ea79SLois Curfman McInnes *newmat = 0; 2236ce94432eSBarry Smith ierr = MatCreate(PetscObjectComm((PetscObject)A),&mat);CHKERRQ(ierr); 2237d0f46423SBarry Smith ierr = MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 22387adad957SLisandro Dalcin ierr = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr); 2239834f8fabSBarry Smith a = (Mat_MPIDense*)mat->data; 22405aa7edbeSHong Zhang 2241d5f3da31SBarry Smith mat->factortype = A->factortype; 2242c456f294SBarry Smith mat->assembled = PETSC_TRUE; 2243273d9f13SBarry Smith mat->preallocated = PETSC_TRUE; 22448965ea79SLois Curfman McInnes 22458965ea79SLois Curfman McInnes a->size = oldmat->size; 22468965ea79SLois Curfman McInnes a->rank = oldmat->rank; 2247e0fa3b82SLois Curfman McInnes mat->insertmode = NOT_SET_VALUES; 22483782ba37SSatish Balay a->donotstash = oldmat->donotstash; 2249e04c1aa4SHong Zhang 22501e1e43feSBarry Smith ierr = PetscLayoutReference(A->rmap,&mat->rmap);CHKERRQ(ierr); 22511e1e43feSBarry Smith ierr = PetscLayoutReference(A->cmap,&mat->cmap);CHKERRQ(ierr); 22528965ea79SLois Curfman McInnes 22535609ef8eSBarry Smith ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 22543bb1ff40SBarry Smith ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 2255637a0070SStefano Zampini ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 225601b82886SBarry Smith 22578965ea79SLois Curfman McInnes *newmat = mat; 22583a40ed3dSBarry Smith PetscFunctionReturn(0); 22598965ea79SLois Curfman McInnes } 22608965ea79SLois Curfman McInnes 2261eb91f321SVaclav Hapla PetscErrorCode MatLoad_MPIDense(Mat newMat, PetscViewer viewer) 2262eb91f321SVaclav Hapla { 2263eb91f321SVaclav Hapla PetscErrorCode ierr; 226487d5ce66SSatish Balay PetscBool isbinary; 226587d5ce66SSatish Balay #if defined(PETSC_HAVE_HDF5) 226687d5ce66SSatish Balay PetscBool ishdf5; 226787d5ce66SSatish Balay #endif 2268eb91f321SVaclav Hapla 2269eb91f321SVaclav Hapla PetscFunctionBegin; 2270eb91f321SVaclav Hapla PetscValidHeaderSpecific(newMat,MAT_CLASSID,1); 2271eb91f321SVaclav Hapla PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2); 2272eb91f321SVaclav Hapla /* force binary viewer to load .info file if it has not yet done so */ 2273eb91f321SVaclav Hapla ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 2274eb91f321SVaclav Hapla ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 227587d5ce66SSatish Balay #if defined(PETSC_HAVE_HDF5) 2276eb91f321SVaclav Hapla ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5, &ishdf5);CHKERRQ(ierr); 227787d5ce66SSatish Balay #endif 2278eb91f321SVaclav Hapla if (isbinary) { 22798491ab44SLisandro Dalcin ierr = MatLoad_Dense_Binary(newMat,viewer);CHKERRQ(ierr); 2280eb91f321SVaclav Hapla #if defined(PETSC_HAVE_HDF5) 228187d5ce66SSatish Balay } else if (ishdf5) { 2282eb91f321SVaclav Hapla ierr = MatLoad_Dense_HDF5(newMat,viewer);CHKERRQ(ierr); 2283eb91f321SVaclav Hapla #endif 228487d5ce66SSatish Balay } else SETERRQ2(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name); 2285eb91f321SVaclav Hapla PetscFunctionReturn(0); 2286eb91f321SVaclav Hapla } 2287eb91f321SVaclav Hapla 2288ace3abfcSBarry Smith PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscBool *flag) 22896e4ee0c6SHong Zhang { 22906e4ee0c6SHong Zhang Mat_MPIDense *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data; 22916e4ee0c6SHong Zhang Mat a,b; 2292ace3abfcSBarry Smith PetscBool flg; 22936e4ee0c6SHong Zhang PetscErrorCode ierr; 229490ace30eSBarry Smith 22956e4ee0c6SHong Zhang PetscFunctionBegin; 22966e4ee0c6SHong Zhang a = matA->A; 22976e4ee0c6SHong Zhang b = matB->A; 22986e4ee0c6SHong Zhang ierr = MatEqual(a,b,&flg);CHKERRQ(ierr); 2299b2566f29SBarry Smith ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 23006e4ee0c6SHong Zhang PetscFunctionReturn(0); 23016e4ee0c6SHong Zhang } 230290ace30eSBarry Smith 2303baa3c1c6SHong Zhang PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(Mat A) 2304baa3c1c6SHong Zhang { 2305baa3c1c6SHong Zhang PetscErrorCode ierr; 2306baa3c1c6SHong Zhang Mat_MPIDense *a = (Mat_MPIDense*)A->data; 2307baa3c1c6SHong Zhang Mat_TransMatMultDense *atb = a->atbdense; 2308baa3c1c6SHong Zhang 2309baa3c1c6SHong Zhang PetscFunctionBegin; 2310637a0070SStefano Zampini ierr = PetscFree2(atb->sendbuf,atb->recvcounts);CHKERRQ(ierr); 2311637a0070SStefano Zampini ierr = MatDestroy(&atb->atb);CHKERRQ(ierr); 2312637a0070SStefano Zampini ierr = (*atb->destroy)(A);CHKERRQ(ierr); 2313baa3c1c6SHong Zhang ierr = PetscFree(atb);CHKERRQ(ierr); 2314baa3c1c6SHong Zhang PetscFunctionReturn(0); 2315baa3c1c6SHong Zhang } 2316baa3c1c6SHong Zhang 2317cc48ffa7SToby Isaac PetscErrorCode MatDestroy_MatMatTransMult_MPIDense_MPIDense(Mat A) 2318cc48ffa7SToby Isaac { 2319cc48ffa7SToby Isaac PetscErrorCode ierr; 2320cc48ffa7SToby Isaac Mat_MPIDense *a = (Mat_MPIDense*)A->data; 2321cc48ffa7SToby Isaac Mat_MatTransMultDense *abt = a->abtdense; 2322cc48ffa7SToby Isaac 2323cc48ffa7SToby Isaac PetscFunctionBegin; 2324cc48ffa7SToby Isaac ierr = PetscFree2(abt->buf[0],abt->buf[1]);CHKERRQ(ierr); 2325faa55883SToby Isaac ierr = PetscFree2(abt->recvcounts,abt->recvdispls);CHKERRQ(ierr); 2326cc48ffa7SToby Isaac ierr = (abt->destroy)(A);CHKERRQ(ierr); 2327cc48ffa7SToby Isaac ierr = PetscFree(abt);CHKERRQ(ierr); 2328cc48ffa7SToby Isaac PetscFunctionReturn(0); 2329cc48ffa7SToby Isaac } 2330cc48ffa7SToby Isaac 2331cb20be35SHong Zhang PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C) 2332cb20be35SHong Zhang { 2333baa3c1c6SHong Zhang Mat_MPIDense *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data; 2334baa3c1c6SHong Zhang Mat_TransMatMultDense *atb = c->atbdense; 2335cb20be35SHong Zhang PetscErrorCode ierr; 2336cb20be35SHong Zhang MPI_Comm comm; 2337637a0070SStefano Zampini PetscMPIInt size,*recvcounts=atb->recvcounts; 2338637a0070SStefano Zampini PetscScalar *carray,*sendbuf=atb->sendbuf; 2339637a0070SStefano Zampini const PetscScalar *atbarray; 2340d5017740SHong Zhang PetscInt i,cN=C->cmap->N,cM=C->rmap->N,proc,k,j; 2341e68c0b26SHong Zhang const PetscInt *ranges; 2342cb20be35SHong Zhang 2343cb20be35SHong Zhang PetscFunctionBegin; 2344cb20be35SHong Zhang ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 2345cb20be35SHong Zhang ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2346e68c0b26SHong Zhang 2347c5ef1628SHong Zhang /* compute atbarray = aseq^T * bseq */ 2348637a0070SStefano Zampini ierr = MatTransposeMatMult(a->A,b->A,atb->atb ? MAT_REUSE_MATRIX : MAT_INITIAL_MATRIX,PETSC_DEFAULT,&atb->atb);CHKERRQ(ierr); 2349cb20be35SHong Zhang 2350cb20be35SHong Zhang ierr = MatGetOwnershipRanges(C,&ranges);CHKERRQ(ierr); 2351c5ef1628SHong Zhang for (i=0; i<size; i++) recvcounts[i] = (ranges[i+1] - ranges[i])*cN; 2352cb20be35SHong Zhang 2353660d5466SHong Zhang /* arrange atbarray into sendbuf */ 2354637a0070SStefano Zampini ierr = MatDenseGetArrayRead(atb->atb,&atbarray);CHKERRQ(ierr); 2355637a0070SStefano Zampini for (proc=0, k=0; proc<size; proc++) { 2356baa3c1c6SHong Zhang for (j=0; j<cN; j++) { 2357c5ef1628SHong Zhang for (i=ranges[proc]; i<ranges[proc+1]; i++) sendbuf[k++] = atbarray[i+j*cM]; 2358cb20be35SHong Zhang } 2359cb20be35SHong Zhang } 2360637a0070SStefano Zampini ierr = MatDenseRestoreArrayRead(atb->atb,&atbarray);CHKERRQ(ierr); 2361637a0070SStefano Zampini 2362c5ef1628SHong Zhang /* sum all atbarray to local values of C */ 2363660d5466SHong Zhang ierr = MatDenseGetArray(c->A,&carray);CHKERRQ(ierr); 23643462b7efSHong Zhang ierr = MPI_Reduce_scatter(sendbuf,carray,recvcounts,MPIU_SCALAR,MPIU_SUM,comm);CHKERRQ(ierr); 2365660d5466SHong Zhang ierr = MatDenseRestoreArray(c->A,&carray);CHKERRQ(ierr); 2366cb20be35SHong Zhang PetscFunctionReturn(0); 2367cb20be35SHong Zhang } 2368cb20be35SHong Zhang 23694222ddf1SHong Zhang PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat C) 2370cb20be35SHong Zhang { 2371cb20be35SHong Zhang PetscErrorCode ierr; 2372cb20be35SHong Zhang MPI_Comm comm; 2373baa3c1c6SHong Zhang PetscMPIInt size; 2374660d5466SHong Zhang PetscInt cm=A->cmap->n,cM,cN=B->cmap->N; 2375baa3c1c6SHong Zhang Mat_MPIDense *c; 2376baa3c1c6SHong Zhang Mat_TransMatMultDense *atb; 2377cb20be35SHong Zhang 2378cb20be35SHong Zhang PetscFunctionBegin; 2379baa3c1c6SHong Zhang ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 2380cb20be35SHong Zhang if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) { 2381cb20be35SHong Zhang SETERRQ4(comm,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend); 2382cb20be35SHong Zhang } 2383cb20be35SHong Zhang 23844222ddf1SHong Zhang /* create matrix product C */ 23854222ddf1SHong Zhang ierr = MatSetSizes(C,cm,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 23864222ddf1SHong Zhang ierr = MatSetType(C,MATMPIDENSE);CHKERRQ(ierr); 238718992e5dSStefano Zampini ierr = MatSetUp(C);CHKERRQ(ierr); 23884222ddf1SHong Zhang ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 23894222ddf1SHong Zhang ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2390baa3c1c6SHong Zhang 23914222ddf1SHong Zhang /* create data structure for reuse C */ 2392baa3c1c6SHong Zhang ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2393baa3c1c6SHong Zhang ierr = PetscNew(&atb);CHKERRQ(ierr); 23944222ddf1SHong Zhang cM = C->rmap->N; 2395637a0070SStefano Zampini ierr = PetscMalloc2(cM*cN,&atb->sendbuf,size,&atb->recvcounts);CHKERRQ(ierr); 2396baa3c1c6SHong Zhang 23974222ddf1SHong Zhang c = (Mat_MPIDense*)C->data; 2398baa3c1c6SHong Zhang c->atbdense = atb; 23994222ddf1SHong Zhang atb->destroy = C->ops->destroy; 24004222ddf1SHong Zhang C->ops->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense; 2401cb20be35SHong Zhang PetscFunctionReturn(0); 2402cb20be35SHong Zhang } 2403cb20be35SHong Zhang 24044222ddf1SHong Zhang static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C) 2405cb20be35SHong Zhang { 2406cb20be35SHong Zhang PetscErrorCode ierr; 2407cc48ffa7SToby Isaac MPI_Comm comm; 2408cc48ffa7SToby Isaac PetscMPIInt i, size; 2409cc48ffa7SToby Isaac PetscInt maxRows, bufsiz; 2410cc48ffa7SToby Isaac Mat_MPIDense *c; 2411cc48ffa7SToby Isaac PetscMPIInt tag; 24124222ddf1SHong Zhang PetscInt alg; 2413cc48ffa7SToby Isaac Mat_MatTransMultDense *abt; 24144222ddf1SHong Zhang Mat_Product *product = C->product; 24154222ddf1SHong Zhang PetscBool flg; 2416cc48ffa7SToby Isaac 2417cc48ffa7SToby Isaac PetscFunctionBegin; 24184222ddf1SHong Zhang /* check local size of A and B */ 2419637a0070SStefano Zampini if (A->cmap->n != B->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local column dimensions are incompatible, A (%D) != B (%D)",A->cmap->n,B->cmap->n); 2420cc48ffa7SToby Isaac 24214222ddf1SHong Zhang ierr = PetscStrcmp(product->alg,"allgatherv",&flg);CHKERRQ(ierr); 2422637a0070SStefano Zampini alg = flg ? 0 : 1; 2423cc48ffa7SToby Isaac 24244222ddf1SHong Zhang /* setup matrix product C */ 24254222ddf1SHong Zhang ierr = MatSetSizes(C,A->rmap->n,B->rmap->n,A->rmap->N,B->rmap->N);CHKERRQ(ierr); 24264222ddf1SHong Zhang ierr = MatSetType(C,MATMPIDENSE);CHKERRQ(ierr); 242718992e5dSStefano Zampini ierr = MatSetUp(C);CHKERRQ(ierr); 24284222ddf1SHong Zhang ierr = PetscObjectGetNewTag((PetscObject)C, &tag);CHKERRQ(ierr); 24294222ddf1SHong Zhang ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 24304222ddf1SHong Zhang ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 24314222ddf1SHong Zhang 24324222ddf1SHong Zhang /* create data structure for reuse C */ 24334222ddf1SHong Zhang ierr = PetscObjectGetComm((PetscObject)C,&comm);CHKERRQ(ierr); 2434cc48ffa7SToby Isaac ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2435cc48ffa7SToby Isaac ierr = PetscNew(&abt);CHKERRQ(ierr); 2436cc48ffa7SToby Isaac abt->tag = tag; 2437faa55883SToby Isaac abt->alg = alg; 2438faa55883SToby Isaac switch (alg) { 24394222ddf1SHong Zhang case 1: /* alg: "cyclic" */ 2440cc48ffa7SToby Isaac for (maxRows = 0, i = 0; i < size; i++) maxRows = PetscMax(maxRows, (B->rmap->range[i + 1] - B->rmap->range[i])); 2441cc48ffa7SToby Isaac bufsiz = A->cmap->N * maxRows; 2442cc48ffa7SToby Isaac ierr = PetscMalloc2(bufsiz,&(abt->buf[0]),bufsiz,&(abt->buf[1]));CHKERRQ(ierr); 2443faa55883SToby Isaac break; 24444222ddf1SHong Zhang default: /* alg: "allgatherv" */ 2445faa55883SToby Isaac ierr = PetscMalloc2(B->rmap->n * B->cmap->N, &(abt->buf[0]), B->rmap->N * B->cmap->N, &(abt->buf[1]));CHKERRQ(ierr); 2446faa55883SToby Isaac ierr = PetscMalloc2(size,&(abt->recvcounts),size+1,&(abt->recvdispls));CHKERRQ(ierr); 2447faa55883SToby Isaac for (i = 0; i <= size; i++) abt->recvdispls[i] = B->rmap->range[i] * A->cmap->N; 2448faa55883SToby Isaac for (i = 0; i < size; i++) abt->recvcounts[i] = abt->recvdispls[i + 1] - abt->recvdispls[i]; 2449faa55883SToby Isaac break; 2450faa55883SToby Isaac } 2451cc48ffa7SToby Isaac 24524222ddf1SHong Zhang c = (Mat_MPIDense*)C->data; 2453cc48ffa7SToby Isaac c->abtdense = abt; 24544222ddf1SHong Zhang abt->destroy = C->ops->destroy; 24554222ddf1SHong Zhang C->ops->destroy = MatDestroy_MatMatTransMult_MPIDense_MPIDense; 24564222ddf1SHong Zhang C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_MPIDense_MPIDense; 2457cc48ffa7SToby Isaac PetscFunctionReturn(0); 2458cc48ffa7SToby Isaac } 2459cc48ffa7SToby Isaac 2460faa55883SToby Isaac static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(Mat A, Mat B, Mat C) 2461cc48ffa7SToby Isaac { 2462cc48ffa7SToby Isaac Mat_MPIDense *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data; 2463cc48ffa7SToby Isaac Mat_MatTransMultDense *abt = c->abtdense; 2464cc48ffa7SToby Isaac PetscErrorCode ierr; 2465cc48ffa7SToby Isaac MPI_Comm comm; 2466cc48ffa7SToby Isaac PetscMPIInt rank,size, sendsiz, recvsiz, sendto, recvfrom, recvisfrom; 2467637a0070SStefano Zampini PetscScalar *sendbuf, *recvbuf=0, *cv; 2468cc48ffa7SToby Isaac PetscInt i,cK=A->cmap->N,k,j,bn; 2469cc48ffa7SToby Isaac PetscScalar _DOne=1.0,_DZero=0.0; 2470637a0070SStefano Zampini const PetscScalar *av,*bv; 2471637a0070SStefano Zampini PetscBLASInt cm, cn, ck, alda, blda, clda; 2472cc48ffa7SToby Isaac MPI_Request reqs[2]; 2473cc48ffa7SToby Isaac const PetscInt *ranges; 2474cc48ffa7SToby Isaac 2475cc48ffa7SToby Isaac PetscFunctionBegin; 2476cc48ffa7SToby Isaac ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 2477cc48ffa7SToby Isaac ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2478cc48ffa7SToby Isaac ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2479637a0070SStefano Zampini ierr = MatDenseGetArrayRead(a->A,&av);CHKERRQ(ierr); 2480637a0070SStefano Zampini ierr = MatDenseGetArrayRead(b->A,&bv);CHKERRQ(ierr); 2481637a0070SStefano Zampini ierr = MatDenseGetArray(c->A,&cv);CHKERRQ(ierr); 2482637a0070SStefano Zampini ierr = MatDenseGetLDA(a->A,&i);CHKERRQ(ierr); 2483637a0070SStefano Zampini ierr = PetscBLASIntCast(i,&alda);CHKERRQ(ierr); 2484637a0070SStefano Zampini ierr = MatDenseGetLDA(b->A,&i);CHKERRQ(ierr); 2485637a0070SStefano Zampini ierr = PetscBLASIntCast(i,&blda);CHKERRQ(ierr); 2486637a0070SStefano Zampini ierr = MatDenseGetLDA(c->A,&i);CHKERRQ(ierr); 2487637a0070SStefano Zampini ierr = PetscBLASIntCast(i,&clda);CHKERRQ(ierr); 2488cc48ffa7SToby Isaac ierr = MatGetOwnershipRanges(B,&ranges);CHKERRQ(ierr); 2489cc48ffa7SToby Isaac bn = B->rmap->n; 2490637a0070SStefano Zampini if (blda == bn) { 2491637a0070SStefano Zampini sendbuf = (PetscScalar*)bv; 2492cc48ffa7SToby Isaac } else { 2493cc48ffa7SToby Isaac sendbuf = abt->buf[0]; 2494cc48ffa7SToby Isaac for (k = 0, i = 0; i < cK; i++) { 2495cc48ffa7SToby Isaac for (j = 0; j < bn; j++, k++) { 2496637a0070SStefano Zampini sendbuf[k] = bv[i * blda + j]; 2497cc48ffa7SToby Isaac } 2498cc48ffa7SToby Isaac } 2499cc48ffa7SToby Isaac } 2500cc48ffa7SToby Isaac if (size > 1) { 2501cc48ffa7SToby Isaac sendto = (rank + size - 1) % size; 2502cc48ffa7SToby Isaac recvfrom = (rank + size + 1) % size; 2503cc48ffa7SToby Isaac } else { 2504cc48ffa7SToby Isaac sendto = recvfrom = 0; 2505cc48ffa7SToby Isaac } 2506cc48ffa7SToby Isaac ierr = PetscBLASIntCast(cK,&ck);CHKERRQ(ierr); 2507cc48ffa7SToby Isaac ierr = PetscBLASIntCast(c->A->rmap->n,&cm);CHKERRQ(ierr); 2508cc48ffa7SToby Isaac recvisfrom = rank; 2509cc48ffa7SToby Isaac for (i = 0; i < size; i++) { 2510cc48ffa7SToby Isaac /* we have finished receiving in sending, bufs can be read/modified */ 2511cc48ffa7SToby Isaac PetscInt nextrecvisfrom = (recvisfrom + 1) % size; /* which process the next recvbuf will originate on */ 2512cc48ffa7SToby Isaac PetscInt nextbn = ranges[nextrecvisfrom + 1] - ranges[nextrecvisfrom]; 2513cc48ffa7SToby Isaac 2514cc48ffa7SToby Isaac if (nextrecvisfrom != rank) { 2515cc48ffa7SToby Isaac /* start the cyclic sends from sendbuf, to recvbuf (which will switch to sendbuf) */ 2516cc48ffa7SToby Isaac sendsiz = cK * bn; 2517cc48ffa7SToby Isaac recvsiz = cK * nextbn; 2518cc48ffa7SToby Isaac recvbuf = (i & 1) ? abt->buf[0] : abt->buf[1]; 2519cc48ffa7SToby Isaac ierr = MPI_Isend(sendbuf, sendsiz, MPIU_SCALAR, sendto, abt->tag, comm, &reqs[0]);CHKERRQ(ierr); 2520cc48ffa7SToby Isaac ierr = MPI_Irecv(recvbuf, recvsiz, MPIU_SCALAR, recvfrom, abt->tag, comm, &reqs[1]);CHKERRQ(ierr); 2521cc48ffa7SToby Isaac } 2522cc48ffa7SToby Isaac 2523cc48ffa7SToby Isaac /* local aseq * sendbuf^T */ 2524cc48ffa7SToby Isaac ierr = PetscBLASIntCast(ranges[recvisfrom + 1] - ranges[recvisfrom], &cn);CHKERRQ(ierr); 2525637a0070SStefano Zampini PetscStackCallBLAS("BLASgemm",BLASgemm_("N","T",&cm,&cn,&ck,&_DOne,av,&alda,sendbuf,&cn,&_DZero,cv + clda * ranges[recvisfrom],&clda)); 2526cc48ffa7SToby Isaac 2527cc48ffa7SToby Isaac if (nextrecvisfrom != rank) { 2528cc48ffa7SToby Isaac /* wait for the sends and receives to complete, swap sendbuf and recvbuf */ 2529cc48ffa7SToby Isaac ierr = MPI_Waitall(2, reqs, MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2530cc48ffa7SToby Isaac } 2531cc48ffa7SToby Isaac bn = nextbn; 2532cc48ffa7SToby Isaac recvisfrom = nextrecvisfrom; 2533cc48ffa7SToby Isaac sendbuf = recvbuf; 2534cc48ffa7SToby Isaac } 2535637a0070SStefano Zampini ierr = MatDenseRestoreArrayRead(a->A,&av);CHKERRQ(ierr); 2536637a0070SStefano Zampini ierr = MatDenseRestoreArrayRead(b->A,&bv);CHKERRQ(ierr); 2537637a0070SStefano Zampini ierr = MatDenseRestoreArray(c->A,&cv);CHKERRQ(ierr); 2538cc48ffa7SToby Isaac PetscFunctionReturn(0); 2539cc48ffa7SToby Isaac } 2540cc48ffa7SToby Isaac 2541faa55883SToby Isaac static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(Mat A, Mat B, Mat C) 2542faa55883SToby Isaac { 2543faa55883SToby Isaac Mat_MPIDense *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data; 2544faa55883SToby Isaac Mat_MatTransMultDense *abt = c->abtdense; 2545faa55883SToby Isaac PetscErrorCode ierr; 2546faa55883SToby Isaac MPI_Comm comm; 2547637a0070SStefano Zampini PetscMPIInt size; 2548637a0070SStefano Zampini PetscScalar *cv, *sendbuf, *recvbuf; 2549637a0070SStefano Zampini const PetscScalar *av,*bv; 2550637a0070SStefano Zampini PetscInt blda,i,cK=A->cmap->N,k,j,bn; 2551faa55883SToby Isaac PetscScalar _DOne=1.0,_DZero=0.0; 2552637a0070SStefano Zampini PetscBLASInt cm, cn, ck, alda, clda; 2553faa55883SToby Isaac 2554faa55883SToby Isaac PetscFunctionBegin; 2555faa55883SToby Isaac ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 2556faa55883SToby Isaac ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2557637a0070SStefano Zampini ierr = MatDenseGetArrayRead(a->A,&av);CHKERRQ(ierr); 2558637a0070SStefano Zampini ierr = MatDenseGetArrayRead(b->A,&bv);CHKERRQ(ierr); 2559637a0070SStefano Zampini ierr = MatDenseGetArray(c->A,&cv);CHKERRQ(ierr); 2560637a0070SStefano Zampini ierr = MatDenseGetLDA(a->A,&i);CHKERRQ(ierr); 2561637a0070SStefano Zampini ierr = PetscBLASIntCast(i,&alda);CHKERRQ(ierr); 2562637a0070SStefano Zampini ierr = MatDenseGetLDA(b->A,&blda);CHKERRQ(ierr); 2563637a0070SStefano Zampini ierr = MatDenseGetLDA(c->A,&i);CHKERRQ(ierr); 2564637a0070SStefano Zampini ierr = PetscBLASIntCast(i,&clda);CHKERRQ(ierr); 2565faa55883SToby Isaac /* copy transpose of B into buf[0] */ 2566faa55883SToby Isaac bn = B->rmap->n; 2567faa55883SToby Isaac sendbuf = abt->buf[0]; 2568faa55883SToby Isaac recvbuf = abt->buf[1]; 2569faa55883SToby Isaac for (k = 0, j = 0; j < bn; j++) { 2570faa55883SToby Isaac for (i = 0; i < cK; i++, k++) { 2571637a0070SStefano Zampini sendbuf[k] = bv[i * blda + j]; 2572faa55883SToby Isaac } 2573faa55883SToby Isaac } 2574637a0070SStefano Zampini ierr = MatDenseRestoreArrayRead(b->A,&bv);CHKERRQ(ierr); 2575faa55883SToby Isaac ierr = MPI_Allgatherv(sendbuf, bn * cK, MPIU_SCALAR, recvbuf, abt->recvcounts, abt->recvdispls, MPIU_SCALAR, comm);CHKERRQ(ierr); 2576faa55883SToby Isaac ierr = PetscBLASIntCast(cK,&ck);CHKERRQ(ierr); 2577faa55883SToby Isaac ierr = PetscBLASIntCast(c->A->rmap->n,&cm);CHKERRQ(ierr); 2578faa55883SToby Isaac ierr = PetscBLASIntCast(c->A->cmap->n,&cn);CHKERRQ(ierr); 2579637a0070SStefano Zampini PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&cm,&cn,&ck,&_DOne,av,&alda,recvbuf,&ck,&_DZero,cv,&clda)); 2580637a0070SStefano Zampini ierr = MatDenseRestoreArrayRead(a->A,&av);CHKERRQ(ierr); 2581637a0070SStefano Zampini ierr = MatDenseRestoreArrayRead(b->A,&bv);CHKERRQ(ierr); 2582637a0070SStefano Zampini ierr = MatDenseRestoreArray(c->A,&cv);CHKERRQ(ierr); 2583faa55883SToby Isaac PetscFunctionReturn(0); 2584faa55883SToby Isaac } 2585faa55883SToby Isaac 2586faa55883SToby Isaac static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C) 2587faa55883SToby Isaac { 2588faa55883SToby Isaac Mat_MPIDense *c=(Mat_MPIDense*)C->data; 2589faa55883SToby Isaac Mat_MatTransMultDense *abt = c->abtdense; 2590faa55883SToby Isaac PetscErrorCode ierr; 2591faa55883SToby Isaac 2592faa55883SToby Isaac PetscFunctionBegin; 2593faa55883SToby Isaac switch (abt->alg) { 2594faa55883SToby Isaac case 1: 2595faa55883SToby Isaac ierr = MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(A, B, C);CHKERRQ(ierr); 2596faa55883SToby Isaac break; 2597faa55883SToby Isaac default: 2598faa55883SToby Isaac ierr = MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(A, B, C);CHKERRQ(ierr); 2599faa55883SToby Isaac break; 2600faa55883SToby Isaac } 2601faa55883SToby Isaac PetscFunctionReturn(0); 2602faa55883SToby Isaac } 2603faa55883SToby Isaac 2604320f2790SHong Zhang PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(Mat A) 2605320f2790SHong Zhang { 2606320f2790SHong Zhang PetscErrorCode ierr; 2607320f2790SHong Zhang Mat_MPIDense *a = (Mat_MPIDense*)A->data; 2608320f2790SHong Zhang Mat_MatMultDense *ab = a->abdense; 2609320f2790SHong Zhang 2610320f2790SHong Zhang PetscFunctionBegin; 2611320f2790SHong Zhang ierr = MatDestroy(&ab->Ce);CHKERRQ(ierr); 2612320f2790SHong Zhang ierr = MatDestroy(&ab->Ae);CHKERRQ(ierr); 2613320f2790SHong Zhang ierr = MatDestroy(&ab->Be);CHKERRQ(ierr); 2614320f2790SHong Zhang 2615320f2790SHong Zhang ierr = (ab->destroy)(A);CHKERRQ(ierr); 2616320f2790SHong Zhang ierr = PetscFree(ab);CHKERRQ(ierr); 2617320f2790SHong Zhang PetscFunctionReturn(0); 2618320f2790SHong Zhang } 2619320f2790SHong Zhang 26205242a7b1SHong Zhang #if defined(PETSC_HAVE_ELEMENTAL) 2621320f2790SHong Zhang PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C) 2622320f2790SHong Zhang { 2623320f2790SHong Zhang PetscErrorCode ierr; 2624320f2790SHong Zhang Mat_MPIDense *c=(Mat_MPIDense*)C->data; 2625320f2790SHong Zhang Mat_MatMultDense *ab=c->abdense; 2626320f2790SHong Zhang 2627320f2790SHong Zhang PetscFunctionBegin; 2628de0a22f0SHong Zhang ierr = MatConvert_MPIDense_Elemental(A,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Ae);CHKERRQ(ierr); 2629de0a22f0SHong Zhang ierr = MatConvert_MPIDense_Elemental(B,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Be);CHKERRQ(ierr); 26304222ddf1SHong Zhang ierr = MatMatMultNumeric_Elemental(ab->Ae,ab->Be,ab->Ce);CHKERRQ(ierr); 2631de0a22f0SHong Zhang ierr = MatConvert(ab->Ce,MATMPIDENSE,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 2632320f2790SHong Zhang PetscFunctionReturn(0); 2633320f2790SHong Zhang } 2634320f2790SHong Zhang 26354222ddf1SHong Zhang PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat C) 2636320f2790SHong Zhang { 2637320f2790SHong Zhang PetscErrorCode ierr; 2638320f2790SHong Zhang Mat Ae,Be,Ce; 2639320f2790SHong Zhang Mat_MPIDense *c; 2640320f2790SHong Zhang Mat_MatMultDense *ab; 2641320f2790SHong Zhang 2642320f2790SHong Zhang PetscFunctionBegin; 26434222ddf1SHong Zhang /* check local size of A and B */ 2644320f2790SHong Zhang if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 2645378336b6SHong Zhang SETERRQ4(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend); 2646320f2790SHong Zhang } 2647320f2790SHong Zhang 26484222ddf1SHong Zhang /* create elemental matrices Ae and Be */ 26494222ddf1SHong Zhang ierr = MatCreate(PetscObjectComm((PetscObject)A), &Ae);CHKERRQ(ierr); 26504222ddf1SHong Zhang ierr = MatSetSizes(Ae,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 26514222ddf1SHong Zhang ierr = MatSetType(Ae,MATELEMENTAL);CHKERRQ(ierr); 26524222ddf1SHong Zhang ierr = MatSetUp(Ae);CHKERRQ(ierr); 26534222ddf1SHong Zhang ierr = MatSetOption(Ae,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 2654320f2790SHong Zhang 26554222ddf1SHong Zhang ierr = MatCreate(PetscObjectComm((PetscObject)B), &Be);CHKERRQ(ierr); 26564222ddf1SHong Zhang ierr = MatSetSizes(Be,PETSC_DECIDE,PETSC_DECIDE,B->rmap->N,B->cmap->N);CHKERRQ(ierr); 26574222ddf1SHong Zhang ierr = MatSetType(Be,MATELEMENTAL);CHKERRQ(ierr); 26584222ddf1SHong Zhang ierr = MatSetUp(Be);CHKERRQ(ierr); 26594222ddf1SHong Zhang ierr = MatSetOption(Be,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 2660320f2790SHong Zhang 26614222ddf1SHong Zhang /* compute symbolic Ce = Ae*Be */ 26624222ddf1SHong Zhang ierr = MatCreate(PetscObjectComm((PetscObject)C),&Ce);CHKERRQ(ierr); 26634222ddf1SHong Zhang ierr = MatMatMultSymbolic_Elemental(Ae,Be,fill,Ce);CHKERRQ(ierr); 26644222ddf1SHong Zhang 26654222ddf1SHong Zhang /* setup C */ 26664222ddf1SHong Zhang ierr = MatSetSizes(C,A->rmap->n,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 26674222ddf1SHong Zhang ierr = MatSetType(C,MATDENSE);CHKERRQ(ierr); 26684222ddf1SHong Zhang ierr = MatSetUp(C);CHKERRQ(ierr); 2669320f2790SHong Zhang 2670320f2790SHong Zhang /* create data structure for reuse Cdense */ 2671320f2790SHong Zhang ierr = PetscNew(&ab);CHKERRQ(ierr); 26724222ddf1SHong Zhang c = (Mat_MPIDense*)C->data; 2673320f2790SHong Zhang c->abdense = ab; 2674320f2790SHong Zhang 2675320f2790SHong Zhang ab->Ae = Ae; 2676320f2790SHong Zhang ab->Be = Be; 2677320f2790SHong Zhang ab->Ce = Ce; 26784222ddf1SHong Zhang ab->destroy = C->ops->destroy; 26794222ddf1SHong Zhang C->ops->destroy = MatDestroy_MatMatMult_MPIDense_MPIDense; 26804222ddf1SHong Zhang C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense; 26814222ddf1SHong Zhang C->ops->productnumeric = MatProductNumeric_AB; 2682320f2790SHong Zhang PetscFunctionReturn(0); 2683320f2790SHong Zhang } 26844222ddf1SHong Zhang #endif 26854222ddf1SHong Zhang /* ----------------------------------------------- */ 26864222ddf1SHong Zhang #if defined(PETSC_HAVE_ELEMENTAL) 26874222ddf1SHong Zhang static PetscErrorCode MatProductSetFromOptions_MPIDense_AB(Mat C) 2688320f2790SHong Zhang { 2689320f2790SHong Zhang PetscFunctionBegin; 26904222ddf1SHong Zhang C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIDense; 26914222ddf1SHong Zhang C->ops->productsymbolic = MatProductSymbolic_AB; 26924222ddf1SHong Zhang C->ops->productnumeric = MatProductNumeric_AB; 2693320f2790SHong Zhang PetscFunctionReturn(0); 2694320f2790SHong Zhang } 26955242a7b1SHong Zhang #endif 269686aefd0dSHong Zhang 26974222ddf1SHong Zhang static PetscErrorCode MatProductSymbolic_AtB_MPIDense(Mat C) 26984222ddf1SHong Zhang { 26994222ddf1SHong Zhang PetscErrorCode ierr; 27004222ddf1SHong Zhang Mat_Product *product = C->product; 27014222ddf1SHong Zhang 27024222ddf1SHong Zhang PetscFunctionBegin; 27034222ddf1SHong Zhang ierr = MatTransposeMatMultSymbolic_MPIDense_MPIDense(product->A,product->B,product->fill,C);CHKERRQ(ierr); 27044222ddf1SHong Zhang C->ops->productnumeric = MatProductNumeric_AtB; 27054222ddf1SHong Zhang C->ops->transposematmultnumeric = MatTransposeMatMultNumeric_MPIDense_MPIDense; 27064222ddf1SHong Zhang PetscFunctionReturn(0); 27074222ddf1SHong Zhang } 27084222ddf1SHong Zhang 27094222ddf1SHong Zhang static PetscErrorCode MatProductSetFromOptions_MPIDense_AtB(Mat C) 27104222ddf1SHong Zhang { 27114222ddf1SHong Zhang Mat_Product *product = C->product; 27124222ddf1SHong Zhang Mat A = product->A,B=product->B; 27134222ddf1SHong Zhang 27144222ddf1SHong Zhang PetscFunctionBegin; 27154222ddf1SHong Zhang if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) 27164222ddf1SHong Zhang SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend); 27174222ddf1SHong Zhang 27184222ddf1SHong Zhang C->ops->productsymbolic = MatProductSymbolic_AtB_MPIDense; 27194222ddf1SHong Zhang PetscFunctionReturn(0); 27204222ddf1SHong Zhang } 27214222ddf1SHong Zhang 27224222ddf1SHong Zhang static PetscErrorCode MatProductSetFromOptions_MPIDense_ABt(Mat C) 27234222ddf1SHong Zhang { 27244222ddf1SHong Zhang PetscErrorCode ierr; 27254222ddf1SHong Zhang Mat_Product *product = C->product; 27264222ddf1SHong Zhang const char *algTypes[2] = {"allgatherv","cyclic"}; 27274222ddf1SHong Zhang PetscInt alg,nalg = 2; 27284222ddf1SHong Zhang PetscBool flg = PETSC_FALSE; 27294222ddf1SHong Zhang 27304222ddf1SHong Zhang PetscFunctionBegin; 27314222ddf1SHong Zhang /* Set default algorithm */ 27324222ddf1SHong Zhang alg = 0; /* default is allgatherv */ 27334222ddf1SHong Zhang ierr = PetscStrcmp(product->alg,"default",&flg);CHKERRQ(ierr); 27344222ddf1SHong Zhang if (flg) { 27354222ddf1SHong Zhang ierr = MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);CHKERRQ(ierr); 27364222ddf1SHong Zhang } 27374222ddf1SHong Zhang 27384222ddf1SHong Zhang /* Get runtime option */ 27394222ddf1SHong Zhang if (product->api_user) { 27404222ddf1SHong Zhang ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatMatTransposeMult","Mat");CHKERRQ(ierr); 27414222ddf1SHong Zhang ierr = PetscOptionsEList("-matmattransmult_mpidense_mpidense_via","Algorithmic approach","MatMatTransposeMult",algTypes,nalg,algTypes[alg],&alg,&flg);CHKERRQ(ierr); 27424222ddf1SHong Zhang ierr = PetscOptionsEnd();CHKERRQ(ierr); 27434222ddf1SHong Zhang } else { 27444222ddf1SHong Zhang ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_ABt","Mat");CHKERRQ(ierr); 27454222ddf1SHong Zhang ierr = PetscOptionsEList("-matproduct_abt_mpidense_mpidense_via","Algorithmic approach","MatProduct_ABt",algTypes,nalg,algTypes[alg],&alg,&flg);CHKERRQ(ierr); 27464222ddf1SHong Zhang ierr = PetscOptionsEnd();CHKERRQ(ierr); 27474222ddf1SHong Zhang } 27484222ddf1SHong Zhang if (flg) { 27494222ddf1SHong Zhang ierr = MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);CHKERRQ(ierr); 27504222ddf1SHong Zhang } 27514222ddf1SHong Zhang 27524222ddf1SHong Zhang C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_MPIDense_MPIDense; 27534222ddf1SHong Zhang C->ops->productsymbolic = MatProductSymbolic_ABt; 27544222ddf1SHong Zhang C->ops->productnumeric = MatProductNumeric_ABt; 27554222ddf1SHong Zhang PetscFunctionReturn(0); 27564222ddf1SHong Zhang } 27574222ddf1SHong Zhang 27584222ddf1SHong Zhang PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense(Mat C) 27594222ddf1SHong Zhang { 27604222ddf1SHong Zhang PetscErrorCode ierr; 27614222ddf1SHong Zhang Mat_Product *product = C->product; 27624222ddf1SHong Zhang 27634222ddf1SHong Zhang PetscFunctionBegin; 27644222ddf1SHong Zhang switch (product->type) { 27654222ddf1SHong Zhang #if defined(PETSC_HAVE_ELEMENTAL) 27664222ddf1SHong Zhang case MATPRODUCT_AB: 27674222ddf1SHong Zhang ierr = MatProductSetFromOptions_MPIDense_AB(C);CHKERRQ(ierr); 27684222ddf1SHong Zhang break; 27694222ddf1SHong Zhang #endif 27704222ddf1SHong Zhang case MATPRODUCT_AtB: 27714222ddf1SHong Zhang ierr = MatProductSetFromOptions_MPIDense_AtB(C);CHKERRQ(ierr); 27724222ddf1SHong Zhang break; 27734222ddf1SHong Zhang case MATPRODUCT_ABt: 27744222ddf1SHong Zhang ierr = MatProductSetFromOptions_MPIDense_ABt(C);CHKERRQ(ierr); 27754222ddf1SHong Zhang break; 2776544a5e07SHong Zhang default: SETERRQ1(PetscObjectComm((PetscObject)C),PETSC_ERR_SUP,"MatProduct type %s is not supported for MPIDense and MPIDense matrices",MatProductTypes[product->type]); 27774222ddf1SHong Zhang } 27784222ddf1SHong Zhang PetscFunctionReturn(0); 27794222ddf1SHong Zhang } 2780