1be1d678aSKris Buschelman 2ed3cc1f0SBarry Smith /* 3ed3cc1f0SBarry Smith Basic functions for basic parallel dense matrices. 4ed3cc1f0SBarry Smith */ 5ed3cc1f0SBarry Smith 6c6db04a5SJed Brown #include <../src/mat/impls/dense/mpi/mpidense.h> /*I "petscmat.h" I*/ 78949adfdSHong Zhang #include <../src/mat/impls/aij/mpi/mpiaij.h> 8baa3c1c6SHong Zhang #include <petscblaslapack.h> 98965ea79SLois Curfman McInnes 10ab92ecdeSBarry Smith /*@ 11ab92ecdeSBarry Smith 12ab92ecdeSBarry Smith MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential 13ab92ecdeSBarry Smith matrix that represents the operator. For sequential matrices it returns itself. 14ab92ecdeSBarry Smith 15ab92ecdeSBarry Smith Input Parameter: 16ab92ecdeSBarry Smith . A - the Seq or MPI dense matrix 17ab92ecdeSBarry Smith 18ab92ecdeSBarry Smith Output Parameter: 19ab92ecdeSBarry Smith . B - the inner matrix 20ab92ecdeSBarry Smith 218e6c10adSSatish Balay Level: intermediate 228e6c10adSSatish Balay 23ab92ecdeSBarry Smith @*/ 24ab92ecdeSBarry Smith PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B) 25ab92ecdeSBarry Smith { 26ab92ecdeSBarry Smith Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 27ab92ecdeSBarry Smith PetscErrorCode ierr; 28ace3abfcSBarry Smith PetscBool flg; 29ab92ecdeSBarry Smith 30ab92ecdeSBarry Smith PetscFunctionBegin; 31d5ea218eSStefano Zampini PetscValidHeaderSpecific(A,MAT_CLASSID,1); 32d5ea218eSStefano Zampini PetscValidPointer(B,2); 33251f4c67SDmitry Karpeev ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr); 342205254eSKarl Rupp if (flg) *B = mat->A; 352205254eSKarl Rupp else *B = A; 36ab92ecdeSBarry Smith PetscFunctionReturn(0); 37ab92ecdeSBarry Smith } 38ab92ecdeSBarry Smith 39ba8c8a56SBarry Smith PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 40ba8c8a56SBarry Smith { 41ba8c8a56SBarry Smith Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 42ba8c8a56SBarry Smith PetscErrorCode ierr; 43d0f46423SBarry Smith PetscInt lrow,rstart = A->rmap->rstart,rend = A->rmap->rend; 44ba8c8a56SBarry Smith 45ba8c8a56SBarry Smith PetscFunctionBegin; 46e7e72b3dSBarry Smith if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows"); 47ba8c8a56SBarry Smith lrow = row - rstart; 48ba8c8a56SBarry Smith ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v);CHKERRQ(ierr); 49ba8c8a56SBarry Smith PetscFunctionReturn(0); 50ba8c8a56SBarry Smith } 51ba8c8a56SBarry Smith 52637a0070SStefano Zampini PetscErrorCode MatRestoreRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 53ba8c8a56SBarry Smith { 54637a0070SStefano Zampini Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 55ba8c8a56SBarry Smith PetscErrorCode ierr; 56637a0070SStefano Zampini PetscInt lrow,rstart = A->rmap->rstart,rend = A->rmap->rend; 57ba8c8a56SBarry Smith 58ba8c8a56SBarry Smith PetscFunctionBegin; 59637a0070SStefano Zampini if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows"); 60637a0070SStefano Zampini lrow = row - rstart; 61637a0070SStefano Zampini ierr = MatRestoreRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v);CHKERRQ(ierr); 62ba8c8a56SBarry Smith PetscFunctionReturn(0); 63ba8c8a56SBarry Smith } 64ba8c8a56SBarry Smith 6511bd1e4dSLisandro Dalcin PetscErrorCode MatGetDiagonalBlock_MPIDense(Mat A,Mat *a) 660de54da6SSatish Balay { 670de54da6SSatish Balay Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 686849ba73SBarry Smith PetscErrorCode ierr; 69d0f46423SBarry Smith PetscInt m = A->rmap->n,rstart = A->rmap->rstart; 7087828ca2SBarry Smith PetscScalar *array; 710de54da6SSatish Balay MPI_Comm comm; 72*4b6ae80fSStefano Zampini PetscBool flg; 7311bd1e4dSLisandro Dalcin Mat B; 740de54da6SSatish Balay 750de54da6SSatish Balay PetscFunctionBegin; 76*4b6ae80fSStefano Zampini ierr = MatHasCongruentLayouts(A,&flg);CHKERRQ(ierr); 77*4b6ae80fSStefano Zampini if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only square matrices supported."); 7811bd1e4dSLisandro Dalcin ierr = PetscObjectQuery((PetscObject)A,"DiagonalBlock",(PetscObject*)&B);CHKERRQ(ierr); 79*4b6ae80fSStefano Zampini if (!B) { /* This should use a new function, e.g. MatDenseGetSubMatrix (not create) */ 80*4b6ae80fSStefano Zampini 81*4b6ae80fSStefano Zampini ierr = PetscObjectTypeCompare((PetscObject)mdn->A,MATSEQDENSECUDA,&flg);CHKERRQ(ierr); 82*4b6ae80fSStefano Zampini if (flg) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature",MATSEQDENSECUDA); 830de54da6SSatish Balay ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr); 8411bd1e4dSLisandro Dalcin ierr = MatCreate(comm,&B);CHKERRQ(ierr); 8511bd1e4dSLisandro Dalcin ierr = MatSetSizes(B,m,m,m,m);CHKERRQ(ierr); 8611bd1e4dSLisandro Dalcin ierr = MatSetType(B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr); 87*4b6ae80fSStefano Zampini ierr = MatDenseGetArrayRead(mdn->A,(const PetscScalar**)&array);CHKERRQ(ierr); 8811bd1e4dSLisandro Dalcin ierr = MatSeqDenseSetPreallocation(B,array+m*rstart);CHKERRQ(ierr); 89*4b6ae80fSStefano Zampini ierr = MatDenseRestoreArrayRead(mdn->A,(const PetscScalar**)&array);CHKERRQ(ierr); 9011bd1e4dSLisandro Dalcin ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 9111bd1e4dSLisandro Dalcin ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 9211bd1e4dSLisandro Dalcin ierr = PetscObjectCompose((PetscObject)A,"DiagonalBlock",(PetscObject)B);CHKERRQ(ierr); 9311bd1e4dSLisandro Dalcin *a = B; 9411bd1e4dSLisandro Dalcin ierr = MatDestroy(&B);CHKERRQ(ierr); 952205254eSKarl Rupp } else *a = B; 960de54da6SSatish Balay PetscFunctionReturn(0); 970de54da6SSatish Balay } 980de54da6SSatish Balay 9913f74950SBarry Smith PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv) 1008965ea79SLois Curfman McInnes { 10139b7565bSBarry Smith Mat_MPIDense *A = (Mat_MPIDense*)mat->data; 102dfbe8321SBarry Smith PetscErrorCode ierr; 103d0f46423SBarry Smith PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row; 104ace3abfcSBarry Smith PetscBool roworiented = A->roworiented; 1058965ea79SLois Curfman McInnes 1063a40ed3dSBarry Smith PetscFunctionBegin; 1078965ea79SLois Curfman McInnes for (i=0; i<m; i++) { 1085ef9f2a5SBarry Smith if (idxm[i] < 0) continue; 109e32f2f54SBarry Smith if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 1108965ea79SLois Curfman McInnes if (idxm[i] >= rstart && idxm[i] < rend) { 1118965ea79SLois Curfman McInnes row = idxm[i] - rstart; 11239b7565bSBarry Smith if (roworiented) { 11339b7565bSBarry Smith ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr); 1143a40ed3dSBarry Smith } else { 1158965ea79SLois Curfman McInnes for (j=0; j<n; j++) { 1165ef9f2a5SBarry Smith if (idxn[j] < 0) continue; 117e32f2f54SBarry Smith if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 11839b7565bSBarry Smith ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr); 11939b7565bSBarry Smith } 1208965ea79SLois Curfman McInnes } 1212205254eSKarl Rupp } else if (!A->donotstash) { 1225080c13bSMatthew G Knepley mat->assembled = PETSC_FALSE; 12339b7565bSBarry Smith if (roworiented) { 124b400d20cSBarry Smith ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 125d36fbae8SSatish Balay } else { 126b400d20cSBarry Smith ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 12739b7565bSBarry Smith } 128b49de8d1SLois Curfman McInnes } 129b49de8d1SLois Curfman McInnes } 1303a40ed3dSBarry Smith PetscFunctionReturn(0); 131b49de8d1SLois Curfman McInnes } 132b49de8d1SLois Curfman McInnes 13313f74950SBarry Smith PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 134b49de8d1SLois Curfman McInnes { 135b49de8d1SLois Curfman McInnes Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 136dfbe8321SBarry Smith PetscErrorCode ierr; 137d0f46423SBarry Smith PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row; 138b49de8d1SLois Curfman McInnes 1393a40ed3dSBarry Smith PetscFunctionBegin; 140b49de8d1SLois Curfman McInnes for (i=0; i<m; i++) { 141e32f2f54SBarry Smith if (idxm[i] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 142e32f2f54SBarry Smith if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 143b49de8d1SLois Curfman McInnes if (idxm[i] >= rstart && idxm[i] < rend) { 144b49de8d1SLois Curfman McInnes row = idxm[i] - rstart; 145b49de8d1SLois Curfman McInnes for (j=0; j<n; j++) { 146e32f2f54SBarry Smith if (idxn[j] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 147e7e72b3dSBarry Smith if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 148b49de8d1SLois Curfman McInnes ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr); 149b49de8d1SLois Curfman McInnes } 150e7e72b3dSBarry Smith } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 1518965ea79SLois Curfman McInnes } 1523a40ed3dSBarry Smith PetscFunctionReturn(0); 1538965ea79SLois Curfman McInnes } 1548965ea79SLois Curfman McInnes 15549a6ff4bSBarry Smith static PetscErrorCode MatDenseGetLDA_MPIDense(Mat A,PetscInt *lda) 15649a6ff4bSBarry Smith { 15749a6ff4bSBarry Smith Mat_MPIDense *a = (Mat_MPIDense*)A->data; 15849a6ff4bSBarry Smith PetscErrorCode ierr; 15949a6ff4bSBarry Smith 16049a6ff4bSBarry Smith PetscFunctionBegin; 16149a6ff4bSBarry Smith ierr = MatDenseGetLDA(a->A,lda);CHKERRQ(ierr); 16249a6ff4bSBarry Smith PetscFunctionReturn(0); 16349a6ff4bSBarry Smith } 16449a6ff4bSBarry Smith 165637a0070SStefano Zampini static PetscErrorCode MatDenseGetArray_MPIDense(Mat A,PetscScalar **array) 166ff14e315SSatish Balay { 167ff14e315SSatish Balay Mat_MPIDense *a = (Mat_MPIDense*)A->data; 168dfbe8321SBarry Smith PetscErrorCode ierr; 169ff14e315SSatish Balay 1703a40ed3dSBarry Smith PetscFunctionBegin; 1718c778c55SBarry Smith ierr = MatDenseGetArray(a->A,array);CHKERRQ(ierr); 1723a40ed3dSBarry Smith PetscFunctionReturn(0); 173ff14e315SSatish Balay } 174ff14e315SSatish Balay 175637a0070SStefano Zampini static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A,const PetscScalar **array) 1768572280aSBarry Smith { 1778572280aSBarry Smith Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1788572280aSBarry Smith PetscErrorCode ierr; 1798572280aSBarry Smith 1808572280aSBarry Smith PetscFunctionBegin; 1818572280aSBarry Smith ierr = MatDenseGetArrayRead(a->A,array);CHKERRQ(ierr); 1828572280aSBarry Smith PetscFunctionReturn(0); 1838572280aSBarry Smith } 1848572280aSBarry Smith 1856947451fSStefano Zampini static PetscErrorCode MatDenseGetArrayWrite_MPIDense(Mat A,PetscScalar **array) 1866947451fSStefano Zampini { 1876947451fSStefano Zampini Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1886947451fSStefano Zampini PetscErrorCode ierr; 1896947451fSStefano Zampini 1906947451fSStefano Zampini PetscFunctionBegin; 1916947451fSStefano Zampini ierr = MatDenseGetArrayWrite(a->A,array);CHKERRQ(ierr); 1926947451fSStefano Zampini PetscFunctionReturn(0); 1936947451fSStefano Zampini } 1946947451fSStefano Zampini 195637a0070SStefano Zampini static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A,const PetscScalar *array) 196d3042a70SBarry Smith { 197d3042a70SBarry Smith Mat_MPIDense *a = (Mat_MPIDense*)A->data; 198d3042a70SBarry Smith PetscErrorCode ierr; 199d3042a70SBarry Smith 200d3042a70SBarry Smith PetscFunctionBegin; 2016947451fSStefano Zampini if (a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec first"); 202d3042a70SBarry Smith ierr = MatDensePlaceArray(a->A,array);CHKERRQ(ierr); 203d3042a70SBarry Smith PetscFunctionReturn(0); 204d3042a70SBarry Smith } 205d3042a70SBarry Smith 206d3042a70SBarry Smith static PetscErrorCode MatDenseResetArray_MPIDense(Mat A) 207d3042a70SBarry Smith { 208d3042a70SBarry Smith Mat_MPIDense *a = (Mat_MPIDense*)A->data; 209d3042a70SBarry Smith PetscErrorCode ierr; 210d3042a70SBarry Smith 211d3042a70SBarry Smith PetscFunctionBegin; 2126947451fSStefano Zampini if (a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec first"); 213d3042a70SBarry Smith ierr = MatDenseResetArray(a->A);CHKERRQ(ierr); 214d3042a70SBarry Smith PetscFunctionReturn(0); 215d3042a70SBarry Smith } 216d3042a70SBarry Smith 217d5ea218eSStefano Zampini static PetscErrorCode MatDenseReplaceArray_MPIDense(Mat A,const PetscScalar *array) 218d5ea218eSStefano Zampini { 219d5ea218eSStefano Zampini Mat_MPIDense *a = (Mat_MPIDense*)A->data; 220d5ea218eSStefano Zampini PetscErrorCode ierr; 221d5ea218eSStefano Zampini 222d5ea218eSStefano Zampini PetscFunctionBegin; 223d5ea218eSStefano Zampini if (a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec first"); 224d5ea218eSStefano Zampini ierr = MatDenseReplaceArray(a->A,array);CHKERRQ(ierr); 225d5ea218eSStefano Zampini PetscFunctionReturn(0); 226d5ea218eSStefano Zampini } 227d5ea218eSStefano Zampini 2287dae84e0SHong Zhang static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B) 229ca3fa75bSLois Curfman McInnes { 230ca3fa75bSLois Curfman McInnes Mat_MPIDense *mat = (Mat_MPIDense*)A->data,*newmatd; 2316849ba73SBarry Smith PetscErrorCode ierr; 232637a0070SStefano Zampini PetscInt lda,i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols; 2335d0c19d7SBarry Smith const PetscInt *irow,*icol; 234637a0070SStefano Zampini const PetscScalar *v; 235637a0070SStefano Zampini PetscScalar *bv; 236ca3fa75bSLois Curfman McInnes Mat newmat; 2374aa3045dSJed Brown IS iscol_local; 23842a884f0SBarry Smith MPI_Comm comm_is,comm_mat; 239ca3fa75bSLois Curfman McInnes 240ca3fa75bSLois Curfman McInnes PetscFunctionBegin; 24142a884f0SBarry Smith ierr = PetscObjectGetComm((PetscObject)A,&comm_mat);CHKERRQ(ierr); 24242a884f0SBarry Smith ierr = PetscObjectGetComm((PetscObject)iscol,&comm_is);CHKERRQ(ierr); 24342a884f0SBarry Smith if (comm_mat != comm_is) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NOTSAMECOMM,"IS communicator must match matrix communicator"); 24442a884f0SBarry Smith 2454aa3045dSJed Brown ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 246ca3fa75bSLois Curfman McInnes ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr); 2474aa3045dSJed Brown ierr = ISGetIndices(iscol_local,&icol);CHKERRQ(ierr); 248b9b97703SBarry Smith ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr); 249b9b97703SBarry Smith ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 2504aa3045dSJed Brown ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); /* global number of columns, size of iscol_local */ 251ca3fa75bSLois Curfman McInnes 252ca3fa75bSLois Curfman McInnes /* No parallel redistribution currently supported! Should really check each index set 2537eba5e9cSLois Curfman McInnes to comfirm that it is OK. ... Currently supports only submatrix same partitioning as 2547eba5e9cSLois Curfman McInnes original matrix! */ 255ca3fa75bSLois Curfman McInnes 256ca3fa75bSLois Curfman McInnes ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr); 2577eba5e9cSLois Curfman McInnes ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 258ca3fa75bSLois Curfman McInnes 259ca3fa75bSLois Curfman McInnes /* Check submatrix call */ 260ca3fa75bSLois Curfman McInnes if (scall == MAT_REUSE_MATRIX) { 261e32f2f54SBarry Smith /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */ 2627eba5e9cSLois Curfman McInnes /* Really need to test rows and column sizes! */ 263ca3fa75bSLois Curfman McInnes newmat = *B; 264ca3fa75bSLois Curfman McInnes } else { 265ca3fa75bSLois Curfman McInnes /* Create and fill new matrix */ 266ce94432eSBarry Smith ierr = MatCreate(PetscObjectComm((PetscObject)A),&newmat);CHKERRQ(ierr); 2674aa3045dSJed Brown ierr = MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols);CHKERRQ(ierr); 2687adad957SLisandro Dalcin ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr); 2690298fd71SBarry Smith ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr); 270ca3fa75bSLois Curfman McInnes } 271ca3fa75bSLois Curfman McInnes 272ca3fa75bSLois Curfman McInnes /* Now extract the data pointers and do the copy, column at a time */ 273ca3fa75bSLois Curfman McInnes newmatd = (Mat_MPIDense*)newmat->data; 274637a0070SStefano Zampini ierr = MatDenseGetArray(newmatd->A,&bv);CHKERRQ(ierr); 275637a0070SStefano Zampini ierr = MatDenseGetArrayRead(mat->A,&v);CHKERRQ(ierr); 276637a0070SStefano Zampini ierr = MatDenseGetLDA(mat->A,&lda);CHKERRQ(ierr); 2774aa3045dSJed Brown for (i=0; i<Ncols; i++) { 278637a0070SStefano Zampini const PetscScalar *av = v + lda*icol[i]; 279ca3fa75bSLois Curfman McInnes for (j=0; j<nrows; j++) { 2807eba5e9cSLois Curfman McInnes *bv++ = av[irow[j] - rstart]; 281ca3fa75bSLois Curfman McInnes } 282ca3fa75bSLois Curfman McInnes } 283637a0070SStefano Zampini ierr = MatDenseRestoreArrayRead(mat->A,&v);CHKERRQ(ierr); 284637a0070SStefano Zampini ierr = MatDenseRestoreArray(newmatd->A,&bv);CHKERRQ(ierr); 285ca3fa75bSLois Curfman McInnes 286ca3fa75bSLois Curfman McInnes /* Assemble the matrices so that the correct flags are set */ 287ca3fa75bSLois Curfman McInnes ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 288ca3fa75bSLois Curfman McInnes ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 289ca3fa75bSLois Curfman McInnes 290ca3fa75bSLois Curfman McInnes /* Free work space */ 291ca3fa75bSLois Curfman McInnes ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr); 2925bdf786aSShri Abhyankar ierr = ISRestoreIndices(iscol_local,&icol);CHKERRQ(ierr); 29332bb1f2dSLisandro Dalcin ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 294ca3fa75bSLois Curfman McInnes *B = newmat; 295ca3fa75bSLois Curfman McInnes PetscFunctionReturn(0); 296ca3fa75bSLois Curfman McInnes } 297ca3fa75bSLois Curfman McInnes 298637a0070SStefano Zampini PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A,PetscScalar **array) 299ff14e315SSatish Balay { 30073a71a0fSBarry Smith Mat_MPIDense *a = (Mat_MPIDense*)A->data; 30173a71a0fSBarry Smith PetscErrorCode ierr; 30273a71a0fSBarry Smith 3033a40ed3dSBarry Smith PetscFunctionBegin; 3048c778c55SBarry Smith ierr = MatDenseRestoreArray(a->A,array);CHKERRQ(ierr); 3053a40ed3dSBarry Smith PetscFunctionReturn(0); 306ff14e315SSatish Balay } 307ff14e315SSatish Balay 308637a0070SStefano Zampini PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A,const PetscScalar **array) 3098572280aSBarry Smith { 3108572280aSBarry Smith Mat_MPIDense *a = (Mat_MPIDense*)A->data; 3118572280aSBarry Smith PetscErrorCode ierr; 3128572280aSBarry Smith 3138572280aSBarry Smith PetscFunctionBegin; 3148572280aSBarry Smith ierr = MatDenseRestoreArrayRead(a->A,array);CHKERRQ(ierr); 3158572280aSBarry Smith PetscFunctionReturn(0); 3168572280aSBarry Smith } 3178572280aSBarry Smith 3186947451fSStefano Zampini PetscErrorCode MatDenseRestoreArrayWrite_MPIDense(Mat A,PetscScalar **array) 3196947451fSStefano Zampini { 3206947451fSStefano Zampini Mat_MPIDense *a = (Mat_MPIDense*)A->data; 3216947451fSStefano Zampini PetscErrorCode ierr; 3226947451fSStefano Zampini 3236947451fSStefano Zampini PetscFunctionBegin; 3246947451fSStefano Zampini ierr = MatDenseRestoreArrayWrite(a->A,array);CHKERRQ(ierr); 3256947451fSStefano Zampini PetscFunctionReturn(0); 3266947451fSStefano Zampini } 3276947451fSStefano Zampini 328dfbe8321SBarry Smith PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode) 3298965ea79SLois Curfman McInnes { 33039ddd567SLois Curfman McInnes Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 331dfbe8321SBarry Smith PetscErrorCode ierr; 33213f74950SBarry Smith PetscInt nstash,reallocs; 3338965ea79SLois Curfman McInnes 3343a40ed3dSBarry Smith PetscFunctionBegin; 335910cf402Sprj- if (mdn->donotstash || mat->nooffprocentries) return(0); 3368965ea79SLois Curfman McInnes 337d0f46423SBarry Smith ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 3388798bf22SSatish Balay ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 339ae15b995SBarry Smith ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 3403a40ed3dSBarry Smith PetscFunctionReturn(0); 3418965ea79SLois Curfman McInnes } 3428965ea79SLois Curfman McInnes 343dfbe8321SBarry Smith PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode) 3448965ea79SLois Curfman McInnes { 34539ddd567SLois Curfman McInnes Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data; 3466849ba73SBarry Smith PetscErrorCode ierr; 34713f74950SBarry Smith PetscInt i,*row,*col,flg,j,rstart,ncols; 34813f74950SBarry Smith PetscMPIInt n; 34987828ca2SBarry Smith PetscScalar *val; 3508965ea79SLois Curfman McInnes 3513a40ed3dSBarry Smith PetscFunctionBegin; 352910cf402Sprj- if (!mdn->donotstash && !mat->nooffprocentries) { 3538965ea79SLois Curfman McInnes /* wait on receives */ 3547ef1d9bdSSatish Balay while (1) { 3558798bf22SSatish Balay ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 3567ef1d9bdSSatish Balay if (!flg) break; 3578965ea79SLois Curfman McInnes 3587ef1d9bdSSatish Balay for (i=0; i<n;) { 3597ef1d9bdSSatish Balay /* Now identify the consecutive vals belonging to the same row */ 3602205254eSKarl Rupp for (j=i,rstart=row[j]; j<n; j++) { 3612205254eSKarl Rupp if (row[j] != rstart) break; 3622205254eSKarl Rupp } 3637ef1d9bdSSatish Balay if (j < n) ncols = j-i; 3647ef1d9bdSSatish Balay else ncols = n-i; 3657ef1d9bdSSatish Balay /* Now assemble all these values with a single function call */ 3664b4eb8d3SJed Brown ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 3677ef1d9bdSSatish Balay i = j; 3688965ea79SLois Curfman McInnes } 3697ef1d9bdSSatish Balay } 3708798bf22SSatish Balay ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 371910cf402Sprj- } 3728965ea79SLois Curfman McInnes 37339ddd567SLois Curfman McInnes ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr); 37439ddd567SLois Curfman McInnes ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr); 3758965ea79SLois Curfman McInnes 3766d4a8577SBarry Smith if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 37739ddd567SLois Curfman McInnes ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 3788965ea79SLois Curfman McInnes } 3793a40ed3dSBarry Smith PetscFunctionReturn(0); 3808965ea79SLois Curfman McInnes } 3818965ea79SLois Curfman McInnes 382dfbe8321SBarry Smith PetscErrorCode MatZeroEntries_MPIDense(Mat A) 3838965ea79SLois Curfman McInnes { 384dfbe8321SBarry Smith PetscErrorCode ierr; 38539ddd567SLois Curfman McInnes Mat_MPIDense *l = (Mat_MPIDense*)A->data; 3863a40ed3dSBarry Smith 3873a40ed3dSBarry Smith PetscFunctionBegin; 3883a40ed3dSBarry Smith ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 3893a40ed3dSBarry Smith PetscFunctionReturn(0); 3908965ea79SLois Curfman McInnes } 3918965ea79SLois Curfman McInnes 392637a0070SStefano Zampini PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt n,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 3938965ea79SLois Curfman McInnes { 39439ddd567SLois Curfman McInnes Mat_MPIDense *l = (Mat_MPIDense*)A->data; 3956849ba73SBarry Smith PetscErrorCode ierr; 396637a0070SStefano Zampini PetscInt i,len,*lrows; 397637a0070SStefano Zampini 398637a0070SStefano Zampini PetscFunctionBegin; 399637a0070SStefano Zampini /* get locally owned rows */ 400637a0070SStefano Zampini ierr = PetscLayoutMapLocal(A->rmap,n,rows,&len,&lrows,NULL);CHKERRQ(ierr); 401637a0070SStefano Zampini /* fix right hand side if needed */ 402637a0070SStefano Zampini if (x && b) { 40397b48c8fSBarry Smith const PetscScalar *xx; 40497b48c8fSBarry Smith PetscScalar *bb; 4058965ea79SLois Curfman McInnes 40697b48c8fSBarry Smith ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr); 407637a0070SStefano Zampini ierr = VecGetArrayWrite(b, &bb);CHKERRQ(ierr); 408637a0070SStefano Zampini for (i=0;i<len;++i) bb[lrows[i]] = diag*xx[lrows[i]]; 40997b48c8fSBarry Smith ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr); 410637a0070SStefano Zampini ierr = VecRestoreArrayWrite(b, &bb);CHKERRQ(ierr); 41197b48c8fSBarry Smith } 412637a0070SStefano Zampini ierr = MatZeroRows(l->A,len,lrows,0.0,NULL,NULL);CHKERRQ(ierr); 413e2eb51b1SBarry Smith if (diag != 0.0) { 414637a0070SStefano Zampini Vec d; 415b9679d65SBarry Smith 416637a0070SStefano Zampini ierr = MatCreateVecs(A,NULL,&d);CHKERRQ(ierr); 417637a0070SStefano Zampini ierr = VecSet(d,diag);CHKERRQ(ierr); 418637a0070SStefano Zampini ierr = MatDiagonalSet(A,d,INSERT_VALUES);CHKERRQ(ierr); 419637a0070SStefano Zampini ierr = VecDestroy(&d);CHKERRQ(ierr); 420b9679d65SBarry Smith } 421606d414cSSatish Balay ierr = PetscFree(lrows);CHKERRQ(ierr); 4223a40ed3dSBarry Smith PetscFunctionReturn(0); 4238965ea79SLois Curfman McInnes } 4248965ea79SLois Curfman McInnes 425cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat,Vec,Vec); 426cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat,Vec,Vec,Vec); 427cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat,Vec,Vec); 428cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat,Vec,Vec,Vec); 429cc2e6a90SBarry Smith 430dfbe8321SBarry Smith PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy) 4318965ea79SLois Curfman McInnes { 43239ddd567SLois Curfman McInnes Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 433dfbe8321SBarry Smith PetscErrorCode ierr; 434637a0070SStefano Zampini const PetscScalar *ax; 435637a0070SStefano Zampini PetscScalar *ay; 436c456f294SBarry Smith 4373a40ed3dSBarry Smith PetscFunctionBegin; 438637a0070SStefano Zampini ierr = VecGetArrayReadInPlace(xx,&ax);CHKERRQ(ierr); 439637a0070SStefano Zampini ierr = VecGetArrayInPlace(mdn->lvec,&ay);CHKERRQ(ierr); 440637a0070SStefano Zampini ierr = PetscSFBcastBegin(mdn->Mvctx,MPIU_SCALAR,ax,ay);CHKERRQ(ierr); 441637a0070SStefano Zampini ierr = PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ax,ay);CHKERRQ(ierr); 442637a0070SStefano Zampini ierr = VecRestoreArrayInPlace(mdn->lvec,&ay);CHKERRQ(ierr); 443637a0070SStefano Zampini ierr = VecRestoreArrayReadInPlace(xx,&ax);CHKERRQ(ierr); 444637a0070SStefano Zampini ierr = (*mdn->A->ops->mult)(mdn->A,mdn->lvec,yy);CHKERRQ(ierr); 4453a40ed3dSBarry Smith PetscFunctionReturn(0); 4468965ea79SLois Curfman McInnes } 4478965ea79SLois Curfman McInnes 448dfbe8321SBarry Smith PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz) 4498965ea79SLois Curfman McInnes { 45039ddd567SLois Curfman McInnes Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 451dfbe8321SBarry Smith PetscErrorCode ierr; 452637a0070SStefano Zampini const PetscScalar *ax; 453637a0070SStefano Zampini PetscScalar *ay; 454c456f294SBarry Smith 4553a40ed3dSBarry Smith PetscFunctionBegin; 456637a0070SStefano Zampini ierr = VecGetArrayReadInPlace(xx,&ax);CHKERRQ(ierr); 457637a0070SStefano Zampini ierr = VecGetArrayInPlace(mdn->lvec,&ay);CHKERRQ(ierr); 458637a0070SStefano Zampini ierr = PetscSFBcastBegin(mdn->Mvctx,MPIU_SCALAR,ax,ay);CHKERRQ(ierr); 459637a0070SStefano Zampini ierr = PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ax,ay);CHKERRQ(ierr); 460637a0070SStefano Zampini ierr = VecRestoreArrayInPlace(mdn->lvec,&ay);CHKERRQ(ierr); 461637a0070SStefano Zampini ierr = VecRestoreArrayReadInPlace(xx,&ax);CHKERRQ(ierr); 462637a0070SStefano Zampini ierr = (*mdn->A->ops->multadd)(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr); 4633a40ed3dSBarry Smith PetscFunctionReturn(0); 4648965ea79SLois Curfman McInnes } 4658965ea79SLois Curfman McInnes 466dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy) 467096963f5SLois Curfman McInnes { 468096963f5SLois Curfman McInnes Mat_MPIDense *a = (Mat_MPIDense*)A->data; 469dfbe8321SBarry Smith PetscErrorCode ierr; 470637a0070SStefano Zampini const PetscScalar *ax; 471637a0070SStefano Zampini PetscScalar *ay; 472096963f5SLois Curfman McInnes 4733a40ed3dSBarry Smith PetscFunctionBegin; 474637a0070SStefano Zampini ierr = VecSet(yy,0.0);CHKERRQ(ierr); 475637a0070SStefano Zampini ierr = (*a->A->ops->multtranspose)(a->A,xx,a->lvec);CHKERRQ(ierr); 476637a0070SStefano Zampini ierr = VecGetArrayReadInPlace(a->lvec,&ax);CHKERRQ(ierr); 477637a0070SStefano Zampini ierr = VecGetArrayInPlace(yy,&ay);CHKERRQ(ierr); 478637a0070SStefano Zampini ierr = PetscSFReduceBegin(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM);CHKERRQ(ierr); 479637a0070SStefano Zampini ierr = PetscSFReduceEnd(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM);CHKERRQ(ierr); 480637a0070SStefano Zampini ierr = VecRestoreArrayReadInPlace(a->lvec,&ax);CHKERRQ(ierr); 481637a0070SStefano Zampini ierr = VecRestoreArrayInPlace(yy,&ay);CHKERRQ(ierr); 4823a40ed3dSBarry Smith PetscFunctionReturn(0); 483096963f5SLois Curfman McInnes } 484096963f5SLois Curfman McInnes 485dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz) 486096963f5SLois Curfman McInnes { 487096963f5SLois Curfman McInnes Mat_MPIDense *a = (Mat_MPIDense*)A->data; 488dfbe8321SBarry Smith PetscErrorCode ierr; 489637a0070SStefano Zampini const PetscScalar *ax; 490637a0070SStefano Zampini PetscScalar *ay; 491096963f5SLois Curfman McInnes 4923a40ed3dSBarry Smith PetscFunctionBegin; 4933501a2bdSLois Curfman McInnes ierr = VecCopy(yy,zz);CHKERRQ(ierr); 494637a0070SStefano Zampini ierr = (*a->A->ops->multtranspose)(a->A,xx,a->lvec);CHKERRQ(ierr); 495637a0070SStefano Zampini ierr = VecGetArrayReadInPlace(a->lvec,&ax);CHKERRQ(ierr); 496637a0070SStefano Zampini ierr = VecGetArrayInPlace(zz,&ay);CHKERRQ(ierr); 497637a0070SStefano Zampini ierr = PetscSFReduceBegin(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM);CHKERRQ(ierr); 498637a0070SStefano Zampini ierr = PetscSFReduceEnd(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM);CHKERRQ(ierr); 499637a0070SStefano Zampini ierr = VecRestoreArrayReadInPlace(a->lvec,&ax);CHKERRQ(ierr); 500637a0070SStefano Zampini ierr = VecRestoreArrayInPlace(zz,&ay);CHKERRQ(ierr); 5013a40ed3dSBarry Smith PetscFunctionReturn(0); 502096963f5SLois Curfman McInnes } 503096963f5SLois Curfman McInnes 504dfbe8321SBarry Smith PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v) 5058965ea79SLois Curfman McInnes { 50639ddd567SLois Curfman McInnes Mat_MPIDense *a = (Mat_MPIDense*)A->data; 507dfbe8321SBarry Smith PetscErrorCode ierr; 508637a0070SStefano Zampini PetscInt lda,len,i,n,m = A->rmap->n,radd; 50987828ca2SBarry Smith PetscScalar *x,zero = 0.0; 510637a0070SStefano Zampini const PetscScalar *av; 511ed3cc1f0SBarry Smith 5123a40ed3dSBarry Smith PetscFunctionBegin; 5132dcb1b2aSMatthew Knepley ierr = VecSet(v,zero);CHKERRQ(ierr); 5141ebc52fbSHong Zhang ierr = VecGetArray(v,&x);CHKERRQ(ierr); 515096963f5SLois Curfman McInnes ierr = VecGetSize(v,&n);CHKERRQ(ierr); 516e32f2f54SBarry Smith if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec"); 517d0f46423SBarry Smith len = PetscMin(a->A->rmap->n,a->A->cmap->n); 518d0f46423SBarry Smith radd = A->rmap->rstart*m; 519637a0070SStefano Zampini ierr = MatDenseGetArrayRead(a->A,&av);CHKERRQ(ierr); 520637a0070SStefano Zampini ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr); 52144cd7ae7SLois Curfman McInnes for (i=0; i<len; i++) { 522637a0070SStefano Zampini x[i] = av[radd + i*lda + i]; 523096963f5SLois Curfman McInnes } 524637a0070SStefano Zampini ierr = MatDenseRestoreArrayRead(a->A,&av);CHKERRQ(ierr); 5251ebc52fbSHong Zhang ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 5263a40ed3dSBarry Smith PetscFunctionReturn(0); 5278965ea79SLois Curfman McInnes } 5288965ea79SLois Curfman McInnes 529dfbe8321SBarry Smith PetscErrorCode MatDestroy_MPIDense(Mat mat) 5308965ea79SLois Curfman McInnes { 5313501a2bdSLois Curfman McInnes Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 532dfbe8321SBarry Smith PetscErrorCode ierr; 533ed3cc1f0SBarry Smith 5343a40ed3dSBarry Smith PetscFunctionBegin; 535aa482453SBarry Smith #if defined(PETSC_USE_LOG) 536d0f46423SBarry Smith PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 5378965ea79SLois Curfman McInnes #endif 5388798bf22SSatish Balay ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 5396bf464f9SBarry Smith ierr = MatDestroy(&mdn->A);CHKERRQ(ierr); 5406bf464f9SBarry Smith ierr = VecDestroy(&mdn->lvec);CHKERRQ(ierr); 541637a0070SStefano Zampini ierr = PetscSFDestroy(&mdn->Mvctx);CHKERRQ(ierr); 5426947451fSStefano Zampini ierr = VecDestroy(&mdn->cvec);CHKERRQ(ierr); 54301b82886SBarry Smith 544bf0cc555SLisandro Dalcin ierr = PetscFree(mat->data);CHKERRQ(ierr); 545dbd8c25aSHong Zhang ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 5468baccfbdSHong Zhang 54749a6ff4bSBarry Smith ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetLDA_C",NULL);CHKERRQ(ierr); 5488baccfbdSHong Zhang ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",NULL);CHKERRQ(ierr); 5498572280aSBarry Smith ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",NULL);CHKERRQ(ierr); 5508572280aSBarry Smith ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",NULL);CHKERRQ(ierr); 5518572280aSBarry Smith ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",NULL);CHKERRQ(ierr); 5526947451fSStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayWrite_C",NULL);CHKERRQ(ierr); 5536947451fSStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayWrite_C",NULL);CHKERRQ(ierr); 554d3042a70SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",NULL);CHKERRQ(ierr); 555d3042a70SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",NULL);CHKERRQ(ierr); 556d5ea218eSStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseReplaceArray_C",NULL);CHKERRQ(ierr); 5578baccfbdSHong Zhang #if defined(PETSC_HAVE_ELEMENTAL) 5588baccfbdSHong Zhang ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",NULL);CHKERRQ(ierr); 5598baccfbdSHong Zhang #endif 560bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",NULL);CHKERRQ(ierr); 5614222ddf1SHong Zhang ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 5624222ddf1SHong Zhang ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaij_C",NULL);CHKERRQ(ierr); 563bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 564bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 56552c5f739Sprj- ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_nest_mpidense_C",NULL);CHKERRQ(ierr); 56652c5f739Sprj- ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_nest_mpidense_C",NULL);CHKERRQ(ierr); 5678baccfbdSHong Zhang ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultSymbolic_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 5688baccfbdSHong Zhang ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultNumeric_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 56986aefd0dSHong Zhang ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",NULL);CHKERRQ(ierr); 57086aefd0dSHong Zhang ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",NULL);CHKERRQ(ierr); 5716947451fSStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",NULL);CHKERRQ(ierr); 5726947451fSStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",NULL);CHKERRQ(ierr); 5736947451fSStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",NULL);CHKERRQ(ierr); 5746947451fSStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",NULL);CHKERRQ(ierr); 5756947451fSStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",NULL);CHKERRQ(ierr); 5766947451fSStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",NULL);CHKERRQ(ierr); 5773a40ed3dSBarry Smith PetscFunctionReturn(0); 5788965ea79SLois Curfman McInnes } 57939ddd567SLois Curfman McInnes 58052c5f739Sprj- PETSC_INTERN PetscErrorCode MatView_SeqDense(Mat,PetscViewer); 58152c5f739Sprj- 5829804daf3SBarry Smith #include <petscdraw.h> 5836849ba73SBarry Smith static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 5848965ea79SLois Curfman McInnes { 58539ddd567SLois Curfman McInnes Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 586dfbe8321SBarry Smith PetscErrorCode ierr; 5877da1fb6eSBarry Smith PetscMPIInt rank = mdn->rank; 58819fd82e9SBarry Smith PetscViewerType vtype; 589ace3abfcSBarry Smith PetscBool iascii,isdraw; 590b0a32e0cSBarry Smith PetscViewer sviewer; 591f3ef73ceSBarry Smith PetscViewerFormat format; 5928965ea79SLois Curfman McInnes 5933a40ed3dSBarry Smith PetscFunctionBegin; 594251f4c67SDmitry Karpeev ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 595251f4c67SDmitry Karpeev ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 59632077d6dSBarry Smith if (iascii) { 597b0a32e0cSBarry Smith ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr); 598b0a32e0cSBarry Smith ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 599456192e2SBarry Smith if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 6004e220ebcSLois Curfman McInnes MatInfo info; 601888f2ed8SSatish Balay ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 6021575c14dSBarry Smith ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 6037b23a99aSBarry Smith ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 604b0a32e0cSBarry Smith ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 6051575c14dSBarry Smith ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 606637a0070SStefano Zampini ierr = PetscSFView(mdn->Mvctx,viewer);CHKERRQ(ierr); 6073a40ed3dSBarry Smith PetscFunctionReturn(0); 608fb9695e5SSatish Balay } else if (format == PETSC_VIEWER_ASCII_INFO) { 6093a40ed3dSBarry Smith PetscFunctionReturn(0); 6108965ea79SLois Curfman McInnes } 611f1af5d2fSBarry Smith } else if (isdraw) { 612b0a32e0cSBarry Smith PetscDraw draw; 613ace3abfcSBarry Smith PetscBool isnull; 614f1af5d2fSBarry Smith 615b0a32e0cSBarry Smith ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 616b0a32e0cSBarry Smith ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 617f1af5d2fSBarry Smith if (isnull) PetscFunctionReturn(0); 618f1af5d2fSBarry Smith } 61977ed5343SBarry Smith 6207da1fb6eSBarry Smith { 6218965ea79SLois Curfman McInnes /* assemble the entire matrix onto first processor. */ 6228965ea79SLois Curfman McInnes Mat A; 623d0f46423SBarry Smith PetscInt M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz; 624ba8c8a56SBarry Smith PetscInt *cols; 625ba8c8a56SBarry Smith PetscScalar *vals; 6268965ea79SLois Curfman McInnes 627ce94432eSBarry Smith ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 6288965ea79SLois Curfman McInnes if (!rank) { 629f69a0ea3SMatthew Knepley ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 6303a40ed3dSBarry Smith } else { 631f69a0ea3SMatthew Knepley ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 6328965ea79SLois Curfman McInnes } 6337adad957SLisandro Dalcin /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */ 634878740d9SKris Buschelman ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr); 6350298fd71SBarry Smith ierr = MatMPIDenseSetPreallocation(A,NULL);CHKERRQ(ierr); 6363bb1ff40SBarry Smith ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 6378965ea79SLois Curfman McInnes 63839ddd567SLois Curfman McInnes /* Copy the matrix ... This isn't the most efficient means, 63939ddd567SLois Curfman McInnes but it's quick for now */ 64051022da4SBarry Smith A->insertmode = INSERT_VALUES; 6412205254eSKarl Rupp 6422205254eSKarl Rupp row = mat->rmap->rstart; 6432205254eSKarl Rupp m = mdn->A->rmap->n; 6448965ea79SLois Curfman McInnes for (i=0; i<m; i++) { 645ba8c8a56SBarry Smith ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 646ba8c8a56SBarry Smith ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 647ba8c8a56SBarry Smith ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 64839ddd567SLois Curfman McInnes row++; 6498965ea79SLois Curfman McInnes } 6508965ea79SLois Curfman McInnes 6516d4a8577SBarry Smith ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6526d4a8577SBarry Smith ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6533f08860eSBarry Smith ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 654b9b97703SBarry Smith if (!rank) { 6551a9d3c3cSBarry Smith ierr = PetscObjectSetName((PetscObject)((Mat_MPIDense*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 6567da1fb6eSBarry Smith ierr = MatView_SeqDense(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr); 6578965ea79SLois Curfman McInnes } 6583f08860eSBarry Smith ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 659b0a32e0cSBarry Smith ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 6606bf464f9SBarry Smith ierr = MatDestroy(&A);CHKERRQ(ierr); 6618965ea79SLois Curfman McInnes } 6623a40ed3dSBarry Smith PetscFunctionReturn(0); 6638965ea79SLois Curfman McInnes } 6648965ea79SLois Curfman McInnes 665dfbe8321SBarry Smith PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer) 6668965ea79SLois Curfman McInnes { 667dfbe8321SBarry Smith PetscErrorCode ierr; 668ace3abfcSBarry Smith PetscBool iascii,isbinary,isdraw,issocket; 6698965ea79SLois Curfman McInnes 670433994e6SBarry Smith PetscFunctionBegin; 671251f4c67SDmitry Karpeev ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 672251f4c67SDmitry Karpeev ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 673251f4c67SDmitry Karpeev ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 674251f4c67SDmitry Karpeev ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 6750f5bd95cSBarry Smith 67632077d6dSBarry Smith if (iascii || issocket || isdraw) { 677f1af5d2fSBarry Smith ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 6780f5bd95cSBarry Smith } else if (isbinary) { 6798491ab44SLisandro Dalcin ierr = MatView_Dense_Binary(mat,viewer);CHKERRQ(ierr); 68011aeaf0aSBarry Smith } 6813a40ed3dSBarry Smith PetscFunctionReturn(0); 6828965ea79SLois Curfman McInnes } 6838965ea79SLois Curfman McInnes 684dfbe8321SBarry Smith PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info) 6858965ea79SLois Curfman McInnes { 6863501a2bdSLois Curfman McInnes Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 6873501a2bdSLois Curfman McInnes Mat mdn = mat->A; 688dfbe8321SBarry Smith PetscErrorCode ierr; 6893966268fSBarry Smith PetscLogDouble isend[5],irecv[5]; 6908965ea79SLois Curfman McInnes 6913a40ed3dSBarry Smith PetscFunctionBegin; 6924e220ebcSLois Curfman McInnes info->block_size = 1.0; 6932205254eSKarl Rupp 6944e220ebcSLois Curfman McInnes ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr); 6952205254eSKarl Rupp 6964e220ebcSLois Curfman McInnes isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 6974e220ebcSLois Curfman McInnes isend[3] = info->memory; isend[4] = info->mallocs; 6988965ea79SLois Curfman McInnes if (flag == MAT_LOCAL) { 6994e220ebcSLois Curfman McInnes info->nz_used = isend[0]; 7004e220ebcSLois Curfman McInnes info->nz_allocated = isend[1]; 7014e220ebcSLois Curfman McInnes info->nz_unneeded = isend[2]; 7024e220ebcSLois Curfman McInnes info->memory = isend[3]; 7034e220ebcSLois Curfman McInnes info->mallocs = isend[4]; 7048965ea79SLois Curfman McInnes } else if (flag == MAT_GLOBAL_MAX) { 7053966268fSBarry Smith ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 7062205254eSKarl Rupp 7074e220ebcSLois Curfman McInnes info->nz_used = irecv[0]; 7084e220ebcSLois Curfman McInnes info->nz_allocated = irecv[1]; 7094e220ebcSLois Curfman McInnes info->nz_unneeded = irecv[2]; 7104e220ebcSLois Curfman McInnes info->memory = irecv[3]; 7114e220ebcSLois Curfman McInnes info->mallocs = irecv[4]; 7128965ea79SLois Curfman McInnes } else if (flag == MAT_GLOBAL_SUM) { 7133966268fSBarry Smith ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 7142205254eSKarl Rupp 7154e220ebcSLois Curfman McInnes info->nz_used = irecv[0]; 7164e220ebcSLois Curfman McInnes info->nz_allocated = irecv[1]; 7174e220ebcSLois Curfman McInnes info->nz_unneeded = irecv[2]; 7184e220ebcSLois Curfman McInnes info->memory = irecv[3]; 7194e220ebcSLois Curfman McInnes info->mallocs = irecv[4]; 7208965ea79SLois Curfman McInnes } 7214e220ebcSLois Curfman McInnes info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 7224e220ebcSLois Curfman McInnes info->fill_ratio_needed = 0; 7234e220ebcSLois Curfman McInnes info->factor_mallocs = 0; 7243a40ed3dSBarry Smith PetscFunctionReturn(0); 7258965ea79SLois Curfman McInnes } 7268965ea79SLois Curfman McInnes 727ace3abfcSBarry Smith PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscBool flg) 7288965ea79SLois Curfman McInnes { 72939ddd567SLois Curfman McInnes Mat_MPIDense *a = (Mat_MPIDense*)A->data; 730dfbe8321SBarry Smith PetscErrorCode ierr; 7318965ea79SLois Curfman McInnes 7323a40ed3dSBarry Smith PetscFunctionBegin; 73312c028f9SKris Buschelman switch (op) { 734512a5fc5SBarry Smith case MAT_NEW_NONZERO_LOCATIONS: 73512c028f9SKris Buschelman case MAT_NEW_NONZERO_LOCATION_ERR: 73612c028f9SKris Buschelman case MAT_NEW_NONZERO_ALLOCATION_ERR: 73743674050SBarry Smith MatCheckPreallocated(A,1); 7384e0d8c25SBarry Smith ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 73912c028f9SKris Buschelman break; 74012c028f9SKris Buschelman case MAT_ROW_ORIENTED: 74143674050SBarry Smith MatCheckPreallocated(A,1); 7424e0d8c25SBarry Smith a->roworiented = flg; 7434e0d8c25SBarry Smith ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 74412c028f9SKris Buschelman break; 7454e0d8c25SBarry Smith case MAT_NEW_DIAGONALS: 74613fa8e87SLisandro Dalcin case MAT_KEEP_NONZERO_PATTERN: 74712c028f9SKris Buschelman case MAT_USE_HASH_TABLE: 748071fcb05SBarry Smith case MAT_SORTED_FULL: 749290bbb0aSBarry Smith ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 75012c028f9SKris Buschelman break; 75112c028f9SKris Buschelman case MAT_IGNORE_OFF_PROC_ENTRIES: 7524e0d8c25SBarry Smith a->donotstash = flg; 75312c028f9SKris Buschelman break; 75477e54ba9SKris Buschelman case MAT_SYMMETRIC: 75577e54ba9SKris Buschelman case MAT_STRUCTURALLY_SYMMETRIC: 7569a4540c5SBarry Smith case MAT_HERMITIAN: 7579a4540c5SBarry Smith case MAT_SYMMETRY_ETERNAL: 758600fe468SBarry Smith case MAT_IGNORE_LOWER_TRIANGULAR: 7595d7aebe8SStefano Zampini case MAT_IGNORE_ZERO_ENTRIES: 760290bbb0aSBarry Smith ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 76177e54ba9SKris Buschelman break; 76212c028f9SKris Buschelman default: 763e32f2f54SBarry Smith SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]); 7643a40ed3dSBarry Smith } 7653a40ed3dSBarry Smith PetscFunctionReturn(0); 7668965ea79SLois Curfman McInnes } 7678965ea79SLois Curfman McInnes 768dfbe8321SBarry Smith PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr) 7695b2fa520SLois Curfman McInnes { 7705b2fa520SLois Curfman McInnes Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 771637a0070SStefano Zampini const PetscScalar *l; 772637a0070SStefano Zampini PetscScalar x,*v,*vv,*r; 773dfbe8321SBarry Smith PetscErrorCode ierr; 774637a0070SStefano Zampini PetscInt i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n,lda; 7755b2fa520SLois Curfman McInnes 7765b2fa520SLois Curfman McInnes PetscFunctionBegin; 777637a0070SStefano Zampini ierr = MatDenseGetArray(mdn->A,&vv);CHKERRQ(ierr); 778637a0070SStefano Zampini ierr = MatDenseGetLDA(mdn->A,&lda);CHKERRQ(ierr); 77972d926a5SLois Curfman McInnes ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr); 7805b2fa520SLois Curfman McInnes if (ll) { 78172d926a5SLois Curfman McInnes ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr); 782637a0070SStefano Zampini if (s2a != s2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %D != %D", s2a, s2); 783bca11509SBarry Smith ierr = VecGetArrayRead(ll,&l);CHKERRQ(ierr); 7845b2fa520SLois Curfman McInnes for (i=0; i<m; i++) { 7855b2fa520SLois Curfman McInnes x = l[i]; 786637a0070SStefano Zampini v = vv + i; 787637a0070SStefano Zampini for (j=0; j<n; j++) { (*v) *= x; v+= lda;} 7885b2fa520SLois Curfman McInnes } 789bca11509SBarry Smith ierr = VecRestoreArrayRead(ll,&l);CHKERRQ(ierr); 790637a0070SStefano Zampini ierr = PetscLogFlops(1.0*n*m);CHKERRQ(ierr); 7915b2fa520SLois Curfman McInnes } 7925b2fa520SLois Curfman McInnes if (rr) { 793637a0070SStefano Zampini const PetscScalar *ar; 794637a0070SStefano Zampini 795175be7b4SMatthew Knepley ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr); 796e32f2f54SBarry Smith if (s3a != s3) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3); 797637a0070SStefano Zampini ierr = VecGetArrayRead(rr,&ar);CHKERRQ(ierr); 798637a0070SStefano Zampini ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr); 799637a0070SStefano Zampini ierr = PetscSFBcastBegin(mdn->Mvctx,MPIU_SCALAR,ar,r);CHKERRQ(ierr); 800637a0070SStefano Zampini ierr = PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ar,r);CHKERRQ(ierr); 801637a0070SStefano Zampini ierr = VecRestoreArrayRead(rr,&ar);CHKERRQ(ierr); 8025b2fa520SLois Curfman McInnes for (i=0; i<n; i++) { 8035b2fa520SLois Curfman McInnes x = r[i]; 804637a0070SStefano Zampini v = vv + i*lda; 8052205254eSKarl Rupp for (j=0; j<m; j++) (*v++) *= x; 8065b2fa520SLois Curfman McInnes } 807637a0070SStefano Zampini ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr); 808637a0070SStefano Zampini ierr = PetscLogFlops(1.0*n*m);CHKERRQ(ierr); 8095b2fa520SLois Curfman McInnes } 810637a0070SStefano Zampini ierr = MatDenseRestoreArray(mdn->A,&vv);CHKERRQ(ierr); 8115b2fa520SLois Curfman McInnes PetscFunctionReturn(0); 8125b2fa520SLois Curfman McInnes } 8135b2fa520SLois Curfman McInnes 814dfbe8321SBarry Smith PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm) 815096963f5SLois Curfman McInnes { 8163501a2bdSLois Curfman McInnes Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 817dfbe8321SBarry Smith PetscErrorCode ierr; 81813f74950SBarry Smith PetscInt i,j; 819329f5518SBarry Smith PetscReal sum = 0.0; 820637a0070SStefano Zampini const PetscScalar *av,*v; 8213501a2bdSLois Curfman McInnes 8223a40ed3dSBarry Smith PetscFunctionBegin; 823637a0070SStefano Zampini ierr = MatDenseGetArrayRead(mdn->A,&av);CHKERRQ(ierr); 824637a0070SStefano Zampini v = av; 8253501a2bdSLois Curfman McInnes if (mdn->size == 1) { 826064f8208SBarry Smith ierr = MatNorm(mdn->A,type,nrm);CHKERRQ(ierr); 8273501a2bdSLois Curfman McInnes } else { 8283501a2bdSLois Curfman McInnes if (type == NORM_FROBENIUS) { 829d0f46423SBarry Smith for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) { 830329f5518SBarry Smith sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 8313501a2bdSLois Curfman McInnes } 832b2566f29SBarry Smith ierr = MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 8338f1a2a5eSBarry Smith *nrm = PetscSqrtReal(*nrm); 834dc0b31edSSatish Balay ierr = PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n);CHKERRQ(ierr); 8353a40ed3dSBarry Smith } else if (type == NORM_1) { 836329f5518SBarry Smith PetscReal *tmp,*tmp2; 837580bdb30SBarry Smith ierr = PetscCalloc2(A->cmap->N,&tmp,A->cmap->N,&tmp2);CHKERRQ(ierr); 838064f8208SBarry Smith *nrm = 0.0; 839637a0070SStefano Zampini v = av; 840d0f46423SBarry Smith for (j=0; j<mdn->A->cmap->n; j++) { 841d0f46423SBarry Smith for (i=0; i<mdn->A->rmap->n; i++) { 84267e560aaSBarry Smith tmp[j] += PetscAbsScalar(*v); v++; 8433501a2bdSLois Curfman McInnes } 8443501a2bdSLois Curfman McInnes } 845b2566f29SBarry Smith ierr = MPIU_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 846d0f46423SBarry Smith for (j=0; j<A->cmap->N; j++) { 847064f8208SBarry Smith if (tmp2[j] > *nrm) *nrm = tmp2[j]; 8483501a2bdSLois Curfman McInnes } 8498627564fSBarry Smith ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr); 850d0f46423SBarry Smith ierr = PetscLogFlops(A->cmap->n*A->rmap->n);CHKERRQ(ierr); 8513a40ed3dSBarry Smith } else if (type == NORM_INFINITY) { /* max row norm */ 852329f5518SBarry Smith PetscReal ntemp; 8533501a2bdSLois Curfman McInnes ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr); 854b2566f29SBarry Smith ierr = MPIU_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 855ce94432eSBarry Smith } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for two norm"); 8563501a2bdSLois Curfman McInnes } 857637a0070SStefano Zampini ierr = MatDenseRestoreArrayRead(mdn->A,&av);CHKERRQ(ierr); 8583a40ed3dSBarry Smith PetscFunctionReturn(0); 8593501a2bdSLois Curfman McInnes } 8603501a2bdSLois Curfman McInnes 861fc4dec0aSBarry Smith PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout) 8623501a2bdSLois Curfman McInnes { 8633501a2bdSLois Curfman McInnes Mat_MPIDense *a = (Mat_MPIDense*)A->data; 8643501a2bdSLois Curfman McInnes Mat B; 865d0f46423SBarry Smith PetscInt M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart; 8666849ba73SBarry Smith PetscErrorCode ierr; 867637a0070SStefano Zampini PetscInt j,i,lda; 86887828ca2SBarry Smith PetscScalar *v; 8693501a2bdSLois Curfman McInnes 8703a40ed3dSBarry Smith PetscFunctionBegin; 871cf37664fSBarry Smith if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) { 872ce94432eSBarry Smith ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 873d0f46423SBarry Smith ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 8747adad957SLisandro Dalcin ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 8750298fd71SBarry Smith ierr = MatMPIDenseSetPreallocation(B,NULL);CHKERRQ(ierr); 876637a0070SStefano Zampini } else B = *matout; 8773501a2bdSLois Curfman McInnes 878637a0070SStefano Zampini m = a->A->rmap->n; n = a->A->cmap->n; 879637a0070SStefano Zampini ierr = MatDenseGetArrayRead(a->A,(const PetscScalar**)&v);CHKERRQ(ierr); 880637a0070SStefano Zampini ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr); 881785e854fSJed Brown ierr = PetscMalloc1(m,&rwork);CHKERRQ(ierr); 8823501a2bdSLois Curfman McInnes for (i=0; i<m; i++) rwork[i] = rstart + i; 8831acff37aSSatish Balay for (j=0; j<n; j++) { 8843501a2bdSLois Curfman McInnes ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr); 885637a0070SStefano Zampini v += lda; 8863501a2bdSLois Curfman McInnes } 887637a0070SStefano Zampini ierr = MatDenseRestoreArrayRead(a->A,(const PetscScalar**)&v);CHKERRQ(ierr); 888606d414cSSatish Balay ierr = PetscFree(rwork);CHKERRQ(ierr); 8896d4a8577SBarry Smith ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 8906d4a8577SBarry Smith ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 891cf37664fSBarry Smith if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 8923501a2bdSLois Curfman McInnes *matout = B; 8933501a2bdSLois Curfman McInnes } else { 89428be2f97SBarry Smith ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr); 8953501a2bdSLois Curfman McInnes } 8963a40ed3dSBarry Smith PetscFunctionReturn(0); 897096963f5SLois Curfman McInnes } 898096963f5SLois Curfman McInnes 8996849ba73SBarry Smith static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat*); 90052c5f739Sprj- PETSC_INTERN PetscErrorCode MatScale_MPIDense(Mat,PetscScalar); 9018965ea79SLois Curfman McInnes 9024994cf47SJed Brown PetscErrorCode MatSetUp_MPIDense(Mat A) 903273d9f13SBarry Smith { 904dfbe8321SBarry Smith PetscErrorCode ierr; 905273d9f13SBarry Smith 906273d9f13SBarry Smith PetscFunctionBegin; 90718992e5dSStefano Zampini ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr); 90818992e5dSStefano Zampini ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr); 90918992e5dSStefano Zampini if (!A->preallocated) { 910273d9f13SBarry Smith ierr = MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr); 91118992e5dSStefano Zampini } 912273d9f13SBarry Smith PetscFunctionReturn(0); 913273d9f13SBarry Smith } 914273d9f13SBarry Smith 915488007eeSBarry Smith PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str) 916488007eeSBarry Smith { 917488007eeSBarry Smith PetscErrorCode ierr; 918488007eeSBarry Smith Mat_MPIDense *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data; 919488007eeSBarry Smith 920488007eeSBarry Smith PetscFunctionBegin; 921488007eeSBarry Smith ierr = MatAXPY(A->A,alpha,B->A,str);CHKERRQ(ierr); 922488007eeSBarry Smith PetscFunctionReturn(0); 923488007eeSBarry Smith } 924488007eeSBarry Smith 9257087cfbeSBarry Smith PetscErrorCode MatConjugate_MPIDense(Mat mat) 926ba337c44SJed Brown { 927ba337c44SJed Brown Mat_MPIDense *a = (Mat_MPIDense*)mat->data; 928ba337c44SJed Brown PetscErrorCode ierr; 929ba337c44SJed Brown 930ba337c44SJed Brown PetscFunctionBegin; 931ba337c44SJed Brown ierr = MatConjugate(a->A);CHKERRQ(ierr); 932ba337c44SJed Brown PetscFunctionReturn(0); 933ba337c44SJed Brown } 934ba337c44SJed Brown 935ba337c44SJed Brown PetscErrorCode MatRealPart_MPIDense(Mat A) 936ba337c44SJed Brown { 937ba337c44SJed Brown Mat_MPIDense *a = (Mat_MPIDense*)A->data; 938ba337c44SJed Brown PetscErrorCode ierr; 939ba337c44SJed Brown 940ba337c44SJed Brown PetscFunctionBegin; 941ba337c44SJed Brown ierr = MatRealPart(a->A);CHKERRQ(ierr); 942ba337c44SJed Brown PetscFunctionReturn(0); 943ba337c44SJed Brown } 944ba337c44SJed Brown 945ba337c44SJed Brown PetscErrorCode MatImaginaryPart_MPIDense(Mat A) 946ba337c44SJed Brown { 947ba337c44SJed Brown Mat_MPIDense *a = (Mat_MPIDense*)A->data; 948ba337c44SJed Brown PetscErrorCode ierr; 949ba337c44SJed Brown 950ba337c44SJed Brown PetscFunctionBegin; 951ba337c44SJed Brown ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 952ba337c44SJed Brown PetscFunctionReturn(0); 953ba337c44SJed Brown } 954ba337c44SJed Brown 95549a6ff4bSBarry Smith static PetscErrorCode MatGetColumnVector_MPIDense(Mat A,Vec v,PetscInt col) 95649a6ff4bSBarry Smith { 95749a6ff4bSBarry Smith PetscErrorCode ierr; 958637a0070SStefano Zampini Mat_MPIDense *a = (Mat_MPIDense*) A->data; 95949a6ff4bSBarry Smith 96049a6ff4bSBarry Smith PetscFunctionBegin; 961637a0070SStefano Zampini if (!a->A) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Missing local matrix"); 962637a0070SStefano Zampini if (!a->A->ops->getcolumnvector) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Missing get column operation"); 963637a0070SStefano Zampini ierr = (*a->A->ops->getcolumnvector)(a->A,v,col);CHKERRQ(ierr); 96449a6ff4bSBarry Smith PetscFunctionReturn(0); 96549a6ff4bSBarry Smith } 96649a6ff4bSBarry Smith 96752c5f739Sprj- PETSC_INTERN PetscErrorCode MatGetColumnNorms_SeqDense(Mat,NormType,PetscReal*); 96852c5f739Sprj- 9690716a85fSBarry Smith PetscErrorCode MatGetColumnNorms_MPIDense(Mat A,NormType type,PetscReal *norms) 9700716a85fSBarry Smith { 9710716a85fSBarry Smith PetscErrorCode ierr; 9720716a85fSBarry Smith PetscInt i,n; 9730716a85fSBarry Smith Mat_MPIDense *a = (Mat_MPIDense*) A->data; 9740716a85fSBarry Smith PetscReal *work; 9750716a85fSBarry Smith 9760716a85fSBarry Smith PetscFunctionBegin; 9770298fd71SBarry Smith ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 978785e854fSJed Brown ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 9790716a85fSBarry Smith ierr = MatGetColumnNorms_SeqDense(a->A,type,work);CHKERRQ(ierr); 9800716a85fSBarry Smith if (type == NORM_2) { 9810716a85fSBarry Smith for (i=0; i<n; i++) work[i] *= work[i]; 9820716a85fSBarry Smith } 9830716a85fSBarry Smith if (type == NORM_INFINITY) { 984b2566f29SBarry Smith ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr); 9850716a85fSBarry Smith } else { 986b2566f29SBarry Smith ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr); 9870716a85fSBarry Smith } 9880716a85fSBarry Smith ierr = PetscFree(work);CHKERRQ(ierr); 9890716a85fSBarry Smith if (type == NORM_2) { 9908f1a2a5eSBarry Smith for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 9910716a85fSBarry Smith } 9920716a85fSBarry Smith PetscFunctionReturn(0); 9930716a85fSBarry Smith } 9940716a85fSBarry Smith 995637a0070SStefano Zampini #if defined(PETSC_HAVE_CUDA) 9966947451fSStefano Zampini static PetscErrorCode MatDenseGetColumnVec_MPIDenseCUDA(Mat A,PetscInt col,Vec *v) 9976947451fSStefano Zampini { 9986947451fSStefano Zampini Mat_MPIDense *a = (Mat_MPIDense*)A->data; 9996947451fSStefano Zampini PetscErrorCode ierr; 10006947451fSStefano Zampini PetscInt lda; 10016947451fSStefano Zampini 10026947451fSStefano Zampini PetscFunctionBegin; 10036947451fSStefano Zampini if (a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec first"); 10046947451fSStefano Zampini if (!a->cvec) { 10056947451fSStefano Zampini ierr = VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr); 10066947451fSStefano Zampini } 10076947451fSStefano Zampini a->vecinuse = col + 1; 10086947451fSStefano Zampini ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr); 10096947451fSStefano Zampini ierr = MatDenseCUDAGetArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr); 10106947451fSStefano Zampini ierr = VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr); 10116947451fSStefano Zampini *v = a->cvec; 10126947451fSStefano Zampini PetscFunctionReturn(0); 10136947451fSStefano Zampini } 10146947451fSStefano Zampini 10156947451fSStefano Zampini static PetscErrorCode MatDenseRestoreColumnVec_MPIDenseCUDA(Mat A,PetscInt col,Vec *v) 10166947451fSStefano Zampini { 10176947451fSStefano Zampini Mat_MPIDense *a = (Mat_MPIDense*)A->data; 10186947451fSStefano Zampini PetscErrorCode ierr; 10196947451fSStefano Zampini 10206947451fSStefano Zampini PetscFunctionBegin; 10216947451fSStefano Zampini if (!a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec first"); 10226947451fSStefano Zampini if (!a->cvec) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector"); 10236947451fSStefano Zampini a->vecinuse = 0; 10246947451fSStefano Zampini ierr = MatDenseCUDARestoreArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr); 10256947451fSStefano Zampini ierr = VecCUDAResetArray(a->cvec);CHKERRQ(ierr); 10266947451fSStefano Zampini *v = NULL; 10276947451fSStefano Zampini PetscFunctionReturn(0); 10286947451fSStefano Zampini } 10296947451fSStefano Zampini 10306947451fSStefano Zampini static PetscErrorCode MatDenseGetColumnVecRead_MPIDenseCUDA(Mat A,PetscInt col,Vec *v) 10316947451fSStefano Zampini { 10326947451fSStefano Zampini Mat_MPIDense *a = (Mat_MPIDense*)A->data; 10336947451fSStefano Zampini PetscInt lda; 10346947451fSStefano Zampini PetscErrorCode ierr; 10356947451fSStefano Zampini 10366947451fSStefano Zampini PetscFunctionBegin; 10376947451fSStefano Zampini if (a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec first"); 10386947451fSStefano Zampini if (!a->cvec) { 10396947451fSStefano Zampini ierr = VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr); 10406947451fSStefano Zampini } 10416947451fSStefano Zampini a->vecinuse = col + 1; 10426947451fSStefano Zampini ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr); 10436947451fSStefano Zampini ierr = MatDenseCUDAGetArrayRead(a->A,&a->ptrinuse);CHKERRQ(ierr); 10446947451fSStefano Zampini ierr = VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr); 10456947451fSStefano Zampini ierr = VecLockReadPush(a->cvec);CHKERRQ(ierr); 10466947451fSStefano Zampini *v = a->cvec; 10476947451fSStefano Zampini PetscFunctionReturn(0); 10486947451fSStefano Zampini } 10496947451fSStefano Zampini 10506947451fSStefano Zampini static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDenseCUDA(Mat A,PetscInt col,Vec *v) 10516947451fSStefano Zampini { 10526947451fSStefano Zampini Mat_MPIDense *a = (Mat_MPIDense*)A->data; 10536947451fSStefano Zampini PetscErrorCode ierr; 10546947451fSStefano Zampini 10556947451fSStefano Zampini PetscFunctionBegin; 10566947451fSStefano Zampini if (!a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec first"); 10576947451fSStefano Zampini if (!a->cvec) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector"); 10586947451fSStefano Zampini a->vecinuse = 0; 10596947451fSStefano Zampini ierr = MatDenseCUDARestoreArrayRead(a->A,&a->ptrinuse);CHKERRQ(ierr); 10606947451fSStefano Zampini ierr = VecLockReadPop(a->cvec);CHKERRQ(ierr); 10616947451fSStefano Zampini ierr = VecCUDAResetArray(a->cvec);CHKERRQ(ierr); 10626947451fSStefano Zampini *v = NULL; 10636947451fSStefano Zampini PetscFunctionReturn(0); 10646947451fSStefano Zampini } 10656947451fSStefano Zampini 10666947451fSStefano Zampini static PetscErrorCode MatDenseGetColumnVecWrite_MPIDenseCUDA(Mat A,PetscInt col,Vec *v) 10676947451fSStefano Zampini { 10686947451fSStefano Zampini Mat_MPIDense *a = (Mat_MPIDense*)A->data; 10696947451fSStefano Zampini PetscErrorCode ierr; 10706947451fSStefano Zampini PetscInt lda; 10716947451fSStefano Zampini 10726947451fSStefano Zampini PetscFunctionBegin; 10736947451fSStefano Zampini if (a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec first"); 10746947451fSStefano Zampini if (!a->cvec) { 10756947451fSStefano Zampini ierr = VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr); 10766947451fSStefano Zampini } 10776947451fSStefano Zampini a->vecinuse = col + 1; 10786947451fSStefano Zampini ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr); 10796947451fSStefano Zampini ierr = MatDenseCUDAGetArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr); 10806947451fSStefano Zampini ierr = VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr); 10816947451fSStefano Zampini *v = a->cvec; 10826947451fSStefano Zampini PetscFunctionReturn(0); 10836947451fSStefano Zampini } 10846947451fSStefano Zampini 10856947451fSStefano Zampini static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDenseCUDA(Mat A,PetscInt col,Vec *v) 10866947451fSStefano Zampini { 10876947451fSStefano Zampini Mat_MPIDense *a = (Mat_MPIDense*)A->data; 10886947451fSStefano Zampini PetscErrorCode ierr; 10896947451fSStefano Zampini 10906947451fSStefano Zampini PetscFunctionBegin; 10916947451fSStefano Zampini if (!a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec first"); 10926947451fSStefano Zampini if (!a->cvec) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector"); 10936947451fSStefano Zampini a->vecinuse = 0; 10946947451fSStefano Zampini ierr = MatDenseCUDARestoreArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr); 10956947451fSStefano Zampini ierr = VecCUDAResetArray(a->cvec);CHKERRQ(ierr); 10966947451fSStefano Zampini *v = NULL; 10976947451fSStefano Zampini PetscFunctionReturn(0); 10986947451fSStefano Zampini } 10996947451fSStefano Zampini 1100637a0070SStefano Zampini static PetscErrorCode MatDenseCUDAPlaceArray_MPIDenseCUDA(Mat A, const PetscScalar *a) 1101637a0070SStefano Zampini { 1102637a0070SStefano Zampini Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1103637a0070SStefano Zampini PetscErrorCode ierr; 1104637a0070SStefano Zampini 1105637a0070SStefano Zampini PetscFunctionBegin; 11066947451fSStefano Zampini if (l->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec first"); 1107637a0070SStefano Zampini ierr = MatDenseCUDAPlaceArray(l->A,a);CHKERRQ(ierr); 1108637a0070SStefano Zampini PetscFunctionReturn(0); 1109637a0070SStefano Zampini } 1110637a0070SStefano Zampini 1111637a0070SStefano Zampini static PetscErrorCode MatDenseCUDAResetArray_MPIDenseCUDA(Mat A) 1112637a0070SStefano Zampini { 1113637a0070SStefano Zampini Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1114637a0070SStefano Zampini PetscErrorCode ierr; 1115637a0070SStefano Zampini 1116637a0070SStefano Zampini PetscFunctionBegin; 11176947451fSStefano Zampini if (l->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec first"); 1118637a0070SStefano Zampini ierr = MatDenseCUDAResetArray(l->A);CHKERRQ(ierr); 1119637a0070SStefano Zampini PetscFunctionReturn(0); 1120637a0070SStefano Zampini } 1121637a0070SStefano Zampini 1122d5ea218eSStefano Zampini static PetscErrorCode MatDenseCUDAReplaceArray_MPIDenseCUDA(Mat A, const PetscScalar *a) 1123d5ea218eSStefano Zampini { 1124d5ea218eSStefano Zampini Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1125d5ea218eSStefano Zampini PetscErrorCode ierr; 1126d5ea218eSStefano Zampini 1127d5ea218eSStefano Zampini PetscFunctionBegin; 1128d5ea218eSStefano Zampini if (l->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec first"); 1129d5ea218eSStefano Zampini ierr = MatDenseCUDAReplaceArray(l->A,a);CHKERRQ(ierr); 1130d5ea218eSStefano Zampini PetscFunctionReturn(0); 1131d5ea218eSStefano Zampini } 1132d5ea218eSStefano Zampini 1133637a0070SStefano Zampini static PetscErrorCode MatDenseCUDAGetArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a) 1134637a0070SStefano Zampini { 1135637a0070SStefano Zampini Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1136637a0070SStefano Zampini PetscErrorCode ierr; 1137637a0070SStefano Zampini 1138637a0070SStefano Zampini PetscFunctionBegin; 1139637a0070SStefano Zampini ierr = MatDenseCUDAGetArrayWrite(l->A,a);CHKERRQ(ierr); 1140637a0070SStefano Zampini PetscFunctionReturn(0); 1141637a0070SStefano Zampini } 1142637a0070SStefano Zampini 1143637a0070SStefano Zampini static PetscErrorCode MatDenseCUDARestoreArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a) 1144637a0070SStefano Zampini { 1145637a0070SStefano Zampini Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1146637a0070SStefano Zampini PetscErrorCode ierr; 1147637a0070SStefano Zampini 1148637a0070SStefano Zampini PetscFunctionBegin; 1149637a0070SStefano Zampini ierr = MatDenseCUDARestoreArrayWrite(l->A,a);CHKERRQ(ierr); 1150637a0070SStefano Zampini PetscFunctionReturn(0); 1151637a0070SStefano Zampini } 1152637a0070SStefano Zampini 1153637a0070SStefano Zampini static PetscErrorCode MatDenseCUDAGetArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a) 1154637a0070SStefano Zampini { 1155637a0070SStefano Zampini Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1156637a0070SStefano Zampini PetscErrorCode ierr; 1157637a0070SStefano Zampini 1158637a0070SStefano Zampini PetscFunctionBegin; 1159637a0070SStefano Zampini ierr = MatDenseCUDAGetArrayRead(l->A,a);CHKERRQ(ierr); 1160637a0070SStefano Zampini PetscFunctionReturn(0); 1161637a0070SStefano Zampini } 1162637a0070SStefano Zampini 1163637a0070SStefano Zampini static PetscErrorCode MatDenseCUDARestoreArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a) 1164637a0070SStefano Zampini { 1165637a0070SStefano Zampini Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1166637a0070SStefano Zampini PetscErrorCode ierr; 1167637a0070SStefano Zampini 1168637a0070SStefano Zampini PetscFunctionBegin; 1169637a0070SStefano Zampini ierr = MatDenseCUDARestoreArrayRead(l->A,a);CHKERRQ(ierr); 1170637a0070SStefano Zampini PetscFunctionReturn(0); 1171637a0070SStefano Zampini } 1172637a0070SStefano Zampini 1173637a0070SStefano Zampini static PetscErrorCode MatDenseCUDAGetArray_MPIDenseCUDA(Mat A, PetscScalar **a) 1174637a0070SStefano Zampini { 1175637a0070SStefano Zampini Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1176637a0070SStefano Zampini PetscErrorCode ierr; 1177637a0070SStefano Zampini 1178637a0070SStefano Zampini PetscFunctionBegin; 1179637a0070SStefano Zampini ierr = MatDenseCUDAGetArray(l->A,a);CHKERRQ(ierr); 1180637a0070SStefano Zampini PetscFunctionReturn(0); 1181637a0070SStefano Zampini } 1182637a0070SStefano Zampini 1183637a0070SStefano Zampini static PetscErrorCode MatDenseCUDARestoreArray_MPIDenseCUDA(Mat A, PetscScalar **a) 1184637a0070SStefano Zampini { 1185637a0070SStefano Zampini Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1186637a0070SStefano Zampini PetscErrorCode ierr; 1187637a0070SStefano Zampini 1188637a0070SStefano Zampini PetscFunctionBegin; 1189637a0070SStefano Zampini ierr = MatDenseCUDARestoreArray(l->A,a);CHKERRQ(ierr); 1190637a0070SStefano Zampini PetscFunctionReturn(0); 1191637a0070SStefano Zampini } 1192637a0070SStefano Zampini 11936947451fSStefano Zampini static PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat,PetscInt,Vec*); 11946947451fSStefano Zampini static PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat,PetscInt,Vec*); 11956947451fSStefano Zampini static PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat,PetscInt,Vec*); 11966947451fSStefano Zampini static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat,PetscInt,Vec*); 11976947451fSStefano Zampini static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat,PetscInt,Vec*); 11986947451fSStefano Zampini static PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat,PetscInt,Vec*); 11996947451fSStefano Zampini 1200637a0070SStefano Zampini static PetscErrorCode MatBindToCPU_MPIDenseCUDA(Mat mat,PetscBool bind) 1201637a0070SStefano Zampini { 1202637a0070SStefano Zampini Mat_MPIDense *d = (Mat_MPIDense*)mat->data; 1203637a0070SStefano Zampini PetscErrorCode ierr; 1204637a0070SStefano Zampini 1205637a0070SStefano Zampini PetscFunctionBegin; 12066947451fSStefano Zampini if (d->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec first"); 1207637a0070SStefano Zampini if (d->A) { 1208637a0070SStefano Zampini ierr = MatBindToCPU(d->A,bind);CHKERRQ(ierr); 1209637a0070SStefano Zampini } 1210637a0070SStefano Zampini mat->boundtocpu = bind; 12116947451fSStefano Zampini if (!bind) { 12126947451fSStefano Zampini PetscBool iscuda; 12136947451fSStefano Zampini 12146947451fSStefano Zampini ierr = PetscObjectTypeCompare((PetscObject)d->cvec,VECMPICUDA,&iscuda);CHKERRQ(ierr); 12156947451fSStefano Zampini if (!iscuda) { 12166947451fSStefano Zampini ierr = VecDestroy(&d->cvec);CHKERRQ(ierr); 12176947451fSStefano Zampini } 12186947451fSStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDenseCUDA);CHKERRQ(ierr); 12196947451fSStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDenseCUDA);CHKERRQ(ierr); 12206947451fSStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDenseCUDA);CHKERRQ(ierr); 12216947451fSStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDenseCUDA);CHKERRQ(ierr); 12226947451fSStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDenseCUDA);CHKERRQ(ierr); 12236947451fSStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDenseCUDA);CHKERRQ(ierr); 12246947451fSStefano Zampini } else { 12256947451fSStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDense);CHKERRQ(ierr); 12266947451fSStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDense);CHKERRQ(ierr); 12276947451fSStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDense);CHKERRQ(ierr); 12286947451fSStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDense);CHKERRQ(ierr); 12296947451fSStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDense);CHKERRQ(ierr); 12306947451fSStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDense);CHKERRQ(ierr); 12316947451fSStefano Zampini } 1232637a0070SStefano Zampini PetscFunctionReturn(0); 1233637a0070SStefano Zampini } 1234637a0070SStefano Zampini 1235637a0070SStefano Zampini PetscErrorCode MatMPIDenseCUDASetPreallocation(Mat A, PetscScalar *d_data) 1236637a0070SStefano Zampini { 1237637a0070SStefano Zampini Mat_MPIDense *d = (Mat_MPIDense*)A->data; 1238637a0070SStefano Zampini PetscErrorCode ierr; 1239637a0070SStefano Zampini PetscBool iscuda; 1240637a0070SStefano Zampini 1241637a0070SStefano Zampini PetscFunctionBegin; 1242d5ea218eSStefano Zampini PetscValidHeaderSpecific(A,MAT_CLASSID,1); 1243637a0070SStefano Zampini ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIDENSECUDA,&iscuda);CHKERRQ(ierr); 1244637a0070SStefano Zampini if (!iscuda) PetscFunctionReturn(0); 1245637a0070SStefano Zampini ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr); 1246637a0070SStefano Zampini ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr); 1247637a0070SStefano Zampini if (!d->A) { 1248637a0070SStefano Zampini ierr = MatCreate(PETSC_COMM_SELF,&d->A);CHKERRQ(ierr); 1249637a0070SStefano Zampini ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)d->A);CHKERRQ(ierr); 1250637a0070SStefano Zampini ierr = MatSetSizes(d->A,A->rmap->n,A->cmap->N,A->rmap->n,A->cmap->N);CHKERRQ(ierr); 1251637a0070SStefano Zampini } 1252637a0070SStefano Zampini ierr = MatSetType(d->A,MATSEQDENSECUDA);CHKERRQ(ierr); 1253637a0070SStefano Zampini ierr = MatSeqDenseCUDASetPreallocation(d->A,d_data);CHKERRQ(ierr); 1254637a0070SStefano Zampini A->preallocated = PETSC_TRUE; 1255637a0070SStefano Zampini PetscFunctionReturn(0); 1256637a0070SStefano Zampini } 1257637a0070SStefano Zampini #endif 1258637a0070SStefano Zampini 125973a71a0fSBarry Smith static PetscErrorCode MatSetRandom_MPIDense(Mat x,PetscRandom rctx) 126073a71a0fSBarry Smith { 126173a71a0fSBarry Smith Mat_MPIDense *d = (Mat_MPIDense*)x->data; 126273a71a0fSBarry Smith PetscErrorCode ierr; 126373a71a0fSBarry Smith 126473a71a0fSBarry Smith PetscFunctionBegin; 1265637a0070SStefano Zampini ierr = MatSetRandom(d->A,rctx);CHKERRQ(ierr); 126673a71a0fSBarry Smith PetscFunctionReturn(0); 126773a71a0fSBarry Smith } 126873a71a0fSBarry Smith 126952c5f739Sprj- PETSC_INTERN PetscErrorCode MatMatMultNumeric_MPIDense(Mat A,Mat,Mat); 1270fd4e9aacSBarry Smith 12713b49f96aSBarry Smith static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A,PetscBool *missing,PetscInt *d) 12723b49f96aSBarry Smith { 12733b49f96aSBarry Smith PetscFunctionBegin; 12743b49f96aSBarry Smith *missing = PETSC_FALSE; 12753b49f96aSBarry Smith PetscFunctionReturn(0); 12763b49f96aSBarry Smith } 12773b49f96aSBarry Smith 12784222ddf1SHong Zhang static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat,Mat,PetscReal,Mat); 1279cc48ffa7SToby Isaac static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat,Mat,Mat); 1280cc48ffa7SToby Isaac 12818965ea79SLois Curfman McInnes /* -------------------------------------------------------------------*/ 128209dc0095SBarry Smith static struct _MatOps MatOps_Values = { MatSetValues_MPIDense, 128309dc0095SBarry Smith MatGetRow_MPIDense, 128409dc0095SBarry Smith MatRestoreRow_MPIDense, 128509dc0095SBarry Smith MatMult_MPIDense, 128697304618SKris Buschelman /* 4*/ MatMultAdd_MPIDense, 12877c922b88SBarry Smith MatMultTranspose_MPIDense, 12887c922b88SBarry Smith MatMultTransposeAdd_MPIDense, 12898965ea79SLois Curfman McInnes 0, 129009dc0095SBarry Smith 0, 129109dc0095SBarry Smith 0, 129297304618SKris Buschelman /* 10*/ 0, 129309dc0095SBarry Smith 0, 129409dc0095SBarry Smith 0, 129509dc0095SBarry Smith 0, 129609dc0095SBarry Smith MatTranspose_MPIDense, 129797304618SKris Buschelman /* 15*/ MatGetInfo_MPIDense, 12986e4ee0c6SHong Zhang MatEqual_MPIDense, 129909dc0095SBarry Smith MatGetDiagonal_MPIDense, 13005b2fa520SLois Curfman McInnes MatDiagonalScale_MPIDense, 130109dc0095SBarry Smith MatNorm_MPIDense, 130297304618SKris Buschelman /* 20*/ MatAssemblyBegin_MPIDense, 130309dc0095SBarry Smith MatAssemblyEnd_MPIDense, 130409dc0095SBarry Smith MatSetOption_MPIDense, 130509dc0095SBarry Smith MatZeroEntries_MPIDense, 1306d519adbfSMatthew Knepley /* 24*/ MatZeroRows_MPIDense, 1307919b68f7SBarry Smith 0, 130801b82886SBarry Smith 0, 130901b82886SBarry Smith 0, 131001b82886SBarry Smith 0, 13114994cf47SJed Brown /* 29*/ MatSetUp_MPIDense, 1312273d9f13SBarry Smith 0, 131309dc0095SBarry Smith 0, 1314c56a70eeSBarry Smith MatGetDiagonalBlock_MPIDense, 13158c778c55SBarry Smith 0, 1316d519adbfSMatthew Knepley /* 34*/ MatDuplicate_MPIDense, 131709dc0095SBarry Smith 0, 131809dc0095SBarry Smith 0, 131909dc0095SBarry Smith 0, 132009dc0095SBarry Smith 0, 1321d519adbfSMatthew Knepley /* 39*/ MatAXPY_MPIDense, 13227dae84e0SHong Zhang MatCreateSubMatrices_MPIDense, 132309dc0095SBarry Smith 0, 132409dc0095SBarry Smith MatGetValues_MPIDense, 132509dc0095SBarry Smith 0, 1326d519adbfSMatthew Knepley /* 44*/ 0, 132709dc0095SBarry Smith MatScale_MPIDense, 13287d68702bSBarry Smith MatShift_Basic, 132909dc0095SBarry Smith 0, 133009dc0095SBarry Smith 0, 133173a71a0fSBarry Smith /* 49*/ MatSetRandom_MPIDense, 133209dc0095SBarry Smith 0, 133309dc0095SBarry Smith 0, 133409dc0095SBarry Smith 0, 133509dc0095SBarry Smith 0, 1336d519adbfSMatthew Knepley /* 54*/ 0, 133709dc0095SBarry Smith 0, 133809dc0095SBarry Smith 0, 133909dc0095SBarry Smith 0, 134009dc0095SBarry Smith 0, 13417dae84e0SHong Zhang /* 59*/ MatCreateSubMatrix_MPIDense, 1342b9b97703SBarry Smith MatDestroy_MPIDense, 1343b9b97703SBarry Smith MatView_MPIDense, 1344357abbc8SBarry Smith 0, 134597304618SKris Buschelman 0, 1346d519adbfSMatthew Knepley /* 64*/ 0, 134797304618SKris Buschelman 0, 134897304618SKris Buschelman 0, 134997304618SKris Buschelman 0, 135097304618SKris Buschelman 0, 1351d519adbfSMatthew Knepley /* 69*/ 0, 135297304618SKris Buschelman 0, 135397304618SKris Buschelman 0, 135497304618SKris Buschelman 0, 135597304618SKris Buschelman 0, 1356d519adbfSMatthew Knepley /* 74*/ 0, 135797304618SKris Buschelman 0, 135897304618SKris Buschelman 0, 135997304618SKris Buschelman 0, 136097304618SKris Buschelman 0, 1361d519adbfSMatthew Knepley /* 79*/ 0, 136297304618SKris Buschelman 0, 136397304618SKris Buschelman 0, 136497304618SKris Buschelman 0, 13655bba2384SShri Abhyankar /* 83*/ MatLoad_MPIDense, 1366865e5f61SKris Buschelman 0, 1367865e5f61SKris Buschelman 0, 1368865e5f61SKris Buschelman 0, 1369865e5f61SKris Buschelman 0, 1370865e5f61SKris Buschelman 0, 13714222ddf1SHong Zhang /* 89*/ 0, 13724222ddf1SHong Zhang 0, 1373fd4e9aacSBarry Smith MatMatMultNumeric_MPIDense, 13742fbe02b9SBarry Smith 0, 1375ba337c44SJed Brown 0, 1376d519adbfSMatthew Knepley /* 94*/ 0, 13774222ddf1SHong Zhang 0, 13784222ddf1SHong Zhang 0, 1379cc48ffa7SToby Isaac MatMatTransposeMultNumeric_MPIDense_MPIDense, 1380ba337c44SJed Brown 0, 13814222ddf1SHong Zhang /* 99*/ MatProductSetFromOptions_MPIDense, 1382ba337c44SJed Brown 0, 1383ba337c44SJed Brown 0, 1384ba337c44SJed Brown MatConjugate_MPIDense, 1385ba337c44SJed Brown 0, 1386ba337c44SJed Brown /*104*/ 0, 1387ba337c44SJed Brown MatRealPart_MPIDense, 1388ba337c44SJed Brown MatImaginaryPart_MPIDense, 138986d161a7SShri Abhyankar 0, 139086d161a7SShri Abhyankar 0, 139186d161a7SShri Abhyankar /*109*/ 0, 139286d161a7SShri Abhyankar 0, 139386d161a7SShri Abhyankar 0, 139449a6ff4bSBarry Smith MatGetColumnVector_MPIDense, 13953b49f96aSBarry Smith MatMissingDiagonal_MPIDense, 139686d161a7SShri Abhyankar /*114*/ 0, 139786d161a7SShri Abhyankar 0, 139886d161a7SShri Abhyankar 0, 139986d161a7SShri Abhyankar 0, 140086d161a7SShri Abhyankar 0, 140186d161a7SShri Abhyankar /*119*/ 0, 140286d161a7SShri Abhyankar 0, 140386d161a7SShri Abhyankar 0, 14040716a85fSBarry Smith 0, 14050716a85fSBarry Smith 0, 14060716a85fSBarry Smith /*124*/ 0, 14073964eb88SJed Brown MatGetColumnNorms_MPIDense, 14083964eb88SJed Brown 0, 14093964eb88SJed Brown 0, 14103964eb88SJed Brown 0, 14113964eb88SJed Brown /*129*/ 0, 14124222ddf1SHong Zhang 0, 14134222ddf1SHong Zhang 0, 1414cb20be35SHong Zhang MatTransposeMatMultNumeric_MPIDense_MPIDense, 14153964eb88SJed Brown 0, 14163964eb88SJed Brown /*134*/ 0, 14173964eb88SJed Brown 0, 14183964eb88SJed Brown 0, 14193964eb88SJed Brown 0, 14203964eb88SJed Brown 0, 14213964eb88SJed Brown /*139*/ 0, 1422f9426fe0SMark Adams 0, 142394e2cb23SJakub Kruzik 0, 142494e2cb23SJakub Kruzik 0, 142594e2cb23SJakub Kruzik 0, 14264222ddf1SHong Zhang MatCreateMPIMatConcatenateSeqMat_MPIDense, 14274222ddf1SHong Zhang /*145*/ 0, 14284222ddf1SHong Zhang 0, 14294222ddf1SHong Zhang 0 1430ba337c44SJed Brown }; 14318965ea79SLois Curfman McInnes 14327087cfbeSBarry Smith PetscErrorCode MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data) 1433a23d5eceSKris Buschelman { 1434637a0070SStefano Zampini Mat_MPIDense *a = (Mat_MPIDense*)mat->data; 1435637a0070SStefano Zampini PetscBool iscuda; 1436dfbe8321SBarry Smith PetscErrorCode ierr; 1437a23d5eceSKris Buschelman 1438a23d5eceSKris Buschelman PetscFunctionBegin; 143934ef9618SShri Abhyankar ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr); 144034ef9618SShri Abhyankar ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr); 1441637a0070SStefano Zampini if (!a->A) { 1442f69a0ea3SMatthew Knepley ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr); 14433bb1ff40SBarry Smith ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 1444637a0070SStefano Zampini ierr = MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);CHKERRQ(ierr); 1445637a0070SStefano Zampini } 1446637a0070SStefano Zampini ierr = PetscObjectTypeCompare((PetscObject)mat,MATMPIDENSECUDA,&iscuda);CHKERRQ(ierr); 1447637a0070SStefano Zampini ierr = MatSetType(a->A,iscuda ? MATSEQDENSECUDA : MATSEQDENSE);CHKERRQ(ierr); 1448637a0070SStefano Zampini ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr); 1449637a0070SStefano Zampini mat->preallocated = PETSC_TRUE; 1450a23d5eceSKris Buschelman PetscFunctionReturn(0); 1451a23d5eceSKris Buschelman } 1452a23d5eceSKris Buschelman 145365b80a83SHong Zhang #if defined(PETSC_HAVE_ELEMENTAL) 1454cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype,MatReuse reuse,Mat *newmat) 14558baccfbdSHong Zhang { 14568ea901baSHong Zhang Mat mat_elemental; 14578ea901baSHong Zhang PetscErrorCode ierr; 145832d7a744SHong Zhang PetscScalar *v; 145932d7a744SHong Zhang PetscInt m=A->rmap->n,N=A->cmap->N,rstart=A->rmap->rstart,i,*rows,*cols; 14608ea901baSHong Zhang 14618baccfbdSHong Zhang PetscFunctionBegin; 1462378336b6SHong Zhang if (reuse == MAT_REUSE_MATRIX) { 1463378336b6SHong Zhang mat_elemental = *newmat; 1464378336b6SHong Zhang ierr = MatZeroEntries(*newmat);CHKERRQ(ierr); 1465378336b6SHong Zhang } else { 1466378336b6SHong Zhang ierr = MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental);CHKERRQ(ierr); 1467378336b6SHong Zhang ierr = MatSetSizes(mat_elemental,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 1468378336b6SHong Zhang ierr = MatSetType(mat_elemental,MATELEMENTAL);CHKERRQ(ierr); 1469378336b6SHong Zhang ierr = MatSetUp(mat_elemental);CHKERRQ(ierr); 147032d7a744SHong Zhang ierr = MatSetOption(mat_elemental,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 1471378336b6SHong Zhang } 1472378336b6SHong Zhang 147332d7a744SHong Zhang ierr = PetscMalloc2(m,&rows,N,&cols);CHKERRQ(ierr); 147432d7a744SHong Zhang for (i=0; i<N; i++) cols[i] = i; 147532d7a744SHong Zhang for (i=0; i<m; i++) rows[i] = rstart + i; 14768ea901baSHong Zhang 1477637a0070SStefano Zampini /* PETSc-Elemental interface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */ 147832d7a744SHong Zhang ierr = MatDenseGetArray(A,&v);CHKERRQ(ierr); 147932d7a744SHong Zhang ierr = MatSetValues(mat_elemental,m,rows,N,cols,v,ADD_VALUES);CHKERRQ(ierr); 14808ea901baSHong Zhang ierr = MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 14818ea901baSHong Zhang ierr = MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 148232d7a744SHong Zhang ierr = MatDenseRestoreArray(A,&v);CHKERRQ(ierr); 148332d7a744SHong Zhang ierr = PetscFree2(rows,cols);CHKERRQ(ierr); 14848ea901baSHong Zhang 1485511c6705SHong Zhang if (reuse == MAT_INPLACE_MATRIX) { 148628be2f97SBarry Smith ierr = MatHeaderReplace(A,&mat_elemental);CHKERRQ(ierr); 14878ea901baSHong Zhang } else { 14888ea901baSHong Zhang *newmat = mat_elemental; 14898ea901baSHong Zhang } 14908baccfbdSHong Zhang PetscFunctionReturn(0); 14918baccfbdSHong Zhang } 149265b80a83SHong Zhang #endif 14938baccfbdSHong Zhang 1494af53bab2SHong Zhang static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A,PetscInt col,PetscScalar **vals) 149586aefd0dSHong Zhang { 149686aefd0dSHong Zhang Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 149786aefd0dSHong Zhang PetscErrorCode ierr; 149886aefd0dSHong Zhang 149986aefd0dSHong Zhang PetscFunctionBegin; 150086aefd0dSHong Zhang ierr = MatDenseGetColumn(mat->A,col,vals);CHKERRQ(ierr); 150186aefd0dSHong Zhang PetscFunctionReturn(0); 150286aefd0dSHong Zhang } 150386aefd0dSHong Zhang 1504af53bab2SHong Zhang static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A,PetscScalar **vals) 150586aefd0dSHong Zhang { 150686aefd0dSHong Zhang Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 150786aefd0dSHong Zhang PetscErrorCode ierr; 150886aefd0dSHong Zhang 150986aefd0dSHong Zhang PetscFunctionBegin; 151086aefd0dSHong Zhang ierr = MatDenseRestoreColumn(mat->A,vals);CHKERRQ(ierr); 151186aefd0dSHong Zhang PetscFunctionReturn(0); 151286aefd0dSHong Zhang } 151386aefd0dSHong Zhang 151494e2cb23SJakub Kruzik PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 151594e2cb23SJakub Kruzik { 151694e2cb23SJakub Kruzik PetscErrorCode ierr; 151794e2cb23SJakub Kruzik Mat_MPIDense *mat; 151894e2cb23SJakub Kruzik PetscInt m,nloc,N; 151994e2cb23SJakub Kruzik 152094e2cb23SJakub Kruzik PetscFunctionBegin; 152194e2cb23SJakub Kruzik ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 152294e2cb23SJakub Kruzik ierr = MatGetLocalSize(inmat,NULL,&nloc);CHKERRQ(ierr); 152394e2cb23SJakub Kruzik if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 152494e2cb23SJakub Kruzik PetscInt sum; 152594e2cb23SJakub Kruzik 152694e2cb23SJakub Kruzik if (n == PETSC_DECIDE) { 152794e2cb23SJakub Kruzik ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 152894e2cb23SJakub Kruzik } 152994e2cb23SJakub Kruzik /* Check sum(n) = N */ 153094e2cb23SJakub Kruzik ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 153194e2cb23SJakub Kruzik if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N); 153294e2cb23SJakub Kruzik 153394e2cb23SJakub Kruzik ierr = MatCreateDense(comm,m,n,PETSC_DETERMINE,N,NULL,outmat);CHKERRQ(ierr); 153494e2cb23SJakub Kruzik } 153594e2cb23SJakub Kruzik 153694e2cb23SJakub Kruzik /* numeric phase */ 153794e2cb23SJakub Kruzik mat = (Mat_MPIDense*)(*outmat)->data; 153894e2cb23SJakub Kruzik ierr = MatCopy(inmat,mat->A,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 153994e2cb23SJakub Kruzik ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 154094e2cb23SJakub Kruzik ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 154194e2cb23SJakub Kruzik PetscFunctionReturn(0); 154294e2cb23SJakub Kruzik } 154394e2cb23SJakub Kruzik 1544637a0070SStefano Zampini #if defined(PETSC_HAVE_CUDA) 1545637a0070SStefano Zampini PetscErrorCode MatConvert_MPIDenseCUDA_MPIDense(Mat M,MatType type,MatReuse reuse,Mat *newmat) 1546637a0070SStefano Zampini { 1547637a0070SStefano Zampini Mat B; 1548637a0070SStefano Zampini Mat_MPIDense *m; 1549637a0070SStefano Zampini PetscErrorCode ierr; 1550637a0070SStefano Zampini 1551637a0070SStefano Zampini PetscFunctionBegin; 1552637a0070SStefano Zampini if (reuse == MAT_INITIAL_MATRIX) { 1553637a0070SStefano Zampini ierr = MatDuplicate(M,MAT_COPY_VALUES,newmat);CHKERRQ(ierr); 1554637a0070SStefano Zampini } else if (reuse == MAT_REUSE_MATRIX) { 1555637a0070SStefano Zampini ierr = MatCopy(M,*newmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1556637a0070SStefano Zampini } 1557637a0070SStefano Zampini 1558637a0070SStefano Zampini B = *newmat; 1559637a0070SStefano Zampini ierr = MatBindToCPU_MPIDenseCUDA(B,PETSC_TRUE);CHKERRQ(ierr); 1560637a0070SStefano Zampini ierr = PetscFree(B->defaultvectype);CHKERRQ(ierr); 1561637a0070SStefano Zampini ierr = PetscStrallocpy(VECSTANDARD,&B->defaultvectype);CHKERRQ(ierr); 1562637a0070SStefano Zampini ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIDENSE);CHKERRQ(ierr); 1563637a0070SStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpidensecuda_mpidense_C",NULL);CHKERRQ(ierr); 1564637a0070SStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpidensecuda_C",NULL);CHKERRQ(ierr); 1565637a0070SStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaij_C",NULL);CHKERRQ(ierr); 1566637a0070SStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArray_C",NULL);CHKERRQ(ierr); 1567637a0070SStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayRead_C",NULL);CHKERRQ(ierr); 1568637a0070SStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayWrite_C",NULL);CHKERRQ(ierr); 1569637a0070SStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArray_C",NULL);CHKERRQ(ierr); 1570637a0070SStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayRead_C",NULL);CHKERRQ(ierr); 1571637a0070SStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayWrite_C",NULL);CHKERRQ(ierr); 1572637a0070SStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAPlaceArray_C",NULL);CHKERRQ(ierr); 1573637a0070SStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAResetArray_C",NULL);CHKERRQ(ierr); 1574d5ea218eSStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAReplaceArray_C",NULL);CHKERRQ(ierr); 1575637a0070SStefano Zampini m = (Mat_MPIDense*)(B)->data; 1576637a0070SStefano Zampini if (m->A) { 1577637a0070SStefano Zampini ierr = MatConvert(m->A,MATSEQDENSE,MAT_INPLACE_MATRIX,&m->A);CHKERRQ(ierr); 1578637a0070SStefano Zampini ierr = MatSetUpMultiply_MPIDense(B);CHKERRQ(ierr); 1579637a0070SStefano Zampini } 1580637a0070SStefano Zampini B->ops->bindtocpu = NULL; 1581637a0070SStefano Zampini B->offloadmask = PETSC_OFFLOAD_CPU; 1582637a0070SStefano Zampini PetscFunctionReturn(0); 1583637a0070SStefano Zampini } 1584637a0070SStefano Zampini 1585637a0070SStefano Zampini PetscErrorCode MatConvert_MPIDense_MPIDenseCUDA(Mat M,MatType type,MatReuse reuse,Mat *newmat) 1586637a0070SStefano Zampini { 1587637a0070SStefano Zampini Mat B; 1588637a0070SStefano Zampini Mat_MPIDense *m; 1589637a0070SStefano Zampini PetscErrorCode ierr; 1590637a0070SStefano Zampini 1591637a0070SStefano Zampini PetscFunctionBegin; 1592637a0070SStefano Zampini if (reuse == MAT_INITIAL_MATRIX) { 1593637a0070SStefano Zampini ierr = MatDuplicate(M,MAT_COPY_VALUES,newmat);CHKERRQ(ierr); 1594637a0070SStefano Zampini } else if (reuse == MAT_REUSE_MATRIX) { 1595637a0070SStefano Zampini ierr = MatCopy(M,*newmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1596637a0070SStefano Zampini } 1597637a0070SStefano Zampini 1598637a0070SStefano Zampini B = *newmat; 1599637a0070SStefano Zampini ierr = PetscFree(B->defaultvectype);CHKERRQ(ierr); 1600637a0070SStefano Zampini ierr = PetscStrallocpy(VECCUDA,&B->defaultvectype);CHKERRQ(ierr); 1601637a0070SStefano Zampini ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIDENSECUDA);CHKERRQ(ierr); 1602637a0070SStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpidensecuda_mpidense_C", MatConvert_MPIDenseCUDA_MPIDense);CHKERRQ(ierr); 1603637a0070SStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpidensecuda_C",MatProductSetFromOptions_MPIAIJ_MPIDense);CHKERRQ(ierr); 1604637a0070SStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaij_C",MatProductSetFromOptions_MPIDense_MPIAIJ);CHKERRQ(ierr); 1605637a0070SStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArray_C", MatDenseCUDAGetArray_MPIDenseCUDA);CHKERRQ(ierr); 1606637a0070SStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayRead_C", MatDenseCUDAGetArrayRead_MPIDenseCUDA);CHKERRQ(ierr); 1607637a0070SStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayWrite_C", MatDenseCUDAGetArrayWrite_MPIDenseCUDA);CHKERRQ(ierr); 1608637a0070SStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArray_C", MatDenseCUDARestoreArray_MPIDenseCUDA);CHKERRQ(ierr); 1609637a0070SStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayRead_C", MatDenseCUDARestoreArrayRead_MPIDenseCUDA);CHKERRQ(ierr); 1610637a0070SStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayWrite_C", MatDenseCUDARestoreArrayWrite_MPIDenseCUDA);CHKERRQ(ierr); 1611637a0070SStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAPlaceArray_C", MatDenseCUDAPlaceArray_MPIDenseCUDA);CHKERRQ(ierr); 1612637a0070SStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAResetArray_C", MatDenseCUDAResetArray_MPIDenseCUDA);CHKERRQ(ierr); 1613d5ea218eSStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAReplaceArray_C", MatDenseCUDAReplaceArray_MPIDenseCUDA);CHKERRQ(ierr); 1614637a0070SStefano Zampini m = (Mat_MPIDense*)(B)->data; 1615637a0070SStefano Zampini if (m->A) { 1616637a0070SStefano Zampini ierr = MatConvert(m->A,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&m->A);CHKERRQ(ierr); 1617637a0070SStefano Zampini ierr = MatSetUpMultiply_MPIDense(B);CHKERRQ(ierr); 1618637a0070SStefano Zampini B->offloadmask = PETSC_OFFLOAD_BOTH; 1619637a0070SStefano Zampini } else { 1620637a0070SStefano Zampini B->offloadmask = PETSC_OFFLOAD_UNALLOCATED; 1621637a0070SStefano Zampini } 1622637a0070SStefano Zampini ierr = MatBindToCPU_MPIDenseCUDA(B,PETSC_FALSE);CHKERRQ(ierr); 1623637a0070SStefano Zampini 1624637a0070SStefano Zampini B->ops->bindtocpu = MatBindToCPU_MPIDenseCUDA; 1625637a0070SStefano Zampini PetscFunctionReturn(0); 1626637a0070SStefano Zampini } 1627637a0070SStefano Zampini #endif 1628637a0070SStefano Zampini 16296947451fSStefano Zampini PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat A,PetscInt col,Vec *v) 16306947451fSStefano Zampini { 16316947451fSStefano Zampini Mat_MPIDense *a = (Mat_MPIDense*)A->data; 16326947451fSStefano Zampini PetscErrorCode ierr; 16336947451fSStefano Zampini PetscInt lda; 16346947451fSStefano Zampini 16356947451fSStefano Zampini PetscFunctionBegin; 16366947451fSStefano Zampini if (a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec first"); 16376947451fSStefano Zampini if (!a->cvec) { 16386947451fSStefano Zampini ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr); 16396947451fSStefano Zampini } 16406947451fSStefano Zampini a->vecinuse = col + 1; 16416947451fSStefano Zampini ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr); 16426947451fSStefano Zampini ierr = MatDenseGetArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr); 16436947451fSStefano Zampini ierr = VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr); 16446947451fSStefano Zampini *v = a->cvec; 16456947451fSStefano Zampini PetscFunctionReturn(0); 16466947451fSStefano Zampini } 16476947451fSStefano Zampini 16486947451fSStefano Zampini PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat A,PetscInt col,Vec *v) 16496947451fSStefano Zampini { 16506947451fSStefano Zampini Mat_MPIDense *a = (Mat_MPIDense*)A->data; 16516947451fSStefano Zampini PetscErrorCode ierr; 16526947451fSStefano Zampini 16536947451fSStefano Zampini PetscFunctionBegin; 16546947451fSStefano Zampini if (!a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec first"); 16556947451fSStefano Zampini if (!a->cvec) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector"); 16566947451fSStefano Zampini a->vecinuse = 0; 16576947451fSStefano Zampini ierr = MatDenseRestoreArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr); 16586947451fSStefano Zampini ierr = VecResetArray(a->cvec);CHKERRQ(ierr); 16596947451fSStefano Zampini *v = NULL; 16606947451fSStefano Zampini PetscFunctionReturn(0); 16616947451fSStefano Zampini } 16626947451fSStefano Zampini 16636947451fSStefano Zampini PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat A,PetscInt col,Vec *v) 16646947451fSStefano Zampini { 16656947451fSStefano Zampini Mat_MPIDense *a = (Mat_MPIDense*)A->data; 16666947451fSStefano Zampini PetscErrorCode ierr; 16676947451fSStefano Zampini PetscInt lda; 16686947451fSStefano Zampini 16696947451fSStefano Zampini PetscFunctionBegin; 16706947451fSStefano Zampini if (a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec first"); 16716947451fSStefano Zampini if (!a->cvec) { 16726947451fSStefano Zampini ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr); 16736947451fSStefano Zampini } 16746947451fSStefano Zampini a->vecinuse = col + 1; 16756947451fSStefano Zampini ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr); 16766947451fSStefano Zampini ierr = MatDenseGetArrayRead(a->A,&a->ptrinuse);CHKERRQ(ierr); 16776947451fSStefano Zampini ierr = VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr); 16786947451fSStefano Zampini ierr = VecLockReadPush(a->cvec);CHKERRQ(ierr); 16796947451fSStefano Zampini *v = a->cvec; 16806947451fSStefano Zampini PetscFunctionReturn(0); 16816947451fSStefano Zampini } 16826947451fSStefano Zampini 16836947451fSStefano Zampini PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat A,PetscInt col,Vec *v) 16846947451fSStefano Zampini { 16856947451fSStefano Zampini Mat_MPIDense *a = (Mat_MPIDense*)A->data; 16866947451fSStefano Zampini PetscErrorCode ierr; 16876947451fSStefano Zampini 16886947451fSStefano Zampini PetscFunctionBegin; 16896947451fSStefano Zampini if (!a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec first"); 16906947451fSStefano Zampini if (!a->cvec) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector"); 16916947451fSStefano Zampini a->vecinuse = 0; 16926947451fSStefano Zampini ierr = MatDenseRestoreArrayRead(a->A,&a->ptrinuse);CHKERRQ(ierr); 16936947451fSStefano Zampini ierr = VecLockReadPop(a->cvec);CHKERRQ(ierr); 16946947451fSStefano Zampini ierr = VecResetArray(a->cvec);CHKERRQ(ierr); 16956947451fSStefano Zampini *v = NULL; 16966947451fSStefano Zampini PetscFunctionReturn(0); 16976947451fSStefano Zampini } 16986947451fSStefano Zampini 16996947451fSStefano Zampini PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat A,PetscInt col,Vec *v) 17006947451fSStefano Zampini { 17016947451fSStefano Zampini Mat_MPIDense *a = (Mat_MPIDense*)A->data; 17026947451fSStefano Zampini PetscErrorCode ierr; 17036947451fSStefano Zampini PetscInt lda; 17046947451fSStefano Zampini 17056947451fSStefano Zampini PetscFunctionBegin; 17066947451fSStefano Zampini if (a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec first"); 17076947451fSStefano Zampini if (!a->cvec) { 17086947451fSStefano Zampini ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr); 17096947451fSStefano Zampini } 17106947451fSStefano Zampini a->vecinuse = col + 1; 17116947451fSStefano Zampini ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr); 17126947451fSStefano Zampini ierr = MatDenseGetArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr); 17136947451fSStefano Zampini ierr = VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr); 17146947451fSStefano Zampini *v = a->cvec; 17156947451fSStefano Zampini PetscFunctionReturn(0); 17166947451fSStefano Zampini } 17176947451fSStefano Zampini 17186947451fSStefano Zampini PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat A,PetscInt col,Vec *v) 17196947451fSStefano Zampini { 17206947451fSStefano Zampini Mat_MPIDense *a = (Mat_MPIDense*)A->data; 17216947451fSStefano Zampini PetscErrorCode ierr; 17226947451fSStefano Zampini 17236947451fSStefano Zampini PetscFunctionBegin; 17246947451fSStefano Zampini if (!a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec first"); 17256947451fSStefano Zampini if (!a->cvec) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector"); 17266947451fSStefano Zampini a->vecinuse = 0; 17276947451fSStefano Zampini ierr = MatDenseRestoreArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr); 17286947451fSStefano Zampini ierr = VecResetArray(a->cvec);CHKERRQ(ierr); 17296947451fSStefano Zampini *v = NULL; 17306947451fSStefano Zampini PetscFunctionReturn(0); 17316947451fSStefano Zampini } 17326947451fSStefano Zampini 17338cc058d9SJed Brown PETSC_EXTERN PetscErrorCode MatCreate_MPIDense(Mat mat) 1734273d9f13SBarry Smith { 1735273d9f13SBarry Smith Mat_MPIDense *a; 1736dfbe8321SBarry Smith PetscErrorCode ierr; 1737273d9f13SBarry Smith 1738273d9f13SBarry Smith PetscFunctionBegin; 1739b00a9115SJed Brown ierr = PetscNewLog(mat,&a);CHKERRQ(ierr); 1740b0a32e0cSBarry Smith mat->data = (void*)a; 1741273d9f13SBarry Smith ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1742273d9f13SBarry Smith 1743273d9f13SBarry Smith mat->insertmode = NOT_SET_VALUES; 1744ce94432eSBarry Smith ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&a->rank);CHKERRQ(ierr); 1745ce94432eSBarry Smith ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&a->size);CHKERRQ(ierr); 1746273d9f13SBarry Smith 1747273d9f13SBarry Smith /* build cache for off array entries formed */ 1748273d9f13SBarry Smith a->donotstash = PETSC_FALSE; 17492205254eSKarl Rupp 1750ce94432eSBarry Smith ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)mat),1,&mat->stash);CHKERRQ(ierr); 1751273d9f13SBarry Smith 1752273d9f13SBarry Smith /* stuff used for matrix vector multiply */ 1753273d9f13SBarry Smith a->lvec = 0; 1754273d9f13SBarry Smith a->Mvctx = 0; 1755273d9f13SBarry Smith a->roworiented = PETSC_TRUE; 1756273d9f13SBarry Smith 175749a6ff4bSBarry Smith ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetLDA_C",MatDenseGetLDA_MPIDense);CHKERRQ(ierr); 1758bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",MatDenseGetArray_MPIDense);CHKERRQ(ierr); 17598572280aSBarry Smith ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",MatDenseRestoreArray_MPIDense);CHKERRQ(ierr); 17608572280aSBarry Smith ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",MatDenseGetArrayRead_MPIDense);CHKERRQ(ierr); 17618572280aSBarry Smith ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",MatDenseRestoreArrayRead_MPIDense);CHKERRQ(ierr); 17626947451fSStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayWrite_C",MatDenseGetArrayWrite_MPIDense);CHKERRQ(ierr); 17636947451fSStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayWrite_C",MatDenseRestoreArrayWrite_MPIDense);CHKERRQ(ierr); 1764d3042a70SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",MatDensePlaceArray_MPIDense);CHKERRQ(ierr); 1765d3042a70SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",MatDenseResetArray_MPIDense);CHKERRQ(ierr); 1766d5ea218eSStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseReplaceArray_C",MatDenseReplaceArray_MPIDense);CHKERRQ(ierr); 17676947451fSStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDense);CHKERRQ(ierr); 17686947451fSStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDense);CHKERRQ(ierr); 17696947451fSStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDense);CHKERRQ(ierr); 17706947451fSStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDense);CHKERRQ(ierr); 17716947451fSStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDense);CHKERRQ(ierr); 17726947451fSStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDense);CHKERRQ(ierr); 17738baccfbdSHong Zhang #if defined(PETSC_HAVE_ELEMENTAL) 17748baccfbdSHong Zhang ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",MatConvert_MPIDense_Elemental);CHKERRQ(ierr); 17758baccfbdSHong Zhang #endif 1776637a0070SStefano Zampini #if defined(PETSC_HAVE_CUDA) 1777637a0070SStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_mpidensecuda_C",MatConvert_MPIDense_MPIDenseCUDA);CHKERRQ(ierr); 1778637a0070SStefano Zampini #endif 1779bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr); 17804222ddf1SHong Zhang ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpidense_C",MatProductSetFromOptions_MPIAIJ_MPIDense);CHKERRQ(ierr); 17814222ddf1SHong Zhang ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaij_C",MatProductSetFromOptions_MPIDense_MPIAIJ);CHKERRQ(ierr); 1782bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",MatMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr); 1783bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",MatMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr); 178452c5f739Sprj- ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_nest_mpidense_C",MatMatMultSymbolic_Nest_Dense);CHKERRQ(ierr); 178552c5f739Sprj- ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_nest_mpidense_C",MatMatMultNumeric_Nest_Dense);CHKERRQ(ierr); 17868949adfdSHong Zhang 17878949adfdSHong Zhang ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultSymbolic_mpiaij_mpidense_C",MatTransposeMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr); 17888949adfdSHong Zhang ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultNumeric_mpiaij_mpidense_C",MatTransposeMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr); 1789af53bab2SHong Zhang ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",MatDenseGetColumn_MPIDense);CHKERRQ(ierr); 1790af53bab2SHong Zhang ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",MatDenseRestoreColumn_MPIDense);CHKERRQ(ierr); 179138aed534SBarry Smith ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr); 1792273d9f13SBarry Smith PetscFunctionReturn(0); 1793273d9f13SBarry Smith } 1794273d9f13SBarry Smith 1795209238afSKris Buschelman /*MC 1796637a0070SStefano Zampini MATMPIDENSECUDA - MATMPIDENSECUDA = "mpidensecuda" - A matrix type to be used for distributed dense matrices on GPUs. 1797637a0070SStefano Zampini 1798637a0070SStefano Zampini Options Database Keys: 1799637a0070SStefano Zampini . -mat_type mpidensecuda - sets the matrix type to "mpidensecuda" during a call to MatSetFromOptions() 1800637a0070SStefano Zampini 1801637a0070SStefano Zampini Level: beginner 1802637a0070SStefano Zampini 1803637a0070SStefano Zampini .seealso: 1804637a0070SStefano Zampini 1805637a0070SStefano Zampini M*/ 1806637a0070SStefano Zampini #if defined(PETSC_HAVE_CUDA) 1807637a0070SStefano Zampini PETSC_EXTERN PetscErrorCode MatCreate_MPIDenseCUDA(Mat B) 1808637a0070SStefano Zampini { 1809637a0070SStefano Zampini PetscErrorCode ierr; 1810637a0070SStefano Zampini 1811637a0070SStefano Zampini PetscFunctionBegin; 1812637a0070SStefano Zampini ierr = MatCreate_MPIDense(B);CHKERRQ(ierr); 1813637a0070SStefano Zampini ierr = MatConvert_MPIDense_MPIDenseCUDA(B,MATMPIDENSECUDA,MAT_INPLACE_MATRIX,&B);CHKERRQ(ierr); 1814637a0070SStefano Zampini PetscFunctionReturn(0); 1815637a0070SStefano Zampini } 1816637a0070SStefano Zampini #endif 1817637a0070SStefano Zampini 1818637a0070SStefano Zampini /*MC 1819002d173eSKris Buschelman MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices. 1820209238afSKris Buschelman 1821209238afSKris Buschelman This matrix type is identical to MATSEQDENSE when constructed with a single process communicator, 1822209238afSKris Buschelman and MATMPIDENSE otherwise. 1823209238afSKris Buschelman 1824209238afSKris Buschelman Options Database Keys: 1825209238afSKris Buschelman . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions() 1826209238afSKris Buschelman 1827209238afSKris Buschelman Level: beginner 1828209238afSKris Buschelman 182901b82886SBarry Smith 18306947451fSStefano Zampini .seealso: MATSEQDENSE,MATMPIDENSE,MATDENSECUDA 18316947451fSStefano Zampini M*/ 18326947451fSStefano Zampini 18336947451fSStefano Zampini /*MC 18346947451fSStefano Zampini MATDENSECUDA - MATDENSECUDA = "densecuda" - A matrix type to be used for dense matrices on GPUs. 18356947451fSStefano Zampini 18366947451fSStefano Zampini This matrix type is identical to MATSEQDENSECUDA when constructed with a single process communicator, 18376947451fSStefano Zampini and MATMPIDENSECUDA otherwise. 18386947451fSStefano Zampini 18396947451fSStefano Zampini Options Database Keys: 18406947451fSStefano Zampini . -mat_type densecuda - sets the matrix type to "densecuda" during a call to MatSetFromOptions() 18416947451fSStefano Zampini 18426947451fSStefano Zampini Level: beginner 18436947451fSStefano Zampini 18446947451fSStefano Zampini .seealso: MATSEQDENSECUDA,MATMPIDENSECUDA,MATDENSE 1845209238afSKris Buschelman M*/ 1846209238afSKris Buschelman 1847273d9f13SBarry Smith /*@C 1848273d9f13SBarry Smith MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries 1849273d9f13SBarry Smith 1850273d9f13SBarry Smith Not collective 1851273d9f13SBarry Smith 1852273d9f13SBarry Smith Input Parameters: 18531c4f3114SJed Brown . B - the matrix 18540298fd71SBarry Smith - data - optional location of matrix data. Set data=NULL for PETSc 1855273d9f13SBarry Smith to control all matrix memory allocation. 1856273d9f13SBarry Smith 1857273d9f13SBarry Smith Notes: 1858273d9f13SBarry Smith The dense format is fully compatible with standard Fortran 77 1859273d9f13SBarry Smith storage by columns. 1860273d9f13SBarry Smith 1861273d9f13SBarry Smith The data input variable is intended primarily for Fortran programmers 1862273d9f13SBarry Smith who wish to allocate their own matrix memory space. Most users should 18630298fd71SBarry Smith set data=NULL. 1864273d9f13SBarry Smith 1865273d9f13SBarry Smith Level: intermediate 1866273d9f13SBarry Smith 1867273d9f13SBarry Smith .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1868273d9f13SBarry Smith @*/ 18691c4f3114SJed Brown PetscErrorCode MatMPIDenseSetPreallocation(Mat B,PetscScalar *data) 1870273d9f13SBarry Smith { 18714ac538c5SBarry Smith PetscErrorCode ierr; 1872273d9f13SBarry Smith 1873273d9f13SBarry Smith PetscFunctionBegin; 1874d5ea218eSStefano Zampini PetscValidHeaderSpecific(B,MAT_CLASSID,1); 18751c4f3114SJed Brown ierr = PetscTryMethod(B,"MatMPIDenseSetPreallocation_C",(Mat,PetscScalar*),(B,data));CHKERRQ(ierr); 1876273d9f13SBarry Smith PetscFunctionReturn(0); 1877273d9f13SBarry Smith } 1878273d9f13SBarry Smith 1879d3042a70SBarry Smith /*@ 1880637a0070SStefano Zampini MatDensePlaceArray - Allows one to replace the array in a dense matrix with an 1881d3042a70SBarry Smith array provided by the user. This is useful to avoid copying an array 1882d3042a70SBarry Smith into a matrix 1883d3042a70SBarry Smith 1884d3042a70SBarry Smith Not Collective 1885d3042a70SBarry Smith 1886d3042a70SBarry Smith Input Parameters: 1887d3042a70SBarry Smith + mat - the matrix 1888d3042a70SBarry Smith - array - the array in column major order 1889d3042a70SBarry Smith 1890d3042a70SBarry Smith Notes: 1891d3042a70SBarry Smith You can return to the original array with a call to MatDenseResetArray(). The user is responsible for freeing this array; it will not be 1892d3042a70SBarry Smith freed when the matrix is destroyed. 1893d3042a70SBarry Smith 1894d3042a70SBarry Smith Level: developer 1895d3042a70SBarry Smith 1896d3042a70SBarry Smith .seealso: MatDenseGetArray(), MatDenseResetArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray() 1897d3042a70SBarry Smith 1898d3042a70SBarry Smith @*/ 1899637a0070SStefano Zampini PetscErrorCode MatDensePlaceArray(Mat mat,const PetscScalar *array) 1900d3042a70SBarry Smith { 1901d3042a70SBarry Smith PetscErrorCode ierr; 1902637a0070SStefano Zampini 1903d3042a70SBarry Smith PetscFunctionBegin; 1904d5ea218eSStefano Zampini PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 1905d3042a70SBarry Smith ierr = PetscUseMethod(mat,"MatDensePlaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr); 1906d3042a70SBarry Smith ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 1907637a0070SStefano Zampini #if defined(PETSC_HAVE_CUDA) 1908637a0070SStefano Zampini mat->offloadmask = PETSC_OFFLOAD_CPU; 1909637a0070SStefano Zampini #endif 1910d3042a70SBarry Smith PetscFunctionReturn(0); 1911d3042a70SBarry Smith } 1912d3042a70SBarry Smith 1913d3042a70SBarry Smith /*@ 1914d3042a70SBarry Smith MatDenseResetArray - Resets the matrix array to that it previously had before the call to MatDensePlaceArray() 1915d3042a70SBarry Smith 1916d3042a70SBarry Smith Not Collective 1917d3042a70SBarry Smith 1918d3042a70SBarry Smith Input Parameters: 1919d3042a70SBarry Smith . mat - the matrix 1920d3042a70SBarry Smith 1921d3042a70SBarry Smith Notes: 1922d3042a70SBarry Smith You can only call this after a call to MatDensePlaceArray() 1923d3042a70SBarry Smith 1924d3042a70SBarry Smith Level: developer 1925d3042a70SBarry Smith 1926d3042a70SBarry Smith .seealso: MatDenseGetArray(), MatDensePlaceArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray() 1927d3042a70SBarry Smith 1928d3042a70SBarry Smith @*/ 1929d3042a70SBarry Smith PetscErrorCode MatDenseResetArray(Mat mat) 1930d3042a70SBarry Smith { 1931d3042a70SBarry Smith PetscErrorCode ierr; 1932637a0070SStefano Zampini 1933d3042a70SBarry Smith PetscFunctionBegin; 1934d5ea218eSStefano Zampini PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 1935d3042a70SBarry Smith ierr = PetscUseMethod(mat,"MatDenseResetArray_C",(Mat),(mat));CHKERRQ(ierr); 1936d3042a70SBarry Smith ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 1937d3042a70SBarry Smith PetscFunctionReturn(0); 1938d3042a70SBarry Smith } 1939d3042a70SBarry Smith 1940d5ea218eSStefano Zampini /*@ 1941d5ea218eSStefano Zampini MatDenseReplaceArray - Allows one to replace the array in a dense matrix with an 1942d5ea218eSStefano Zampini array provided by the user. This is useful to avoid copying an array 1943d5ea218eSStefano Zampini into a matrix 1944d5ea218eSStefano Zampini 1945d5ea218eSStefano Zampini Not Collective 1946d5ea218eSStefano Zampini 1947d5ea218eSStefano Zampini Input Parameters: 1948d5ea218eSStefano Zampini + mat - the matrix 1949d5ea218eSStefano Zampini - array - the array in column major order 1950d5ea218eSStefano Zampini 1951d5ea218eSStefano Zampini Notes: 1952d5ea218eSStefano Zampini The memory passed in MUST be obtained with PetscMalloc() and CANNOT be 1953d5ea218eSStefano Zampini freed by the user. It will be freed when the matrix is destroyed. 1954d5ea218eSStefano Zampini 1955d5ea218eSStefano Zampini Level: developer 1956d5ea218eSStefano Zampini 1957d5ea218eSStefano Zampini .seealso: MatDenseGetArray(), VecReplaceArray() 1958d5ea218eSStefano Zampini @*/ 1959d5ea218eSStefano Zampini PetscErrorCode MatDenseReplaceArray(Mat mat,const PetscScalar *array) 1960d5ea218eSStefano Zampini { 1961d5ea218eSStefano Zampini PetscErrorCode ierr; 1962d5ea218eSStefano Zampini 1963d5ea218eSStefano Zampini PetscFunctionBegin; 1964d5ea218eSStefano Zampini PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 1965d5ea218eSStefano Zampini ierr = PetscUseMethod(mat,"MatDenseReplaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr); 1966d5ea218eSStefano Zampini ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 1967d5ea218eSStefano Zampini #if defined(PETSC_HAVE_CUDA) 1968d5ea218eSStefano Zampini mat->offloadmask = PETSC_OFFLOAD_CPU; 1969d5ea218eSStefano Zampini #endif 1970d5ea218eSStefano Zampini PetscFunctionReturn(0); 1971d5ea218eSStefano Zampini } 1972d5ea218eSStefano Zampini 1973637a0070SStefano Zampini #if defined(PETSC_HAVE_CUDA) 19748965ea79SLois Curfman McInnes /*@C 1975637a0070SStefano Zampini MatDenseCUDAPlaceArray - Allows one to replace the GPU array in a dense matrix with an 1976637a0070SStefano Zampini array provided by the user. This is useful to avoid copying an array 1977637a0070SStefano Zampini into a matrix 1978637a0070SStefano Zampini 1979637a0070SStefano Zampini Not Collective 1980637a0070SStefano Zampini 1981637a0070SStefano Zampini Input Parameters: 1982637a0070SStefano Zampini + mat - the matrix 1983637a0070SStefano Zampini - array - the array in column major order 1984637a0070SStefano Zampini 1985637a0070SStefano Zampini Notes: 1986637a0070SStefano Zampini You can return to the original array with a call to MatDenseCUDAResetArray(). The user is responsible for freeing this array; it will not be 1987637a0070SStefano Zampini freed when the matrix is destroyed. The array must have been allocated with cudaMalloc(). 1988637a0070SStefano Zampini 1989637a0070SStefano Zampini Level: developer 1990637a0070SStefano Zampini 1991637a0070SStefano Zampini .seealso: MatDenseCUDAGetArray(), MatDenseCUDAResetArray() 1992637a0070SStefano Zampini @*/ 1993637a0070SStefano Zampini PetscErrorCode MatDenseCUDAPlaceArray(Mat mat,const PetscScalar *array) 1994637a0070SStefano Zampini { 1995637a0070SStefano Zampini PetscErrorCode ierr; 1996637a0070SStefano Zampini 1997637a0070SStefano Zampini PetscFunctionBegin; 1998d5ea218eSStefano Zampini PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 1999637a0070SStefano Zampini ierr = PetscUseMethod(mat,"MatDenseCUDAPlaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr); 2000637a0070SStefano Zampini ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 2001637a0070SStefano Zampini mat->offloadmask = PETSC_OFFLOAD_GPU; 2002637a0070SStefano Zampini PetscFunctionReturn(0); 2003637a0070SStefano Zampini } 2004637a0070SStefano Zampini 2005637a0070SStefano Zampini /*@C 2006637a0070SStefano Zampini MatDenseCUDAResetArray - Resets the matrix array to that it previously had before the call to MatDenseCUDAPlaceArray() 2007637a0070SStefano Zampini 2008637a0070SStefano Zampini Not Collective 2009637a0070SStefano Zampini 2010637a0070SStefano Zampini Input Parameters: 2011637a0070SStefano Zampini . mat - the matrix 2012637a0070SStefano Zampini 2013637a0070SStefano Zampini Notes: 2014637a0070SStefano Zampini You can only call this after a call to MatDenseCUDAPlaceArray() 2015637a0070SStefano Zampini 2016637a0070SStefano Zampini Level: developer 2017637a0070SStefano Zampini 2018637a0070SStefano Zampini .seealso: MatDenseCUDAGetArray(), MatDenseCUDAPlaceArray() 2019637a0070SStefano Zampini 2020637a0070SStefano Zampini @*/ 2021637a0070SStefano Zampini PetscErrorCode MatDenseCUDAResetArray(Mat mat) 2022637a0070SStefano Zampini { 2023637a0070SStefano Zampini PetscErrorCode ierr; 2024637a0070SStefano Zampini 2025637a0070SStefano Zampini PetscFunctionBegin; 2026d5ea218eSStefano Zampini PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2027637a0070SStefano Zampini ierr = PetscUseMethod(mat,"MatDenseCUDAResetArray_C",(Mat),(mat));CHKERRQ(ierr); 2028637a0070SStefano Zampini ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 2029637a0070SStefano Zampini PetscFunctionReturn(0); 2030637a0070SStefano Zampini } 2031637a0070SStefano Zampini 2032637a0070SStefano Zampini /*@C 2033d5ea218eSStefano Zampini MatDenseCUDAReplaceArray - Allows one to replace the GPU array in a dense matrix with an 2034d5ea218eSStefano Zampini array provided by the user. This is useful to avoid copying an array 2035d5ea218eSStefano Zampini into a matrix 2036d5ea218eSStefano Zampini 2037d5ea218eSStefano Zampini Not Collective 2038d5ea218eSStefano Zampini 2039d5ea218eSStefano Zampini Input Parameters: 2040d5ea218eSStefano Zampini + mat - the matrix 2041d5ea218eSStefano Zampini - array - the array in column major order 2042d5ea218eSStefano Zampini 2043d5ea218eSStefano Zampini Notes: 2044d5ea218eSStefano Zampini This permanently replaces the GPU array and frees the memory associated with the old GPU array. 2045d5ea218eSStefano Zampini The memory passed in CANNOT be freed by the user. It will be freed 2046d5ea218eSStefano Zampini when the matrix is destroyed. The array should respect the matrix leading dimension. 2047d5ea218eSStefano Zampini 2048d5ea218eSStefano Zampini Level: developer 2049d5ea218eSStefano Zampini 2050d5ea218eSStefano Zampini .seealso: MatDenseCUDAGetArray(), MatDenseCUDAPlaceArray(), MatDenseCUDAResetArray() 2051d5ea218eSStefano Zampini @*/ 2052d5ea218eSStefano Zampini PetscErrorCode MatDenseCUDAReplaceArray(Mat mat,const PetscScalar *array) 2053d5ea218eSStefano Zampini { 2054d5ea218eSStefano Zampini PetscErrorCode ierr; 2055d5ea218eSStefano Zampini 2056d5ea218eSStefano Zampini PetscFunctionBegin; 2057d5ea218eSStefano Zampini PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2058d5ea218eSStefano Zampini ierr = PetscUseMethod(mat,"MatDenseCUDAReplaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr); 2059d5ea218eSStefano Zampini ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 2060d5ea218eSStefano Zampini mat->offloadmask = PETSC_OFFLOAD_GPU; 2061d5ea218eSStefano Zampini PetscFunctionReturn(0); 2062d5ea218eSStefano Zampini } 2063d5ea218eSStefano Zampini 2064d5ea218eSStefano Zampini /*@C 2065637a0070SStefano Zampini MatDenseCUDAGetArrayWrite - Provides write access to the CUDA buffer inside a dense matrix. 2066637a0070SStefano Zampini 2067637a0070SStefano Zampini Not Collective 2068637a0070SStefano Zampini 2069637a0070SStefano Zampini Input Parameters: 2070637a0070SStefano Zampini . A - the matrix 2071637a0070SStefano Zampini 2072637a0070SStefano Zampini Output Parameters 2073637a0070SStefano Zampini . array - the GPU array in column major order 2074637a0070SStefano Zampini 2075637a0070SStefano Zampini Notes: 2076637a0070SStefano Zampini The data on the GPU may not be updated due to operations done on the CPU. If you need updated data, use MatDenseCUDAGetArray(). The array must be restored with MatDenseCUDARestoreArrayWrite() when no longer needed. 2077637a0070SStefano Zampini 2078637a0070SStefano Zampini Level: developer 2079637a0070SStefano Zampini 2080637a0070SStefano Zampini .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayRead(), MatDenseCUDARestoreArrayRead() 2081637a0070SStefano Zampini @*/ 2082637a0070SStefano Zampini PetscErrorCode MatDenseCUDAGetArrayWrite(Mat A, PetscScalar **a) 2083637a0070SStefano Zampini { 2084637a0070SStefano Zampini PetscErrorCode ierr; 2085637a0070SStefano Zampini 2086637a0070SStefano Zampini PetscFunctionBegin; 2087d5ea218eSStefano Zampini PetscValidHeaderSpecific(A,MAT_CLASSID,1); 2088637a0070SStefano Zampini ierr = PetscUseMethod(A,"MatDenseCUDAGetArrayWrite_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr); 2089637a0070SStefano Zampini ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr); 2090637a0070SStefano Zampini PetscFunctionReturn(0); 2091637a0070SStefano Zampini } 2092637a0070SStefano Zampini 2093637a0070SStefano Zampini /*@C 2094637a0070SStefano Zampini MatDenseCUDARestoreArrayWrite - Restore write access to the CUDA buffer inside a dense matrix previously obtained with MatDenseCUDAGetArrayWrite(). 2095637a0070SStefano Zampini 2096637a0070SStefano Zampini Not Collective 2097637a0070SStefano Zampini 2098637a0070SStefano Zampini Input Parameters: 2099637a0070SStefano Zampini + A - the matrix 2100637a0070SStefano Zampini - array - the GPU array in column major order 2101637a0070SStefano Zampini 2102637a0070SStefano Zampini Notes: 2103637a0070SStefano Zampini 2104637a0070SStefano Zampini Level: developer 2105637a0070SStefano Zampini 2106637a0070SStefano Zampini .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead(), MatDenseCUDAGetArrayRead() 2107637a0070SStefano Zampini @*/ 2108637a0070SStefano Zampini PetscErrorCode MatDenseCUDARestoreArrayWrite(Mat A, PetscScalar **a) 2109637a0070SStefano Zampini { 2110637a0070SStefano Zampini PetscErrorCode ierr; 2111637a0070SStefano Zampini 2112637a0070SStefano Zampini PetscFunctionBegin; 2113d5ea218eSStefano Zampini PetscValidHeaderSpecific(A,MAT_CLASSID,1); 2114637a0070SStefano Zampini ierr = PetscUseMethod(A,"MatDenseCUDARestoreArrayWrite_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr); 2115637a0070SStefano Zampini ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr); 2116637a0070SStefano Zampini A->offloadmask = PETSC_OFFLOAD_GPU; 2117637a0070SStefano Zampini PetscFunctionReturn(0); 2118637a0070SStefano Zampini } 2119637a0070SStefano Zampini 2120637a0070SStefano Zampini /*@C 2121637a0070SStefano Zampini MatDenseCUDAGetArrayRead - Provides read-only access to the CUDA buffer inside a dense matrix. The array must be restored with MatDenseCUDARestoreArrayRead() when no longer needed. 2122637a0070SStefano Zampini 2123637a0070SStefano Zampini Not Collective 2124637a0070SStefano Zampini 2125637a0070SStefano Zampini Input Parameters: 2126637a0070SStefano Zampini . A - the matrix 2127637a0070SStefano Zampini 2128637a0070SStefano Zampini Output Parameters 2129637a0070SStefano Zampini . array - the GPU array in column major order 2130637a0070SStefano Zampini 2131637a0070SStefano Zampini Notes: 2132637a0070SStefano Zampini Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite(). 2133637a0070SStefano Zampini 2134637a0070SStefano Zampini Level: developer 2135637a0070SStefano Zampini 2136637a0070SStefano Zampini .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead() 2137637a0070SStefano Zampini @*/ 2138637a0070SStefano Zampini PetscErrorCode MatDenseCUDAGetArrayRead(Mat A, const PetscScalar **a) 2139637a0070SStefano Zampini { 2140637a0070SStefano Zampini PetscErrorCode ierr; 2141637a0070SStefano Zampini 2142637a0070SStefano Zampini PetscFunctionBegin; 2143d5ea218eSStefano Zampini PetscValidHeaderSpecific(A,MAT_CLASSID,1); 2144637a0070SStefano Zampini ierr = PetscUseMethod(A,"MatDenseCUDAGetArrayRead_C",(Mat,const PetscScalar**),(A,a));CHKERRQ(ierr); 2145637a0070SStefano Zampini PetscFunctionReturn(0); 2146637a0070SStefano Zampini } 2147637a0070SStefano Zampini 2148637a0070SStefano Zampini /*@C 2149637a0070SStefano Zampini MatDenseCUDARestoreArrayRead - Restore read-only access to the CUDA buffer inside a dense matrix previously obtained with a call to MatDenseCUDAGetArrayRead(). 2150637a0070SStefano Zampini 2151637a0070SStefano Zampini Not Collective 2152637a0070SStefano Zampini 2153637a0070SStefano Zampini Input Parameters: 2154637a0070SStefano Zampini + A - the matrix 2155637a0070SStefano Zampini - array - the GPU array in column major order 2156637a0070SStefano Zampini 2157637a0070SStefano Zampini Notes: 2158637a0070SStefano Zampini Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite(). 2159637a0070SStefano Zampini 2160637a0070SStefano Zampini Level: developer 2161637a0070SStefano Zampini 2162637a0070SStefano Zampini .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDAGetArrayRead() 2163637a0070SStefano Zampini @*/ 2164637a0070SStefano Zampini PetscErrorCode MatDenseCUDARestoreArrayRead(Mat A, const PetscScalar **a) 2165637a0070SStefano Zampini { 2166637a0070SStefano Zampini PetscErrorCode ierr; 2167637a0070SStefano Zampini 2168637a0070SStefano Zampini PetscFunctionBegin; 2169637a0070SStefano Zampini ierr = PetscUseMethod(A,"MatDenseCUDARestoreArrayRead_C",(Mat,const PetscScalar**),(A,a));CHKERRQ(ierr); 2170637a0070SStefano Zampini PetscFunctionReturn(0); 2171637a0070SStefano Zampini } 2172637a0070SStefano Zampini 2173637a0070SStefano Zampini /*@C 2174637a0070SStefano Zampini MatDenseCUDAGetArray - Provides access to the CUDA buffer inside a dense matrix. The array must be restored with MatDenseCUDARestoreArray() when no longer needed. 2175637a0070SStefano Zampini 2176637a0070SStefano Zampini Not Collective 2177637a0070SStefano Zampini 2178637a0070SStefano Zampini Input Parameters: 2179637a0070SStefano Zampini . A - the matrix 2180637a0070SStefano Zampini 2181637a0070SStefano Zampini Output Parameters 2182637a0070SStefano Zampini . array - the GPU array in column major order 2183637a0070SStefano Zampini 2184637a0070SStefano Zampini Notes: 2185637a0070SStefano Zampini Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite(). For read-only access, use MatDenseCUDAGetArrayRead(). 2186637a0070SStefano Zampini 2187637a0070SStefano Zampini Level: developer 2188637a0070SStefano Zampini 2189637a0070SStefano Zampini .seealso: MatDenseCUDAGetArrayRead(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead() 2190637a0070SStefano Zampini @*/ 2191637a0070SStefano Zampini PetscErrorCode MatDenseCUDAGetArray(Mat A, PetscScalar **a) 2192637a0070SStefano Zampini { 2193637a0070SStefano Zampini PetscErrorCode ierr; 2194637a0070SStefano Zampini 2195637a0070SStefano Zampini PetscFunctionBegin; 2196d5ea218eSStefano Zampini PetscValidHeaderSpecific(A,MAT_CLASSID,1); 2197637a0070SStefano Zampini ierr = PetscUseMethod(A,"MatDenseCUDAGetArray_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr); 2198637a0070SStefano Zampini ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr); 2199637a0070SStefano Zampini PetscFunctionReturn(0); 2200637a0070SStefano Zampini } 2201637a0070SStefano Zampini 2202637a0070SStefano Zampini /*@C 2203637a0070SStefano Zampini MatDenseCUDARestoreArray - Restore access to the CUDA buffer inside a dense matrix previously obtained with MatDenseCUDAGetArray(). 2204637a0070SStefano Zampini 2205637a0070SStefano Zampini Not Collective 2206637a0070SStefano Zampini 2207637a0070SStefano Zampini Input Parameters: 2208637a0070SStefano Zampini + A - the matrix 2209637a0070SStefano Zampini - array - the GPU array in column major order 2210637a0070SStefano Zampini 2211637a0070SStefano Zampini Notes: 2212637a0070SStefano Zampini 2213637a0070SStefano Zampini Level: developer 2214637a0070SStefano Zampini 2215637a0070SStefano Zampini .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead(), MatDenseCUDAGetArrayRead() 2216637a0070SStefano Zampini @*/ 2217637a0070SStefano Zampini PetscErrorCode MatDenseCUDARestoreArray(Mat A, PetscScalar **a) 2218637a0070SStefano Zampini { 2219637a0070SStefano Zampini PetscErrorCode ierr; 2220637a0070SStefano Zampini 2221637a0070SStefano Zampini PetscFunctionBegin; 2222d5ea218eSStefano Zampini PetscValidHeaderSpecific(A,MAT_CLASSID,1); 2223637a0070SStefano Zampini ierr = PetscUseMethod(A,"MatDenseCUDARestoreArray_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr); 2224637a0070SStefano Zampini ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr); 2225637a0070SStefano Zampini A->offloadmask = PETSC_OFFLOAD_GPU; 2226637a0070SStefano Zampini PetscFunctionReturn(0); 2227637a0070SStefano Zampini } 2228637a0070SStefano Zampini #endif 2229637a0070SStefano Zampini 2230637a0070SStefano Zampini /*@C 2231637a0070SStefano Zampini MatCreateDense - Creates a matrix in dense format. 22328965ea79SLois Curfman McInnes 2233d083f849SBarry Smith Collective 2234db81eaa0SLois Curfman McInnes 22358965ea79SLois Curfman McInnes Input Parameters: 2236db81eaa0SLois Curfman McInnes + comm - MPI communicator 22378965ea79SLois Curfman McInnes . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2238db81eaa0SLois Curfman McInnes . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 22398965ea79SLois Curfman McInnes . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 2240db81eaa0SLois Curfman McInnes . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 22416cfe35ddSJose E. Roman - data - optional location of matrix data. Set data=NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc 2242dfc5480cSLois Curfman McInnes to control all matrix memory allocation. 22438965ea79SLois Curfman McInnes 22448965ea79SLois Curfman McInnes Output Parameter: 2245477f1c0bSLois Curfman McInnes . A - the matrix 22468965ea79SLois Curfman McInnes 2247b259b22eSLois Curfman McInnes Notes: 224839ddd567SLois Curfman McInnes The dense format is fully compatible with standard Fortran 77 224939ddd567SLois Curfman McInnes storage by columns. 22508965ea79SLois Curfman McInnes 225118f449edSLois Curfman McInnes The data input variable is intended primarily for Fortran programmers 225218f449edSLois Curfman McInnes who wish to allocate their own matrix memory space. Most users should 22536cfe35ddSJose E. Roman set data=NULL (PETSC_NULL_SCALAR for Fortran users). 225418f449edSLois Curfman McInnes 22558965ea79SLois Curfman McInnes The user MUST specify either the local or global matrix dimensions 22568965ea79SLois Curfman McInnes (possibly both). 22578965ea79SLois Curfman McInnes 2258027ccd11SLois Curfman McInnes Level: intermediate 2259027ccd11SLois Curfman McInnes 226039ddd567SLois Curfman McInnes .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 22618965ea79SLois Curfman McInnes @*/ 226269b1f4b7SBarry Smith PetscErrorCode MatCreateDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A) 22638965ea79SLois Curfman McInnes { 22646849ba73SBarry Smith PetscErrorCode ierr; 226513f74950SBarry Smith PetscMPIInt size; 22668965ea79SLois Curfman McInnes 22673a40ed3dSBarry Smith PetscFunctionBegin; 2268f69a0ea3SMatthew Knepley ierr = MatCreate(comm,A);CHKERRQ(ierr); 22698491ab44SLisandro Dalcin PetscValidLogicalCollectiveBool(*A,!!data,6); 2270f69a0ea3SMatthew Knepley ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 2271273d9f13SBarry Smith ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2272273d9f13SBarry Smith if (size > 1) { 2273273d9f13SBarry Smith ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr); 2274273d9f13SBarry Smith ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr); 22756cfe35ddSJose E. Roman if (data) { /* user provided data array, so no need to assemble */ 22766cfe35ddSJose E. Roman ierr = MatSetUpMultiply_MPIDense(*A);CHKERRQ(ierr); 22776cfe35ddSJose E. Roman (*A)->assembled = PETSC_TRUE; 22786cfe35ddSJose E. Roman } 2279273d9f13SBarry Smith } else { 2280273d9f13SBarry Smith ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr); 2281273d9f13SBarry Smith ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr); 22828c469469SLois Curfman McInnes } 22833a40ed3dSBarry Smith PetscFunctionReturn(0); 22848965ea79SLois Curfman McInnes } 22858965ea79SLois Curfman McInnes 2286637a0070SStefano Zampini #if defined(PETSC_HAVE_CUDA) 2287637a0070SStefano Zampini /*@C 2288637a0070SStefano Zampini MatCreateDenseCUDA - Creates a matrix in dense format using CUDA. 2289637a0070SStefano Zampini 2290637a0070SStefano Zampini Collective 2291637a0070SStefano Zampini 2292637a0070SStefano Zampini Input Parameters: 2293637a0070SStefano Zampini + comm - MPI communicator 2294637a0070SStefano Zampini . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2295637a0070SStefano Zampini . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 2296637a0070SStefano Zampini . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 2297637a0070SStefano Zampini . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 2298637a0070SStefano Zampini - data - optional location of GPU matrix data. Set data=NULL for PETSc 2299637a0070SStefano Zampini to control matrix memory allocation. 2300637a0070SStefano Zampini 2301637a0070SStefano Zampini Output Parameter: 2302637a0070SStefano Zampini . A - the matrix 2303637a0070SStefano Zampini 2304637a0070SStefano Zampini Notes: 2305637a0070SStefano Zampini 2306637a0070SStefano Zampini Level: intermediate 2307637a0070SStefano Zampini 2308637a0070SStefano Zampini .seealso: MatCreate(), MatCreateDense() 2309637a0070SStefano Zampini @*/ 2310637a0070SStefano Zampini PetscErrorCode MatCreateDenseCUDA(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A) 2311637a0070SStefano Zampini { 2312637a0070SStefano Zampini PetscErrorCode ierr; 2313637a0070SStefano Zampini PetscMPIInt size; 2314637a0070SStefano Zampini 2315637a0070SStefano Zampini PetscFunctionBegin; 2316637a0070SStefano Zampini ierr = MatCreate(comm,A);CHKERRQ(ierr); 2317637a0070SStefano Zampini PetscValidLogicalCollectiveBool(*A,!!data,6); 2318637a0070SStefano Zampini ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 2319637a0070SStefano Zampini ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2320637a0070SStefano Zampini if (size > 1) { 2321637a0070SStefano Zampini ierr = MatSetType(*A,MATMPIDENSECUDA);CHKERRQ(ierr); 2322637a0070SStefano Zampini ierr = MatMPIDenseCUDASetPreallocation(*A,data);CHKERRQ(ierr); 2323637a0070SStefano Zampini if (data) { /* user provided data array, so no need to assemble */ 2324637a0070SStefano Zampini ierr = MatSetUpMultiply_MPIDense(*A);CHKERRQ(ierr); 2325637a0070SStefano Zampini (*A)->assembled = PETSC_TRUE; 2326637a0070SStefano Zampini } 2327637a0070SStefano Zampini } else { 2328637a0070SStefano Zampini ierr = MatSetType(*A,MATSEQDENSECUDA);CHKERRQ(ierr); 2329637a0070SStefano Zampini ierr = MatSeqDenseCUDASetPreallocation(*A,data);CHKERRQ(ierr); 2330637a0070SStefano Zampini } 2331637a0070SStefano Zampini PetscFunctionReturn(0); 2332637a0070SStefano Zampini } 2333637a0070SStefano Zampini #endif 2334637a0070SStefano Zampini 23356849ba73SBarry Smith static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat) 23368965ea79SLois Curfman McInnes { 23378965ea79SLois Curfman McInnes Mat mat; 23383501a2bdSLois Curfman McInnes Mat_MPIDense *a,*oldmat = (Mat_MPIDense*)A->data; 2339dfbe8321SBarry Smith PetscErrorCode ierr; 23408965ea79SLois Curfman McInnes 23413a40ed3dSBarry Smith PetscFunctionBegin; 23428965ea79SLois Curfman McInnes *newmat = 0; 2343ce94432eSBarry Smith ierr = MatCreate(PetscObjectComm((PetscObject)A),&mat);CHKERRQ(ierr); 2344d0f46423SBarry Smith ierr = MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 23457adad957SLisandro Dalcin ierr = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr); 2346834f8fabSBarry Smith a = (Mat_MPIDense*)mat->data; 23475aa7edbeSHong Zhang 2348d5f3da31SBarry Smith mat->factortype = A->factortype; 2349c456f294SBarry Smith mat->assembled = PETSC_TRUE; 2350273d9f13SBarry Smith mat->preallocated = PETSC_TRUE; 23518965ea79SLois Curfman McInnes 23528965ea79SLois Curfman McInnes a->size = oldmat->size; 23538965ea79SLois Curfman McInnes a->rank = oldmat->rank; 2354e0fa3b82SLois Curfman McInnes mat->insertmode = NOT_SET_VALUES; 23553782ba37SSatish Balay a->donotstash = oldmat->donotstash; 2356e04c1aa4SHong Zhang 23571e1e43feSBarry Smith ierr = PetscLayoutReference(A->rmap,&mat->rmap);CHKERRQ(ierr); 23581e1e43feSBarry Smith ierr = PetscLayoutReference(A->cmap,&mat->cmap);CHKERRQ(ierr); 23598965ea79SLois Curfman McInnes 23605609ef8eSBarry Smith ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 23613bb1ff40SBarry Smith ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 2362637a0070SStefano Zampini ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 236301b82886SBarry Smith 23648965ea79SLois Curfman McInnes *newmat = mat; 23653a40ed3dSBarry Smith PetscFunctionReturn(0); 23668965ea79SLois Curfman McInnes } 23678965ea79SLois Curfman McInnes 2368eb91f321SVaclav Hapla PetscErrorCode MatLoad_MPIDense(Mat newMat, PetscViewer viewer) 2369eb91f321SVaclav Hapla { 2370eb91f321SVaclav Hapla PetscErrorCode ierr; 237187d5ce66SSatish Balay PetscBool isbinary; 237287d5ce66SSatish Balay #if defined(PETSC_HAVE_HDF5) 237387d5ce66SSatish Balay PetscBool ishdf5; 237487d5ce66SSatish Balay #endif 2375eb91f321SVaclav Hapla 2376eb91f321SVaclav Hapla PetscFunctionBegin; 2377eb91f321SVaclav Hapla PetscValidHeaderSpecific(newMat,MAT_CLASSID,1); 2378eb91f321SVaclav Hapla PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2); 2379eb91f321SVaclav Hapla /* force binary viewer to load .info file if it has not yet done so */ 2380eb91f321SVaclav Hapla ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 2381eb91f321SVaclav Hapla ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 238287d5ce66SSatish Balay #if defined(PETSC_HAVE_HDF5) 2383eb91f321SVaclav Hapla ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5, &ishdf5);CHKERRQ(ierr); 238487d5ce66SSatish Balay #endif 2385eb91f321SVaclav Hapla if (isbinary) { 23868491ab44SLisandro Dalcin ierr = MatLoad_Dense_Binary(newMat,viewer);CHKERRQ(ierr); 2387eb91f321SVaclav Hapla #if defined(PETSC_HAVE_HDF5) 238887d5ce66SSatish Balay } else if (ishdf5) { 2389eb91f321SVaclav Hapla ierr = MatLoad_Dense_HDF5(newMat,viewer);CHKERRQ(ierr); 2390eb91f321SVaclav Hapla #endif 239187d5ce66SSatish Balay } else SETERRQ2(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name); 2392eb91f321SVaclav Hapla PetscFunctionReturn(0); 2393eb91f321SVaclav Hapla } 2394eb91f321SVaclav Hapla 2395ace3abfcSBarry Smith PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscBool *flag) 23966e4ee0c6SHong Zhang { 23976e4ee0c6SHong Zhang Mat_MPIDense *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data; 23986e4ee0c6SHong Zhang Mat a,b; 2399ace3abfcSBarry Smith PetscBool flg; 24006e4ee0c6SHong Zhang PetscErrorCode ierr; 240190ace30eSBarry Smith 24026e4ee0c6SHong Zhang PetscFunctionBegin; 24036e4ee0c6SHong Zhang a = matA->A; 24046e4ee0c6SHong Zhang b = matB->A; 24056e4ee0c6SHong Zhang ierr = MatEqual(a,b,&flg);CHKERRQ(ierr); 2406b2566f29SBarry Smith ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 24076e4ee0c6SHong Zhang PetscFunctionReturn(0); 24086e4ee0c6SHong Zhang } 240990ace30eSBarry Smith 2410baa3c1c6SHong Zhang PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(Mat A) 2411baa3c1c6SHong Zhang { 2412baa3c1c6SHong Zhang PetscErrorCode ierr; 2413baa3c1c6SHong Zhang Mat_MPIDense *a = (Mat_MPIDense*)A->data; 2414baa3c1c6SHong Zhang Mat_TransMatMultDense *atb = a->atbdense; 2415baa3c1c6SHong Zhang 2416baa3c1c6SHong Zhang PetscFunctionBegin; 2417637a0070SStefano Zampini ierr = PetscFree2(atb->sendbuf,atb->recvcounts);CHKERRQ(ierr); 2418637a0070SStefano Zampini ierr = MatDestroy(&atb->atb);CHKERRQ(ierr); 2419637a0070SStefano Zampini ierr = (*atb->destroy)(A);CHKERRQ(ierr); 2420baa3c1c6SHong Zhang ierr = PetscFree(atb);CHKERRQ(ierr); 2421baa3c1c6SHong Zhang PetscFunctionReturn(0); 2422baa3c1c6SHong Zhang } 2423baa3c1c6SHong Zhang 2424cc48ffa7SToby Isaac PetscErrorCode MatDestroy_MatMatTransMult_MPIDense_MPIDense(Mat A) 2425cc48ffa7SToby Isaac { 2426cc48ffa7SToby Isaac PetscErrorCode ierr; 2427cc48ffa7SToby Isaac Mat_MPIDense *a = (Mat_MPIDense*)A->data; 2428cc48ffa7SToby Isaac Mat_MatTransMultDense *abt = a->abtdense; 2429cc48ffa7SToby Isaac 2430cc48ffa7SToby Isaac PetscFunctionBegin; 2431cc48ffa7SToby Isaac ierr = PetscFree2(abt->buf[0],abt->buf[1]);CHKERRQ(ierr); 2432faa55883SToby Isaac ierr = PetscFree2(abt->recvcounts,abt->recvdispls);CHKERRQ(ierr); 2433cc48ffa7SToby Isaac ierr = (abt->destroy)(A);CHKERRQ(ierr); 2434cc48ffa7SToby Isaac ierr = PetscFree(abt);CHKERRQ(ierr); 2435cc48ffa7SToby Isaac PetscFunctionReturn(0); 2436cc48ffa7SToby Isaac } 2437cc48ffa7SToby Isaac 2438cb20be35SHong Zhang PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C) 2439cb20be35SHong Zhang { 2440baa3c1c6SHong Zhang Mat_MPIDense *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data; 2441baa3c1c6SHong Zhang Mat_TransMatMultDense *atb = c->atbdense; 2442cb20be35SHong Zhang PetscErrorCode ierr; 2443cb20be35SHong Zhang MPI_Comm comm; 2444637a0070SStefano Zampini PetscMPIInt size,*recvcounts=atb->recvcounts; 2445637a0070SStefano Zampini PetscScalar *carray,*sendbuf=atb->sendbuf; 2446637a0070SStefano Zampini const PetscScalar *atbarray; 2447d5017740SHong Zhang PetscInt i,cN=C->cmap->N,cM=C->rmap->N,proc,k,j; 2448e68c0b26SHong Zhang const PetscInt *ranges; 2449cb20be35SHong Zhang 2450cb20be35SHong Zhang PetscFunctionBegin; 2451cb20be35SHong Zhang ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 2452cb20be35SHong Zhang ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2453e68c0b26SHong Zhang 2454c5ef1628SHong Zhang /* compute atbarray = aseq^T * bseq */ 2455637a0070SStefano Zampini ierr = MatTransposeMatMult(a->A,b->A,atb->atb ? MAT_REUSE_MATRIX : MAT_INITIAL_MATRIX,PETSC_DEFAULT,&atb->atb);CHKERRQ(ierr); 2456cb20be35SHong Zhang 2457cb20be35SHong Zhang ierr = MatGetOwnershipRanges(C,&ranges);CHKERRQ(ierr); 2458c5ef1628SHong Zhang for (i=0; i<size; i++) recvcounts[i] = (ranges[i+1] - ranges[i])*cN; 2459cb20be35SHong Zhang 2460660d5466SHong Zhang /* arrange atbarray into sendbuf */ 2461637a0070SStefano Zampini ierr = MatDenseGetArrayRead(atb->atb,&atbarray);CHKERRQ(ierr); 2462637a0070SStefano Zampini for (proc=0, k=0; proc<size; proc++) { 2463baa3c1c6SHong Zhang for (j=0; j<cN; j++) { 2464c5ef1628SHong Zhang for (i=ranges[proc]; i<ranges[proc+1]; i++) sendbuf[k++] = atbarray[i+j*cM]; 2465cb20be35SHong Zhang } 2466cb20be35SHong Zhang } 2467637a0070SStefano Zampini ierr = MatDenseRestoreArrayRead(atb->atb,&atbarray);CHKERRQ(ierr); 2468637a0070SStefano Zampini 2469c5ef1628SHong Zhang /* sum all atbarray to local values of C */ 2470660d5466SHong Zhang ierr = MatDenseGetArray(c->A,&carray);CHKERRQ(ierr); 24713462b7efSHong Zhang ierr = MPI_Reduce_scatter(sendbuf,carray,recvcounts,MPIU_SCALAR,MPIU_SUM,comm);CHKERRQ(ierr); 2472660d5466SHong Zhang ierr = MatDenseRestoreArray(c->A,&carray);CHKERRQ(ierr); 2473cb20be35SHong Zhang PetscFunctionReturn(0); 2474cb20be35SHong Zhang } 2475cb20be35SHong Zhang 24764222ddf1SHong Zhang PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat C) 2477cb20be35SHong Zhang { 2478cb20be35SHong Zhang PetscErrorCode ierr; 2479cb20be35SHong Zhang MPI_Comm comm; 2480baa3c1c6SHong Zhang PetscMPIInt size; 2481660d5466SHong Zhang PetscInt cm=A->cmap->n,cM,cN=B->cmap->N; 2482baa3c1c6SHong Zhang Mat_MPIDense *c; 2483baa3c1c6SHong Zhang Mat_TransMatMultDense *atb; 2484cb20be35SHong Zhang 2485cb20be35SHong Zhang PetscFunctionBegin; 2486baa3c1c6SHong Zhang ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 2487cb20be35SHong Zhang if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) { 2488cb20be35SHong Zhang SETERRQ4(comm,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend); 2489cb20be35SHong Zhang } 2490cb20be35SHong Zhang 24914222ddf1SHong Zhang /* create matrix product C */ 24924222ddf1SHong Zhang ierr = MatSetSizes(C,cm,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 24934222ddf1SHong Zhang ierr = MatSetType(C,MATMPIDENSE);CHKERRQ(ierr); 249418992e5dSStefano Zampini ierr = MatSetUp(C);CHKERRQ(ierr); 24954222ddf1SHong Zhang ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 24964222ddf1SHong Zhang ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2497baa3c1c6SHong Zhang 24984222ddf1SHong Zhang /* create data structure for reuse C */ 2499baa3c1c6SHong Zhang ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2500baa3c1c6SHong Zhang ierr = PetscNew(&atb);CHKERRQ(ierr); 25014222ddf1SHong Zhang cM = C->rmap->N; 2502637a0070SStefano Zampini ierr = PetscMalloc2(cM*cN,&atb->sendbuf,size,&atb->recvcounts);CHKERRQ(ierr); 2503baa3c1c6SHong Zhang 25044222ddf1SHong Zhang c = (Mat_MPIDense*)C->data; 2505baa3c1c6SHong Zhang c->atbdense = atb; 25064222ddf1SHong Zhang atb->destroy = C->ops->destroy; 25074222ddf1SHong Zhang C->ops->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense; 2508cb20be35SHong Zhang PetscFunctionReturn(0); 2509cb20be35SHong Zhang } 2510cb20be35SHong Zhang 25114222ddf1SHong Zhang static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C) 2512cb20be35SHong Zhang { 2513cb20be35SHong Zhang PetscErrorCode ierr; 2514cc48ffa7SToby Isaac MPI_Comm comm; 2515cc48ffa7SToby Isaac PetscMPIInt i, size; 2516cc48ffa7SToby Isaac PetscInt maxRows, bufsiz; 2517cc48ffa7SToby Isaac Mat_MPIDense *c; 2518cc48ffa7SToby Isaac PetscMPIInt tag; 25194222ddf1SHong Zhang PetscInt alg; 2520cc48ffa7SToby Isaac Mat_MatTransMultDense *abt; 25214222ddf1SHong Zhang Mat_Product *product = C->product; 25224222ddf1SHong Zhang PetscBool flg; 2523cc48ffa7SToby Isaac 2524cc48ffa7SToby Isaac PetscFunctionBegin; 25254222ddf1SHong Zhang /* check local size of A and B */ 2526637a0070SStefano Zampini if (A->cmap->n != B->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local column dimensions are incompatible, A (%D) != B (%D)",A->cmap->n,B->cmap->n); 2527cc48ffa7SToby Isaac 25284222ddf1SHong Zhang ierr = PetscStrcmp(product->alg,"allgatherv",&flg);CHKERRQ(ierr); 2529637a0070SStefano Zampini alg = flg ? 0 : 1; 2530cc48ffa7SToby Isaac 25314222ddf1SHong Zhang /* setup matrix product C */ 25324222ddf1SHong Zhang ierr = MatSetSizes(C,A->rmap->n,B->rmap->n,A->rmap->N,B->rmap->N);CHKERRQ(ierr); 25334222ddf1SHong Zhang ierr = MatSetType(C,MATMPIDENSE);CHKERRQ(ierr); 253418992e5dSStefano Zampini ierr = MatSetUp(C);CHKERRQ(ierr); 25354222ddf1SHong Zhang ierr = PetscObjectGetNewTag((PetscObject)C, &tag);CHKERRQ(ierr); 25364222ddf1SHong Zhang ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 25374222ddf1SHong Zhang ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 25384222ddf1SHong Zhang 25394222ddf1SHong Zhang /* create data structure for reuse C */ 25404222ddf1SHong Zhang ierr = PetscObjectGetComm((PetscObject)C,&comm);CHKERRQ(ierr); 2541cc48ffa7SToby Isaac ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2542cc48ffa7SToby Isaac ierr = PetscNew(&abt);CHKERRQ(ierr); 2543cc48ffa7SToby Isaac abt->tag = tag; 2544faa55883SToby Isaac abt->alg = alg; 2545faa55883SToby Isaac switch (alg) { 25464222ddf1SHong Zhang case 1: /* alg: "cyclic" */ 2547cc48ffa7SToby Isaac for (maxRows = 0, i = 0; i < size; i++) maxRows = PetscMax(maxRows, (B->rmap->range[i + 1] - B->rmap->range[i])); 2548cc48ffa7SToby Isaac bufsiz = A->cmap->N * maxRows; 2549cc48ffa7SToby Isaac ierr = PetscMalloc2(bufsiz,&(abt->buf[0]),bufsiz,&(abt->buf[1]));CHKERRQ(ierr); 2550faa55883SToby Isaac break; 25514222ddf1SHong Zhang default: /* alg: "allgatherv" */ 2552faa55883SToby Isaac ierr = PetscMalloc2(B->rmap->n * B->cmap->N, &(abt->buf[0]), B->rmap->N * B->cmap->N, &(abt->buf[1]));CHKERRQ(ierr); 2553faa55883SToby Isaac ierr = PetscMalloc2(size,&(abt->recvcounts),size+1,&(abt->recvdispls));CHKERRQ(ierr); 2554faa55883SToby Isaac for (i = 0; i <= size; i++) abt->recvdispls[i] = B->rmap->range[i] * A->cmap->N; 2555faa55883SToby Isaac for (i = 0; i < size; i++) abt->recvcounts[i] = abt->recvdispls[i + 1] - abt->recvdispls[i]; 2556faa55883SToby Isaac break; 2557faa55883SToby Isaac } 2558cc48ffa7SToby Isaac 25594222ddf1SHong Zhang c = (Mat_MPIDense*)C->data; 2560cc48ffa7SToby Isaac c->abtdense = abt; 25614222ddf1SHong Zhang abt->destroy = C->ops->destroy; 25624222ddf1SHong Zhang C->ops->destroy = MatDestroy_MatMatTransMult_MPIDense_MPIDense; 25634222ddf1SHong Zhang C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_MPIDense_MPIDense; 2564cc48ffa7SToby Isaac PetscFunctionReturn(0); 2565cc48ffa7SToby Isaac } 2566cc48ffa7SToby Isaac 2567faa55883SToby Isaac static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(Mat A, Mat B, Mat C) 2568cc48ffa7SToby Isaac { 2569cc48ffa7SToby Isaac Mat_MPIDense *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data; 2570cc48ffa7SToby Isaac Mat_MatTransMultDense *abt = c->abtdense; 2571cc48ffa7SToby Isaac PetscErrorCode ierr; 2572cc48ffa7SToby Isaac MPI_Comm comm; 2573cc48ffa7SToby Isaac PetscMPIInt rank,size, sendsiz, recvsiz, sendto, recvfrom, recvisfrom; 2574637a0070SStefano Zampini PetscScalar *sendbuf, *recvbuf=0, *cv; 2575cc48ffa7SToby Isaac PetscInt i,cK=A->cmap->N,k,j,bn; 2576cc48ffa7SToby Isaac PetscScalar _DOne=1.0,_DZero=0.0; 2577637a0070SStefano Zampini const PetscScalar *av,*bv; 2578637a0070SStefano Zampini PetscBLASInt cm, cn, ck, alda, blda, clda; 2579cc48ffa7SToby Isaac MPI_Request reqs[2]; 2580cc48ffa7SToby Isaac const PetscInt *ranges; 2581cc48ffa7SToby Isaac 2582cc48ffa7SToby Isaac PetscFunctionBegin; 2583cc48ffa7SToby Isaac ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 2584cc48ffa7SToby Isaac ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2585cc48ffa7SToby Isaac ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2586637a0070SStefano Zampini ierr = MatDenseGetArrayRead(a->A,&av);CHKERRQ(ierr); 2587637a0070SStefano Zampini ierr = MatDenseGetArrayRead(b->A,&bv);CHKERRQ(ierr); 2588637a0070SStefano Zampini ierr = MatDenseGetArray(c->A,&cv);CHKERRQ(ierr); 2589637a0070SStefano Zampini ierr = MatDenseGetLDA(a->A,&i);CHKERRQ(ierr); 2590637a0070SStefano Zampini ierr = PetscBLASIntCast(i,&alda);CHKERRQ(ierr); 2591637a0070SStefano Zampini ierr = MatDenseGetLDA(b->A,&i);CHKERRQ(ierr); 2592637a0070SStefano Zampini ierr = PetscBLASIntCast(i,&blda);CHKERRQ(ierr); 2593637a0070SStefano Zampini ierr = MatDenseGetLDA(c->A,&i);CHKERRQ(ierr); 2594637a0070SStefano Zampini ierr = PetscBLASIntCast(i,&clda);CHKERRQ(ierr); 2595cc48ffa7SToby Isaac ierr = MatGetOwnershipRanges(B,&ranges);CHKERRQ(ierr); 2596cc48ffa7SToby Isaac bn = B->rmap->n; 2597637a0070SStefano Zampini if (blda == bn) { 2598637a0070SStefano Zampini sendbuf = (PetscScalar*)bv; 2599cc48ffa7SToby Isaac } else { 2600cc48ffa7SToby Isaac sendbuf = abt->buf[0]; 2601cc48ffa7SToby Isaac for (k = 0, i = 0; i < cK; i++) { 2602cc48ffa7SToby Isaac for (j = 0; j < bn; j++, k++) { 2603637a0070SStefano Zampini sendbuf[k] = bv[i * blda + j]; 2604cc48ffa7SToby Isaac } 2605cc48ffa7SToby Isaac } 2606cc48ffa7SToby Isaac } 2607cc48ffa7SToby Isaac if (size > 1) { 2608cc48ffa7SToby Isaac sendto = (rank + size - 1) % size; 2609cc48ffa7SToby Isaac recvfrom = (rank + size + 1) % size; 2610cc48ffa7SToby Isaac } else { 2611cc48ffa7SToby Isaac sendto = recvfrom = 0; 2612cc48ffa7SToby Isaac } 2613cc48ffa7SToby Isaac ierr = PetscBLASIntCast(cK,&ck);CHKERRQ(ierr); 2614cc48ffa7SToby Isaac ierr = PetscBLASIntCast(c->A->rmap->n,&cm);CHKERRQ(ierr); 2615cc48ffa7SToby Isaac recvisfrom = rank; 2616cc48ffa7SToby Isaac for (i = 0; i < size; i++) { 2617cc48ffa7SToby Isaac /* we have finished receiving in sending, bufs can be read/modified */ 2618cc48ffa7SToby Isaac PetscInt nextrecvisfrom = (recvisfrom + 1) % size; /* which process the next recvbuf will originate on */ 2619cc48ffa7SToby Isaac PetscInt nextbn = ranges[nextrecvisfrom + 1] - ranges[nextrecvisfrom]; 2620cc48ffa7SToby Isaac 2621cc48ffa7SToby Isaac if (nextrecvisfrom != rank) { 2622cc48ffa7SToby Isaac /* start the cyclic sends from sendbuf, to recvbuf (which will switch to sendbuf) */ 2623cc48ffa7SToby Isaac sendsiz = cK * bn; 2624cc48ffa7SToby Isaac recvsiz = cK * nextbn; 2625cc48ffa7SToby Isaac recvbuf = (i & 1) ? abt->buf[0] : abt->buf[1]; 2626cc48ffa7SToby Isaac ierr = MPI_Isend(sendbuf, sendsiz, MPIU_SCALAR, sendto, abt->tag, comm, &reqs[0]);CHKERRQ(ierr); 2627cc48ffa7SToby Isaac ierr = MPI_Irecv(recvbuf, recvsiz, MPIU_SCALAR, recvfrom, abt->tag, comm, &reqs[1]);CHKERRQ(ierr); 2628cc48ffa7SToby Isaac } 2629cc48ffa7SToby Isaac 2630cc48ffa7SToby Isaac /* local aseq * sendbuf^T */ 2631cc48ffa7SToby Isaac ierr = PetscBLASIntCast(ranges[recvisfrom + 1] - ranges[recvisfrom], &cn);CHKERRQ(ierr); 263250ce3c9cSStefano Zampini if (cm && cn && ck) PetscStackCallBLAS("BLASgemm",BLASgemm_("N","T",&cm,&cn,&ck,&_DOne,av,&alda,sendbuf,&cn,&_DZero,cv + clda * ranges[recvisfrom],&clda)); 2633cc48ffa7SToby Isaac 2634cc48ffa7SToby Isaac if (nextrecvisfrom != rank) { 2635cc48ffa7SToby Isaac /* wait for the sends and receives to complete, swap sendbuf and recvbuf */ 2636cc48ffa7SToby Isaac ierr = MPI_Waitall(2, reqs, MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2637cc48ffa7SToby Isaac } 2638cc48ffa7SToby Isaac bn = nextbn; 2639cc48ffa7SToby Isaac recvisfrom = nextrecvisfrom; 2640cc48ffa7SToby Isaac sendbuf = recvbuf; 2641cc48ffa7SToby Isaac } 2642637a0070SStefano Zampini ierr = MatDenseRestoreArrayRead(a->A,&av);CHKERRQ(ierr); 2643637a0070SStefano Zampini ierr = MatDenseRestoreArrayRead(b->A,&bv);CHKERRQ(ierr); 2644637a0070SStefano Zampini ierr = MatDenseRestoreArray(c->A,&cv);CHKERRQ(ierr); 2645cc48ffa7SToby Isaac PetscFunctionReturn(0); 2646cc48ffa7SToby Isaac } 2647cc48ffa7SToby Isaac 2648faa55883SToby Isaac static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(Mat A, Mat B, Mat C) 2649faa55883SToby Isaac { 2650faa55883SToby Isaac Mat_MPIDense *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data; 2651faa55883SToby Isaac Mat_MatTransMultDense *abt = c->abtdense; 2652faa55883SToby Isaac PetscErrorCode ierr; 2653faa55883SToby Isaac MPI_Comm comm; 2654637a0070SStefano Zampini PetscMPIInt size; 2655637a0070SStefano Zampini PetscScalar *cv, *sendbuf, *recvbuf; 2656637a0070SStefano Zampini const PetscScalar *av,*bv; 2657637a0070SStefano Zampini PetscInt blda,i,cK=A->cmap->N,k,j,bn; 2658faa55883SToby Isaac PetscScalar _DOne=1.0,_DZero=0.0; 2659637a0070SStefano Zampini PetscBLASInt cm, cn, ck, alda, clda; 2660faa55883SToby Isaac 2661faa55883SToby Isaac PetscFunctionBegin; 2662faa55883SToby Isaac ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 2663faa55883SToby Isaac ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2664637a0070SStefano Zampini ierr = MatDenseGetArrayRead(a->A,&av);CHKERRQ(ierr); 2665637a0070SStefano Zampini ierr = MatDenseGetArrayRead(b->A,&bv);CHKERRQ(ierr); 2666637a0070SStefano Zampini ierr = MatDenseGetArray(c->A,&cv);CHKERRQ(ierr); 2667637a0070SStefano Zampini ierr = MatDenseGetLDA(a->A,&i);CHKERRQ(ierr); 2668637a0070SStefano Zampini ierr = PetscBLASIntCast(i,&alda);CHKERRQ(ierr); 2669637a0070SStefano Zampini ierr = MatDenseGetLDA(b->A,&blda);CHKERRQ(ierr); 2670637a0070SStefano Zampini ierr = MatDenseGetLDA(c->A,&i);CHKERRQ(ierr); 2671637a0070SStefano Zampini ierr = PetscBLASIntCast(i,&clda);CHKERRQ(ierr); 2672faa55883SToby Isaac /* copy transpose of B into buf[0] */ 2673faa55883SToby Isaac bn = B->rmap->n; 2674faa55883SToby Isaac sendbuf = abt->buf[0]; 2675faa55883SToby Isaac recvbuf = abt->buf[1]; 2676faa55883SToby Isaac for (k = 0, j = 0; j < bn; j++) { 2677faa55883SToby Isaac for (i = 0; i < cK; i++, k++) { 2678637a0070SStefano Zampini sendbuf[k] = bv[i * blda + j]; 2679faa55883SToby Isaac } 2680faa55883SToby Isaac } 2681637a0070SStefano Zampini ierr = MatDenseRestoreArrayRead(b->A,&bv);CHKERRQ(ierr); 2682faa55883SToby Isaac ierr = MPI_Allgatherv(sendbuf, bn * cK, MPIU_SCALAR, recvbuf, abt->recvcounts, abt->recvdispls, MPIU_SCALAR, comm);CHKERRQ(ierr); 2683faa55883SToby Isaac ierr = PetscBLASIntCast(cK,&ck);CHKERRQ(ierr); 2684faa55883SToby Isaac ierr = PetscBLASIntCast(c->A->rmap->n,&cm);CHKERRQ(ierr); 2685faa55883SToby Isaac ierr = PetscBLASIntCast(c->A->cmap->n,&cn);CHKERRQ(ierr); 268650ce3c9cSStefano Zampini if (cm && cn && ck) PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&cm,&cn,&ck,&_DOne,av,&alda,recvbuf,&ck,&_DZero,cv,&clda)); 2687637a0070SStefano Zampini ierr = MatDenseRestoreArrayRead(a->A,&av);CHKERRQ(ierr); 2688637a0070SStefano Zampini ierr = MatDenseRestoreArrayRead(b->A,&bv);CHKERRQ(ierr); 2689637a0070SStefano Zampini ierr = MatDenseRestoreArray(c->A,&cv);CHKERRQ(ierr); 2690faa55883SToby Isaac PetscFunctionReturn(0); 2691faa55883SToby Isaac } 2692faa55883SToby Isaac 2693faa55883SToby Isaac static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C) 2694faa55883SToby Isaac { 2695faa55883SToby Isaac Mat_MPIDense *c=(Mat_MPIDense*)C->data; 2696faa55883SToby Isaac Mat_MatTransMultDense *abt = c->abtdense; 2697faa55883SToby Isaac PetscErrorCode ierr; 2698faa55883SToby Isaac 2699faa55883SToby Isaac PetscFunctionBegin; 2700faa55883SToby Isaac switch (abt->alg) { 2701faa55883SToby Isaac case 1: 2702faa55883SToby Isaac ierr = MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(A, B, C);CHKERRQ(ierr); 2703faa55883SToby Isaac break; 2704faa55883SToby Isaac default: 2705faa55883SToby Isaac ierr = MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(A, B, C);CHKERRQ(ierr); 2706faa55883SToby Isaac break; 2707faa55883SToby Isaac } 2708faa55883SToby Isaac PetscFunctionReturn(0); 2709faa55883SToby Isaac } 2710faa55883SToby Isaac 2711320f2790SHong Zhang PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(Mat A) 2712320f2790SHong Zhang { 2713320f2790SHong Zhang PetscErrorCode ierr; 2714320f2790SHong Zhang Mat_MPIDense *a = (Mat_MPIDense*)A->data; 2715320f2790SHong Zhang Mat_MatMultDense *ab = a->abdense; 2716320f2790SHong Zhang 2717320f2790SHong Zhang PetscFunctionBegin; 2718320f2790SHong Zhang ierr = MatDestroy(&ab->Ce);CHKERRQ(ierr); 2719320f2790SHong Zhang ierr = MatDestroy(&ab->Ae);CHKERRQ(ierr); 2720320f2790SHong Zhang ierr = MatDestroy(&ab->Be);CHKERRQ(ierr); 2721320f2790SHong Zhang 2722320f2790SHong Zhang ierr = (ab->destroy)(A);CHKERRQ(ierr); 2723320f2790SHong Zhang ierr = PetscFree(ab);CHKERRQ(ierr); 2724320f2790SHong Zhang PetscFunctionReturn(0); 2725320f2790SHong Zhang } 2726320f2790SHong Zhang 27275242a7b1SHong Zhang #if defined(PETSC_HAVE_ELEMENTAL) 2728320f2790SHong Zhang PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C) 2729320f2790SHong Zhang { 2730320f2790SHong Zhang PetscErrorCode ierr; 2731320f2790SHong Zhang Mat_MPIDense *c=(Mat_MPIDense*)C->data; 2732320f2790SHong Zhang Mat_MatMultDense *ab=c->abdense; 2733320f2790SHong Zhang 2734320f2790SHong Zhang PetscFunctionBegin; 2735de0a22f0SHong Zhang ierr = MatConvert_MPIDense_Elemental(A,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Ae);CHKERRQ(ierr); 2736de0a22f0SHong Zhang ierr = MatConvert_MPIDense_Elemental(B,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Be);CHKERRQ(ierr); 27374222ddf1SHong Zhang ierr = MatMatMultNumeric_Elemental(ab->Ae,ab->Be,ab->Ce);CHKERRQ(ierr); 2738de0a22f0SHong Zhang ierr = MatConvert(ab->Ce,MATMPIDENSE,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 2739320f2790SHong Zhang PetscFunctionReturn(0); 2740320f2790SHong Zhang } 2741320f2790SHong Zhang 27424222ddf1SHong Zhang PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat C) 2743320f2790SHong Zhang { 2744320f2790SHong Zhang PetscErrorCode ierr; 2745320f2790SHong Zhang Mat Ae,Be,Ce; 2746320f2790SHong Zhang Mat_MPIDense *c; 2747320f2790SHong Zhang Mat_MatMultDense *ab; 2748320f2790SHong Zhang 2749320f2790SHong Zhang PetscFunctionBegin; 27504222ddf1SHong Zhang /* check local size of A and B */ 2751320f2790SHong Zhang if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 2752378336b6SHong Zhang SETERRQ4(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend); 2753320f2790SHong Zhang } 2754320f2790SHong Zhang 27554222ddf1SHong Zhang /* create elemental matrices Ae and Be */ 27564222ddf1SHong Zhang ierr = MatCreate(PetscObjectComm((PetscObject)A), &Ae);CHKERRQ(ierr); 27574222ddf1SHong Zhang ierr = MatSetSizes(Ae,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 27584222ddf1SHong Zhang ierr = MatSetType(Ae,MATELEMENTAL);CHKERRQ(ierr); 27594222ddf1SHong Zhang ierr = MatSetUp(Ae);CHKERRQ(ierr); 27604222ddf1SHong Zhang ierr = MatSetOption(Ae,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 2761320f2790SHong Zhang 27624222ddf1SHong Zhang ierr = MatCreate(PetscObjectComm((PetscObject)B), &Be);CHKERRQ(ierr); 27634222ddf1SHong Zhang ierr = MatSetSizes(Be,PETSC_DECIDE,PETSC_DECIDE,B->rmap->N,B->cmap->N);CHKERRQ(ierr); 27644222ddf1SHong Zhang ierr = MatSetType(Be,MATELEMENTAL);CHKERRQ(ierr); 27654222ddf1SHong Zhang ierr = MatSetUp(Be);CHKERRQ(ierr); 27664222ddf1SHong Zhang ierr = MatSetOption(Be,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 2767320f2790SHong Zhang 27684222ddf1SHong Zhang /* compute symbolic Ce = Ae*Be */ 27694222ddf1SHong Zhang ierr = MatCreate(PetscObjectComm((PetscObject)C),&Ce);CHKERRQ(ierr); 27704222ddf1SHong Zhang ierr = MatMatMultSymbolic_Elemental(Ae,Be,fill,Ce);CHKERRQ(ierr); 27714222ddf1SHong Zhang 27724222ddf1SHong Zhang /* setup C */ 27734222ddf1SHong Zhang ierr = MatSetSizes(C,A->rmap->n,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 27744222ddf1SHong Zhang ierr = MatSetType(C,MATDENSE);CHKERRQ(ierr); 27754222ddf1SHong Zhang ierr = MatSetUp(C);CHKERRQ(ierr); 2776320f2790SHong Zhang 2777320f2790SHong Zhang /* create data structure for reuse Cdense */ 2778320f2790SHong Zhang ierr = PetscNew(&ab);CHKERRQ(ierr); 27794222ddf1SHong Zhang c = (Mat_MPIDense*)C->data; 2780320f2790SHong Zhang c->abdense = ab; 2781320f2790SHong Zhang 2782320f2790SHong Zhang ab->Ae = Ae; 2783320f2790SHong Zhang ab->Be = Be; 2784320f2790SHong Zhang ab->Ce = Ce; 27854222ddf1SHong Zhang ab->destroy = C->ops->destroy; 27864222ddf1SHong Zhang C->ops->destroy = MatDestroy_MatMatMult_MPIDense_MPIDense; 27874222ddf1SHong Zhang C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense; 27884222ddf1SHong Zhang C->ops->productnumeric = MatProductNumeric_AB; 2789320f2790SHong Zhang PetscFunctionReturn(0); 2790320f2790SHong Zhang } 27914222ddf1SHong Zhang #endif 27924222ddf1SHong Zhang /* ----------------------------------------------- */ 27934222ddf1SHong Zhang #if defined(PETSC_HAVE_ELEMENTAL) 27944222ddf1SHong Zhang static PetscErrorCode MatProductSetFromOptions_MPIDense_AB(Mat C) 2795320f2790SHong Zhang { 2796320f2790SHong Zhang PetscFunctionBegin; 27974222ddf1SHong Zhang C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIDense; 27984222ddf1SHong Zhang C->ops->productsymbolic = MatProductSymbolic_AB; 27994222ddf1SHong Zhang C->ops->productnumeric = MatProductNumeric_AB; 2800320f2790SHong Zhang PetscFunctionReturn(0); 2801320f2790SHong Zhang } 28025242a7b1SHong Zhang #endif 280386aefd0dSHong Zhang 28044222ddf1SHong Zhang static PetscErrorCode MatProductSymbolic_AtB_MPIDense(Mat C) 28054222ddf1SHong Zhang { 28064222ddf1SHong Zhang PetscErrorCode ierr; 28074222ddf1SHong Zhang Mat_Product *product = C->product; 28084222ddf1SHong Zhang 28094222ddf1SHong Zhang PetscFunctionBegin; 28104222ddf1SHong Zhang ierr = MatTransposeMatMultSymbolic_MPIDense_MPIDense(product->A,product->B,product->fill,C);CHKERRQ(ierr); 28114222ddf1SHong Zhang C->ops->productnumeric = MatProductNumeric_AtB; 28124222ddf1SHong Zhang C->ops->transposematmultnumeric = MatTransposeMatMultNumeric_MPIDense_MPIDense; 28134222ddf1SHong Zhang PetscFunctionReturn(0); 28144222ddf1SHong Zhang } 28154222ddf1SHong Zhang 28164222ddf1SHong Zhang static PetscErrorCode MatProductSetFromOptions_MPIDense_AtB(Mat C) 28174222ddf1SHong Zhang { 28184222ddf1SHong Zhang Mat_Product *product = C->product; 28194222ddf1SHong Zhang Mat A = product->A,B=product->B; 28204222ddf1SHong Zhang 28214222ddf1SHong Zhang PetscFunctionBegin; 28224222ddf1SHong Zhang if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) 28234222ddf1SHong Zhang SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend); 28244222ddf1SHong Zhang 28254222ddf1SHong Zhang C->ops->productsymbolic = MatProductSymbolic_AtB_MPIDense; 28264222ddf1SHong Zhang PetscFunctionReturn(0); 28274222ddf1SHong Zhang } 28284222ddf1SHong Zhang 28294222ddf1SHong Zhang static PetscErrorCode MatProductSetFromOptions_MPIDense_ABt(Mat C) 28304222ddf1SHong Zhang { 28314222ddf1SHong Zhang PetscErrorCode ierr; 28324222ddf1SHong Zhang Mat_Product *product = C->product; 28334222ddf1SHong Zhang const char *algTypes[2] = {"allgatherv","cyclic"}; 28344222ddf1SHong Zhang PetscInt alg,nalg = 2; 28354222ddf1SHong Zhang PetscBool flg = PETSC_FALSE; 28364222ddf1SHong Zhang 28374222ddf1SHong Zhang PetscFunctionBegin; 28384222ddf1SHong Zhang /* Set default algorithm */ 28394222ddf1SHong Zhang alg = 0; /* default is allgatherv */ 28404222ddf1SHong Zhang ierr = PetscStrcmp(product->alg,"default",&flg);CHKERRQ(ierr); 28414222ddf1SHong Zhang if (flg) { 28424222ddf1SHong Zhang ierr = MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);CHKERRQ(ierr); 28434222ddf1SHong Zhang } 28444222ddf1SHong Zhang 28454222ddf1SHong Zhang /* Get runtime option */ 28464222ddf1SHong Zhang if (product->api_user) { 28474222ddf1SHong Zhang ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatMatTransposeMult","Mat");CHKERRQ(ierr); 28484222ddf1SHong Zhang ierr = PetscOptionsEList("-matmattransmult_mpidense_mpidense_via","Algorithmic approach","MatMatTransposeMult",algTypes,nalg,algTypes[alg],&alg,&flg);CHKERRQ(ierr); 28494222ddf1SHong Zhang ierr = PetscOptionsEnd();CHKERRQ(ierr); 28504222ddf1SHong Zhang } else { 28514222ddf1SHong Zhang ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_ABt","Mat");CHKERRQ(ierr); 28524222ddf1SHong Zhang ierr = PetscOptionsEList("-matproduct_abt_mpidense_mpidense_via","Algorithmic approach","MatProduct_ABt",algTypes,nalg,algTypes[alg],&alg,&flg);CHKERRQ(ierr); 28534222ddf1SHong Zhang ierr = PetscOptionsEnd();CHKERRQ(ierr); 28544222ddf1SHong Zhang } 28554222ddf1SHong Zhang if (flg) { 28564222ddf1SHong Zhang ierr = MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);CHKERRQ(ierr); 28574222ddf1SHong Zhang } 28584222ddf1SHong Zhang 28594222ddf1SHong Zhang C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_MPIDense_MPIDense; 28604222ddf1SHong Zhang C->ops->productsymbolic = MatProductSymbolic_ABt; 28614222ddf1SHong Zhang C->ops->productnumeric = MatProductNumeric_ABt; 28624222ddf1SHong Zhang PetscFunctionReturn(0); 28634222ddf1SHong Zhang } 28644222ddf1SHong Zhang 28654222ddf1SHong Zhang PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense(Mat C) 28664222ddf1SHong Zhang { 28674222ddf1SHong Zhang PetscErrorCode ierr; 28684222ddf1SHong Zhang Mat_Product *product = C->product; 28694222ddf1SHong Zhang 28704222ddf1SHong Zhang PetscFunctionBegin; 28714222ddf1SHong Zhang switch (product->type) { 28724222ddf1SHong Zhang #if defined(PETSC_HAVE_ELEMENTAL) 28734222ddf1SHong Zhang case MATPRODUCT_AB: 28744222ddf1SHong Zhang ierr = MatProductSetFromOptions_MPIDense_AB(C);CHKERRQ(ierr); 28754222ddf1SHong Zhang break; 28764222ddf1SHong Zhang #endif 28774222ddf1SHong Zhang case MATPRODUCT_AtB: 28784222ddf1SHong Zhang ierr = MatProductSetFromOptions_MPIDense_AtB(C);CHKERRQ(ierr); 28794222ddf1SHong Zhang break; 28804222ddf1SHong Zhang case MATPRODUCT_ABt: 28814222ddf1SHong Zhang ierr = MatProductSetFromOptions_MPIDense_ABt(C);CHKERRQ(ierr); 28824222ddf1SHong Zhang break; 2883544a5e07SHong Zhang default: SETERRQ1(PetscObjectComm((PetscObject)C),PETSC_ERR_SUP,"MatProduct type %s is not supported for MPIDense and MPIDense matrices",MatProductTypes[product->type]); 28844222ddf1SHong Zhang } 28854222ddf1SHong Zhang PetscFunctionReturn(0); 28864222ddf1SHong Zhang } 2887