1be1d678aSKris Buschelman 2ed3cc1f0SBarry Smith /* 3ed3cc1f0SBarry Smith Basic functions for basic parallel dense matrices. 4ed3cc1f0SBarry Smith */ 5ed3cc1f0SBarry Smith 604fea9ffSBarry Smith 7c6db04a5SJed Brown #include <../src/mat/impls/dense/mpi/mpidense.h> /*I "petscmat.h" I*/ 88949adfdSHong Zhang #include <../src/mat/impls/aij/mpi/mpiaij.h> 9baa3c1c6SHong Zhang #include <petscblaslapack.h> 108965ea79SLois Curfman McInnes 11ab92ecdeSBarry Smith /*@ 12ab92ecdeSBarry Smith 13ab92ecdeSBarry Smith MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential 14ab92ecdeSBarry Smith matrix that represents the operator. For sequential matrices it returns itself. 15ab92ecdeSBarry Smith 16ab92ecdeSBarry Smith Input Parameter: 17ab92ecdeSBarry Smith . A - the Seq or MPI dense matrix 18ab92ecdeSBarry Smith 19ab92ecdeSBarry Smith Output Parameter: 20ab92ecdeSBarry Smith . B - the inner matrix 21ab92ecdeSBarry Smith 228e6c10adSSatish Balay Level: intermediate 238e6c10adSSatish Balay 24ab92ecdeSBarry Smith @*/ 25ab92ecdeSBarry Smith PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B) 26ab92ecdeSBarry Smith { 27ab92ecdeSBarry Smith Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 28ab92ecdeSBarry Smith PetscErrorCode ierr; 29ace3abfcSBarry Smith PetscBool flg; 30ab92ecdeSBarry Smith 31ab92ecdeSBarry Smith PetscFunctionBegin; 32251f4c67SDmitry Karpeev ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr); 332205254eSKarl Rupp if (flg) *B = mat->A; 342205254eSKarl Rupp else *B = A; 35ab92ecdeSBarry Smith PetscFunctionReturn(0); 36ab92ecdeSBarry Smith } 37ab92ecdeSBarry Smith 38ba8c8a56SBarry Smith PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 39ba8c8a56SBarry Smith { 40ba8c8a56SBarry Smith Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 41ba8c8a56SBarry Smith PetscErrorCode ierr; 42d0f46423SBarry Smith PetscInt lrow,rstart = A->rmap->rstart,rend = A->rmap->rend; 43ba8c8a56SBarry Smith 44ba8c8a56SBarry Smith PetscFunctionBegin; 45e7e72b3dSBarry Smith if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows"); 46ba8c8a56SBarry Smith lrow = row - rstart; 47ba8c8a56SBarry Smith ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v);CHKERRQ(ierr); 48ba8c8a56SBarry Smith PetscFunctionReturn(0); 49ba8c8a56SBarry Smith } 50ba8c8a56SBarry Smith 51ba8c8a56SBarry Smith PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 52ba8c8a56SBarry Smith { 53ba8c8a56SBarry Smith PetscErrorCode ierr; 54ba8c8a56SBarry Smith 55ba8c8a56SBarry Smith PetscFunctionBegin; 56ba8c8a56SBarry Smith if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 57ba8c8a56SBarry Smith if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 58ba8c8a56SBarry Smith PetscFunctionReturn(0); 59ba8c8a56SBarry Smith } 60ba8c8a56SBarry Smith 6111bd1e4dSLisandro Dalcin PetscErrorCode MatGetDiagonalBlock_MPIDense(Mat A,Mat *a) 620de54da6SSatish Balay { 630de54da6SSatish Balay Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 646849ba73SBarry Smith PetscErrorCode ierr; 65d0f46423SBarry Smith PetscInt m = A->rmap->n,rstart = A->rmap->rstart; 6687828ca2SBarry Smith PetscScalar *array; 670de54da6SSatish Balay MPI_Comm comm; 6811bd1e4dSLisandro Dalcin Mat B; 690de54da6SSatish Balay 700de54da6SSatish Balay PetscFunctionBegin; 71e32f2f54SBarry Smith if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only square matrices supported."); 720de54da6SSatish Balay 7311bd1e4dSLisandro Dalcin ierr = PetscObjectQuery((PetscObject)A,"DiagonalBlock",(PetscObject*)&B);CHKERRQ(ierr); 7411bd1e4dSLisandro Dalcin if (!B) { 750de54da6SSatish Balay ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr); 7611bd1e4dSLisandro Dalcin ierr = MatCreate(comm,&B);CHKERRQ(ierr); 7711bd1e4dSLisandro Dalcin ierr = MatSetSizes(B,m,m,m,m);CHKERRQ(ierr); 7811bd1e4dSLisandro Dalcin ierr = MatSetType(B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr); 798c778c55SBarry Smith ierr = MatDenseGetArray(mdn->A,&array);CHKERRQ(ierr); 8011bd1e4dSLisandro Dalcin ierr = MatSeqDenseSetPreallocation(B,array+m*rstart);CHKERRQ(ierr); 818c778c55SBarry Smith ierr = MatDenseRestoreArray(mdn->A,&array);CHKERRQ(ierr); 8211bd1e4dSLisandro Dalcin ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 8311bd1e4dSLisandro Dalcin ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 8411bd1e4dSLisandro Dalcin ierr = PetscObjectCompose((PetscObject)A,"DiagonalBlock",(PetscObject)B);CHKERRQ(ierr); 8511bd1e4dSLisandro Dalcin *a = B; 8611bd1e4dSLisandro Dalcin ierr = MatDestroy(&B);CHKERRQ(ierr); 872205254eSKarl Rupp } else *a = B; 880de54da6SSatish Balay PetscFunctionReturn(0); 890de54da6SSatish Balay } 900de54da6SSatish Balay 9113f74950SBarry Smith PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv) 928965ea79SLois Curfman McInnes { 9339b7565bSBarry Smith Mat_MPIDense *A = (Mat_MPIDense*)mat->data; 94dfbe8321SBarry Smith PetscErrorCode ierr; 95d0f46423SBarry Smith PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row; 96ace3abfcSBarry Smith PetscBool roworiented = A->roworiented; 978965ea79SLois Curfman McInnes 983a40ed3dSBarry Smith PetscFunctionBegin; 998965ea79SLois Curfman McInnes for (i=0; i<m; i++) { 1005ef9f2a5SBarry Smith if (idxm[i] < 0) continue; 101e32f2f54SBarry Smith if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 1028965ea79SLois Curfman McInnes if (idxm[i] >= rstart && idxm[i] < rend) { 1038965ea79SLois Curfman McInnes row = idxm[i] - rstart; 10439b7565bSBarry Smith if (roworiented) { 10539b7565bSBarry Smith ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr); 1063a40ed3dSBarry Smith } else { 1078965ea79SLois Curfman McInnes for (j=0; j<n; j++) { 1085ef9f2a5SBarry Smith if (idxn[j] < 0) continue; 109e32f2f54SBarry Smith if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 11039b7565bSBarry Smith ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr); 11139b7565bSBarry Smith } 1128965ea79SLois Curfman McInnes } 1132205254eSKarl Rupp } else if (!A->donotstash) { 1145080c13bSMatthew G Knepley mat->assembled = PETSC_FALSE; 11539b7565bSBarry Smith if (roworiented) { 116b400d20cSBarry Smith ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 117d36fbae8SSatish Balay } else { 118b400d20cSBarry Smith ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 11939b7565bSBarry Smith } 120b49de8d1SLois Curfman McInnes } 121b49de8d1SLois Curfman McInnes } 1223a40ed3dSBarry Smith PetscFunctionReturn(0); 123b49de8d1SLois Curfman McInnes } 124b49de8d1SLois Curfman McInnes 12513f74950SBarry Smith PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 126b49de8d1SLois Curfman McInnes { 127b49de8d1SLois Curfman McInnes Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 128dfbe8321SBarry Smith PetscErrorCode ierr; 129d0f46423SBarry Smith PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row; 130b49de8d1SLois Curfman McInnes 1313a40ed3dSBarry Smith PetscFunctionBegin; 132b49de8d1SLois Curfman McInnes for (i=0; i<m; i++) { 133e32f2f54SBarry Smith if (idxm[i] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 134e32f2f54SBarry Smith if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 135b49de8d1SLois Curfman McInnes if (idxm[i] >= rstart && idxm[i] < rend) { 136b49de8d1SLois Curfman McInnes row = idxm[i] - rstart; 137b49de8d1SLois Curfman McInnes for (j=0; j<n; j++) { 138e32f2f54SBarry Smith if (idxn[j] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 139e7e72b3dSBarry Smith if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 140b49de8d1SLois Curfman McInnes ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr); 141b49de8d1SLois Curfman McInnes } 142e7e72b3dSBarry Smith } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 1438965ea79SLois Curfman McInnes } 1443a40ed3dSBarry Smith PetscFunctionReturn(0); 1458965ea79SLois Curfman McInnes } 1468965ea79SLois Curfman McInnes 147d3042a70SBarry Smith static PetscErrorCode MatDenseGetArray_MPIDense(Mat A,PetscScalar *array[]) 148ff14e315SSatish Balay { 149ff14e315SSatish Balay Mat_MPIDense *a = (Mat_MPIDense*)A->data; 150dfbe8321SBarry Smith PetscErrorCode ierr; 151ff14e315SSatish Balay 1523a40ed3dSBarry Smith PetscFunctionBegin; 1538c778c55SBarry Smith ierr = MatDenseGetArray(a->A,array);CHKERRQ(ierr); 1543a40ed3dSBarry Smith PetscFunctionReturn(0); 155ff14e315SSatish Balay } 156ff14e315SSatish Balay 1578572280aSBarry Smith static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A,const PetscScalar *array[]) 1588572280aSBarry Smith { 1598572280aSBarry Smith Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1608572280aSBarry Smith PetscErrorCode ierr; 1618572280aSBarry Smith 1628572280aSBarry Smith PetscFunctionBegin; 1638572280aSBarry Smith ierr = MatDenseGetArrayRead(a->A,array);CHKERRQ(ierr); 1648572280aSBarry Smith PetscFunctionReturn(0); 1658572280aSBarry Smith } 1668572280aSBarry Smith 167d3042a70SBarry Smith static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A,const PetscScalar array[]) 168d3042a70SBarry Smith { 169d3042a70SBarry Smith Mat_MPIDense *a = (Mat_MPIDense*)A->data; 170d3042a70SBarry Smith PetscErrorCode ierr; 171d3042a70SBarry Smith 172d3042a70SBarry Smith PetscFunctionBegin; 173d3042a70SBarry Smith ierr = MatDensePlaceArray(a->A,array);CHKERRQ(ierr); 174d3042a70SBarry Smith PetscFunctionReturn(0); 175d3042a70SBarry Smith } 176d3042a70SBarry Smith 177d3042a70SBarry Smith static PetscErrorCode MatDenseResetArray_MPIDense(Mat A) 178d3042a70SBarry Smith { 179d3042a70SBarry Smith Mat_MPIDense *a = (Mat_MPIDense*)A->data; 180d3042a70SBarry Smith PetscErrorCode ierr; 181d3042a70SBarry Smith 182d3042a70SBarry Smith PetscFunctionBegin; 183d3042a70SBarry Smith ierr = MatDenseResetArray(a->A);CHKERRQ(ierr); 184d3042a70SBarry Smith PetscFunctionReturn(0); 185d3042a70SBarry Smith } 186d3042a70SBarry Smith 1877dae84e0SHong Zhang static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B) 188ca3fa75bSLois Curfman McInnes { 189ca3fa75bSLois Curfman McInnes Mat_MPIDense *mat = (Mat_MPIDense*)A->data,*newmatd; 190ca3fa75bSLois Curfman McInnes Mat_SeqDense *lmat = (Mat_SeqDense*)mat->A->data; 1916849ba73SBarry Smith PetscErrorCode ierr; 1924aa3045dSJed Brown PetscInt i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols; 1935d0c19d7SBarry Smith const PetscInt *irow,*icol; 19487828ca2SBarry Smith PetscScalar *av,*bv,*v = lmat->v; 195ca3fa75bSLois Curfman McInnes Mat newmat; 1964aa3045dSJed Brown IS iscol_local; 19742a884f0SBarry Smith MPI_Comm comm_is,comm_mat; 198ca3fa75bSLois Curfman McInnes 199ca3fa75bSLois Curfman McInnes PetscFunctionBegin; 20042a884f0SBarry Smith ierr = PetscObjectGetComm((PetscObject)A,&comm_mat);CHKERRQ(ierr); 20142a884f0SBarry Smith ierr = PetscObjectGetComm((PetscObject)iscol,&comm_is);CHKERRQ(ierr); 20242a884f0SBarry Smith if (comm_mat != comm_is) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NOTSAMECOMM,"IS communicator must match matrix communicator"); 20342a884f0SBarry Smith 2044aa3045dSJed Brown ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 205ca3fa75bSLois Curfman McInnes ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr); 2064aa3045dSJed Brown ierr = ISGetIndices(iscol_local,&icol);CHKERRQ(ierr); 207b9b97703SBarry Smith ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr); 208b9b97703SBarry Smith ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 2094aa3045dSJed Brown ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); /* global number of columns, size of iscol_local */ 210ca3fa75bSLois Curfman McInnes 211ca3fa75bSLois Curfman McInnes /* No parallel redistribution currently supported! Should really check each index set 2127eba5e9cSLois Curfman McInnes to comfirm that it is OK. ... Currently supports only submatrix same partitioning as 2137eba5e9cSLois Curfman McInnes original matrix! */ 214ca3fa75bSLois Curfman McInnes 215ca3fa75bSLois Curfman McInnes ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr); 2167eba5e9cSLois Curfman McInnes ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 217ca3fa75bSLois Curfman McInnes 218ca3fa75bSLois Curfman McInnes /* Check submatrix call */ 219ca3fa75bSLois Curfman McInnes if (scall == MAT_REUSE_MATRIX) { 220e32f2f54SBarry Smith /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */ 2217eba5e9cSLois Curfman McInnes /* Really need to test rows and column sizes! */ 222ca3fa75bSLois Curfman McInnes newmat = *B; 223ca3fa75bSLois Curfman McInnes } else { 224ca3fa75bSLois Curfman McInnes /* Create and fill new matrix */ 225ce94432eSBarry Smith ierr = MatCreate(PetscObjectComm((PetscObject)A),&newmat);CHKERRQ(ierr); 2264aa3045dSJed Brown ierr = MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols);CHKERRQ(ierr); 2277adad957SLisandro Dalcin ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr); 2280298fd71SBarry Smith ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr); 229ca3fa75bSLois Curfman McInnes } 230ca3fa75bSLois Curfman McInnes 231ca3fa75bSLois Curfman McInnes /* Now extract the data pointers and do the copy, column at a time */ 232ca3fa75bSLois Curfman McInnes newmatd = (Mat_MPIDense*)newmat->data; 233ca3fa75bSLois Curfman McInnes bv = ((Mat_SeqDense*)newmatd->A->data)->v; 234ca3fa75bSLois Curfman McInnes 2354aa3045dSJed Brown for (i=0; i<Ncols; i++) { 23625a33276SHong Zhang av = v + ((Mat_SeqDense*)mat->A->data)->lda*icol[i]; 237ca3fa75bSLois Curfman McInnes for (j=0; j<nrows; j++) { 2387eba5e9cSLois Curfman McInnes *bv++ = av[irow[j] - rstart]; 239ca3fa75bSLois Curfman McInnes } 240ca3fa75bSLois Curfman McInnes } 241ca3fa75bSLois Curfman McInnes 242ca3fa75bSLois Curfman McInnes /* Assemble the matrices so that the correct flags are set */ 243ca3fa75bSLois Curfman McInnes ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 244ca3fa75bSLois Curfman McInnes ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 245ca3fa75bSLois Curfman McInnes 246ca3fa75bSLois Curfman McInnes /* Free work space */ 247ca3fa75bSLois Curfman McInnes ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr); 2485bdf786aSShri Abhyankar ierr = ISRestoreIndices(iscol_local,&icol);CHKERRQ(ierr); 24932bb1f2dSLisandro Dalcin ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 250ca3fa75bSLois Curfman McInnes *B = newmat; 251ca3fa75bSLois Curfman McInnes PetscFunctionReturn(0); 252ca3fa75bSLois Curfman McInnes } 253ca3fa75bSLois Curfman McInnes 2548c778c55SBarry Smith PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A,PetscScalar *array[]) 255ff14e315SSatish Balay { 25673a71a0fSBarry Smith Mat_MPIDense *a = (Mat_MPIDense*)A->data; 25773a71a0fSBarry Smith PetscErrorCode ierr; 25873a71a0fSBarry Smith 2593a40ed3dSBarry Smith PetscFunctionBegin; 2608c778c55SBarry Smith ierr = MatDenseRestoreArray(a->A,array);CHKERRQ(ierr); 2613a40ed3dSBarry Smith PetscFunctionReturn(0); 262ff14e315SSatish Balay } 263ff14e315SSatish Balay 2648572280aSBarry Smith PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A,const PetscScalar *array[]) 2658572280aSBarry Smith { 2668572280aSBarry Smith Mat_MPIDense *a = (Mat_MPIDense*)A->data; 2678572280aSBarry Smith PetscErrorCode ierr; 2688572280aSBarry Smith 2698572280aSBarry Smith PetscFunctionBegin; 2708572280aSBarry Smith ierr = MatDenseRestoreArrayRead(a->A,array);CHKERRQ(ierr); 2718572280aSBarry Smith PetscFunctionReturn(0); 2728572280aSBarry Smith } 2738572280aSBarry Smith 274dfbe8321SBarry Smith PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode) 2758965ea79SLois Curfman McInnes { 27639ddd567SLois Curfman McInnes Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 277ce94432eSBarry Smith MPI_Comm comm; 278dfbe8321SBarry Smith PetscErrorCode ierr; 27913f74950SBarry Smith PetscInt nstash,reallocs; 2808965ea79SLois Curfman McInnes InsertMode addv; 2818965ea79SLois Curfman McInnes 2823a40ed3dSBarry Smith PetscFunctionBegin; 283ce94432eSBarry Smith ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 2848965ea79SLois Curfman McInnes /* make sure all processors are either in INSERTMODE or ADDMODE */ 285b2566f29SBarry Smith ierr = MPIU_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,comm);CHKERRQ(ierr); 286ce94432eSBarry Smith if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs"); 287e0fa3b82SLois Curfman McInnes mat->insertmode = addv; /* in case this processor had no cache */ 2888965ea79SLois Curfman McInnes 289d0f46423SBarry Smith ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 2908798bf22SSatish Balay ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 291ae15b995SBarry Smith ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 2923a40ed3dSBarry Smith PetscFunctionReturn(0); 2938965ea79SLois Curfman McInnes } 2948965ea79SLois Curfman McInnes 295dfbe8321SBarry Smith PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode) 2968965ea79SLois Curfman McInnes { 29739ddd567SLois Curfman McInnes Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data; 2986849ba73SBarry Smith PetscErrorCode ierr; 29913f74950SBarry Smith PetscInt i,*row,*col,flg,j,rstart,ncols; 30013f74950SBarry Smith PetscMPIInt n; 30187828ca2SBarry Smith PetscScalar *val; 3028965ea79SLois Curfman McInnes 3033a40ed3dSBarry Smith PetscFunctionBegin; 3048965ea79SLois Curfman McInnes /* wait on receives */ 3057ef1d9bdSSatish Balay while (1) { 3068798bf22SSatish Balay ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 3077ef1d9bdSSatish Balay if (!flg) break; 3088965ea79SLois Curfman McInnes 3097ef1d9bdSSatish Balay for (i=0; i<n;) { 3107ef1d9bdSSatish Balay /* Now identify the consecutive vals belonging to the same row */ 3112205254eSKarl Rupp for (j=i,rstart=row[j]; j<n; j++) { 3122205254eSKarl Rupp if (row[j] != rstart) break; 3132205254eSKarl Rupp } 3147ef1d9bdSSatish Balay if (j < n) ncols = j-i; 3157ef1d9bdSSatish Balay else ncols = n-i; 3167ef1d9bdSSatish Balay /* Now assemble all these values with a single function call */ 3174b4eb8d3SJed Brown ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 3187ef1d9bdSSatish Balay i = j; 3198965ea79SLois Curfman McInnes } 3207ef1d9bdSSatish Balay } 3218798bf22SSatish Balay ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 3228965ea79SLois Curfman McInnes 32339ddd567SLois Curfman McInnes ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr); 32439ddd567SLois Curfman McInnes ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr); 3258965ea79SLois Curfman McInnes 3266d4a8577SBarry Smith if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 32739ddd567SLois Curfman McInnes ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 3288965ea79SLois Curfman McInnes } 3293a40ed3dSBarry Smith PetscFunctionReturn(0); 3308965ea79SLois Curfman McInnes } 3318965ea79SLois Curfman McInnes 332dfbe8321SBarry Smith PetscErrorCode MatZeroEntries_MPIDense(Mat A) 3338965ea79SLois Curfman McInnes { 334dfbe8321SBarry Smith PetscErrorCode ierr; 33539ddd567SLois Curfman McInnes Mat_MPIDense *l = (Mat_MPIDense*)A->data; 3363a40ed3dSBarry Smith 3373a40ed3dSBarry Smith PetscFunctionBegin; 3383a40ed3dSBarry Smith ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 3393a40ed3dSBarry Smith PetscFunctionReturn(0); 3408965ea79SLois Curfman McInnes } 3418965ea79SLois Curfman McInnes 3428965ea79SLois Curfman McInnes /* the code does not do the diagonal entries correctly unless the 3438965ea79SLois Curfman McInnes matrix is square and the column and row owerships are identical. 3448965ea79SLois Curfman McInnes This is a BUG. The only way to fix it seems to be to access 3453501a2bdSLois Curfman McInnes mdn->A and mdn->B directly and not through the MatZeroRows() 3468965ea79SLois Curfman McInnes routine. 3478965ea79SLois Curfman McInnes */ 3482b40b63fSBarry Smith PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 3498965ea79SLois Curfman McInnes { 35039ddd567SLois Curfman McInnes Mat_MPIDense *l = (Mat_MPIDense*)A->data; 3516849ba73SBarry Smith PetscErrorCode ierr; 352d0f46423SBarry Smith PetscInt i,*owners = A->rmap->range; 35376ec1555SBarry Smith PetscInt *sizes,j,idx,nsends; 35413f74950SBarry Smith PetscInt nmax,*svalues,*starts,*owner,nrecvs; 3557adad957SLisandro Dalcin PetscInt *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source; 35613f74950SBarry Smith PetscInt *lens,*lrows,*values; 35713f74950SBarry Smith PetscMPIInt n,imdex,rank = l->rank,size = l->size; 358ce94432eSBarry Smith MPI_Comm comm; 3598965ea79SLois Curfman McInnes MPI_Request *send_waits,*recv_waits; 3608965ea79SLois Curfman McInnes MPI_Status recv_status,*send_status; 361ace3abfcSBarry Smith PetscBool found; 36297b48c8fSBarry Smith const PetscScalar *xx; 36397b48c8fSBarry Smith PetscScalar *bb; 3648965ea79SLois Curfman McInnes 3653a40ed3dSBarry Smith PetscFunctionBegin; 366ce94432eSBarry Smith ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 367ce94432eSBarry Smith if (A->rmap->N != A->cmap->N) SETERRQ(comm,PETSC_ERR_SUP,"Only handles square matrices"); 368b9679d65SBarry Smith if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only handles matrices with identical column and row ownership"); 3698965ea79SLois Curfman McInnes /* first count number of contributors to each processor */ 370f628708eSJed Brown ierr = PetscCalloc1(2*size,&sizes);CHKERRQ(ierr); 371037dbc42SBarry Smith ierr = PetscMalloc1(N+1,&owner);CHKERRQ(ierr); /* see note*/ 3728965ea79SLois Curfman McInnes for (i=0; i<N; i++) { 3738965ea79SLois Curfman McInnes idx = rows[i]; 37435d8aa7fSBarry Smith found = PETSC_FALSE; 3758965ea79SLois Curfman McInnes for (j=0; j<size; j++) { 3768965ea79SLois Curfman McInnes if (idx >= owners[j] && idx < owners[j+1]) { 37776ec1555SBarry Smith sizes[2*j]++; sizes[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break; 3788965ea79SLois Curfman McInnes } 3798965ea79SLois Curfman McInnes } 380e32f2f54SBarry Smith if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 3818965ea79SLois Curfman McInnes } 3822205254eSKarl Rupp nsends = 0; 38376ec1555SBarry Smith for (i=0; i<size; i++) nsends += sizes[2*i+1]; 3848965ea79SLois Curfman McInnes 3858965ea79SLois Curfman McInnes /* inform other processors of number of messages and max length*/ 38676ec1555SBarry Smith ierr = PetscMaxSum(comm,sizes,&nmax,&nrecvs);CHKERRQ(ierr); 3878965ea79SLois Curfman McInnes 3888965ea79SLois Curfman McInnes /* post receives: */ 389785e854fSJed Brown ierr = PetscMalloc1((nrecvs+1)*(nmax+1),&rvalues);CHKERRQ(ierr); 390854ce69bSBarry Smith ierr = PetscMalloc1(nrecvs+1,&recv_waits);CHKERRQ(ierr); 3918965ea79SLois Curfman McInnes for (i=0; i<nrecvs; i++) { 39213f74950SBarry Smith ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 3938965ea79SLois Curfman McInnes } 3948965ea79SLois Curfman McInnes 3958965ea79SLois Curfman McInnes /* do sends: 3968965ea79SLois Curfman McInnes 1) starts[i] gives the starting index in svalues for stuff going to 3978965ea79SLois Curfman McInnes the ith processor 3988965ea79SLois Curfman McInnes */ 399854ce69bSBarry Smith ierr = PetscMalloc1(N+1,&svalues);CHKERRQ(ierr); 400854ce69bSBarry Smith ierr = PetscMalloc1(nsends+1,&send_waits);CHKERRQ(ierr); 401854ce69bSBarry Smith ierr = PetscMalloc1(size+1,&starts);CHKERRQ(ierr); 4028965ea79SLois Curfman McInnes 4038965ea79SLois Curfman McInnes starts[0] = 0; 40476ec1555SBarry Smith for (i=1; i<size; i++) starts[i] = starts[i-1] + sizes[2*i-2]; 4052205254eSKarl Rupp for (i=0; i<N; i++) svalues[starts[owner[i]]++] = rows[i]; 4062205254eSKarl Rupp 4072205254eSKarl Rupp starts[0] = 0; 40876ec1555SBarry Smith for (i=1; i<size+1; i++) starts[i] = starts[i-1] + sizes[2*i-2]; 4098965ea79SLois Curfman McInnes count = 0; 4108965ea79SLois Curfman McInnes for (i=0; i<size; i++) { 41176ec1555SBarry Smith if (sizes[2*i+1]) { 41276ec1555SBarry Smith ierr = MPI_Isend(svalues+starts[i],sizes[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 4138965ea79SLois Curfman McInnes } 4148965ea79SLois Curfman McInnes } 415606d414cSSatish Balay ierr = PetscFree(starts);CHKERRQ(ierr); 4168965ea79SLois Curfman McInnes 4178965ea79SLois Curfman McInnes base = owners[rank]; 4188965ea79SLois Curfman McInnes 4198965ea79SLois Curfman McInnes /* wait on receives */ 420dcca6d9dSJed Brown ierr = PetscMalloc2(nrecvs,&lens,nrecvs,&source);CHKERRQ(ierr); 42174ed9c26SBarry Smith count = nrecvs; 42274ed9c26SBarry Smith slen = 0; 4238965ea79SLois Curfman McInnes while (count) { 424ca161407SBarry Smith ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 4258965ea79SLois Curfman McInnes /* unpack receives into our local space */ 42613f74950SBarry Smith ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 4272205254eSKarl Rupp 4288965ea79SLois Curfman McInnes source[imdex] = recv_status.MPI_SOURCE; 4298965ea79SLois Curfman McInnes lens[imdex] = n; 4308965ea79SLois Curfman McInnes slen += n; 4318965ea79SLois Curfman McInnes count--; 4328965ea79SLois Curfman McInnes } 433606d414cSSatish Balay ierr = PetscFree(recv_waits);CHKERRQ(ierr); 4348965ea79SLois Curfman McInnes 4358965ea79SLois Curfman McInnes /* move the data into the send scatter */ 436854ce69bSBarry Smith ierr = PetscMalloc1(slen+1,&lrows);CHKERRQ(ierr); 4378965ea79SLois Curfman McInnes count = 0; 4388965ea79SLois Curfman McInnes for (i=0; i<nrecvs; i++) { 4398965ea79SLois Curfman McInnes values = rvalues + i*nmax; 4408965ea79SLois Curfman McInnes for (j=0; j<lens[i]; j++) { 4418965ea79SLois Curfman McInnes lrows[count++] = values[j] - base; 4428965ea79SLois Curfman McInnes } 4438965ea79SLois Curfman McInnes } 444606d414cSSatish Balay ierr = PetscFree(rvalues);CHKERRQ(ierr); 44574ed9c26SBarry Smith ierr = PetscFree2(lens,source);CHKERRQ(ierr); 446606d414cSSatish Balay ierr = PetscFree(owner);CHKERRQ(ierr); 44776ec1555SBarry Smith ierr = PetscFree(sizes);CHKERRQ(ierr); 4488965ea79SLois Curfman McInnes 44997b48c8fSBarry Smith /* fix right hand side if needed */ 45097b48c8fSBarry Smith if (x && b) { 45197b48c8fSBarry Smith ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 45297b48c8fSBarry Smith ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 45397b48c8fSBarry Smith for (i=0; i<slen; i++) { 45497b48c8fSBarry Smith bb[lrows[i]] = diag*xx[lrows[i]]; 45597b48c8fSBarry Smith } 45697b48c8fSBarry Smith ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 45797b48c8fSBarry Smith ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 45897b48c8fSBarry Smith } 45997b48c8fSBarry Smith 4608965ea79SLois Curfman McInnes /* actually zap the local rows */ 461b9679d65SBarry Smith ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 462e2eb51b1SBarry Smith if (diag != 0.0) { 463b9679d65SBarry Smith Mat_SeqDense *ll = (Mat_SeqDense*)l->A->data; 464b9679d65SBarry Smith PetscInt m = ll->lda, i; 465b9679d65SBarry Smith 466b9679d65SBarry Smith for (i=0; i<slen; i++) { 467b9679d65SBarry Smith ll->v[lrows[i] + m*(A->cmap->rstart + lrows[i])] = diag; 468b9679d65SBarry Smith } 469b9679d65SBarry Smith } 470606d414cSSatish Balay ierr = PetscFree(lrows);CHKERRQ(ierr); 4718965ea79SLois Curfman McInnes 4728965ea79SLois Curfman McInnes /* wait on sends */ 4738965ea79SLois Curfman McInnes if (nsends) { 474785e854fSJed Brown ierr = PetscMalloc1(nsends,&send_status);CHKERRQ(ierr); 475ca161407SBarry Smith ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 476606d414cSSatish Balay ierr = PetscFree(send_status);CHKERRQ(ierr); 4778965ea79SLois Curfman McInnes } 478606d414cSSatish Balay ierr = PetscFree(send_waits);CHKERRQ(ierr); 479606d414cSSatish Balay ierr = PetscFree(svalues);CHKERRQ(ierr); 4803a40ed3dSBarry Smith PetscFunctionReturn(0); 4818965ea79SLois Curfman McInnes } 4828965ea79SLois Curfman McInnes 483cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat,Vec,Vec); 484cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat,Vec,Vec,Vec); 485cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat,Vec,Vec); 486cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat,Vec,Vec,Vec); 487cc2e6a90SBarry Smith 488dfbe8321SBarry Smith PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy) 4898965ea79SLois Curfman McInnes { 49039ddd567SLois Curfman McInnes Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 491dfbe8321SBarry Smith PetscErrorCode ierr; 492c456f294SBarry Smith 4933a40ed3dSBarry Smith PetscFunctionBegin; 494ca9f406cSSatish Balay ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 495ca9f406cSSatish Balay ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 496cc2e6a90SBarry Smith ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr); 4973a40ed3dSBarry Smith PetscFunctionReturn(0); 4988965ea79SLois Curfman McInnes } 4998965ea79SLois Curfman McInnes 500dfbe8321SBarry Smith PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz) 5018965ea79SLois Curfman McInnes { 50239ddd567SLois Curfman McInnes Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 503dfbe8321SBarry Smith PetscErrorCode ierr; 504c456f294SBarry Smith 5053a40ed3dSBarry Smith PetscFunctionBegin; 506ca9f406cSSatish Balay ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 507ca9f406cSSatish Balay ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 508cc2e6a90SBarry Smith ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr); 5093a40ed3dSBarry Smith PetscFunctionReturn(0); 5108965ea79SLois Curfman McInnes } 5118965ea79SLois Curfman McInnes 512dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy) 513096963f5SLois Curfman McInnes { 514096963f5SLois Curfman McInnes Mat_MPIDense *a = (Mat_MPIDense*)A->data; 515dfbe8321SBarry Smith PetscErrorCode ierr; 51687828ca2SBarry Smith PetscScalar zero = 0.0; 517096963f5SLois Curfman McInnes 5183a40ed3dSBarry Smith PetscFunctionBegin; 5192dcb1b2aSMatthew Knepley ierr = VecSet(yy,zero);CHKERRQ(ierr); 520cc2e6a90SBarry Smith ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 521ca9f406cSSatish Balay ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 522ca9f406cSSatish Balay ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5233a40ed3dSBarry Smith PetscFunctionReturn(0); 524096963f5SLois Curfman McInnes } 525096963f5SLois Curfman McInnes 526dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz) 527096963f5SLois Curfman McInnes { 528096963f5SLois Curfman McInnes Mat_MPIDense *a = (Mat_MPIDense*)A->data; 529dfbe8321SBarry Smith PetscErrorCode ierr; 530096963f5SLois Curfman McInnes 5313a40ed3dSBarry Smith PetscFunctionBegin; 5323501a2bdSLois Curfman McInnes ierr = VecCopy(yy,zz);CHKERRQ(ierr); 533cc2e6a90SBarry Smith ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 534ca9f406cSSatish Balay ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 535ca9f406cSSatish Balay ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5363a40ed3dSBarry Smith PetscFunctionReturn(0); 537096963f5SLois Curfman McInnes } 538096963f5SLois Curfman McInnes 539dfbe8321SBarry Smith PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v) 5408965ea79SLois Curfman McInnes { 54139ddd567SLois Curfman McInnes Mat_MPIDense *a = (Mat_MPIDense*)A->data; 542096963f5SLois Curfman McInnes Mat_SeqDense *aloc = (Mat_SeqDense*)a->A->data; 543dfbe8321SBarry Smith PetscErrorCode ierr; 544d0f46423SBarry Smith PetscInt len,i,n,m = A->rmap->n,radd; 54587828ca2SBarry Smith PetscScalar *x,zero = 0.0; 546ed3cc1f0SBarry Smith 5473a40ed3dSBarry Smith PetscFunctionBegin; 5482dcb1b2aSMatthew Knepley ierr = VecSet(v,zero);CHKERRQ(ierr); 5491ebc52fbSHong Zhang ierr = VecGetArray(v,&x);CHKERRQ(ierr); 550096963f5SLois Curfman McInnes ierr = VecGetSize(v,&n);CHKERRQ(ierr); 551e32f2f54SBarry Smith if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec"); 552d0f46423SBarry Smith len = PetscMin(a->A->rmap->n,a->A->cmap->n); 553d0f46423SBarry Smith radd = A->rmap->rstart*m; 55444cd7ae7SLois Curfman McInnes for (i=0; i<len; i++) { 555096963f5SLois Curfman McInnes x[i] = aloc->v[radd + i*m + i]; 556096963f5SLois Curfman McInnes } 5571ebc52fbSHong Zhang ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 5583a40ed3dSBarry Smith PetscFunctionReturn(0); 5598965ea79SLois Curfman McInnes } 5608965ea79SLois Curfman McInnes 561dfbe8321SBarry Smith PetscErrorCode MatDestroy_MPIDense(Mat mat) 5628965ea79SLois Curfman McInnes { 5633501a2bdSLois Curfman McInnes Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 564dfbe8321SBarry Smith PetscErrorCode ierr; 565ed3cc1f0SBarry Smith 5663a40ed3dSBarry Smith PetscFunctionBegin; 567aa482453SBarry Smith #if defined(PETSC_USE_LOG) 568d0f46423SBarry Smith PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 5698965ea79SLois Curfman McInnes #endif 5708798bf22SSatish Balay ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 5716bf464f9SBarry Smith ierr = MatDestroy(&mdn->A);CHKERRQ(ierr); 5726bf464f9SBarry Smith ierr = VecDestroy(&mdn->lvec);CHKERRQ(ierr); 5736bf464f9SBarry Smith ierr = VecScatterDestroy(&mdn->Mvctx);CHKERRQ(ierr); 57401b82886SBarry Smith 575bf0cc555SLisandro Dalcin ierr = PetscFree(mat->data);CHKERRQ(ierr); 576dbd8c25aSHong Zhang ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 5778baccfbdSHong Zhang 5788baccfbdSHong Zhang ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",NULL);CHKERRQ(ierr); 5798572280aSBarry Smith ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",NULL);CHKERRQ(ierr); 5808572280aSBarry Smith ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",NULL);CHKERRQ(ierr); 5818572280aSBarry Smith ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",NULL);CHKERRQ(ierr); 582d3042a70SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",NULL);CHKERRQ(ierr); 583d3042a70SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",NULL);CHKERRQ(ierr); 5848baccfbdSHong Zhang #if defined(PETSC_HAVE_ELEMENTAL) 5858baccfbdSHong Zhang ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",NULL);CHKERRQ(ierr); 5868baccfbdSHong Zhang #endif 587bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",NULL);CHKERRQ(ierr); 588bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 589bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 590bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 5918baccfbdSHong Zhang ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMult_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 5928baccfbdSHong Zhang ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultSymbolic_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 5938baccfbdSHong Zhang ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultNumeric_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 59486aefd0dSHong Zhang ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",NULL);CHKERRQ(ierr); 59586aefd0dSHong Zhang ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",NULL);CHKERRQ(ierr); 5963a40ed3dSBarry Smith PetscFunctionReturn(0); 5978965ea79SLois Curfman McInnes } 59839ddd567SLois Curfman McInnes 5996849ba73SBarry Smith static PetscErrorCode MatView_MPIDense_Binary(Mat mat,PetscViewer viewer) 6008965ea79SLois Curfman McInnes { 60139ddd567SLois Curfman McInnes Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 602dfbe8321SBarry Smith PetscErrorCode ierr; 603aa05aa95SBarry Smith PetscViewerFormat format; 604aa05aa95SBarry Smith int fd; 605d0f46423SBarry Smith PetscInt header[4],mmax,N = mat->cmap->N,i,j,m,k; 606aa05aa95SBarry Smith PetscMPIInt rank,tag = ((PetscObject)viewer)->tag,size; 607578230a0SSatish Balay PetscScalar *work,*v,*vv; 608aa05aa95SBarry Smith Mat_SeqDense *a = (Mat_SeqDense*)mdn->A->data; 6097056b6fcSBarry Smith 6103a40ed3dSBarry Smith PetscFunctionBegin; 61139ddd567SLois Curfman McInnes if (mdn->size == 1) { 61239ddd567SLois Curfman McInnes ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 613aa05aa95SBarry Smith } else { 614aa05aa95SBarry Smith ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 615ce94432eSBarry Smith ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 616ce94432eSBarry Smith ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 617aa05aa95SBarry Smith 618aa05aa95SBarry Smith ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 619f4403165SShri Abhyankar if (format == PETSC_VIEWER_NATIVE) { 620aa05aa95SBarry Smith 621aa05aa95SBarry Smith if (!rank) { 622aa05aa95SBarry Smith /* store the matrix as a dense matrix */ 6230700a824SBarry Smith header[0] = MAT_FILE_CLASSID; 624d0f46423SBarry Smith header[1] = mat->rmap->N; 625aa05aa95SBarry Smith header[2] = N; 626aa05aa95SBarry Smith header[3] = MATRIX_BINARY_FORMAT_DENSE; 627aa05aa95SBarry Smith ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 628aa05aa95SBarry Smith 629aa05aa95SBarry Smith /* get largest work array needed for transposing array */ 630d0f46423SBarry Smith mmax = mat->rmap->n; 631aa05aa95SBarry Smith for (i=1; i<size; i++) { 632d0f46423SBarry Smith mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]); 6338965ea79SLois Curfman McInnes } 634785e854fSJed Brown ierr = PetscMalloc1(mmax*N,&work);CHKERRQ(ierr); 635aa05aa95SBarry Smith 636aa05aa95SBarry Smith /* write out local array, by rows */ 637d0f46423SBarry Smith m = mat->rmap->n; 638aa05aa95SBarry Smith v = a->v; 639aa05aa95SBarry Smith for (j=0; j<N; j++) { 640aa05aa95SBarry Smith for (i=0; i<m; i++) { 641578230a0SSatish Balay work[j + i*N] = *v++; 642aa05aa95SBarry Smith } 643aa05aa95SBarry Smith } 644aa05aa95SBarry Smith ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 645aa05aa95SBarry Smith /* get largest work array to receive messages from other processes, excludes process zero */ 646aa05aa95SBarry Smith mmax = 0; 647aa05aa95SBarry Smith for (i=1; i<size; i++) { 648d0f46423SBarry Smith mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]); 649aa05aa95SBarry Smith } 650785e854fSJed Brown ierr = PetscMalloc1(mmax*N,&vv);CHKERRQ(ierr); 651aa05aa95SBarry Smith for (k = 1; k < size; k++) { 652f8009846SMatthew Knepley v = vv; 653d0f46423SBarry Smith m = mat->rmap->range[k+1] - mat->rmap->range[k]; 654ce94432eSBarry Smith ierr = MPIULong_Recv(v,m*N,MPIU_SCALAR,k,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 655aa05aa95SBarry Smith 656aa05aa95SBarry Smith for (j = 0; j < N; j++) { 657aa05aa95SBarry Smith for (i = 0; i < m; i++) { 658578230a0SSatish Balay work[j + i*N] = *v++; 659aa05aa95SBarry Smith } 660aa05aa95SBarry Smith } 661aa05aa95SBarry Smith ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 662aa05aa95SBarry Smith } 663aa05aa95SBarry Smith ierr = PetscFree(work);CHKERRQ(ierr); 664578230a0SSatish Balay ierr = PetscFree(vv);CHKERRQ(ierr); 665aa05aa95SBarry Smith } else { 666ce94432eSBarry Smith ierr = MPIULong_Send(a->v,mat->rmap->n*mat->cmap->N,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 667aa05aa95SBarry Smith } 6686a9046bcSBarry Smith } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"To store a parallel dense matrix you must first call PetscViewerPushFormat(viewer,PETSC_VIEWER_NATIVE)"); 669aa05aa95SBarry Smith } 6703a40ed3dSBarry Smith PetscFunctionReturn(0); 6718965ea79SLois Curfman McInnes } 6728965ea79SLois Curfman McInnes 6737da1fb6eSBarry Smith extern PetscErrorCode MatView_SeqDense(Mat,PetscViewer); 6749804daf3SBarry Smith #include <petscdraw.h> 6756849ba73SBarry Smith static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 6768965ea79SLois Curfman McInnes { 67739ddd567SLois Curfman McInnes Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 678dfbe8321SBarry Smith PetscErrorCode ierr; 6797da1fb6eSBarry Smith PetscMPIInt rank = mdn->rank; 68019fd82e9SBarry Smith PetscViewerType vtype; 681ace3abfcSBarry Smith PetscBool iascii,isdraw; 682b0a32e0cSBarry Smith PetscViewer sviewer; 683f3ef73ceSBarry Smith PetscViewerFormat format; 6848965ea79SLois Curfman McInnes 6853a40ed3dSBarry Smith PetscFunctionBegin; 686251f4c67SDmitry Karpeev ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 687251f4c67SDmitry Karpeev ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 68832077d6dSBarry Smith if (iascii) { 689b0a32e0cSBarry Smith ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr); 690b0a32e0cSBarry Smith ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 691456192e2SBarry Smith if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 6924e220ebcSLois Curfman McInnes MatInfo info; 693888f2ed8SSatish Balay ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 6941575c14dSBarry Smith ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 6957b23a99aSBarry Smith ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 696b0a32e0cSBarry Smith ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 6971575c14dSBarry Smith ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 6985d00a290SHong Zhang ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr); 6993a40ed3dSBarry Smith PetscFunctionReturn(0); 700fb9695e5SSatish Balay } else if (format == PETSC_VIEWER_ASCII_INFO) { 7013a40ed3dSBarry Smith PetscFunctionReturn(0); 7028965ea79SLois Curfman McInnes } 703f1af5d2fSBarry Smith } else if (isdraw) { 704b0a32e0cSBarry Smith PetscDraw draw; 705ace3abfcSBarry Smith PetscBool isnull; 706f1af5d2fSBarry Smith 707b0a32e0cSBarry Smith ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 708b0a32e0cSBarry Smith ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 709f1af5d2fSBarry Smith if (isnull) PetscFunctionReturn(0); 710f1af5d2fSBarry Smith } 71177ed5343SBarry Smith 7127da1fb6eSBarry Smith { 7138965ea79SLois Curfman McInnes /* assemble the entire matrix onto first processor. */ 7148965ea79SLois Curfman McInnes Mat A; 715d0f46423SBarry Smith PetscInt M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz; 716ba8c8a56SBarry Smith PetscInt *cols; 717ba8c8a56SBarry Smith PetscScalar *vals; 7188965ea79SLois Curfman McInnes 719ce94432eSBarry Smith ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 7208965ea79SLois Curfman McInnes if (!rank) { 721f69a0ea3SMatthew Knepley ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 7223a40ed3dSBarry Smith } else { 723f69a0ea3SMatthew Knepley ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 7248965ea79SLois Curfman McInnes } 7257adad957SLisandro Dalcin /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */ 726878740d9SKris Buschelman ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr); 7270298fd71SBarry Smith ierr = MatMPIDenseSetPreallocation(A,NULL);CHKERRQ(ierr); 7283bb1ff40SBarry Smith ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 7298965ea79SLois Curfman McInnes 73039ddd567SLois Curfman McInnes /* Copy the matrix ... This isn't the most efficient means, 73139ddd567SLois Curfman McInnes but it's quick for now */ 73251022da4SBarry Smith A->insertmode = INSERT_VALUES; 7332205254eSKarl Rupp 7342205254eSKarl Rupp row = mat->rmap->rstart; 7352205254eSKarl Rupp m = mdn->A->rmap->n; 7368965ea79SLois Curfman McInnes for (i=0; i<m; i++) { 737ba8c8a56SBarry Smith ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 738ba8c8a56SBarry Smith ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 739ba8c8a56SBarry Smith ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 74039ddd567SLois Curfman McInnes row++; 7418965ea79SLois Curfman McInnes } 7428965ea79SLois Curfman McInnes 7436d4a8577SBarry Smith ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7446d4a8577SBarry Smith ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7453f08860eSBarry Smith ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 746b9b97703SBarry Smith if (!rank) { 7471a9d3c3cSBarry Smith ierr = PetscObjectSetName((PetscObject)((Mat_MPIDense*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 7487da1fb6eSBarry Smith ierr = MatView_SeqDense(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr); 7498965ea79SLois Curfman McInnes } 7503f08860eSBarry Smith ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 751b0a32e0cSBarry Smith ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 7526bf464f9SBarry Smith ierr = MatDestroy(&A);CHKERRQ(ierr); 7538965ea79SLois Curfman McInnes } 7543a40ed3dSBarry Smith PetscFunctionReturn(0); 7558965ea79SLois Curfman McInnes } 7568965ea79SLois Curfman McInnes 757dfbe8321SBarry Smith PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer) 7588965ea79SLois Curfman McInnes { 759dfbe8321SBarry Smith PetscErrorCode ierr; 760ace3abfcSBarry Smith PetscBool iascii,isbinary,isdraw,issocket; 7618965ea79SLois Curfman McInnes 762433994e6SBarry Smith PetscFunctionBegin; 763251f4c67SDmitry Karpeev ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 764251f4c67SDmitry Karpeev ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 765251f4c67SDmitry Karpeev ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 766251f4c67SDmitry Karpeev ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 7670f5bd95cSBarry Smith 76832077d6dSBarry Smith if (iascii || issocket || isdraw) { 769f1af5d2fSBarry Smith ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 7700f5bd95cSBarry Smith } else if (isbinary) { 7713a40ed3dSBarry Smith ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr); 77211aeaf0aSBarry Smith } 7733a40ed3dSBarry Smith PetscFunctionReturn(0); 7748965ea79SLois Curfman McInnes } 7758965ea79SLois Curfman McInnes 776dfbe8321SBarry Smith PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info) 7778965ea79SLois Curfman McInnes { 7783501a2bdSLois Curfman McInnes Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 7793501a2bdSLois Curfman McInnes Mat mdn = mat->A; 780dfbe8321SBarry Smith PetscErrorCode ierr; 781329f5518SBarry Smith PetscReal isend[5],irecv[5]; 7828965ea79SLois Curfman McInnes 7833a40ed3dSBarry Smith PetscFunctionBegin; 7844e220ebcSLois Curfman McInnes info->block_size = 1.0; 7852205254eSKarl Rupp 7864e220ebcSLois Curfman McInnes ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr); 7872205254eSKarl Rupp 7884e220ebcSLois Curfman McInnes isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 7894e220ebcSLois Curfman McInnes isend[3] = info->memory; isend[4] = info->mallocs; 7908965ea79SLois Curfman McInnes if (flag == MAT_LOCAL) { 7914e220ebcSLois Curfman McInnes info->nz_used = isend[0]; 7924e220ebcSLois Curfman McInnes info->nz_allocated = isend[1]; 7934e220ebcSLois Curfman McInnes info->nz_unneeded = isend[2]; 7944e220ebcSLois Curfman McInnes info->memory = isend[3]; 7954e220ebcSLois Curfman McInnes info->mallocs = isend[4]; 7968965ea79SLois Curfman McInnes } else if (flag == MAT_GLOBAL_MAX) { 797b2566f29SBarry Smith ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 7982205254eSKarl Rupp 7994e220ebcSLois Curfman McInnes info->nz_used = irecv[0]; 8004e220ebcSLois Curfman McInnes info->nz_allocated = irecv[1]; 8014e220ebcSLois Curfman McInnes info->nz_unneeded = irecv[2]; 8024e220ebcSLois Curfman McInnes info->memory = irecv[3]; 8034e220ebcSLois Curfman McInnes info->mallocs = irecv[4]; 8048965ea79SLois Curfman McInnes } else if (flag == MAT_GLOBAL_SUM) { 805b2566f29SBarry Smith ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 8062205254eSKarl Rupp 8074e220ebcSLois Curfman McInnes info->nz_used = irecv[0]; 8084e220ebcSLois Curfman McInnes info->nz_allocated = irecv[1]; 8094e220ebcSLois Curfman McInnes info->nz_unneeded = irecv[2]; 8104e220ebcSLois Curfman McInnes info->memory = irecv[3]; 8114e220ebcSLois Curfman McInnes info->mallocs = irecv[4]; 8128965ea79SLois Curfman McInnes } 8134e220ebcSLois Curfman McInnes info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 8144e220ebcSLois Curfman McInnes info->fill_ratio_needed = 0; 8154e220ebcSLois Curfman McInnes info->factor_mallocs = 0; 8163a40ed3dSBarry Smith PetscFunctionReturn(0); 8178965ea79SLois Curfman McInnes } 8188965ea79SLois Curfman McInnes 819ace3abfcSBarry Smith PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscBool flg) 8208965ea79SLois Curfman McInnes { 82139ddd567SLois Curfman McInnes Mat_MPIDense *a = (Mat_MPIDense*)A->data; 822dfbe8321SBarry Smith PetscErrorCode ierr; 8238965ea79SLois Curfman McInnes 8243a40ed3dSBarry Smith PetscFunctionBegin; 82512c028f9SKris Buschelman switch (op) { 826512a5fc5SBarry Smith case MAT_NEW_NONZERO_LOCATIONS: 82712c028f9SKris Buschelman case MAT_NEW_NONZERO_LOCATION_ERR: 82812c028f9SKris Buschelman case MAT_NEW_NONZERO_ALLOCATION_ERR: 82943674050SBarry Smith MatCheckPreallocated(A,1); 8304e0d8c25SBarry Smith ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 83112c028f9SKris Buschelman break; 83212c028f9SKris Buschelman case MAT_ROW_ORIENTED: 83343674050SBarry Smith MatCheckPreallocated(A,1); 8344e0d8c25SBarry Smith a->roworiented = flg; 8354e0d8c25SBarry Smith ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 83612c028f9SKris Buschelman break; 8374e0d8c25SBarry Smith case MAT_NEW_DIAGONALS: 83813fa8e87SLisandro Dalcin case MAT_KEEP_NONZERO_PATTERN: 83912c028f9SKris Buschelman case MAT_USE_HASH_TABLE: 840290bbb0aSBarry Smith ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 84112c028f9SKris Buschelman break; 84212c028f9SKris Buschelman case MAT_IGNORE_OFF_PROC_ENTRIES: 8434e0d8c25SBarry Smith a->donotstash = flg; 84412c028f9SKris Buschelman break; 84577e54ba9SKris Buschelman case MAT_SYMMETRIC: 84677e54ba9SKris Buschelman case MAT_STRUCTURALLY_SYMMETRIC: 8479a4540c5SBarry Smith case MAT_HERMITIAN: 8489a4540c5SBarry Smith case MAT_SYMMETRY_ETERNAL: 849600fe468SBarry Smith case MAT_IGNORE_LOWER_TRIANGULAR: 850*5d7aebe8SStefano Zampini case MAT_IGNORE_ZERO_ENTRIES: 851290bbb0aSBarry Smith ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 85277e54ba9SKris Buschelman break; 85312c028f9SKris Buschelman default: 854e32f2f54SBarry Smith SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]); 8553a40ed3dSBarry Smith } 8563a40ed3dSBarry Smith PetscFunctionReturn(0); 8578965ea79SLois Curfman McInnes } 8588965ea79SLois Curfman McInnes 8598965ea79SLois Curfman McInnes 860dfbe8321SBarry Smith PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr) 8615b2fa520SLois Curfman McInnes { 8625b2fa520SLois Curfman McInnes Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 8635b2fa520SLois Curfman McInnes Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 864bca11509SBarry Smith const PetscScalar *l,*r; 865bca11509SBarry Smith PetscScalar x,*v; 866dfbe8321SBarry Smith PetscErrorCode ierr; 867d0f46423SBarry Smith PetscInt i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n; 8685b2fa520SLois Curfman McInnes 8695b2fa520SLois Curfman McInnes PetscFunctionBegin; 87072d926a5SLois Curfman McInnes ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr); 8715b2fa520SLois Curfman McInnes if (ll) { 87272d926a5SLois Curfman McInnes ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr); 873e32f2f54SBarry Smith if (s2a != s2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %d != %d.", s2a, s2); 874bca11509SBarry Smith ierr = VecGetArrayRead(ll,&l);CHKERRQ(ierr); 8755b2fa520SLois Curfman McInnes for (i=0; i<m; i++) { 8765b2fa520SLois Curfman McInnes x = l[i]; 8775b2fa520SLois Curfman McInnes v = mat->v + i; 8785b2fa520SLois Curfman McInnes for (j=0; j<n; j++) { (*v) *= x; v+= m;} 8795b2fa520SLois Curfman McInnes } 880bca11509SBarry Smith ierr = VecRestoreArrayRead(ll,&l);CHKERRQ(ierr); 881efee365bSSatish Balay ierr = PetscLogFlops(n*m);CHKERRQ(ierr); 8825b2fa520SLois Curfman McInnes } 8835b2fa520SLois Curfman McInnes if (rr) { 884175be7b4SMatthew Knepley ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr); 885e32f2f54SBarry Smith if (s3a != s3) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3); 886ca9f406cSSatish Balay ierr = VecScatterBegin(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 887ca9f406cSSatish Balay ierr = VecScatterEnd(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 888bca11509SBarry Smith ierr = VecGetArrayRead(mdn->lvec,&r);CHKERRQ(ierr); 8895b2fa520SLois Curfman McInnes for (i=0; i<n; i++) { 8905b2fa520SLois Curfman McInnes x = r[i]; 8915b2fa520SLois Curfman McInnes v = mat->v + i*m; 8922205254eSKarl Rupp for (j=0; j<m; j++) (*v++) *= x; 8935b2fa520SLois Curfman McInnes } 894bca11509SBarry Smith ierr = VecRestoreArrayRead(mdn->lvec,&r);CHKERRQ(ierr); 895efee365bSSatish Balay ierr = PetscLogFlops(n*m);CHKERRQ(ierr); 8965b2fa520SLois Curfman McInnes } 8975b2fa520SLois Curfman McInnes PetscFunctionReturn(0); 8985b2fa520SLois Curfman McInnes } 8995b2fa520SLois Curfman McInnes 900dfbe8321SBarry Smith PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm) 901096963f5SLois Curfman McInnes { 9023501a2bdSLois Curfman McInnes Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 9033501a2bdSLois Curfman McInnes Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 904dfbe8321SBarry Smith PetscErrorCode ierr; 90513f74950SBarry Smith PetscInt i,j; 906329f5518SBarry Smith PetscReal sum = 0.0; 90787828ca2SBarry Smith PetscScalar *v = mat->v; 9083501a2bdSLois Curfman McInnes 9093a40ed3dSBarry Smith PetscFunctionBegin; 9103501a2bdSLois Curfman McInnes if (mdn->size == 1) { 911064f8208SBarry Smith ierr = MatNorm(mdn->A,type,nrm);CHKERRQ(ierr); 9123501a2bdSLois Curfman McInnes } else { 9133501a2bdSLois Curfman McInnes if (type == NORM_FROBENIUS) { 914d0f46423SBarry Smith for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) { 915329f5518SBarry Smith sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 9163501a2bdSLois Curfman McInnes } 917b2566f29SBarry Smith ierr = MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 9188f1a2a5eSBarry Smith *nrm = PetscSqrtReal(*nrm); 919dc0b31edSSatish Balay ierr = PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n);CHKERRQ(ierr); 9203a40ed3dSBarry Smith } else if (type == NORM_1) { 921329f5518SBarry Smith PetscReal *tmp,*tmp2; 922dcca6d9dSJed Brown ierr = PetscMalloc2(A->cmap->N,&tmp,A->cmap->N,&tmp2);CHKERRQ(ierr); 92374ed9c26SBarry Smith ierr = PetscMemzero(tmp,A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 92474ed9c26SBarry Smith ierr = PetscMemzero(tmp2,A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 925064f8208SBarry Smith *nrm = 0.0; 9263501a2bdSLois Curfman McInnes v = mat->v; 927d0f46423SBarry Smith for (j=0; j<mdn->A->cmap->n; j++) { 928d0f46423SBarry Smith for (i=0; i<mdn->A->rmap->n; i++) { 92967e560aaSBarry Smith tmp[j] += PetscAbsScalar(*v); v++; 9303501a2bdSLois Curfman McInnes } 9313501a2bdSLois Curfman McInnes } 932b2566f29SBarry Smith ierr = MPIU_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 933d0f46423SBarry Smith for (j=0; j<A->cmap->N; j++) { 934064f8208SBarry Smith if (tmp2[j] > *nrm) *nrm = tmp2[j]; 9353501a2bdSLois Curfman McInnes } 9368627564fSBarry Smith ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr); 937d0f46423SBarry Smith ierr = PetscLogFlops(A->cmap->n*A->rmap->n);CHKERRQ(ierr); 9383a40ed3dSBarry Smith } else if (type == NORM_INFINITY) { /* max row norm */ 939329f5518SBarry Smith PetscReal ntemp; 9403501a2bdSLois Curfman McInnes ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr); 941b2566f29SBarry Smith ierr = MPIU_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 942ce94432eSBarry Smith } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for two norm"); 9433501a2bdSLois Curfman McInnes } 9443a40ed3dSBarry Smith PetscFunctionReturn(0); 9453501a2bdSLois Curfman McInnes } 9463501a2bdSLois Curfman McInnes 947fc4dec0aSBarry Smith PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout) 9483501a2bdSLois Curfman McInnes { 9493501a2bdSLois Curfman McInnes Mat_MPIDense *a = (Mat_MPIDense*)A->data; 9503501a2bdSLois Curfman McInnes Mat_SeqDense *Aloc = (Mat_SeqDense*)a->A->data; 9513501a2bdSLois Curfman McInnes Mat B; 952d0f46423SBarry Smith PetscInt M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart; 9536849ba73SBarry Smith PetscErrorCode ierr; 95413f74950SBarry Smith PetscInt j,i; 95587828ca2SBarry Smith PetscScalar *v; 9563501a2bdSLois Curfman McInnes 9573a40ed3dSBarry Smith PetscFunctionBegin; 958cf37664fSBarry Smith if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) { 959ce94432eSBarry Smith ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 960d0f46423SBarry Smith ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 9617adad957SLisandro Dalcin ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 9620298fd71SBarry Smith ierr = MatMPIDenseSetPreallocation(B,NULL);CHKERRQ(ierr); 963fc4dec0aSBarry Smith } else { 964fc4dec0aSBarry Smith B = *matout; 965fc4dec0aSBarry Smith } 9663501a2bdSLois Curfman McInnes 967d0f46423SBarry Smith m = a->A->rmap->n; n = a->A->cmap->n; v = Aloc->v; 968785e854fSJed Brown ierr = PetscMalloc1(m,&rwork);CHKERRQ(ierr); 9693501a2bdSLois Curfman McInnes for (i=0; i<m; i++) rwork[i] = rstart + i; 9701acff37aSSatish Balay for (j=0; j<n; j++) { 9713501a2bdSLois Curfman McInnes ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr); 9723501a2bdSLois Curfman McInnes v += m; 9733501a2bdSLois Curfman McInnes } 974606d414cSSatish Balay ierr = PetscFree(rwork);CHKERRQ(ierr); 9756d4a8577SBarry Smith ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 9766d4a8577SBarry Smith ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 977cf37664fSBarry Smith if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 9783501a2bdSLois Curfman McInnes *matout = B; 9793501a2bdSLois Curfman McInnes } else { 98028be2f97SBarry Smith ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr); 9813501a2bdSLois Curfman McInnes } 9823a40ed3dSBarry Smith PetscFunctionReturn(0); 983096963f5SLois Curfman McInnes } 984096963f5SLois Curfman McInnes 98544cd7ae7SLois Curfman McInnes 9866849ba73SBarry Smith static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat*); 987d84338a6SBarry Smith extern PetscErrorCode MatScale_MPIDense(Mat,PetscScalar); 9888965ea79SLois Curfman McInnes 9894994cf47SJed Brown PetscErrorCode MatSetUp_MPIDense(Mat A) 990273d9f13SBarry Smith { 991dfbe8321SBarry Smith PetscErrorCode ierr; 992273d9f13SBarry Smith 993273d9f13SBarry Smith PetscFunctionBegin; 994273d9f13SBarry Smith ierr = MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr); 995273d9f13SBarry Smith PetscFunctionReturn(0); 996273d9f13SBarry Smith } 997273d9f13SBarry Smith 998488007eeSBarry Smith PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str) 999488007eeSBarry Smith { 1000488007eeSBarry Smith PetscErrorCode ierr; 1001488007eeSBarry Smith Mat_MPIDense *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data; 1002488007eeSBarry Smith 1003488007eeSBarry Smith PetscFunctionBegin; 1004488007eeSBarry Smith ierr = MatAXPY(A->A,alpha,B->A,str);CHKERRQ(ierr); 1005a3fa217bSJose E. Roman ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 1006488007eeSBarry Smith PetscFunctionReturn(0); 1007488007eeSBarry Smith } 1008488007eeSBarry Smith 10097087cfbeSBarry Smith PetscErrorCode MatConjugate_MPIDense(Mat mat) 1010ba337c44SJed Brown { 1011ba337c44SJed Brown Mat_MPIDense *a = (Mat_MPIDense*)mat->data; 1012ba337c44SJed Brown PetscErrorCode ierr; 1013ba337c44SJed Brown 1014ba337c44SJed Brown PetscFunctionBegin; 1015ba337c44SJed Brown ierr = MatConjugate(a->A);CHKERRQ(ierr); 1016ba337c44SJed Brown PetscFunctionReturn(0); 1017ba337c44SJed Brown } 1018ba337c44SJed Brown 1019ba337c44SJed Brown PetscErrorCode MatRealPart_MPIDense(Mat A) 1020ba337c44SJed Brown { 1021ba337c44SJed Brown Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1022ba337c44SJed Brown PetscErrorCode ierr; 1023ba337c44SJed Brown 1024ba337c44SJed Brown PetscFunctionBegin; 1025ba337c44SJed Brown ierr = MatRealPart(a->A);CHKERRQ(ierr); 1026ba337c44SJed Brown PetscFunctionReturn(0); 1027ba337c44SJed Brown } 1028ba337c44SJed Brown 1029ba337c44SJed Brown PetscErrorCode MatImaginaryPart_MPIDense(Mat A) 1030ba337c44SJed Brown { 1031ba337c44SJed Brown Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1032ba337c44SJed Brown PetscErrorCode ierr; 1033ba337c44SJed Brown 1034ba337c44SJed Brown PetscFunctionBegin; 1035ba337c44SJed Brown ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1036ba337c44SJed Brown PetscFunctionReturn(0); 1037ba337c44SJed Brown } 1038ba337c44SJed Brown 10390716a85fSBarry Smith extern PetscErrorCode MatGetColumnNorms_SeqDense(Mat,NormType,PetscReal*); 10400716a85fSBarry Smith PetscErrorCode MatGetColumnNorms_MPIDense(Mat A,NormType type,PetscReal *norms) 10410716a85fSBarry Smith { 10420716a85fSBarry Smith PetscErrorCode ierr; 10430716a85fSBarry Smith PetscInt i,n; 10440716a85fSBarry Smith Mat_MPIDense *a = (Mat_MPIDense*) A->data; 10450716a85fSBarry Smith PetscReal *work; 10460716a85fSBarry Smith 10470716a85fSBarry Smith PetscFunctionBegin; 10480298fd71SBarry Smith ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 1049785e854fSJed Brown ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 10500716a85fSBarry Smith ierr = MatGetColumnNorms_SeqDense(a->A,type,work);CHKERRQ(ierr); 10510716a85fSBarry Smith if (type == NORM_2) { 10520716a85fSBarry Smith for (i=0; i<n; i++) work[i] *= work[i]; 10530716a85fSBarry Smith } 10540716a85fSBarry Smith if (type == NORM_INFINITY) { 1055b2566f29SBarry Smith ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr); 10560716a85fSBarry Smith } else { 1057b2566f29SBarry Smith ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr); 10580716a85fSBarry Smith } 10590716a85fSBarry Smith ierr = PetscFree(work);CHKERRQ(ierr); 10600716a85fSBarry Smith if (type == NORM_2) { 10618f1a2a5eSBarry Smith for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 10620716a85fSBarry Smith } 10630716a85fSBarry Smith PetscFunctionReturn(0); 10640716a85fSBarry Smith } 10650716a85fSBarry Smith 106673a71a0fSBarry Smith static PetscErrorCode MatSetRandom_MPIDense(Mat x,PetscRandom rctx) 106773a71a0fSBarry Smith { 106873a71a0fSBarry Smith Mat_MPIDense *d = (Mat_MPIDense*)x->data; 106973a71a0fSBarry Smith PetscErrorCode ierr; 107073a71a0fSBarry Smith PetscScalar *a; 107173a71a0fSBarry Smith PetscInt m,n,i; 107273a71a0fSBarry Smith 107373a71a0fSBarry Smith PetscFunctionBegin; 107473a71a0fSBarry Smith ierr = MatGetSize(d->A,&m,&n);CHKERRQ(ierr); 10758c778c55SBarry Smith ierr = MatDenseGetArray(d->A,&a);CHKERRQ(ierr); 107673a71a0fSBarry Smith for (i=0; i<m*n; i++) { 107773a71a0fSBarry Smith ierr = PetscRandomGetValue(rctx,a+i);CHKERRQ(ierr); 107873a71a0fSBarry Smith } 10798c778c55SBarry Smith ierr = MatDenseRestoreArray(d->A,&a);CHKERRQ(ierr); 108073a71a0fSBarry Smith PetscFunctionReturn(0); 108173a71a0fSBarry Smith } 108273a71a0fSBarry Smith 1083fd4e9aacSBarry Smith extern PetscErrorCode MatMatMultNumeric_MPIDense(Mat A,Mat,Mat); 1084fd4e9aacSBarry Smith 10853b49f96aSBarry Smith static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A,PetscBool *missing,PetscInt *d) 10863b49f96aSBarry Smith { 10873b49f96aSBarry Smith PetscFunctionBegin; 10883b49f96aSBarry Smith *missing = PETSC_FALSE; 10893b49f96aSBarry Smith PetscFunctionReturn(0); 10903b49f96aSBarry Smith } 10913b49f96aSBarry Smith 1092cc48ffa7SToby Isaac static PetscErrorCode MatMatTransposeMult_MPIDense_MPIDense(Mat,Mat,MatReuse,PetscReal,Mat*); 1093cc48ffa7SToby Isaac static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat,Mat,PetscReal,Mat*); 1094cc48ffa7SToby Isaac static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat,Mat,Mat); 1095cc48ffa7SToby Isaac 10968965ea79SLois Curfman McInnes /* -------------------------------------------------------------------*/ 109709dc0095SBarry Smith static struct _MatOps MatOps_Values = { MatSetValues_MPIDense, 109809dc0095SBarry Smith MatGetRow_MPIDense, 109909dc0095SBarry Smith MatRestoreRow_MPIDense, 110009dc0095SBarry Smith MatMult_MPIDense, 110197304618SKris Buschelman /* 4*/ MatMultAdd_MPIDense, 11027c922b88SBarry Smith MatMultTranspose_MPIDense, 11037c922b88SBarry Smith MatMultTransposeAdd_MPIDense, 11048965ea79SLois Curfman McInnes 0, 110509dc0095SBarry Smith 0, 110609dc0095SBarry Smith 0, 110797304618SKris Buschelman /* 10*/ 0, 110809dc0095SBarry Smith 0, 110909dc0095SBarry Smith 0, 111009dc0095SBarry Smith 0, 111109dc0095SBarry Smith MatTranspose_MPIDense, 111297304618SKris Buschelman /* 15*/ MatGetInfo_MPIDense, 11136e4ee0c6SHong Zhang MatEqual_MPIDense, 111409dc0095SBarry Smith MatGetDiagonal_MPIDense, 11155b2fa520SLois Curfman McInnes MatDiagonalScale_MPIDense, 111609dc0095SBarry Smith MatNorm_MPIDense, 111797304618SKris Buschelman /* 20*/ MatAssemblyBegin_MPIDense, 111809dc0095SBarry Smith MatAssemblyEnd_MPIDense, 111909dc0095SBarry Smith MatSetOption_MPIDense, 112009dc0095SBarry Smith MatZeroEntries_MPIDense, 1121d519adbfSMatthew Knepley /* 24*/ MatZeroRows_MPIDense, 1122919b68f7SBarry Smith 0, 112301b82886SBarry Smith 0, 112401b82886SBarry Smith 0, 112501b82886SBarry Smith 0, 11264994cf47SJed Brown /* 29*/ MatSetUp_MPIDense, 1127273d9f13SBarry Smith 0, 112809dc0095SBarry Smith 0, 1129c56a70eeSBarry Smith MatGetDiagonalBlock_MPIDense, 11308c778c55SBarry Smith 0, 1131d519adbfSMatthew Knepley /* 34*/ MatDuplicate_MPIDense, 113209dc0095SBarry Smith 0, 113309dc0095SBarry Smith 0, 113409dc0095SBarry Smith 0, 113509dc0095SBarry Smith 0, 1136d519adbfSMatthew Knepley /* 39*/ MatAXPY_MPIDense, 11377dae84e0SHong Zhang MatCreateSubMatrices_MPIDense, 113809dc0095SBarry Smith 0, 113909dc0095SBarry Smith MatGetValues_MPIDense, 114009dc0095SBarry Smith 0, 1141d519adbfSMatthew Knepley /* 44*/ 0, 114209dc0095SBarry Smith MatScale_MPIDense, 11437d68702bSBarry Smith MatShift_Basic, 114409dc0095SBarry Smith 0, 114509dc0095SBarry Smith 0, 114673a71a0fSBarry Smith /* 49*/ MatSetRandom_MPIDense, 114709dc0095SBarry Smith 0, 114809dc0095SBarry Smith 0, 114909dc0095SBarry Smith 0, 115009dc0095SBarry Smith 0, 1151d519adbfSMatthew Knepley /* 54*/ 0, 115209dc0095SBarry Smith 0, 115309dc0095SBarry Smith 0, 115409dc0095SBarry Smith 0, 115509dc0095SBarry Smith 0, 11567dae84e0SHong Zhang /* 59*/ MatCreateSubMatrix_MPIDense, 1157b9b97703SBarry Smith MatDestroy_MPIDense, 1158b9b97703SBarry Smith MatView_MPIDense, 1159357abbc8SBarry Smith 0, 116097304618SKris Buschelman 0, 1161d519adbfSMatthew Knepley /* 64*/ 0, 116297304618SKris Buschelman 0, 116397304618SKris Buschelman 0, 116497304618SKris Buschelman 0, 116597304618SKris Buschelman 0, 1166d519adbfSMatthew Knepley /* 69*/ 0, 116797304618SKris Buschelman 0, 116897304618SKris Buschelman 0, 116997304618SKris Buschelman 0, 117097304618SKris Buschelman 0, 1171d519adbfSMatthew Knepley /* 74*/ 0, 117297304618SKris Buschelman 0, 117397304618SKris Buschelman 0, 117497304618SKris Buschelman 0, 117597304618SKris Buschelman 0, 1176d519adbfSMatthew Knepley /* 79*/ 0, 117797304618SKris Buschelman 0, 117897304618SKris Buschelman 0, 117997304618SKris Buschelman 0, 11805bba2384SShri Abhyankar /* 83*/ MatLoad_MPIDense, 1181865e5f61SKris Buschelman 0, 1182865e5f61SKris Buschelman 0, 1183865e5f61SKris Buschelman 0, 1184865e5f61SKris Buschelman 0, 1185865e5f61SKris Buschelman 0, 11869775878aSHong Zhang #if defined(PETSC_HAVE_ELEMENTAL) 1187320f2790SHong Zhang /* 89*/ MatMatMult_MPIDense_MPIDense, 1188320f2790SHong Zhang MatMatMultSymbolic_MPIDense_MPIDense, 11899775878aSHong Zhang #else 11909775878aSHong Zhang /* 89*/ 0, 1191865e5f61SKris Buschelman 0, 11929775878aSHong Zhang #endif 1193fd4e9aacSBarry Smith MatMatMultNumeric_MPIDense, 11942fbe02b9SBarry Smith 0, 1195ba337c44SJed Brown 0, 1196d519adbfSMatthew Knepley /* 94*/ 0, 1197cc48ffa7SToby Isaac MatMatTransposeMult_MPIDense_MPIDense, 1198cc48ffa7SToby Isaac MatMatTransposeMultSymbolic_MPIDense_MPIDense, 1199cc48ffa7SToby Isaac MatMatTransposeMultNumeric_MPIDense_MPIDense, 1200ba337c44SJed Brown 0, 1201ba337c44SJed Brown /* 99*/ 0, 1202ba337c44SJed Brown 0, 1203ba337c44SJed Brown 0, 1204ba337c44SJed Brown MatConjugate_MPIDense, 1205ba337c44SJed Brown 0, 1206ba337c44SJed Brown /*104*/ 0, 1207ba337c44SJed Brown MatRealPart_MPIDense, 1208ba337c44SJed Brown MatImaginaryPart_MPIDense, 120986d161a7SShri Abhyankar 0, 121086d161a7SShri Abhyankar 0, 121186d161a7SShri Abhyankar /*109*/ 0, 121286d161a7SShri Abhyankar 0, 121386d161a7SShri Abhyankar 0, 121486d161a7SShri Abhyankar 0, 12153b49f96aSBarry Smith MatMissingDiagonal_MPIDense, 121686d161a7SShri Abhyankar /*114*/ 0, 121786d161a7SShri Abhyankar 0, 121886d161a7SShri Abhyankar 0, 121986d161a7SShri Abhyankar 0, 122086d161a7SShri Abhyankar 0, 122186d161a7SShri Abhyankar /*119*/ 0, 122286d161a7SShri Abhyankar 0, 122386d161a7SShri Abhyankar 0, 12240716a85fSBarry Smith 0, 12250716a85fSBarry Smith 0, 12260716a85fSBarry Smith /*124*/ 0, 12273964eb88SJed Brown MatGetColumnNorms_MPIDense, 12283964eb88SJed Brown 0, 12293964eb88SJed Brown 0, 12303964eb88SJed Brown 0, 12313964eb88SJed Brown /*129*/ 0, 1232cb20be35SHong Zhang MatTransposeMatMult_MPIDense_MPIDense, 1233cb20be35SHong Zhang MatTransposeMatMultSymbolic_MPIDense_MPIDense, 1234cb20be35SHong Zhang MatTransposeMatMultNumeric_MPIDense_MPIDense, 12353964eb88SJed Brown 0, 12363964eb88SJed Brown /*134*/ 0, 12373964eb88SJed Brown 0, 12383964eb88SJed Brown 0, 12393964eb88SJed Brown 0, 12403964eb88SJed Brown 0, 12413964eb88SJed Brown /*139*/ 0, 1242f9426fe0SMark Adams 0, 124394e2cb23SJakub Kruzik 0, 124494e2cb23SJakub Kruzik 0, 124594e2cb23SJakub Kruzik 0, 12461ae5eee8SJakub Kruzik /*144*/ MatCreateMPIMatConcatenateSeqMat_MPIDense 1247ba337c44SJed Brown }; 12488965ea79SLois Curfman McInnes 12497087cfbeSBarry Smith PetscErrorCode MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data) 1250a23d5eceSKris Buschelman { 1251a23d5eceSKris Buschelman Mat_MPIDense *a; 1252dfbe8321SBarry Smith PetscErrorCode ierr; 1253a23d5eceSKris Buschelman 1254a23d5eceSKris Buschelman PetscFunctionBegin; 1255430ada49SBarry Smith if (mat->preallocated) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Matrix has already been preallocated"); 1256a23d5eceSKris Buschelman mat->preallocated = PETSC_TRUE; 1257a23d5eceSKris Buschelman /* Note: For now, when data is specified above, this assumes the user correctly 1258a23d5eceSKris Buschelman allocates the local dense storage space. We should add error checking. */ 1259a23d5eceSKris Buschelman 1260a23d5eceSKris Buschelman a = (Mat_MPIDense*)mat->data; 126134ef9618SShri Abhyankar ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr); 126234ef9618SShri Abhyankar ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr); 126334ef9618SShri Abhyankar a->nvec = mat->cmap->n; 126434ef9618SShri Abhyankar 1265f69a0ea3SMatthew Knepley ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr); 1266d0f46423SBarry Smith ierr = MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);CHKERRQ(ierr); 12675c5985e7SKris Buschelman ierr = MatSetType(a->A,MATSEQDENSE);CHKERRQ(ierr); 12685c5985e7SKris Buschelman ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr); 12693bb1ff40SBarry Smith ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 1270a23d5eceSKris Buschelman PetscFunctionReturn(0); 1271a23d5eceSKris Buschelman } 1272a23d5eceSKris Buschelman 127365b80a83SHong Zhang #if defined(PETSC_HAVE_ELEMENTAL) 1274cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype,MatReuse reuse,Mat *newmat) 12758baccfbdSHong Zhang { 12768ea901baSHong Zhang Mat mat_elemental; 12778ea901baSHong Zhang PetscErrorCode ierr; 127832d7a744SHong Zhang PetscScalar *v; 127932d7a744SHong Zhang PetscInt m=A->rmap->n,N=A->cmap->N,rstart=A->rmap->rstart,i,*rows,*cols; 12808ea901baSHong Zhang 12818baccfbdSHong Zhang PetscFunctionBegin; 1282378336b6SHong Zhang if (reuse == MAT_REUSE_MATRIX) { 1283378336b6SHong Zhang mat_elemental = *newmat; 1284378336b6SHong Zhang ierr = MatZeroEntries(*newmat);CHKERRQ(ierr); 1285378336b6SHong Zhang } else { 1286378336b6SHong Zhang ierr = MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental);CHKERRQ(ierr); 1287378336b6SHong Zhang ierr = MatSetSizes(mat_elemental,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 1288378336b6SHong Zhang ierr = MatSetType(mat_elemental,MATELEMENTAL);CHKERRQ(ierr); 1289378336b6SHong Zhang ierr = MatSetUp(mat_elemental);CHKERRQ(ierr); 129032d7a744SHong Zhang ierr = MatSetOption(mat_elemental,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 1291378336b6SHong Zhang } 1292378336b6SHong Zhang 129332d7a744SHong Zhang ierr = PetscMalloc2(m,&rows,N,&cols);CHKERRQ(ierr); 129432d7a744SHong Zhang for (i=0; i<N; i++) cols[i] = i; 129532d7a744SHong Zhang for (i=0; i<m; i++) rows[i] = rstart + i; 12968ea901baSHong Zhang 12978ea901baSHong Zhang /* PETSc-Elemental interaface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */ 129832d7a744SHong Zhang ierr = MatDenseGetArray(A,&v);CHKERRQ(ierr); 129932d7a744SHong Zhang ierr = MatSetValues(mat_elemental,m,rows,N,cols,v,ADD_VALUES);CHKERRQ(ierr); 13008ea901baSHong Zhang ierr = MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 13018ea901baSHong Zhang ierr = MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 130232d7a744SHong Zhang ierr = MatDenseRestoreArray(A,&v);CHKERRQ(ierr); 130332d7a744SHong Zhang ierr = PetscFree2(rows,cols);CHKERRQ(ierr); 13048ea901baSHong Zhang 1305511c6705SHong Zhang if (reuse == MAT_INPLACE_MATRIX) { 130628be2f97SBarry Smith ierr = MatHeaderReplace(A,&mat_elemental);CHKERRQ(ierr); 13078ea901baSHong Zhang } else { 13088ea901baSHong Zhang *newmat = mat_elemental; 13098ea901baSHong Zhang } 13108baccfbdSHong Zhang PetscFunctionReturn(0); 13118baccfbdSHong Zhang } 131265b80a83SHong Zhang #endif 13138baccfbdSHong Zhang 1314af53bab2SHong Zhang static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A,PetscInt col,PetscScalar **vals) 131586aefd0dSHong Zhang { 131686aefd0dSHong Zhang Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 131786aefd0dSHong Zhang PetscErrorCode ierr; 131886aefd0dSHong Zhang 131986aefd0dSHong Zhang PetscFunctionBegin; 132086aefd0dSHong Zhang ierr = MatDenseGetColumn(mat->A,col,vals);CHKERRQ(ierr); 132186aefd0dSHong Zhang PetscFunctionReturn(0); 132286aefd0dSHong Zhang } 132386aefd0dSHong Zhang 1324af53bab2SHong Zhang static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A,PetscScalar **vals) 132586aefd0dSHong Zhang { 132686aefd0dSHong Zhang Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 132786aefd0dSHong Zhang PetscErrorCode ierr; 132886aefd0dSHong Zhang 132986aefd0dSHong Zhang PetscFunctionBegin; 133086aefd0dSHong Zhang ierr = MatDenseRestoreColumn(mat->A,vals);CHKERRQ(ierr); 133186aefd0dSHong Zhang PetscFunctionReturn(0); 133286aefd0dSHong Zhang } 133386aefd0dSHong Zhang 133494e2cb23SJakub Kruzik PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 133594e2cb23SJakub Kruzik { 133694e2cb23SJakub Kruzik PetscErrorCode ierr; 133794e2cb23SJakub Kruzik Mat_MPIDense *mat; 133894e2cb23SJakub Kruzik PetscInt m,nloc,N; 133994e2cb23SJakub Kruzik 134094e2cb23SJakub Kruzik PetscFunctionBegin; 134194e2cb23SJakub Kruzik ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 134294e2cb23SJakub Kruzik ierr = MatGetLocalSize(inmat,NULL,&nloc);CHKERRQ(ierr); 134394e2cb23SJakub Kruzik if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 134494e2cb23SJakub Kruzik PetscInt sum; 134594e2cb23SJakub Kruzik 134694e2cb23SJakub Kruzik if (n == PETSC_DECIDE) { 134794e2cb23SJakub Kruzik ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 134894e2cb23SJakub Kruzik } 134994e2cb23SJakub Kruzik /* Check sum(n) = N */ 135094e2cb23SJakub Kruzik ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 135194e2cb23SJakub Kruzik if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N); 135294e2cb23SJakub Kruzik 135394e2cb23SJakub Kruzik ierr = MatCreateDense(comm,m,n,PETSC_DETERMINE,N,NULL,outmat);CHKERRQ(ierr); 135494e2cb23SJakub Kruzik } 135594e2cb23SJakub Kruzik 135694e2cb23SJakub Kruzik /* numeric phase */ 135794e2cb23SJakub Kruzik mat = (Mat_MPIDense*)(*outmat)->data; 135894e2cb23SJakub Kruzik ierr = MatCopy(inmat,mat->A,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 135994e2cb23SJakub Kruzik ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 136094e2cb23SJakub Kruzik ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 136194e2cb23SJakub Kruzik PetscFunctionReturn(0); 136294e2cb23SJakub Kruzik } 136394e2cb23SJakub Kruzik 13648cc058d9SJed Brown PETSC_EXTERN PetscErrorCode MatCreate_MPIDense(Mat mat) 1365273d9f13SBarry Smith { 1366273d9f13SBarry Smith Mat_MPIDense *a; 1367dfbe8321SBarry Smith PetscErrorCode ierr; 1368273d9f13SBarry Smith 1369273d9f13SBarry Smith PetscFunctionBegin; 1370b00a9115SJed Brown ierr = PetscNewLog(mat,&a);CHKERRQ(ierr); 1371b0a32e0cSBarry Smith mat->data = (void*)a; 1372273d9f13SBarry Smith ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1373273d9f13SBarry Smith 1374273d9f13SBarry Smith mat->insertmode = NOT_SET_VALUES; 1375ce94432eSBarry Smith ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&a->rank);CHKERRQ(ierr); 1376ce94432eSBarry Smith ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&a->size);CHKERRQ(ierr); 1377273d9f13SBarry Smith 1378273d9f13SBarry Smith /* build cache for off array entries formed */ 1379273d9f13SBarry Smith a->donotstash = PETSC_FALSE; 13802205254eSKarl Rupp 1381ce94432eSBarry Smith ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)mat),1,&mat->stash);CHKERRQ(ierr); 1382273d9f13SBarry Smith 1383273d9f13SBarry Smith /* stuff used for matrix vector multiply */ 1384273d9f13SBarry Smith a->lvec = 0; 1385273d9f13SBarry Smith a->Mvctx = 0; 1386273d9f13SBarry Smith a->roworiented = PETSC_TRUE; 1387273d9f13SBarry Smith 1388bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",MatDenseGetArray_MPIDense);CHKERRQ(ierr); 13898572280aSBarry Smith ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",MatDenseRestoreArray_MPIDense);CHKERRQ(ierr); 13908572280aSBarry Smith ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",MatDenseGetArrayRead_MPIDense);CHKERRQ(ierr); 13918572280aSBarry Smith ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",MatDenseRestoreArrayRead_MPIDense);CHKERRQ(ierr); 1392d3042a70SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",MatDensePlaceArray_MPIDense);CHKERRQ(ierr); 1393d3042a70SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",MatDenseResetArray_MPIDense);CHKERRQ(ierr); 13948baccfbdSHong Zhang #if defined(PETSC_HAVE_ELEMENTAL) 13958baccfbdSHong Zhang ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",MatConvert_MPIDense_Elemental);CHKERRQ(ierr); 13968baccfbdSHong Zhang #endif 1397bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr); 1398bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",MatMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr); 1399bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",MatMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr); 1400bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",MatMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr); 14018949adfdSHong Zhang 14028949adfdSHong Zhang ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMult_mpiaij_mpidense_C",MatTransposeMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr); 14038949adfdSHong Zhang ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultSymbolic_mpiaij_mpidense_C",MatTransposeMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr); 14048949adfdSHong Zhang ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultNumeric_mpiaij_mpidense_C",MatTransposeMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr); 1405af53bab2SHong Zhang ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",MatDenseGetColumn_MPIDense);CHKERRQ(ierr); 1406af53bab2SHong Zhang ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",MatDenseRestoreColumn_MPIDense);CHKERRQ(ierr); 140738aed534SBarry Smith ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr); 1408273d9f13SBarry Smith PetscFunctionReturn(0); 1409273d9f13SBarry Smith } 1410273d9f13SBarry Smith 1411209238afSKris Buschelman /*MC 1412002d173eSKris Buschelman MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices. 1413209238afSKris Buschelman 1414209238afSKris Buschelman This matrix type is identical to MATSEQDENSE when constructed with a single process communicator, 1415209238afSKris Buschelman and MATMPIDENSE otherwise. 1416209238afSKris Buschelman 1417209238afSKris Buschelman Options Database Keys: 1418209238afSKris Buschelman . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions() 1419209238afSKris Buschelman 1420209238afSKris Buschelman Level: beginner 1421209238afSKris Buschelman 142201b82886SBarry Smith 1423209238afSKris Buschelman .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE 1424209238afSKris Buschelman M*/ 1425209238afSKris Buschelman 1426273d9f13SBarry Smith /*@C 1427273d9f13SBarry Smith MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries 1428273d9f13SBarry Smith 1429273d9f13SBarry Smith Not collective 1430273d9f13SBarry Smith 1431273d9f13SBarry Smith Input Parameters: 14321c4f3114SJed Brown . B - the matrix 14330298fd71SBarry Smith - data - optional location of matrix data. Set data=NULL for PETSc 1434273d9f13SBarry Smith to control all matrix memory allocation. 1435273d9f13SBarry Smith 1436273d9f13SBarry Smith Notes: 1437273d9f13SBarry Smith The dense format is fully compatible with standard Fortran 77 1438273d9f13SBarry Smith storage by columns. 1439273d9f13SBarry Smith 1440273d9f13SBarry Smith The data input variable is intended primarily for Fortran programmers 1441273d9f13SBarry Smith who wish to allocate their own matrix memory space. Most users should 14420298fd71SBarry Smith set data=NULL. 1443273d9f13SBarry Smith 1444273d9f13SBarry Smith Level: intermediate 1445273d9f13SBarry Smith 1446273d9f13SBarry Smith .keywords: matrix,dense, parallel 1447273d9f13SBarry Smith 1448273d9f13SBarry Smith .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1449273d9f13SBarry Smith @*/ 14501c4f3114SJed Brown PetscErrorCode MatMPIDenseSetPreallocation(Mat B,PetscScalar *data) 1451273d9f13SBarry Smith { 14524ac538c5SBarry Smith PetscErrorCode ierr; 1453273d9f13SBarry Smith 1454273d9f13SBarry Smith PetscFunctionBegin; 14551c4f3114SJed Brown ierr = PetscTryMethod(B,"MatMPIDenseSetPreallocation_C",(Mat,PetscScalar*),(B,data));CHKERRQ(ierr); 1456273d9f13SBarry Smith PetscFunctionReturn(0); 1457273d9f13SBarry Smith } 1458273d9f13SBarry Smith 1459d3042a70SBarry Smith /*@ 1460d3042a70SBarry Smith MatDensePlaceArray - Allows one to replace the array in a dense array with an 1461d3042a70SBarry Smith array provided by the user. This is useful to avoid copying an array 1462d3042a70SBarry Smith into a matrix 1463d3042a70SBarry Smith 1464d3042a70SBarry Smith Not Collective 1465d3042a70SBarry Smith 1466d3042a70SBarry Smith Input Parameters: 1467d3042a70SBarry Smith + mat - the matrix 1468d3042a70SBarry Smith - array - the array in column major order 1469d3042a70SBarry Smith 1470d3042a70SBarry Smith Notes: 1471d3042a70SBarry Smith You can return to the original array with a call to MatDenseResetArray(). The user is responsible for freeing this array; it will not be 1472d3042a70SBarry Smith freed when the matrix is destroyed. 1473d3042a70SBarry Smith 1474d3042a70SBarry Smith Level: developer 1475d3042a70SBarry Smith 1476d3042a70SBarry Smith .seealso: MatDenseGetArray(), MatDenseResetArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray() 1477d3042a70SBarry Smith 1478d3042a70SBarry Smith @*/ 1479d3042a70SBarry Smith PetscErrorCode MatDensePlaceArray(Mat mat,const PetscScalar array[]) 1480d3042a70SBarry Smith { 1481d3042a70SBarry Smith PetscErrorCode ierr; 1482d3042a70SBarry Smith PetscFunctionBegin; 1483d3042a70SBarry Smith ierr = PetscUseMethod(mat,"MatDensePlaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr); 1484d3042a70SBarry Smith ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 1485d3042a70SBarry Smith PetscFunctionReturn(0); 1486d3042a70SBarry Smith } 1487d3042a70SBarry Smith 1488d3042a70SBarry Smith /*@ 1489d3042a70SBarry Smith MatDenseResetArray - Resets the matrix array to that it previously had before the call to MatDensePlaceArray() 1490d3042a70SBarry Smith 1491d3042a70SBarry Smith Not Collective 1492d3042a70SBarry Smith 1493d3042a70SBarry Smith Input Parameters: 1494d3042a70SBarry Smith . mat - the matrix 1495d3042a70SBarry Smith 1496d3042a70SBarry Smith Notes: 1497d3042a70SBarry Smith You can only call this after a call to MatDensePlaceArray() 1498d3042a70SBarry Smith 1499d3042a70SBarry Smith Level: developer 1500d3042a70SBarry Smith 1501d3042a70SBarry Smith .seealso: MatDenseGetArray(), MatDensePlaceArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray() 1502d3042a70SBarry Smith 1503d3042a70SBarry Smith @*/ 1504d3042a70SBarry Smith PetscErrorCode MatDenseResetArray(Mat mat) 1505d3042a70SBarry Smith { 1506d3042a70SBarry Smith PetscErrorCode ierr; 1507d3042a70SBarry Smith PetscFunctionBegin; 1508d3042a70SBarry Smith ierr = PetscUseMethod(mat,"MatDenseResetArray_C",(Mat),(mat));CHKERRQ(ierr); 1509d3042a70SBarry Smith ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 1510d3042a70SBarry Smith PetscFunctionReturn(0); 1511d3042a70SBarry Smith } 1512d3042a70SBarry Smith 15138965ea79SLois Curfman McInnes /*@C 151469b1f4b7SBarry Smith MatCreateDense - Creates a parallel matrix in dense format. 15158965ea79SLois Curfman McInnes 1516db81eaa0SLois Curfman McInnes Collective on MPI_Comm 1517db81eaa0SLois Curfman McInnes 15188965ea79SLois Curfman McInnes Input Parameters: 1519db81eaa0SLois Curfman McInnes + comm - MPI communicator 15208965ea79SLois Curfman McInnes . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1521db81eaa0SLois Curfman McInnes . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 15228965ea79SLois Curfman McInnes . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 1523db81eaa0SLois Curfman McInnes . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 15246cfe35ddSJose E. Roman - data - optional location of matrix data. Set data=NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc 1525dfc5480cSLois Curfman McInnes to control all matrix memory allocation. 15268965ea79SLois Curfman McInnes 15278965ea79SLois Curfman McInnes Output Parameter: 1528477f1c0bSLois Curfman McInnes . A - the matrix 15298965ea79SLois Curfman McInnes 1530b259b22eSLois Curfman McInnes Notes: 153139ddd567SLois Curfman McInnes The dense format is fully compatible with standard Fortran 77 153239ddd567SLois Curfman McInnes storage by columns. 15338965ea79SLois Curfman McInnes 153418f449edSLois Curfman McInnes The data input variable is intended primarily for Fortran programmers 153518f449edSLois Curfman McInnes who wish to allocate their own matrix memory space. Most users should 15366cfe35ddSJose E. Roman set data=NULL (PETSC_NULL_SCALAR for Fortran users). 153718f449edSLois Curfman McInnes 15388965ea79SLois Curfman McInnes The user MUST specify either the local or global matrix dimensions 15398965ea79SLois Curfman McInnes (possibly both). 15408965ea79SLois Curfman McInnes 1541027ccd11SLois Curfman McInnes Level: intermediate 1542027ccd11SLois Curfman McInnes 154339ddd567SLois Curfman McInnes .keywords: matrix,dense, parallel 15448965ea79SLois Curfman McInnes 154539ddd567SLois Curfman McInnes .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 15468965ea79SLois Curfman McInnes @*/ 154769b1f4b7SBarry Smith PetscErrorCode MatCreateDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A) 15488965ea79SLois Curfman McInnes { 15496849ba73SBarry Smith PetscErrorCode ierr; 155013f74950SBarry Smith PetscMPIInt size; 15518965ea79SLois Curfman McInnes 15523a40ed3dSBarry Smith PetscFunctionBegin; 1553f69a0ea3SMatthew Knepley ierr = MatCreate(comm,A);CHKERRQ(ierr); 1554f69a0ea3SMatthew Knepley ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 1555273d9f13SBarry Smith ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1556273d9f13SBarry Smith if (size > 1) { 1557273d9f13SBarry Smith ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr); 1558273d9f13SBarry Smith ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr); 15596cfe35ddSJose E. Roman if (data) { /* user provided data array, so no need to assemble */ 15606cfe35ddSJose E. Roman ierr = MatSetUpMultiply_MPIDense(*A);CHKERRQ(ierr); 15616cfe35ddSJose E. Roman (*A)->assembled = PETSC_TRUE; 15626cfe35ddSJose E. Roman } 1563273d9f13SBarry Smith } else { 1564273d9f13SBarry Smith ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr); 1565273d9f13SBarry Smith ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr); 15668c469469SLois Curfman McInnes } 15673a40ed3dSBarry Smith PetscFunctionReturn(0); 15688965ea79SLois Curfman McInnes } 15698965ea79SLois Curfman McInnes 15706849ba73SBarry Smith static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat) 15718965ea79SLois Curfman McInnes { 15728965ea79SLois Curfman McInnes Mat mat; 15733501a2bdSLois Curfman McInnes Mat_MPIDense *a,*oldmat = (Mat_MPIDense*)A->data; 1574dfbe8321SBarry Smith PetscErrorCode ierr; 15758965ea79SLois Curfman McInnes 15763a40ed3dSBarry Smith PetscFunctionBegin; 15778965ea79SLois Curfman McInnes *newmat = 0; 1578ce94432eSBarry Smith ierr = MatCreate(PetscObjectComm((PetscObject)A),&mat);CHKERRQ(ierr); 1579d0f46423SBarry Smith ierr = MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 15807adad957SLisandro Dalcin ierr = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr); 1581834f8fabSBarry Smith a = (Mat_MPIDense*)mat->data; 15825aa7edbeSHong Zhang 1583d5f3da31SBarry Smith mat->factortype = A->factortype; 1584c456f294SBarry Smith mat->assembled = PETSC_TRUE; 1585273d9f13SBarry Smith mat->preallocated = PETSC_TRUE; 15868965ea79SLois Curfman McInnes 15878965ea79SLois Curfman McInnes a->size = oldmat->size; 15888965ea79SLois Curfman McInnes a->rank = oldmat->rank; 1589e0fa3b82SLois Curfman McInnes mat->insertmode = NOT_SET_VALUES; 1590b9b97703SBarry Smith a->nvec = oldmat->nvec; 15913782ba37SSatish Balay a->donotstash = oldmat->donotstash; 1592e04c1aa4SHong Zhang 15931e1e43feSBarry Smith ierr = PetscLayoutReference(A->rmap,&mat->rmap);CHKERRQ(ierr); 15941e1e43feSBarry Smith ierr = PetscLayoutReference(A->cmap,&mat->cmap);CHKERRQ(ierr); 15958965ea79SLois Curfman McInnes 1596329f5518SBarry Smith ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 15975609ef8eSBarry Smith ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 15983bb1ff40SBarry Smith ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 159901b82886SBarry Smith 16008965ea79SLois Curfman McInnes *newmat = mat; 16013a40ed3dSBarry Smith PetscFunctionReturn(0); 16028965ea79SLois Curfman McInnes } 16038965ea79SLois Curfman McInnes 16049d36ed5fSBarry Smith PetscErrorCode MatLoad_MPIDense_DenseInFile(MPI_Comm comm,PetscInt fd,PetscInt M,PetscInt N,Mat newmat) 160586d161a7SShri Abhyankar { 160686d161a7SShri Abhyankar PetscErrorCode ierr; 160786d161a7SShri Abhyankar PetscMPIInt rank,size; 160874c13d95SBarry Smith const PetscInt *rowners; 160974c13d95SBarry Smith PetscInt i,m,n,nz,j,mMax; 161086d161a7SShri Abhyankar PetscScalar *array,*vals,*vals_ptr; 16119d36ed5fSBarry Smith Mat_MPIDense *a = (Mat_MPIDense*)newmat->data; 161286d161a7SShri Abhyankar 161386d161a7SShri Abhyankar PetscFunctionBegin; 161486d161a7SShri Abhyankar ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 161586d161a7SShri Abhyankar ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 161686d161a7SShri Abhyankar 161774c13d95SBarry Smith /* determine ownership of rows and columns */ 161874c13d95SBarry Smith m = (newmat->rmap->n < 0) ? PETSC_DECIDE : newmat->rmap->n; 161974c13d95SBarry Smith n = (newmat->cmap->n < 0) ? PETSC_DECIDE : newmat->cmap->n; 162086d161a7SShri Abhyankar 162174c13d95SBarry Smith ierr = MatSetSizes(newmat,m,n,M,N);CHKERRQ(ierr); 1622430ada49SBarry Smith if (!a->A) { 16230298fd71SBarry Smith ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr); 16249d36ed5fSBarry Smith } 16258c778c55SBarry Smith ierr = MatDenseGetArray(newmat,&array);CHKERRQ(ierr); 162649467e7dSBarry Smith ierr = MatGetLocalSize(newmat,&m,NULL);CHKERRQ(ierr); 162774c13d95SBarry Smith ierr = MatGetOwnershipRanges(newmat,&rowners);CHKERRQ(ierr); 1628396e826eSBarry Smith ierr = MPI_Reduce(&m,&mMax,1,MPIU_INT,MPI_MAX,0,comm);CHKERRQ(ierr); 162986d161a7SShri Abhyankar if (!rank) { 16309d36ed5fSBarry Smith ierr = PetscMalloc1(mMax*N,&vals);CHKERRQ(ierr); 163186d161a7SShri Abhyankar 163286d161a7SShri Abhyankar /* read in my part of the matrix numerical values */ 163386d161a7SShri Abhyankar ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr); 163486d161a7SShri Abhyankar 163586d161a7SShri Abhyankar /* insert into matrix-by row (this is why cannot directly read into array */ 163686d161a7SShri Abhyankar vals_ptr = vals; 163786d161a7SShri Abhyankar for (i=0; i<m; i++) { 163886d161a7SShri Abhyankar for (j=0; j<N; j++) { 163986d161a7SShri Abhyankar array[i + j*m] = *vals_ptr++; 164086d161a7SShri Abhyankar } 164186d161a7SShri Abhyankar } 164286d161a7SShri Abhyankar 164386d161a7SShri Abhyankar /* read in other processors and ship out */ 164486d161a7SShri Abhyankar for (i=1; i<size; i++) { 164586d161a7SShri Abhyankar nz = (rowners[i+1] - rowners[i])*N; 164686d161a7SShri Abhyankar ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1647a25532f0SBarry Smith ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)(newmat))->tag,comm);CHKERRQ(ierr); 164886d161a7SShri Abhyankar } 164986d161a7SShri Abhyankar } else { 165086d161a7SShri Abhyankar /* receive numeric values */ 1651785e854fSJed Brown ierr = PetscMalloc1(m*N,&vals);CHKERRQ(ierr); 165286d161a7SShri Abhyankar 165386d161a7SShri Abhyankar /* receive message of values*/ 1654a25532f0SBarry Smith ierr = MPIULong_Recv(vals,m*N,MPIU_SCALAR,0,((PetscObject)(newmat))->tag,comm);CHKERRQ(ierr); 165586d161a7SShri Abhyankar 165686d161a7SShri Abhyankar /* insert into matrix-by row (this is why cannot directly read into array */ 165786d161a7SShri Abhyankar vals_ptr = vals; 165886d161a7SShri Abhyankar for (i=0; i<m; i++) { 165986d161a7SShri Abhyankar for (j=0; j<N; j++) { 166086d161a7SShri Abhyankar array[i + j*m] = *vals_ptr++; 166186d161a7SShri Abhyankar } 166286d161a7SShri Abhyankar } 166386d161a7SShri Abhyankar } 16648c778c55SBarry Smith ierr = MatDenseRestoreArray(newmat,&array);CHKERRQ(ierr); 166586d161a7SShri Abhyankar ierr = PetscFree(vals);CHKERRQ(ierr); 166686d161a7SShri Abhyankar ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 166786d161a7SShri Abhyankar ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 166886d161a7SShri Abhyankar PetscFunctionReturn(0); 166986d161a7SShri Abhyankar } 167086d161a7SShri Abhyankar 1671112444f4SShri Abhyankar PetscErrorCode MatLoad_MPIDense(Mat newmat,PetscViewer viewer) 167286d161a7SShri Abhyankar { 1673dfdea288SBarry Smith Mat_MPIDense *a; 167486d161a7SShri Abhyankar PetscScalar *vals,*svals; 1675ce94432eSBarry Smith MPI_Comm comm; 167686d161a7SShri Abhyankar MPI_Status status; 167749467e7dSBarry Smith PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,*rowners,*sndcounts,m,n,maxnz; 167886d161a7SShri Abhyankar PetscInt header[4],*rowlengths = 0,M,N,*cols; 167949467e7dSBarry Smith PetscInt *ourlens,*procsnz = 0,jj,*mycols,*smycols; 16809d36ed5fSBarry Smith PetscInt i,nz,j,rstart,rend; 168186d161a7SShri Abhyankar int fd; 16827f489da9SVaclav Hapla PetscBool isbinary; 168386d161a7SShri Abhyankar PetscErrorCode ierr; 168486d161a7SShri Abhyankar 168586d161a7SShri Abhyankar PetscFunctionBegin; 16867f489da9SVaclav Hapla ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 16877f489da9SVaclav Hapla if (!isbinary) SETERRQ2(PetscObjectComm((PetscObject)newmat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newmat)->type_name); 16887f489da9SVaclav Hapla 1689c98fd787SBarry Smith /* force binary viewer to load .info file if it has not yet done so */ 1690c98fd787SBarry Smith ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 1691ce94432eSBarry Smith ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 169286d161a7SShri Abhyankar ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 169386d161a7SShri Abhyankar ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 169486d161a7SShri Abhyankar ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 16955872f025SBarry Smith if (!rank) { 169686d161a7SShri Abhyankar ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 169786d161a7SShri Abhyankar if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 169886d161a7SShri Abhyankar } 169986d161a7SShri Abhyankar ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 170086d161a7SShri Abhyankar M = header[1]; N = header[2]; nz = header[3]; 170186d161a7SShri Abhyankar 170286d161a7SShri Abhyankar /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 17039d36ed5fSBarry Smith if (newmat->rmap->N < 0) newmat->rmap->N = M; 17049d36ed5fSBarry Smith if (newmat->cmap->N < 0) newmat->cmap->N = N; 170586d161a7SShri Abhyankar 17069d36ed5fSBarry Smith if (newmat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",M,newmat->rmap->N); 17079d36ed5fSBarry Smith if (newmat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",N,newmat->cmap->N); 170886d161a7SShri Abhyankar 170986d161a7SShri Abhyankar /* 171086d161a7SShri Abhyankar Handle case where matrix is stored on disk as a dense matrix 171186d161a7SShri Abhyankar */ 171286d161a7SShri Abhyankar if (nz == MATRIX_BINARY_FORMAT_DENSE) { 17139d36ed5fSBarry Smith ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat);CHKERRQ(ierr); 171486d161a7SShri Abhyankar PetscFunctionReturn(0); 171586d161a7SShri Abhyankar } 171686d161a7SShri Abhyankar 171786d161a7SShri Abhyankar /* determine ownership of all rows */ 17182205254eSKarl Rupp if (newmat->rmap->n < 0) { 17192205254eSKarl Rupp ierr = PetscMPIIntCast(M/size + ((M % size) > rank),&m);CHKERRQ(ierr); 17202205254eSKarl Rupp } else { 17212205254eSKarl Rupp ierr = PetscMPIIntCast(newmat->rmap->n,&m);CHKERRQ(ierr); 17222205254eSKarl Rupp } 172349467e7dSBarry Smith if (newmat->cmap->n < 0) { 172449467e7dSBarry Smith n = PETSC_DECIDE; 172549467e7dSBarry Smith } else { 172649467e7dSBarry Smith ierr = PetscMPIIntCast(newmat->cmap->n,&n);CHKERRQ(ierr); 172749467e7dSBarry Smith } 172849467e7dSBarry Smith 1729854ce69bSBarry Smith ierr = PetscMalloc1(size+2,&rowners);CHKERRQ(ierr); 173086d161a7SShri Abhyankar ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 173186d161a7SShri Abhyankar rowners[0] = 0; 173286d161a7SShri Abhyankar for (i=2; i<=size; i++) { 173386d161a7SShri Abhyankar rowners[i] += rowners[i-1]; 173486d161a7SShri Abhyankar } 173586d161a7SShri Abhyankar rstart = rowners[rank]; 173686d161a7SShri Abhyankar rend = rowners[rank+1]; 173786d161a7SShri Abhyankar 173886d161a7SShri Abhyankar /* distribute row lengths to all processors */ 173949467e7dSBarry Smith ierr = PetscMalloc1(rend-rstart,&ourlens);CHKERRQ(ierr); 174086d161a7SShri Abhyankar if (!rank) { 1741785e854fSJed Brown ierr = PetscMalloc1(M,&rowlengths);CHKERRQ(ierr); 174286d161a7SShri Abhyankar ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1743785e854fSJed Brown ierr = PetscMalloc1(size,&sndcounts);CHKERRQ(ierr); 174486d161a7SShri Abhyankar for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i]; 174586d161a7SShri Abhyankar ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr); 174686d161a7SShri Abhyankar ierr = PetscFree(sndcounts);CHKERRQ(ierr); 174786d161a7SShri Abhyankar } else { 174886d161a7SShri Abhyankar ierr = MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr); 174986d161a7SShri Abhyankar } 175086d161a7SShri Abhyankar 175186d161a7SShri Abhyankar if (!rank) { 175286d161a7SShri Abhyankar /* calculate the number of nonzeros on each processor */ 1753785e854fSJed Brown ierr = PetscMalloc1(size,&procsnz);CHKERRQ(ierr); 175486d161a7SShri Abhyankar ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 175586d161a7SShri Abhyankar for (i=0; i<size; i++) { 175686d161a7SShri Abhyankar for (j=rowners[i]; j< rowners[i+1]; j++) { 175786d161a7SShri Abhyankar procsnz[i] += rowlengths[j]; 175886d161a7SShri Abhyankar } 175986d161a7SShri Abhyankar } 176086d161a7SShri Abhyankar ierr = PetscFree(rowlengths);CHKERRQ(ierr); 176186d161a7SShri Abhyankar 176286d161a7SShri Abhyankar /* determine max buffer needed and allocate it */ 176386d161a7SShri Abhyankar maxnz = 0; 176486d161a7SShri Abhyankar for (i=0; i<size; i++) { 176586d161a7SShri Abhyankar maxnz = PetscMax(maxnz,procsnz[i]); 176686d161a7SShri Abhyankar } 1767785e854fSJed Brown ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr); 176886d161a7SShri Abhyankar 176986d161a7SShri Abhyankar /* read in my part of the matrix column indices */ 177086d161a7SShri Abhyankar nz = procsnz[0]; 1771785e854fSJed Brown ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 177286d161a7SShri Abhyankar ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 177386d161a7SShri Abhyankar 177486d161a7SShri Abhyankar /* read in every one elses and ship off */ 177586d161a7SShri Abhyankar for (i=1; i<size; i++) { 177686d161a7SShri Abhyankar nz = procsnz[i]; 177786d161a7SShri Abhyankar ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 177886d161a7SShri Abhyankar ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 177986d161a7SShri Abhyankar } 178086d161a7SShri Abhyankar ierr = PetscFree(cols);CHKERRQ(ierr); 178186d161a7SShri Abhyankar } else { 178286d161a7SShri Abhyankar /* determine buffer space needed for message */ 178386d161a7SShri Abhyankar nz = 0; 178486d161a7SShri Abhyankar for (i=0; i<m; i++) { 178586d161a7SShri Abhyankar nz += ourlens[i]; 178686d161a7SShri Abhyankar } 1787854ce69bSBarry Smith ierr = PetscMalloc1(nz+1,&mycols);CHKERRQ(ierr); 178886d161a7SShri Abhyankar 178986d161a7SShri Abhyankar /* receive message of column indices*/ 179086d161a7SShri Abhyankar ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 179186d161a7SShri Abhyankar ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 179286d161a7SShri Abhyankar if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 179386d161a7SShri Abhyankar } 179486d161a7SShri Abhyankar 179549467e7dSBarry Smith ierr = MatSetSizes(newmat,m,n,M,N);CHKERRQ(ierr); 1796dfdea288SBarry Smith a = (Mat_MPIDense*)newmat->data; 1797430ada49SBarry Smith if (!a->A) { 17980298fd71SBarry Smith ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr); 1799dfdea288SBarry Smith } 180086d161a7SShri Abhyankar 180186d161a7SShri Abhyankar if (!rank) { 1802785e854fSJed Brown ierr = PetscMalloc1(maxnz,&vals);CHKERRQ(ierr); 180386d161a7SShri Abhyankar 180486d161a7SShri Abhyankar /* read in my part of the matrix numerical values */ 180586d161a7SShri Abhyankar nz = procsnz[0]; 180686d161a7SShri Abhyankar ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 180786d161a7SShri Abhyankar 180886d161a7SShri Abhyankar /* insert into matrix */ 180986d161a7SShri Abhyankar jj = rstart; 181086d161a7SShri Abhyankar smycols = mycols; 181186d161a7SShri Abhyankar svals = vals; 181286d161a7SShri Abhyankar for (i=0; i<m; i++) { 181386d161a7SShri Abhyankar ierr = MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 181486d161a7SShri Abhyankar smycols += ourlens[i]; 181586d161a7SShri Abhyankar svals += ourlens[i]; 181686d161a7SShri Abhyankar jj++; 181786d161a7SShri Abhyankar } 181886d161a7SShri Abhyankar 181986d161a7SShri Abhyankar /* read in other processors and ship out */ 182086d161a7SShri Abhyankar for (i=1; i<size; i++) { 182186d161a7SShri Abhyankar nz = procsnz[i]; 182286d161a7SShri Abhyankar ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 182386d161a7SShri Abhyankar ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 182486d161a7SShri Abhyankar } 182586d161a7SShri Abhyankar ierr = PetscFree(procsnz);CHKERRQ(ierr); 182686d161a7SShri Abhyankar } else { 182786d161a7SShri Abhyankar /* receive numeric values */ 1828854ce69bSBarry Smith ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr); 182986d161a7SShri Abhyankar 183086d161a7SShri Abhyankar /* receive message of values*/ 183186d161a7SShri Abhyankar ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr); 183286d161a7SShri Abhyankar ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 183386d161a7SShri Abhyankar if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 183486d161a7SShri Abhyankar 183586d161a7SShri Abhyankar /* insert into matrix */ 183686d161a7SShri Abhyankar jj = rstart; 183786d161a7SShri Abhyankar smycols = mycols; 183886d161a7SShri Abhyankar svals = vals; 183986d161a7SShri Abhyankar for (i=0; i<m; i++) { 184086d161a7SShri Abhyankar ierr = MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 184186d161a7SShri Abhyankar smycols += ourlens[i]; 184286d161a7SShri Abhyankar svals += ourlens[i]; 184386d161a7SShri Abhyankar jj++; 184486d161a7SShri Abhyankar } 184586d161a7SShri Abhyankar } 184649467e7dSBarry Smith ierr = PetscFree(ourlens);CHKERRQ(ierr); 184786d161a7SShri Abhyankar ierr = PetscFree(vals);CHKERRQ(ierr); 184886d161a7SShri Abhyankar ierr = PetscFree(mycols);CHKERRQ(ierr); 184986d161a7SShri Abhyankar ierr = PetscFree(rowners);CHKERRQ(ierr); 185086d161a7SShri Abhyankar 185186d161a7SShri Abhyankar ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 185286d161a7SShri Abhyankar ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 185386d161a7SShri Abhyankar PetscFunctionReturn(0); 185486d161a7SShri Abhyankar } 185586d161a7SShri Abhyankar 1856ace3abfcSBarry Smith PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscBool *flag) 18576e4ee0c6SHong Zhang { 18586e4ee0c6SHong Zhang Mat_MPIDense *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data; 18596e4ee0c6SHong Zhang Mat a,b; 1860ace3abfcSBarry Smith PetscBool flg; 18616e4ee0c6SHong Zhang PetscErrorCode ierr; 186290ace30eSBarry Smith 18636e4ee0c6SHong Zhang PetscFunctionBegin; 18646e4ee0c6SHong Zhang a = matA->A; 18656e4ee0c6SHong Zhang b = matB->A; 18666e4ee0c6SHong Zhang ierr = MatEqual(a,b,&flg);CHKERRQ(ierr); 1867b2566f29SBarry Smith ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 18686e4ee0c6SHong Zhang PetscFunctionReturn(0); 18696e4ee0c6SHong Zhang } 187090ace30eSBarry Smith 1871baa3c1c6SHong Zhang PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(Mat A) 1872baa3c1c6SHong Zhang { 1873baa3c1c6SHong Zhang PetscErrorCode ierr; 1874baa3c1c6SHong Zhang Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1875baa3c1c6SHong Zhang Mat_TransMatMultDense *atb = a->atbdense; 1876baa3c1c6SHong Zhang 1877baa3c1c6SHong Zhang PetscFunctionBegin; 1878c5ef1628SHong Zhang ierr = PetscFree3(atb->sendbuf,atb->atbarray,atb->recvcounts);CHKERRQ(ierr); 1879baa3c1c6SHong Zhang ierr = (atb->destroy)(A);CHKERRQ(ierr); 1880baa3c1c6SHong Zhang ierr = PetscFree(atb);CHKERRQ(ierr); 1881baa3c1c6SHong Zhang PetscFunctionReturn(0); 1882baa3c1c6SHong Zhang } 1883baa3c1c6SHong Zhang 1884cc48ffa7SToby Isaac PetscErrorCode MatDestroy_MatMatTransMult_MPIDense_MPIDense(Mat A) 1885cc48ffa7SToby Isaac { 1886cc48ffa7SToby Isaac PetscErrorCode ierr; 1887cc48ffa7SToby Isaac Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1888cc48ffa7SToby Isaac Mat_MatTransMultDense *abt = a->abtdense; 1889cc48ffa7SToby Isaac 1890cc48ffa7SToby Isaac PetscFunctionBegin; 1891cc48ffa7SToby Isaac ierr = PetscFree2(abt->buf[0],abt->buf[1]);CHKERRQ(ierr); 1892faa55883SToby Isaac ierr = PetscFree2(abt->recvcounts,abt->recvdispls);CHKERRQ(ierr); 1893cc48ffa7SToby Isaac ierr = (abt->destroy)(A);CHKERRQ(ierr); 1894cc48ffa7SToby Isaac ierr = PetscFree(abt);CHKERRQ(ierr); 1895cc48ffa7SToby Isaac PetscFunctionReturn(0); 1896cc48ffa7SToby Isaac } 1897cc48ffa7SToby Isaac 1898cb20be35SHong Zhang PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C) 1899cb20be35SHong Zhang { 1900baa3c1c6SHong Zhang Mat_MPIDense *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data; 1901660d5466SHong Zhang Mat_SeqDense *aseq=(Mat_SeqDense*)(a->A)->data, *bseq=(Mat_SeqDense*)(b->A)->data; 1902baa3c1c6SHong Zhang Mat_TransMatMultDense *atb = c->atbdense; 1903cb20be35SHong Zhang PetscErrorCode ierr; 1904cb20be35SHong Zhang MPI_Comm comm; 1905d5017740SHong Zhang PetscMPIInt rank,size,*recvcounts=atb->recvcounts; 1906c5ef1628SHong Zhang PetscScalar *carray,*atbarray=atb->atbarray,*sendbuf=atb->sendbuf; 1907d5017740SHong Zhang PetscInt i,cN=C->cmap->N,cM=C->rmap->N,proc,k,j; 1908660d5466SHong Zhang PetscScalar _DOne=1.0,_DZero=0.0; 1909adc7a786SHong Zhang PetscBLASInt am,an,bn,aN; 1910e68c0b26SHong Zhang const PetscInt *ranges; 1911cb20be35SHong Zhang 1912cb20be35SHong Zhang PetscFunctionBegin; 1913cb20be35SHong Zhang ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1914cb20be35SHong Zhang ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1915cb20be35SHong Zhang ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1916e68c0b26SHong Zhang 1917c5ef1628SHong Zhang /* compute atbarray = aseq^T * bseq */ 1918660d5466SHong Zhang ierr = PetscBLASIntCast(a->A->cmap->n,&an);CHKERRQ(ierr); 1919660d5466SHong Zhang ierr = PetscBLASIntCast(b->A->cmap->n,&bn);CHKERRQ(ierr); 1920660d5466SHong Zhang ierr = PetscBLASIntCast(a->A->rmap->n,&am);CHKERRQ(ierr); 1921adc7a786SHong Zhang ierr = PetscBLASIntCast(A->cmap->N,&aN);CHKERRQ(ierr); 1922adc7a786SHong Zhang PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&an,&bn,&am,&_DOne,aseq->v,&aseq->lda,bseq->v,&bseq->lda,&_DZero,atbarray,&aN)); 1923cb20be35SHong Zhang 1924cb20be35SHong Zhang ierr = MatGetOwnershipRanges(C,&ranges);CHKERRQ(ierr); 1925c5ef1628SHong Zhang for (i=0; i<size; i++) recvcounts[i] = (ranges[i+1] - ranges[i])*cN; 1926cb20be35SHong Zhang 1927660d5466SHong Zhang /* arrange atbarray into sendbuf */ 1928cb20be35SHong Zhang k = 0; 1929cb20be35SHong Zhang for (proc=0; proc<size; proc++) { 1930baa3c1c6SHong Zhang for (j=0; j<cN; j++) { 1931c5ef1628SHong Zhang for (i=ranges[proc]; i<ranges[proc+1]; i++) sendbuf[k++] = atbarray[i+j*cM]; 1932cb20be35SHong Zhang } 1933cb20be35SHong Zhang } 1934c5ef1628SHong Zhang /* sum all atbarray to local values of C */ 1935660d5466SHong Zhang ierr = MatDenseGetArray(c->A,&carray);CHKERRQ(ierr); 19363462b7efSHong Zhang ierr = MPI_Reduce_scatter(sendbuf,carray,recvcounts,MPIU_SCALAR,MPIU_SUM,comm);CHKERRQ(ierr); 1937660d5466SHong Zhang ierr = MatDenseRestoreArray(c->A,&carray);CHKERRQ(ierr); 1938cb20be35SHong Zhang PetscFunctionReturn(0); 1939cb20be35SHong Zhang } 1940cb20be35SHong Zhang 1941cb20be35SHong Zhang PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C) 1942cb20be35SHong Zhang { 1943cb20be35SHong Zhang PetscErrorCode ierr; 1944baa3c1c6SHong Zhang Mat Cdense; 1945cb20be35SHong Zhang MPI_Comm comm; 1946baa3c1c6SHong Zhang PetscMPIInt size; 1947660d5466SHong Zhang PetscInt cm=A->cmap->n,cM,cN=B->cmap->N; 1948baa3c1c6SHong Zhang Mat_MPIDense *c; 1949baa3c1c6SHong Zhang Mat_TransMatMultDense *atb; 1950cb20be35SHong Zhang 1951cb20be35SHong Zhang PetscFunctionBegin; 1952baa3c1c6SHong Zhang ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1953cb20be35SHong Zhang if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) { 1954cb20be35SHong Zhang SETERRQ4(comm,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend); 1955cb20be35SHong Zhang } 1956cb20be35SHong Zhang 1957baa3c1c6SHong Zhang /* create matrix product Cdense */ 1958baa3c1c6SHong Zhang ierr = MatCreate(comm,&Cdense);CHKERRQ(ierr); 1959660d5466SHong Zhang ierr = MatSetSizes(Cdense,cm,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 1960baa3c1c6SHong Zhang ierr = MatSetType(Cdense,MATMPIDENSE);CHKERRQ(ierr); 1961baa3c1c6SHong Zhang ierr = MatMPIDenseSetPreallocation(Cdense,NULL);CHKERRQ(ierr); 1962baa3c1c6SHong Zhang ierr = MatAssemblyBegin(Cdense,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1963baa3c1c6SHong Zhang ierr = MatAssemblyEnd(Cdense,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1964baa3c1c6SHong Zhang *C = Cdense; 1965baa3c1c6SHong Zhang 1966baa3c1c6SHong Zhang /* create data structure for reuse Cdense */ 1967baa3c1c6SHong Zhang ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1968baa3c1c6SHong Zhang ierr = PetscNew(&atb);CHKERRQ(ierr); 1969660d5466SHong Zhang cM = Cdense->rmap->N; 1970c5ef1628SHong Zhang ierr = PetscMalloc3(cM*cN,&atb->sendbuf,cM*cN,&atb->atbarray,size,&atb->recvcounts);CHKERRQ(ierr); 1971baa3c1c6SHong Zhang 1972baa3c1c6SHong Zhang c = (Mat_MPIDense*)Cdense->data; 1973baa3c1c6SHong Zhang c->atbdense = atb; 1974baa3c1c6SHong Zhang atb->destroy = Cdense->ops->destroy; 1975baa3c1c6SHong Zhang Cdense->ops->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense; 1976cb20be35SHong Zhang PetscFunctionReturn(0); 1977cb20be35SHong Zhang } 1978cb20be35SHong Zhang 1979cb20be35SHong Zhang PetscErrorCode MatTransposeMatMult_MPIDense_MPIDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 1980cb20be35SHong Zhang { 1981cb20be35SHong Zhang PetscErrorCode ierr; 1982cb20be35SHong Zhang 1983cb20be35SHong Zhang PetscFunctionBegin; 1984cb20be35SHong Zhang if (scall == MAT_INITIAL_MATRIX) { 1985cb20be35SHong Zhang ierr = MatTransposeMatMultSymbolic_MPIDense_MPIDense(A,B,fill,C);CHKERRQ(ierr); 1986cb20be35SHong Zhang } 1987cb20be35SHong Zhang ierr = MatTransposeMatMultNumeric_MPIDense_MPIDense(A,B,*C);CHKERRQ(ierr); 1988cb20be35SHong Zhang PetscFunctionReturn(0); 1989cb20be35SHong Zhang } 1990320f2790SHong Zhang 1991cc48ffa7SToby Isaac static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat *C) 1992cc48ffa7SToby Isaac { 1993cc48ffa7SToby Isaac PetscErrorCode ierr; 1994cc48ffa7SToby Isaac Mat Cdense; 1995cc48ffa7SToby Isaac MPI_Comm comm; 1996cc48ffa7SToby Isaac PetscMPIInt i, size; 1997cc48ffa7SToby Isaac PetscInt maxRows, bufsiz; 1998cc48ffa7SToby Isaac Mat_MPIDense *c; 1999cc48ffa7SToby Isaac PetscMPIInt tag; 2000faa55883SToby Isaac const char *algTypes[2] = {"allgatherv","cyclic"}; 2001faa55883SToby Isaac PetscInt alg, nalg = 2; 2002cc48ffa7SToby Isaac Mat_MatTransMultDense *abt; 2003cc48ffa7SToby Isaac 2004cc48ffa7SToby Isaac PetscFunctionBegin; 2005cc48ffa7SToby Isaac ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 2006faa55883SToby Isaac alg = 0; /* default is allgatherv */ 2007faa55883SToby Isaac ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"MatMatTransposeMult","Mat");CHKERRQ(ierr); 2008faa55883SToby Isaac ierr = PetscOptionsEList("-matmattransmult_mpidense_mpidense_via","Algorithmic approach","MatMatTransposeMult",algTypes,nalg,algTypes[0],&alg,NULL);CHKERRQ(ierr); 2009faa55883SToby Isaac ierr = PetscOptionsEnd();CHKERRQ(ierr); 2010cc48ffa7SToby Isaac if (A->cmap->N != B->cmap->N) { 2011cc48ffa7SToby Isaac SETERRQ2(comm,PETSC_ERR_ARG_SIZ,"Matrix global column dimensions are incompatible, A (%D) != B (%D)",A->cmap->N,B->cmap->N); 2012cc48ffa7SToby Isaac } 2013cc48ffa7SToby Isaac 2014cc48ffa7SToby Isaac /* create matrix product Cdense */ 2015cc48ffa7SToby Isaac ierr = MatCreate(comm,&Cdense);CHKERRQ(ierr); 2016cc48ffa7SToby Isaac ierr = MatSetSizes(Cdense,A->rmap->n,B->rmap->n,A->rmap->N,B->rmap->N);CHKERRQ(ierr); 2017cc48ffa7SToby Isaac ierr = MatSetType(Cdense,MATMPIDENSE);CHKERRQ(ierr); 2018cc48ffa7SToby Isaac ierr = MatMPIDenseSetPreallocation(Cdense,NULL);CHKERRQ(ierr); 2019cc48ffa7SToby Isaac ierr = PetscObjectGetNewTag((PetscObject)Cdense, &tag);CHKERRQ(ierr); 2020cc48ffa7SToby Isaac ierr = MatAssemblyBegin(Cdense,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2021cc48ffa7SToby Isaac ierr = MatAssemblyEnd(Cdense,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2022cc48ffa7SToby Isaac *C = Cdense; 2023cc48ffa7SToby Isaac 2024cc48ffa7SToby Isaac /* create data structure for reuse Cdense */ 2025cc48ffa7SToby Isaac ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2026cc48ffa7SToby Isaac ierr = PetscNew(&abt);CHKERRQ(ierr); 2027cc48ffa7SToby Isaac abt->tag = tag; 2028faa55883SToby Isaac abt->alg = alg; 2029faa55883SToby Isaac switch (alg) { 2030faa55883SToby Isaac case 1: 2031cc48ffa7SToby Isaac for (maxRows = 0, i = 0; i < size; i++) maxRows = PetscMax(maxRows, (B->rmap->range[i + 1] - B->rmap->range[i])); 2032cc48ffa7SToby Isaac bufsiz = A->cmap->N * maxRows; 2033cc48ffa7SToby Isaac ierr = PetscMalloc2(bufsiz,&(abt->buf[0]),bufsiz,&(abt->buf[1]));CHKERRQ(ierr); 2034faa55883SToby Isaac break; 2035faa55883SToby Isaac default: 2036faa55883SToby Isaac ierr = PetscMalloc2(B->rmap->n * B->cmap->N, &(abt->buf[0]), B->rmap->N * B->cmap->N, &(abt->buf[1]));CHKERRQ(ierr); 2037faa55883SToby Isaac ierr = PetscMalloc2(size,&(abt->recvcounts),size+1,&(abt->recvdispls));CHKERRQ(ierr); 2038faa55883SToby Isaac for (i = 0; i <= size; i++) abt->recvdispls[i] = B->rmap->range[i] * A->cmap->N; 2039faa55883SToby Isaac for (i = 0; i < size; i++) abt->recvcounts[i] = abt->recvdispls[i + 1] - abt->recvdispls[i]; 2040faa55883SToby Isaac break; 2041faa55883SToby Isaac } 2042cc48ffa7SToby Isaac 2043cc48ffa7SToby Isaac c = (Mat_MPIDense*)Cdense->data; 2044cc48ffa7SToby Isaac c->abtdense = abt; 2045cc48ffa7SToby Isaac abt->destroy = Cdense->ops->destroy; 2046cc48ffa7SToby Isaac Cdense->ops->destroy = MatDestroy_MatMatTransMult_MPIDense_MPIDense; 2047cc48ffa7SToby Isaac PetscFunctionReturn(0); 2048cc48ffa7SToby Isaac } 2049cc48ffa7SToby Isaac 2050faa55883SToby Isaac static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(Mat A, Mat B, Mat C) 2051cc48ffa7SToby Isaac { 2052cc48ffa7SToby Isaac Mat_MPIDense *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data; 2053cc48ffa7SToby Isaac Mat_SeqDense *aseq=(Mat_SeqDense*)(a->A)->data, *bseq=(Mat_SeqDense*)(b->A)->data; 2054cc48ffa7SToby Isaac Mat_SeqDense *cseq=(Mat_SeqDense*)(c->A)->data; 2055cc48ffa7SToby Isaac Mat_MatTransMultDense *abt = c->abtdense; 2056cc48ffa7SToby Isaac PetscErrorCode ierr; 2057cc48ffa7SToby Isaac MPI_Comm comm; 2058cc48ffa7SToby Isaac PetscMPIInt rank,size, sendsiz, recvsiz, sendto, recvfrom, recvisfrom; 2059ce36caf3SSatish Balay PetscScalar *sendbuf, *recvbuf=0, *carray; 2060cc48ffa7SToby Isaac PetscInt i,cK=A->cmap->N,k,j,bn; 2061cc48ffa7SToby Isaac PetscScalar _DOne=1.0,_DZero=0.0; 2062cc48ffa7SToby Isaac PetscBLASInt cm, cn, ck; 2063cc48ffa7SToby Isaac MPI_Request reqs[2]; 2064cc48ffa7SToby Isaac const PetscInt *ranges; 2065cc48ffa7SToby Isaac 2066cc48ffa7SToby Isaac PetscFunctionBegin; 2067cc48ffa7SToby Isaac ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 2068cc48ffa7SToby Isaac ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2069cc48ffa7SToby Isaac ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2070cc48ffa7SToby Isaac 2071cc48ffa7SToby Isaac ierr = MatGetOwnershipRanges(B,&ranges);CHKERRQ(ierr); 2072cc48ffa7SToby Isaac bn = B->rmap->n; 2073cc48ffa7SToby Isaac if (bseq->lda == bn) { 2074cc48ffa7SToby Isaac sendbuf = bseq->v; 2075cc48ffa7SToby Isaac } else { 2076cc48ffa7SToby Isaac sendbuf = abt->buf[0]; 2077cc48ffa7SToby Isaac for (k = 0, i = 0; i < cK; i++) { 2078cc48ffa7SToby Isaac for (j = 0; j < bn; j++, k++) { 2079cc48ffa7SToby Isaac sendbuf[k] = bseq->v[i * bseq->lda + j]; 2080cc48ffa7SToby Isaac } 2081cc48ffa7SToby Isaac } 2082cc48ffa7SToby Isaac } 2083cc48ffa7SToby Isaac if (size > 1) { 2084cc48ffa7SToby Isaac sendto = (rank + size - 1) % size; 2085cc48ffa7SToby Isaac recvfrom = (rank + size + 1) % size; 2086cc48ffa7SToby Isaac } else { 2087cc48ffa7SToby Isaac sendto = recvfrom = 0; 2088cc48ffa7SToby Isaac } 2089cc48ffa7SToby Isaac ierr = PetscBLASIntCast(cK,&ck);CHKERRQ(ierr); 2090cc48ffa7SToby Isaac ierr = PetscBLASIntCast(c->A->rmap->n,&cm);CHKERRQ(ierr); 2091cc48ffa7SToby Isaac recvisfrom = rank; 2092cc48ffa7SToby Isaac for (i = 0; i < size; i++) { 2093cc48ffa7SToby Isaac /* we have finished receiving in sending, bufs can be read/modified */ 2094cc48ffa7SToby Isaac PetscInt nextrecvisfrom = (recvisfrom + 1) % size; /* which process the next recvbuf will originate on */ 2095cc48ffa7SToby Isaac PetscInt nextbn = ranges[nextrecvisfrom + 1] - ranges[nextrecvisfrom]; 2096cc48ffa7SToby Isaac 2097cc48ffa7SToby Isaac if (nextrecvisfrom != rank) { 2098cc48ffa7SToby Isaac /* start the cyclic sends from sendbuf, to recvbuf (which will switch to sendbuf) */ 2099cc48ffa7SToby Isaac sendsiz = cK * bn; 2100cc48ffa7SToby Isaac recvsiz = cK * nextbn; 2101cc48ffa7SToby Isaac recvbuf = (i & 1) ? abt->buf[0] : abt->buf[1]; 2102cc48ffa7SToby Isaac ierr = MPI_Isend(sendbuf, sendsiz, MPIU_SCALAR, sendto, abt->tag, comm, &reqs[0]);CHKERRQ(ierr); 2103cc48ffa7SToby Isaac ierr = MPI_Irecv(recvbuf, recvsiz, MPIU_SCALAR, recvfrom, abt->tag, comm, &reqs[1]);CHKERRQ(ierr); 2104cc48ffa7SToby Isaac } 2105cc48ffa7SToby Isaac 2106cc48ffa7SToby Isaac /* local aseq * sendbuf^T */ 2107cc48ffa7SToby Isaac ierr = PetscBLASIntCast(ranges[recvisfrom + 1] - ranges[recvisfrom], &cn);CHKERRQ(ierr); 2108cc48ffa7SToby Isaac carray = &cseq->v[cseq->lda * ranges[recvisfrom]]; 2109cc48ffa7SToby Isaac PetscStackCallBLAS("BLASgemm",BLASgemm_("N","T",&cm,&cn,&ck,&_DOne,aseq->v,&aseq->lda,sendbuf,&cn,&_DZero,carray,&cseq->lda)); 2110cc48ffa7SToby Isaac 2111cc48ffa7SToby Isaac if (nextrecvisfrom != rank) { 2112cc48ffa7SToby Isaac /* wait for the sends and receives to complete, swap sendbuf and recvbuf */ 2113cc48ffa7SToby Isaac ierr = MPI_Waitall(2, reqs, MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2114cc48ffa7SToby Isaac } 2115cc48ffa7SToby Isaac bn = nextbn; 2116cc48ffa7SToby Isaac recvisfrom = nextrecvisfrom; 2117cc48ffa7SToby Isaac sendbuf = recvbuf; 2118cc48ffa7SToby Isaac } 2119cc48ffa7SToby Isaac PetscFunctionReturn(0); 2120cc48ffa7SToby Isaac } 2121cc48ffa7SToby Isaac 2122faa55883SToby Isaac static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(Mat A, Mat B, Mat C) 2123faa55883SToby Isaac { 2124faa55883SToby Isaac Mat_MPIDense *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data; 2125faa55883SToby Isaac Mat_SeqDense *aseq=(Mat_SeqDense*)(a->A)->data, *bseq=(Mat_SeqDense*)(b->A)->data; 2126faa55883SToby Isaac Mat_SeqDense *cseq=(Mat_SeqDense*)(c->A)->data; 2127faa55883SToby Isaac Mat_MatTransMultDense *abt = c->abtdense; 2128faa55883SToby Isaac PetscErrorCode ierr; 2129faa55883SToby Isaac MPI_Comm comm; 2130faa55883SToby Isaac PetscMPIInt rank,size; 2131faa55883SToby Isaac PetscScalar *sendbuf, *recvbuf; 2132faa55883SToby Isaac PetscInt i,cK=A->cmap->N,k,j,bn; 2133faa55883SToby Isaac PetscScalar _DOne=1.0,_DZero=0.0; 2134faa55883SToby Isaac PetscBLASInt cm, cn, ck; 2135faa55883SToby Isaac 2136faa55883SToby Isaac PetscFunctionBegin; 2137faa55883SToby Isaac ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 2138faa55883SToby Isaac ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2139faa55883SToby Isaac ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2140faa55883SToby Isaac 2141faa55883SToby Isaac /* copy transpose of B into buf[0] */ 2142faa55883SToby Isaac bn = B->rmap->n; 2143faa55883SToby Isaac sendbuf = abt->buf[0]; 2144faa55883SToby Isaac recvbuf = abt->buf[1]; 2145faa55883SToby Isaac for (k = 0, j = 0; j < bn; j++) { 2146faa55883SToby Isaac for (i = 0; i < cK; i++, k++) { 2147faa55883SToby Isaac sendbuf[k] = bseq->v[i * bseq->lda + j]; 2148faa55883SToby Isaac } 2149faa55883SToby Isaac } 2150faa55883SToby Isaac ierr = MPI_Allgatherv(sendbuf, bn * cK, MPIU_SCALAR, recvbuf, abt->recvcounts, abt->recvdispls, MPIU_SCALAR, comm);CHKERRQ(ierr); 2151faa55883SToby Isaac ierr = PetscBLASIntCast(cK,&ck);CHKERRQ(ierr); 2152faa55883SToby Isaac ierr = PetscBLASIntCast(c->A->rmap->n,&cm);CHKERRQ(ierr); 2153faa55883SToby Isaac ierr = PetscBLASIntCast(c->A->cmap->n,&cn);CHKERRQ(ierr); 2154faa55883SToby Isaac PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&cm,&cn,&ck,&_DOne,aseq->v,&aseq->lda,recvbuf,&ck,&_DZero,cseq->v,&cseq->lda)); 2155faa55883SToby Isaac PetscFunctionReturn(0); 2156faa55883SToby Isaac } 2157faa55883SToby Isaac 2158faa55883SToby Isaac static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C) 2159faa55883SToby Isaac { 2160faa55883SToby Isaac Mat_MPIDense *c=(Mat_MPIDense*)C->data; 2161faa55883SToby Isaac Mat_MatTransMultDense *abt = c->abtdense; 2162faa55883SToby Isaac PetscErrorCode ierr; 2163faa55883SToby Isaac 2164faa55883SToby Isaac PetscFunctionBegin; 2165faa55883SToby Isaac switch (abt->alg) { 2166faa55883SToby Isaac case 1: 2167faa55883SToby Isaac ierr = MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(A, B, C);CHKERRQ(ierr); 2168faa55883SToby Isaac break; 2169faa55883SToby Isaac default: 2170faa55883SToby Isaac ierr = MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(A, B, C);CHKERRQ(ierr); 2171faa55883SToby Isaac break; 2172faa55883SToby Isaac } 2173faa55883SToby Isaac PetscFunctionReturn(0); 2174faa55883SToby Isaac } 2175faa55883SToby Isaac 2176cc48ffa7SToby Isaac static PetscErrorCode MatMatTransposeMult_MPIDense_MPIDense(Mat A,Mat B, MatReuse scall, PetscReal fill, Mat *C) 2177cc48ffa7SToby Isaac { 2178cc48ffa7SToby Isaac PetscErrorCode ierr; 2179cc48ffa7SToby Isaac 2180cc48ffa7SToby Isaac PetscFunctionBegin; 2181cc48ffa7SToby Isaac if (scall == MAT_INITIAL_MATRIX) { 2182cc48ffa7SToby Isaac ierr = MatMatTransposeMultSymbolic_MPIDense_MPIDense(A,B,fill,C);CHKERRQ(ierr); 2183cc48ffa7SToby Isaac } 2184cc48ffa7SToby Isaac ierr = MatMatTransposeMultNumeric_MPIDense_MPIDense(A,B,*C);CHKERRQ(ierr); 2185cc48ffa7SToby Isaac PetscFunctionReturn(0); 2186cc48ffa7SToby Isaac } 2187cc48ffa7SToby Isaac 2188320f2790SHong Zhang PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(Mat A) 2189320f2790SHong Zhang { 2190320f2790SHong Zhang PetscErrorCode ierr; 2191320f2790SHong Zhang Mat_MPIDense *a = (Mat_MPIDense*)A->data; 2192320f2790SHong Zhang Mat_MatMultDense *ab = a->abdense; 2193320f2790SHong Zhang 2194320f2790SHong Zhang PetscFunctionBegin; 2195320f2790SHong Zhang ierr = MatDestroy(&ab->Ce);CHKERRQ(ierr); 2196320f2790SHong Zhang ierr = MatDestroy(&ab->Ae);CHKERRQ(ierr); 2197320f2790SHong Zhang ierr = MatDestroy(&ab->Be);CHKERRQ(ierr); 2198320f2790SHong Zhang 2199320f2790SHong Zhang ierr = (ab->destroy)(A);CHKERRQ(ierr); 2200320f2790SHong Zhang ierr = PetscFree(ab);CHKERRQ(ierr); 2201320f2790SHong Zhang PetscFunctionReturn(0); 2202320f2790SHong Zhang } 2203320f2790SHong Zhang 22045242a7b1SHong Zhang #if defined(PETSC_HAVE_ELEMENTAL) 2205320f2790SHong Zhang PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C) 2206320f2790SHong Zhang { 2207320f2790SHong Zhang PetscErrorCode ierr; 2208320f2790SHong Zhang Mat_MPIDense *c=(Mat_MPIDense*)C->data; 2209320f2790SHong Zhang Mat_MatMultDense *ab=c->abdense; 2210320f2790SHong Zhang 2211320f2790SHong Zhang PetscFunctionBegin; 2212de0a22f0SHong Zhang ierr = MatConvert_MPIDense_Elemental(A,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Ae);CHKERRQ(ierr); 2213de0a22f0SHong Zhang ierr = MatConvert_MPIDense_Elemental(B,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Be);CHKERRQ(ierr); 2214320f2790SHong Zhang ierr = MatMatMultNumeric(ab->Ae,ab->Be,ab->Ce);CHKERRQ(ierr); 2215de0a22f0SHong Zhang ierr = MatConvert(ab->Ce,MATMPIDENSE,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 2216320f2790SHong Zhang PetscFunctionReturn(0); 2217320f2790SHong Zhang } 2218320f2790SHong Zhang 2219320f2790SHong Zhang PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C) 2220320f2790SHong Zhang { 2221320f2790SHong Zhang PetscErrorCode ierr; 2222320f2790SHong Zhang Mat Ae,Be,Ce; 2223320f2790SHong Zhang Mat_MPIDense *c; 2224320f2790SHong Zhang Mat_MatMultDense *ab; 2225320f2790SHong Zhang 2226320f2790SHong Zhang PetscFunctionBegin; 2227320f2790SHong Zhang if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 2228378336b6SHong Zhang SETERRQ4(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend); 2229320f2790SHong Zhang } 2230320f2790SHong Zhang 2231320f2790SHong Zhang /* convert A and B to Elemental matrices Ae and Be */ 2232320f2790SHong Zhang ierr = MatConvert(A,MATELEMENTAL,MAT_INITIAL_MATRIX, &Ae);CHKERRQ(ierr); 2233320f2790SHong Zhang ierr = MatConvert(B,MATELEMENTAL,MAT_INITIAL_MATRIX, &Be);CHKERRQ(ierr); 2234320f2790SHong Zhang 2235320f2790SHong Zhang /* Ce = Ae*Be */ 2236320f2790SHong Zhang ierr = MatMatMultSymbolic(Ae,Be,fill,&Ce);CHKERRQ(ierr); 2237320f2790SHong Zhang ierr = MatMatMultNumeric(Ae,Be,Ce);CHKERRQ(ierr); 2238320f2790SHong Zhang 2239320f2790SHong Zhang /* convert Ce to C */ 2240320f2790SHong Zhang ierr = MatConvert(Ce,MATMPIDENSE,MAT_INITIAL_MATRIX,C);CHKERRQ(ierr); 2241320f2790SHong Zhang 2242320f2790SHong Zhang /* create data structure for reuse Cdense */ 2243320f2790SHong Zhang ierr = PetscNew(&ab);CHKERRQ(ierr); 2244320f2790SHong Zhang c = (Mat_MPIDense*)(*C)->data; 2245320f2790SHong Zhang c->abdense = ab; 2246320f2790SHong Zhang 2247320f2790SHong Zhang ab->Ae = Ae; 2248320f2790SHong Zhang ab->Be = Be; 2249320f2790SHong Zhang ab->Ce = Ce; 2250320f2790SHong Zhang ab->destroy = (*C)->ops->destroy; 2251320f2790SHong Zhang (*C)->ops->destroy = MatDestroy_MatMatMult_MPIDense_MPIDense; 22529775878aSHong Zhang (*C)->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense; 2253320f2790SHong Zhang PetscFunctionReturn(0); 2254320f2790SHong Zhang } 2255320f2790SHong Zhang 2256150d2497SBarry Smith PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 2257320f2790SHong Zhang { 2258320f2790SHong Zhang PetscErrorCode ierr; 2259320f2790SHong Zhang 2260320f2790SHong Zhang PetscFunctionBegin; 2261320f2790SHong Zhang if (scall == MAT_INITIAL_MATRIX) { /* simbolic product includes numeric product */ 226257299f2fSHong Zhang ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 2263320f2790SHong Zhang ierr = MatMatMultSymbolic_MPIDense_MPIDense(A,B,fill,C);CHKERRQ(ierr); 226457299f2fSHong Zhang ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 2265320f2790SHong Zhang } else { 226657299f2fSHong Zhang ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 2267320f2790SHong Zhang ierr = MatMatMultNumeric_MPIDense_MPIDense(A,B,*C);CHKERRQ(ierr); 226857299f2fSHong Zhang ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 2269320f2790SHong Zhang } 2270320f2790SHong Zhang PetscFunctionReturn(0); 2271320f2790SHong Zhang } 22725242a7b1SHong Zhang #endif 227386aefd0dSHong Zhang 2274