1be1d678aSKris Buschelman 2ed3cc1f0SBarry Smith /* 3ed3cc1f0SBarry Smith Basic functions for basic parallel dense matrices. 4ed3cc1f0SBarry Smith */ 5ed3cc1f0SBarry Smith 604fea9ffSBarry Smith 7c6db04a5SJed Brown #include <../src/mat/impls/dense/mpi/mpidense.h> /*I "petscmat.h" I*/ 88949adfdSHong Zhang #include <../src/mat/impls/aij/mpi/mpiaij.h> 9baa3c1c6SHong Zhang #include <petscblaslapack.h> 108965ea79SLois Curfman McInnes 11ab92ecdeSBarry Smith /*@ 12ab92ecdeSBarry Smith 13ab92ecdeSBarry Smith MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential 14ab92ecdeSBarry Smith matrix that represents the operator. For sequential matrices it returns itself. 15ab92ecdeSBarry Smith 16ab92ecdeSBarry Smith Input Parameter: 17ab92ecdeSBarry Smith . A - the Seq or MPI dense matrix 18ab92ecdeSBarry Smith 19ab92ecdeSBarry Smith Output Parameter: 20ab92ecdeSBarry Smith . B - the inner matrix 21ab92ecdeSBarry Smith 228e6c10adSSatish Balay Level: intermediate 238e6c10adSSatish Balay 24ab92ecdeSBarry Smith @*/ 25ab92ecdeSBarry Smith PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B) 26ab92ecdeSBarry Smith { 27ab92ecdeSBarry Smith Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 28ab92ecdeSBarry Smith PetscErrorCode ierr; 29ace3abfcSBarry Smith PetscBool flg; 30ab92ecdeSBarry Smith 31ab92ecdeSBarry Smith PetscFunctionBegin; 32251f4c67SDmitry Karpeev ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr); 332205254eSKarl Rupp if (flg) *B = mat->A; 342205254eSKarl Rupp else *B = A; 35ab92ecdeSBarry Smith PetscFunctionReturn(0); 36ab92ecdeSBarry Smith } 37ab92ecdeSBarry Smith 38ba8c8a56SBarry Smith PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 39ba8c8a56SBarry Smith { 40ba8c8a56SBarry Smith Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 41ba8c8a56SBarry Smith PetscErrorCode ierr; 42d0f46423SBarry Smith PetscInt lrow,rstart = A->rmap->rstart,rend = A->rmap->rend; 43ba8c8a56SBarry Smith 44ba8c8a56SBarry Smith PetscFunctionBegin; 45e7e72b3dSBarry Smith if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows"); 46ba8c8a56SBarry Smith lrow = row - rstart; 47ba8c8a56SBarry Smith ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v);CHKERRQ(ierr); 48ba8c8a56SBarry Smith PetscFunctionReturn(0); 49ba8c8a56SBarry Smith } 50ba8c8a56SBarry Smith 51ba8c8a56SBarry Smith PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 52ba8c8a56SBarry Smith { 53ba8c8a56SBarry Smith PetscErrorCode ierr; 54ba8c8a56SBarry Smith 55ba8c8a56SBarry Smith PetscFunctionBegin; 56ba8c8a56SBarry Smith if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 57ba8c8a56SBarry Smith if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 58ba8c8a56SBarry Smith PetscFunctionReturn(0); 59ba8c8a56SBarry Smith } 60ba8c8a56SBarry Smith 6111bd1e4dSLisandro Dalcin PetscErrorCode MatGetDiagonalBlock_MPIDense(Mat A,Mat *a) 620de54da6SSatish Balay { 630de54da6SSatish Balay Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 646849ba73SBarry Smith PetscErrorCode ierr; 65d0f46423SBarry Smith PetscInt m = A->rmap->n,rstart = A->rmap->rstart; 6687828ca2SBarry Smith PetscScalar *array; 670de54da6SSatish Balay MPI_Comm comm; 6811bd1e4dSLisandro Dalcin Mat B; 690de54da6SSatish Balay 700de54da6SSatish Balay PetscFunctionBegin; 71e32f2f54SBarry Smith if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only square matrices supported."); 720de54da6SSatish Balay 7311bd1e4dSLisandro Dalcin ierr = PetscObjectQuery((PetscObject)A,"DiagonalBlock",(PetscObject*)&B);CHKERRQ(ierr); 7411bd1e4dSLisandro Dalcin if (!B) { 750de54da6SSatish Balay ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr); 7611bd1e4dSLisandro Dalcin ierr = MatCreate(comm,&B);CHKERRQ(ierr); 7711bd1e4dSLisandro Dalcin ierr = MatSetSizes(B,m,m,m,m);CHKERRQ(ierr); 7811bd1e4dSLisandro Dalcin ierr = MatSetType(B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr); 798c778c55SBarry Smith ierr = MatDenseGetArray(mdn->A,&array);CHKERRQ(ierr); 8011bd1e4dSLisandro Dalcin ierr = MatSeqDenseSetPreallocation(B,array+m*rstart);CHKERRQ(ierr); 818c778c55SBarry Smith ierr = MatDenseRestoreArray(mdn->A,&array);CHKERRQ(ierr); 8211bd1e4dSLisandro Dalcin ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 8311bd1e4dSLisandro Dalcin ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 8411bd1e4dSLisandro Dalcin ierr = PetscObjectCompose((PetscObject)A,"DiagonalBlock",(PetscObject)B);CHKERRQ(ierr); 8511bd1e4dSLisandro Dalcin *a = B; 8611bd1e4dSLisandro Dalcin ierr = MatDestroy(&B);CHKERRQ(ierr); 872205254eSKarl Rupp } else *a = B; 880de54da6SSatish Balay PetscFunctionReturn(0); 890de54da6SSatish Balay } 900de54da6SSatish Balay 9113f74950SBarry Smith PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv) 928965ea79SLois Curfman McInnes { 9339b7565bSBarry Smith Mat_MPIDense *A = (Mat_MPIDense*)mat->data; 94dfbe8321SBarry Smith PetscErrorCode ierr; 95d0f46423SBarry Smith PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row; 96ace3abfcSBarry Smith PetscBool roworiented = A->roworiented; 978965ea79SLois Curfman McInnes 983a40ed3dSBarry Smith PetscFunctionBegin; 998965ea79SLois Curfman McInnes for (i=0; i<m; i++) { 1005ef9f2a5SBarry Smith if (idxm[i] < 0) continue; 101e32f2f54SBarry Smith if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 1028965ea79SLois Curfman McInnes if (idxm[i] >= rstart && idxm[i] < rend) { 1038965ea79SLois Curfman McInnes row = idxm[i] - rstart; 10439b7565bSBarry Smith if (roworiented) { 10539b7565bSBarry Smith ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr); 1063a40ed3dSBarry Smith } else { 1078965ea79SLois Curfman McInnes for (j=0; j<n; j++) { 1085ef9f2a5SBarry Smith if (idxn[j] < 0) continue; 109e32f2f54SBarry Smith if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 11039b7565bSBarry Smith ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr); 11139b7565bSBarry Smith } 1128965ea79SLois Curfman McInnes } 1132205254eSKarl Rupp } else if (!A->donotstash) { 1145080c13bSMatthew G Knepley mat->assembled = PETSC_FALSE; 11539b7565bSBarry Smith if (roworiented) { 116b400d20cSBarry Smith ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 117d36fbae8SSatish Balay } else { 118b400d20cSBarry Smith ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 11939b7565bSBarry Smith } 120b49de8d1SLois Curfman McInnes } 121b49de8d1SLois Curfman McInnes } 1223a40ed3dSBarry Smith PetscFunctionReturn(0); 123b49de8d1SLois Curfman McInnes } 124b49de8d1SLois Curfman McInnes 12513f74950SBarry Smith PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 126b49de8d1SLois Curfman McInnes { 127b49de8d1SLois Curfman McInnes Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 128dfbe8321SBarry Smith PetscErrorCode ierr; 129d0f46423SBarry Smith PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row; 130b49de8d1SLois Curfman McInnes 1313a40ed3dSBarry Smith PetscFunctionBegin; 132b49de8d1SLois Curfman McInnes for (i=0; i<m; i++) { 133e32f2f54SBarry Smith if (idxm[i] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 134e32f2f54SBarry Smith if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 135b49de8d1SLois Curfman McInnes if (idxm[i] >= rstart && idxm[i] < rend) { 136b49de8d1SLois Curfman McInnes row = idxm[i] - rstart; 137b49de8d1SLois Curfman McInnes for (j=0; j<n; j++) { 138e32f2f54SBarry Smith if (idxn[j] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 139e7e72b3dSBarry Smith if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 140b49de8d1SLois Curfman McInnes ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr); 141b49de8d1SLois Curfman McInnes } 142e7e72b3dSBarry Smith } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 1438965ea79SLois Curfman McInnes } 1443a40ed3dSBarry Smith PetscFunctionReturn(0); 1458965ea79SLois Curfman McInnes } 1468965ea79SLois Curfman McInnes 1478c778c55SBarry Smith PetscErrorCode MatDenseGetArray_MPIDense(Mat A,PetscScalar *array[]) 148ff14e315SSatish Balay { 149ff14e315SSatish Balay Mat_MPIDense *a = (Mat_MPIDense*)A->data; 150dfbe8321SBarry Smith PetscErrorCode ierr; 151ff14e315SSatish Balay 1523a40ed3dSBarry Smith PetscFunctionBegin; 1538c778c55SBarry Smith ierr = MatDenseGetArray(a->A,array);CHKERRQ(ierr); 1543a40ed3dSBarry Smith PetscFunctionReturn(0); 155ff14e315SSatish Balay } 156ff14e315SSatish Balay 157*7dae84e0SHong Zhang static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B) 158ca3fa75bSLois Curfman McInnes { 159ca3fa75bSLois Curfman McInnes Mat_MPIDense *mat = (Mat_MPIDense*)A->data,*newmatd; 160ca3fa75bSLois Curfman McInnes Mat_SeqDense *lmat = (Mat_SeqDense*)mat->A->data; 1616849ba73SBarry Smith PetscErrorCode ierr; 1624aa3045dSJed Brown PetscInt i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols; 1635d0c19d7SBarry Smith const PetscInt *irow,*icol; 16487828ca2SBarry Smith PetscScalar *av,*bv,*v = lmat->v; 165ca3fa75bSLois Curfman McInnes Mat newmat; 1664aa3045dSJed Brown IS iscol_local; 167ca3fa75bSLois Curfman McInnes 168ca3fa75bSLois Curfman McInnes PetscFunctionBegin; 1694aa3045dSJed Brown ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 170ca3fa75bSLois Curfman McInnes ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr); 1714aa3045dSJed Brown ierr = ISGetIndices(iscol_local,&icol);CHKERRQ(ierr); 172b9b97703SBarry Smith ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr); 173b9b97703SBarry Smith ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 1744aa3045dSJed Brown ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); /* global number of columns, size of iscol_local */ 175ca3fa75bSLois Curfman McInnes 176ca3fa75bSLois Curfman McInnes /* No parallel redistribution currently supported! Should really check each index set 1777eba5e9cSLois Curfman McInnes to comfirm that it is OK. ... Currently supports only submatrix same partitioning as 1787eba5e9cSLois Curfman McInnes original matrix! */ 179ca3fa75bSLois Curfman McInnes 180ca3fa75bSLois Curfman McInnes ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr); 1817eba5e9cSLois Curfman McInnes ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 182ca3fa75bSLois Curfman McInnes 183ca3fa75bSLois Curfman McInnes /* Check submatrix call */ 184ca3fa75bSLois Curfman McInnes if (scall == MAT_REUSE_MATRIX) { 185e32f2f54SBarry Smith /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */ 1867eba5e9cSLois Curfman McInnes /* Really need to test rows and column sizes! */ 187ca3fa75bSLois Curfman McInnes newmat = *B; 188ca3fa75bSLois Curfman McInnes } else { 189ca3fa75bSLois Curfman McInnes /* Create and fill new matrix */ 190ce94432eSBarry Smith ierr = MatCreate(PetscObjectComm((PetscObject)A),&newmat);CHKERRQ(ierr); 1914aa3045dSJed Brown ierr = MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols);CHKERRQ(ierr); 1927adad957SLisandro Dalcin ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr); 1930298fd71SBarry Smith ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr); 194ca3fa75bSLois Curfman McInnes } 195ca3fa75bSLois Curfman McInnes 196ca3fa75bSLois Curfman McInnes /* Now extract the data pointers and do the copy, column at a time */ 197ca3fa75bSLois Curfman McInnes newmatd = (Mat_MPIDense*)newmat->data; 198ca3fa75bSLois Curfman McInnes bv = ((Mat_SeqDense*)newmatd->A->data)->v; 199ca3fa75bSLois Curfman McInnes 2004aa3045dSJed Brown for (i=0; i<Ncols; i++) { 20125a33276SHong Zhang av = v + ((Mat_SeqDense*)mat->A->data)->lda*icol[i]; 202ca3fa75bSLois Curfman McInnes for (j=0; j<nrows; j++) { 2037eba5e9cSLois Curfman McInnes *bv++ = av[irow[j] - rstart]; 204ca3fa75bSLois Curfman McInnes } 205ca3fa75bSLois Curfman McInnes } 206ca3fa75bSLois Curfman McInnes 207ca3fa75bSLois Curfman McInnes /* Assemble the matrices so that the correct flags are set */ 208ca3fa75bSLois Curfman McInnes ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 209ca3fa75bSLois Curfman McInnes ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 210ca3fa75bSLois Curfman McInnes 211ca3fa75bSLois Curfman McInnes /* Free work space */ 212ca3fa75bSLois Curfman McInnes ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr); 2135bdf786aSShri Abhyankar ierr = ISRestoreIndices(iscol_local,&icol);CHKERRQ(ierr); 21432bb1f2dSLisandro Dalcin ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 215ca3fa75bSLois Curfman McInnes *B = newmat; 216ca3fa75bSLois Curfman McInnes PetscFunctionReturn(0); 217ca3fa75bSLois Curfman McInnes } 218ca3fa75bSLois Curfman McInnes 2198c778c55SBarry Smith PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A,PetscScalar *array[]) 220ff14e315SSatish Balay { 22173a71a0fSBarry Smith Mat_MPIDense *a = (Mat_MPIDense*)A->data; 22273a71a0fSBarry Smith PetscErrorCode ierr; 22373a71a0fSBarry Smith 2243a40ed3dSBarry Smith PetscFunctionBegin; 2258c778c55SBarry Smith ierr = MatDenseRestoreArray(a->A,array);CHKERRQ(ierr); 2263a40ed3dSBarry Smith PetscFunctionReturn(0); 227ff14e315SSatish Balay } 228ff14e315SSatish Balay 229dfbe8321SBarry Smith PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode) 2308965ea79SLois Curfman McInnes { 23139ddd567SLois Curfman McInnes Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 232ce94432eSBarry Smith MPI_Comm comm; 233dfbe8321SBarry Smith PetscErrorCode ierr; 23413f74950SBarry Smith PetscInt nstash,reallocs; 2358965ea79SLois Curfman McInnes InsertMode addv; 2368965ea79SLois Curfman McInnes 2373a40ed3dSBarry Smith PetscFunctionBegin; 238ce94432eSBarry Smith ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 2398965ea79SLois Curfman McInnes /* make sure all processors are either in INSERTMODE or ADDMODE */ 240b2566f29SBarry Smith ierr = MPIU_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,comm);CHKERRQ(ierr); 241ce94432eSBarry Smith if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs"); 242e0fa3b82SLois Curfman McInnes mat->insertmode = addv; /* in case this processor had no cache */ 2438965ea79SLois Curfman McInnes 244d0f46423SBarry Smith ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 2458798bf22SSatish Balay ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 246ae15b995SBarry Smith ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 2473a40ed3dSBarry Smith PetscFunctionReturn(0); 2488965ea79SLois Curfman McInnes } 2498965ea79SLois Curfman McInnes 250dfbe8321SBarry Smith PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode) 2518965ea79SLois Curfman McInnes { 25239ddd567SLois Curfman McInnes Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data; 2536849ba73SBarry Smith PetscErrorCode ierr; 25413f74950SBarry Smith PetscInt i,*row,*col,flg,j,rstart,ncols; 25513f74950SBarry Smith PetscMPIInt n; 25687828ca2SBarry Smith PetscScalar *val; 2578965ea79SLois Curfman McInnes 2583a40ed3dSBarry Smith PetscFunctionBegin; 2598965ea79SLois Curfman McInnes /* wait on receives */ 2607ef1d9bdSSatish Balay while (1) { 2618798bf22SSatish Balay ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 2627ef1d9bdSSatish Balay if (!flg) break; 2638965ea79SLois Curfman McInnes 2647ef1d9bdSSatish Balay for (i=0; i<n;) { 2657ef1d9bdSSatish Balay /* Now identify the consecutive vals belonging to the same row */ 2662205254eSKarl Rupp for (j=i,rstart=row[j]; j<n; j++) { 2672205254eSKarl Rupp if (row[j] != rstart) break; 2682205254eSKarl Rupp } 2697ef1d9bdSSatish Balay if (j < n) ncols = j-i; 2707ef1d9bdSSatish Balay else ncols = n-i; 2717ef1d9bdSSatish Balay /* Now assemble all these values with a single function call */ 2724b4eb8d3SJed Brown ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 2737ef1d9bdSSatish Balay i = j; 2748965ea79SLois Curfman McInnes } 2757ef1d9bdSSatish Balay } 2768798bf22SSatish Balay ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 2778965ea79SLois Curfman McInnes 27839ddd567SLois Curfman McInnes ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr); 27939ddd567SLois Curfman McInnes ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr); 2808965ea79SLois Curfman McInnes 2816d4a8577SBarry Smith if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 28239ddd567SLois Curfman McInnes ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 2838965ea79SLois Curfman McInnes } 2843a40ed3dSBarry Smith PetscFunctionReturn(0); 2858965ea79SLois Curfman McInnes } 2868965ea79SLois Curfman McInnes 287dfbe8321SBarry Smith PetscErrorCode MatZeroEntries_MPIDense(Mat A) 2888965ea79SLois Curfman McInnes { 289dfbe8321SBarry Smith PetscErrorCode ierr; 29039ddd567SLois Curfman McInnes Mat_MPIDense *l = (Mat_MPIDense*)A->data; 2913a40ed3dSBarry Smith 2923a40ed3dSBarry Smith PetscFunctionBegin; 2933a40ed3dSBarry Smith ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 2943a40ed3dSBarry Smith PetscFunctionReturn(0); 2958965ea79SLois Curfman McInnes } 2968965ea79SLois Curfman McInnes 2978965ea79SLois Curfman McInnes /* the code does not do the diagonal entries correctly unless the 2988965ea79SLois Curfman McInnes matrix is square and the column and row owerships are identical. 2998965ea79SLois Curfman McInnes This is a BUG. The only way to fix it seems to be to access 3003501a2bdSLois Curfman McInnes mdn->A and mdn->B directly and not through the MatZeroRows() 3018965ea79SLois Curfman McInnes routine. 3028965ea79SLois Curfman McInnes */ 3032b40b63fSBarry Smith PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 3048965ea79SLois Curfman McInnes { 30539ddd567SLois Curfman McInnes Mat_MPIDense *l = (Mat_MPIDense*)A->data; 3066849ba73SBarry Smith PetscErrorCode ierr; 307d0f46423SBarry Smith PetscInt i,*owners = A->rmap->range; 30876ec1555SBarry Smith PetscInt *sizes,j,idx,nsends; 30913f74950SBarry Smith PetscInt nmax,*svalues,*starts,*owner,nrecvs; 3107adad957SLisandro Dalcin PetscInt *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source; 31113f74950SBarry Smith PetscInt *lens,*lrows,*values; 31213f74950SBarry Smith PetscMPIInt n,imdex,rank = l->rank,size = l->size; 313ce94432eSBarry Smith MPI_Comm comm; 3148965ea79SLois Curfman McInnes MPI_Request *send_waits,*recv_waits; 3158965ea79SLois Curfman McInnes MPI_Status recv_status,*send_status; 316ace3abfcSBarry Smith PetscBool found; 31797b48c8fSBarry Smith const PetscScalar *xx; 31897b48c8fSBarry Smith PetscScalar *bb; 3198965ea79SLois Curfman McInnes 3203a40ed3dSBarry Smith PetscFunctionBegin; 321ce94432eSBarry Smith ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 322ce94432eSBarry Smith if (A->rmap->N != A->cmap->N) SETERRQ(comm,PETSC_ERR_SUP,"Only handles square matrices"); 323b9679d65SBarry Smith if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only handles matrices with identical column and row ownership"); 3248965ea79SLois Curfman McInnes /* first count number of contributors to each processor */ 325f628708eSJed Brown ierr = PetscCalloc1(2*size,&sizes);CHKERRQ(ierr); 326037dbc42SBarry Smith ierr = PetscMalloc1(N+1,&owner);CHKERRQ(ierr); /* see note*/ 3278965ea79SLois Curfman McInnes for (i=0; i<N; i++) { 3288965ea79SLois Curfman McInnes idx = rows[i]; 32935d8aa7fSBarry Smith found = PETSC_FALSE; 3308965ea79SLois Curfman McInnes for (j=0; j<size; j++) { 3318965ea79SLois Curfman McInnes if (idx >= owners[j] && idx < owners[j+1]) { 33276ec1555SBarry Smith sizes[2*j]++; sizes[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break; 3338965ea79SLois Curfman McInnes } 3348965ea79SLois Curfman McInnes } 335e32f2f54SBarry Smith if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 3368965ea79SLois Curfman McInnes } 3372205254eSKarl Rupp nsends = 0; 33876ec1555SBarry Smith for (i=0; i<size; i++) nsends += sizes[2*i+1]; 3398965ea79SLois Curfman McInnes 3408965ea79SLois Curfman McInnes /* inform other processors of number of messages and max length*/ 34176ec1555SBarry Smith ierr = PetscMaxSum(comm,sizes,&nmax,&nrecvs);CHKERRQ(ierr); 3428965ea79SLois Curfman McInnes 3438965ea79SLois Curfman McInnes /* post receives: */ 344785e854fSJed Brown ierr = PetscMalloc1((nrecvs+1)*(nmax+1),&rvalues);CHKERRQ(ierr); 345854ce69bSBarry Smith ierr = PetscMalloc1(nrecvs+1,&recv_waits);CHKERRQ(ierr); 3468965ea79SLois Curfman McInnes for (i=0; i<nrecvs; i++) { 34713f74950SBarry Smith ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 3488965ea79SLois Curfman McInnes } 3498965ea79SLois Curfman McInnes 3508965ea79SLois Curfman McInnes /* do sends: 3518965ea79SLois Curfman McInnes 1) starts[i] gives the starting index in svalues for stuff going to 3528965ea79SLois Curfman McInnes the ith processor 3538965ea79SLois Curfman McInnes */ 354854ce69bSBarry Smith ierr = PetscMalloc1(N+1,&svalues);CHKERRQ(ierr); 355854ce69bSBarry Smith ierr = PetscMalloc1(nsends+1,&send_waits);CHKERRQ(ierr); 356854ce69bSBarry Smith ierr = PetscMalloc1(size+1,&starts);CHKERRQ(ierr); 3578965ea79SLois Curfman McInnes 3588965ea79SLois Curfman McInnes starts[0] = 0; 35976ec1555SBarry Smith for (i=1; i<size; i++) starts[i] = starts[i-1] + sizes[2*i-2]; 3602205254eSKarl Rupp for (i=0; i<N; i++) svalues[starts[owner[i]]++] = rows[i]; 3612205254eSKarl Rupp 3622205254eSKarl Rupp starts[0] = 0; 36376ec1555SBarry Smith for (i=1; i<size+1; i++) starts[i] = starts[i-1] + sizes[2*i-2]; 3648965ea79SLois Curfman McInnes count = 0; 3658965ea79SLois Curfman McInnes for (i=0; i<size; i++) { 36676ec1555SBarry Smith if (sizes[2*i+1]) { 36776ec1555SBarry Smith ierr = MPI_Isend(svalues+starts[i],sizes[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 3688965ea79SLois Curfman McInnes } 3698965ea79SLois Curfman McInnes } 370606d414cSSatish Balay ierr = PetscFree(starts);CHKERRQ(ierr); 3718965ea79SLois Curfman McInnes 3728965ea79SLois Curfman McInnes base = owners[rank]; 3738965ea79SLois Curfman McInnes 3748965ea79SLois Curfman McInnes /* wait on receives */ 375dcca6d9dSJed Brown ierr = PetscMalloc2(nrecvs,&lens,nrecvs,&source);CHKERRQ(ierr); 37674ed9c26SBarry Smith count = nrecvs; 37774ed9c26SBarry Smith slen = 0; 3788965ea79SLois Curfman McInnes while (count) { 379ca161407SBarry Smith ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 3808965ea79SLois Curfman McInnes /* unpack receives into our local space */ 38113f74950SBarry Smith ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 3822205254eSKarl Rupp 3838965ea79SLois Curfman McInnes source[imdex] = recv_status.MPI_SOURCE; 3848965ea79SLois Curfman McInnes lens[imdex] = n; 3858965ea79SLois Curfman McInnes slen += n; 3868965ea79SLois Curfman McInnes count--; 3878965ea79SLois Curfman McInnes } 388606d414cSSatish Balay ierr = PetscFree(recv_waits);CHKERRQ(ierr); 3898965ea79SLois Curfman McInnes 3908965ea79SLois Curfman McInnes /* move the data into the send scatter */ 391854ce69bSBarry Smith ierr = PetscMalloc1(slen+1,&lrows);CHKERRQ(ierr); 3928965ea79SLois Curfman McInnes count = 0; 3938965ea79SLois Curfman McInnes for (i=0; i<nrecvs; i++) { 3948965ea79SLois Curfman McInnes values = rvalues + i*nmax; 3958965ea79SLois Curfman McInnes for (j=0; j<lens[i]; j++) { 3968965ea79SLois Curfman McInnes lrows[count++] = values[j] - base; 3978965ea79SLois Curfman McInnes } 3988965ea79SLois Curfman McInnes } 399606d414cSSatish Balay ierr = PetscFree(rvalues);CHKERRQ(ierr); 40074ed9c26SBarry Smith ierr = PetscFree2(lens,source);CHKERRQ(ierr); 401606d414cSSatish Balay ierr = PetscFree(owner);CHKERRQ(ierr); 40276ec1555SBarry Smith ierr = PetscFree(sizes);CHKERRQ(ierr); 4038965ea79SLois Curfman McInnes 40497b48c8fSBarry Smith /* fix right hand side if needed */ 40597b48c8fSBarry Smith if (x && b) { 40697b48c8fSBarry Smith ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 40797b48c8fSBarry Smith ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 40897b48c8fSBarry Smith for (i=0; i<slen; i++) { 40997b48c8fSBarry Smith bb[lrows[i]] = diag*xx[lrows[i]]; 41097b48c8fSBarry Smith } 41197b48c8fSBarry Smith ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 41297b48c8fSBarry Smith ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 41397b48c8fSBarry Smith } 41497b48c8fSBarry Smith 4158965ea79SLois Curfman McInnes /* actually zap the local rows */ 416b9679d65SBarry Smith ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 417e2eb51b1SBarry Smith if (diag != 0.0) { 418b9679d65SBarry Smith Mat_SeqDense *ll = (Mat_SeqDense*)l->A->data; 419b9679d65SBarry Smith PetscInt m = ll->lda, i; 420b9679d65SBarry Smith 421b9679d65SBarry Smith for (i=0; i<slen; i++) { 422b9679d65SBarry Smith ll->v[lrows[i] + m*(A->cmap->rstart + lrows[i])] = diag; 423b9679d65SBarry Smith } 424b9679d65SBarry Smith } 425606d414cSSatish Balay ierr = PetscFree(lrows);CHKERRQ(ierr); 4268965ea79SLois Curfman McInnes 4278965ea79SLois Curfman McInnes /* wait on sends */ 4288965ea79SLois Curfman McInnes if (nsends) { 429785e854fSJed Brown ierr = PetscMalloc1(nsends,&send_status);CHKERRQ(ierr); 430ca161407SBarry Smith ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 431606d414cSSatish Balay ierr = PetscFree(send_status);CHKERRQ(ierr); 4328965ea79SLois Curfman McInnes } 433606d414cSSatish Balay ierr = PetscFree(send_waits);CHKERRQ(ierr); 434606d414cSSatish Balay ierr = PetscFree(svalues);CHKERRQ(ierr); 4353a40ed3dSBarry Smith PetscFunctionReturn(0); 4368965ea79SLois Curfman McInnes } 4378965ea79SLois Curfman McInnes 438cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat,Vec,Vec); 439cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat,Vec,Vec,Vec); 440cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat,Vec,Vec); 441cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat,Vec,Vec,Vec); 442cc2e6a90SBarry Smith 443dfbe8321SBarry Smith PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy) 4448965ea79SLois Curfman McInnes { 44539ddd567SLois Curfman McInnes Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 446dfbe8321SBarry Smith PetscErrorCode ierr; 447c456f294SBarry Smith 4483a40ed3dSBarry Smith PetscFunctionBegin; 449ca9f406cSSatish Balay ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 450ca9f406cSSatish Balay ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 451cc2e6a90SBarry Smith ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr); 4523a40ed3dSBarry Smith PetscFunctionReturn(0); 4538965ea79SLois Curfman McInnes } 4548965ea79SLois Curfman McInnes 455dfbe8321SBarry Smith PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz) 4568965ea79SLois Curfman McInnes { 45739ddd567SLois Curfman McInnes Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 458dfbe8321SBarry Smith PetscErrorCode ierr; 459c456f294SBarry Smith 4603a40ed3dSBarry Smith PetscFunctionBegin; 461ca9f406cSSatish Balay ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 462ca9f406cSSatish Balay ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 463cc2e6a90SBarry Smith ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr); 4643a40ed3dSBarry Smith PetscFunctionReturn(0); 4658965ea79SLois Curfman McInnes } 4668965ea79SLois Curfman McInnes 467dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy) 468096963f5SLois Curfman McInnes { 469096963f5SLois Curfman McInnes Mat_MPIDense *a = (Mat_MPIDense*)A->data; 470dfbe8321SBarry Smith PetscErrorCode ierr; 47187828ca2SBarry Smith PetscScalar zero = 0.0; 472096963f5SLois Curfman McInnes 4733a40ed3dSBarry Smith PetscFunctionBegin; 4742dcb1b2aSMatthew Knepley ierr = VecSet(yy,zero);CHKERRQ(ierr); 475cc2e6a90SBarry Smith ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 476ca9f406cSSatish Balay ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 477ca9f406cSSatish Balay ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4783a40ed3dSBarry Smith PetscFunctionReturn(0); 479096963f5SLois Curfman McInnes } 480096963f5SLois Curfman McInnes 481dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz) 482096963f5SLois Curfman McInnes { 483096963f5SLois Curfman McInnes Mat_MPIDense *a = (Mat_MPIDense*)A->data; 484dfbe8321SBarry Smith PetscErrorCode ierr; 485096963f5SLois Curfman McInnes 4863a40ed3dSBarry Smith PetscFunctionBegin; 4873501a2bdSLois Curfman McInnes ierr = VecCopy(yy,zz);CHKERRQ(ierr); 488cc2e6a90SBarry Smith ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr); 489ca9f406cSSatish Balay ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 490ca9f406cSSatish Balay ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4913a40ed3dSBarry Smith PetscFunctionReturn(0); 492096963f5SLois Curfman McInnes } 493096963f5SLois Curfman McInnes 494dfbe8321SBarry Smith PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v) 4958965ea79SLois Curfman McInnes { 49639ddd567SLois Curfman McInnes Mat_MPIDense *a = (Mat_MPIDense*)A->data; 497096963f5SLois Curfman McInnes Mat_SeqDense *aloc = (Mat_SeqDense*)a->A->data; 498dfbe8321SBarry Smith PetscErrorCode ierr; 499d0f46423SBarry Smith PetscInt len,i,n,m = A->rmap->n,radd; 50087828ca2SBarry Smith PetscScalar *x,zero = 0.0; 501ed3cc1f0SBarry Smith 5023a40ed3dSBarry Smith PetscFunctionBegin; 5032dcb1b2aSMatthew Knepley ierr = VecSet(v,zero);CHKERRQ(ierr); 5041ebc52fbSHong Zhang ierr = VecGetArray(v,&x);CHKERRQ(ierr); 505096963f5SLois Curfman McInnes ierr = VecGetSize(v,&n);CHKERRQ(ierr); 506e32f2f54SBarry Smith if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec"); 507d0f46423SBarry Smith len = PetscMin(a->A->rmap->n,a->A->cmap->n); 508d0f46423SBarry Smith radd = A->rmap->rstart*m; 50944cd7ae7SLois Curfman McInnes for (i=0; i<len; i++) { 510096963f5SLois Curfman McInnes x[i] = aloc->v[radd + i*m + i]; 511096963f5SLois Curfman McInnes } 5121ebc52fbSHong Zhang ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 5133a40ed3dSBarry Smith PetscFunctionReturn(0); 5148965ea79SLois Curfman McInnes } 5158965ea79SLois Curfman McInnes 516dfbe8321SBarry Smith PetscErrorCode MatDestroy_MPIDense(Mat mat) 5178965ea79SLois Curfman McInnes { 5183501a2bdSLois Curfman McInnes Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 519dfbe8321SBarry Smith PetscErrorCode ierr; 520ed3cc1f0SBarry Smith 5213a40ed3dSBarry Smith PetscFunctionBegin; 522aa482453SBarry Smith #if defined(PETSC_USE_LOG) 523d0f46423SBarry Smith PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 5248965ea79SLois Curfman McInnes #endif 5258798bf22SSatish Balay ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 5266bf464f9SBarry Smith ierr = MatDestroy(&mdn->A);CHKERRQ(ierr); 5276bf464f9SBarry Smith ierr = VecDestroy(&mdn->lvec);CHKERRQ(ierr); 5286bf464f9SBarry Smith ierr = VecScatterDestroy(&mdn->Mvctx);CHKERRQ(ierr); 52901b82886SBarry Smith 530bf0cc555SLisandro Dalcin ierr = PetscFree(mat->data);CHKERRQ(ierr); 531dbd8c25aSHong Zhang ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 5328baccfbdSHong Zhang 5338baccfbdSHong Zhang ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",NULL);CHKERRQ(ierr); 5348baccfbdSHong Zhang ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",NULL);CHKERRQ(ierr); 5358baccfbdSHong Zhang #if defined(PETSC_HAVE_ELEMENTAL) 5368baccfbdSHong Zhang ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",NULL);CHKERRQ(ierr); 5378baccfbdSHong Zhang #endif 538bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",NULL);CHKERRQ(ierr); 539bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 540bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 541bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 5428baccfbdSHong Zhang ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMult_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 5438baccfbdSHong Zhang ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultSymbolic_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 5448baccfbdSHong Zhang ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultNumeric_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 5453a40ed3dSBarry Smith PetscFunctionReturn(0); 5468965ea79SLois Curfman McInnes } 54739ddd567SLois Curfman McInnes 5486849ba73SBarry Smith static PetscErrorCode MatView_MPIDense_Binary(Mat mat,PetscViewer viewer) 5498965ea79SLois Curfman McInnes { 55039ddd567SLois Curfman McInnes Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 551dfbe8321SBarry Smith PetscErrorCode ierr; 552aa05aa95SBarry Smith PetscViewerFormat format; 553aa05aa95SBarry Smith int fd; 554d0f46423SBarry Smith PetscInt header[4],mmax,N = mat->cmap->N,i,j,m,k; 555aa05aa95SBarry Smith PetscMPIInt rank,tag = ((PetscObject)viewer)->tag,size; 556578230a0SSatish Balay PetscScalar *work,*v,*vv; 557aa05aa95SBarry Smith Mat_SeqDense *a = (Mat_SeqDense*)mdn->A->data; 5587056b6fcSBarry Smith 5593a40ed3dSBarry Smith PetscFunctionBegin; 56039ddd567SLois Curfman McInnes if (mdn->size == 1) { 56139ddd567SLois Curfman McInnes ierr = MatView(mdn->A,viewer);CHKERRQ(ierr); 562aa05aa95SBarry Smith } else { 563aa05aa95SBarry Smith ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 564ce94432eSBarry Smith ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 565ce94432eSBarry Smith ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 566aa05aa95SBarry Smith 567aa05aa95SBarry Smith ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 568f4403165SShri Abhyankar if (format == PETSC_VIEWER_NATIVE) { 569aa05aa95SBarry Smith 570aa05aa95SBarry Smith if (!rank) { 571aa05aa95SBarry Smith /* store the matrix as a dense matrix */ 5720700a824SBarry Smith header[0] = MAT_FILE_CLASSID; 573d0f46423SBarry Smith header[1] = mat->rmap->N; 574aa05aa95SBarry Smith header[2] = N; 575aa05aa95SBarry Smith header[3] = MATRIX_BINARY_FORMAT_DENSE; 576aa05aa95SBarry Smith ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 577aa05aa95SBarry Smith 578aa05aa95SBarry Smith /* get largest work array needed for transposing array */ 579d0f46423SBarry Smith mmax = mat->rmap->n; 580aa05aa95SBarry Smith for (i=1; i<size; i++) { 581d0f46423SBarry Smith mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]); 5828965ea79SLois Curfman McInnes } 583785e854fSJed Brown ierr = PetscMalloc1(mmax*N,&work);CHKERRQ(ierr); 584aa05aa95SBarry Smith 585aa05aa95SBarry Smith /* write out local array, by rows */ 586d0f46423SBarry Smith m = mat->rmap->n; 587aa05aa95SBarry Smith v = a->v; 588aa05aa95SBarry Smith for (j=0; j<N; j++) { 589aa05aa95SBarry Smith for (i=0; i<m; i++) { 590578230a0SSatish Balay work[j + i*N] = *v++; 591aa05aa95SBarry Smith } 592aa05aa95SBarry Smith } 593aa05aa95SBarry Smith ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 594aa05aa95SBarry Smith /* get largest work array to receive messages from other processes, excludes process zero */ 595aa05aa95SBarry Smith mmax = 0; 596aa05aa95SBarry Smith for (i=1; i<size; i++) { 597d0f46423SBarry Smith mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]); 598aa05aa95SBarry Smith } 599785e854fSJed Brown ierr = PetscMalloc1(mmax*N,&vv);CHKERRQ(ierr); 600aa05aa95SBarry Smith for (k = 1; k < size; k++) { 601f8009846SMatthew Knepley v = vv; 602d0f46423SBarry Smith m = mat->rmap->range[k+1] - mat->rmap->range[k]; 603ce94432eSBarry Smith ierr = MPIULong_Recv(v,m*N,MPIU_SCALAR,k,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 604aa05aa95SBarry Smith 605aa05aa95SBarry Smith for (j = 0; j < N; j++) { 606aa05aa95SBarry Smith for (i = 0; i < m; i++) { 607578230a0SSatish Balay work[j + i*N] = *v++; 608aa05aa95SBarry Smith } 609aa05aa95SBarry Smith } 610aa05aa95SBarry Smith ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 611aa05aa95SBarry Smith } 612aa05aa95SBarry Smith ierr = PetscFree(work);CHKERRQ(ierr); 613578230a0SSatish Balay ierr = PetscFree(vv);CHKERRQ(ierr); 614aa05aa95SBarry Smith } else { 615ce94432eSBarry Smith ierr = MPIULong_Send(a->v,mat->rmap->n*mat->cmap->N,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 616aa05aa95SBarry Smith } 6176a9046bcSBarry Smith } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"To store a parallel dense matrix you must first call PetscViewerPushFormat(viewer,PETSC_VIEWER_NATIVE)"); 618aa05aa95SBarry Smith } 6193a40ed3dSBarry Smith PetscFunctionReturn(0); 6208965ea79SLois Curfman McInnes } 6218965ea79SLois Curfman McInnes 6227da1fb6eSBarry Smith extern PetscErrorCode MatView_SeqDense(Mat,PetscViewer); 6239804daf3SBarry Smith #include <petscdraw.h> 6246849ba73SBarry Smith static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 6258965ea79SLois Curfman McInnes { 62639ddd567SLois Curfman McInnes Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 627dfbe8321SBarry Smith PetscErrorCode ierr; 6287da1fb6eSBarry Smith PetscMPIInt rank = mdn->rank; 62919fd82e9SBarry Smith PetscViewerType vtype; 630ace3abfcSBarry Smith PetscBool iascii,isdraw; 631b0a32e0cSBarry Smith PetscViewer sviewer; 632f3ef73ceSBarry Smith PetscViewerFormat format; 6338965ea79SLois Curfman McInnes 6343a40ed3dSBarry Smith PetscFunctionBegin; 635251f4c67SDmitry Karpeev ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 636251f4c67SDmitry Karpeev ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 63732077d6dSBarry Smith if (iascii) { 638b0a32e0cSBarry Smith ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr); 639b0a32e0cSBarry Smith ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 640456192e2SBarry Smith if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 6414e220ebcSLois Curfman McInnes MatInfo info; 642888f2ed8SSatish Balay ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 6431575c14dSBarry Smith ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 6447b23a99aSBarry Smith ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 645b0a32e0cSBarry Smith ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 6461575c14dSBarry Smith ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 6475d00a290SHong Zhang ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr); 6483a40ed3dSBarry Smith PetscFunctionReturn(0); 649fb9695e5SSatish Balay } else if (format == PETSC_VIEWER_ASCII_INFO) { 6503a40ed3dSBarry Smith PetscFunctionReturn(0); 6518965ea79SLois Curfman McInnes } 652f1af5d2fSBarry Smith } else if (isdraw) { 653b0a32e0cSBarry Smith PetscDraw draw; 654ace3abfcSBarry Smith PetscBool isnull; 655f1af5d2fSBarry Smith 656b0a32e0cSBarry Smith ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 657b0a32e0cSBarry Smith ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 658f1af5d2fSBarry Smith if (isnull) PetscFunctionReturn(0); 659f1af5d2fSBarry Smith } 66077ed5343SBarry Smith 6617da1fb6eSBarry Smith { 6628965ea79SLois Curfman McInnes /* assemble the entire matrix onto first processor. */ 6638965ea79SLois Curfman McInnes Mat A; 664d0f46423SBarry Smith PetscInt M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz; 665ba8c8a56SBarry Smith PetscInt *cols; 666ba8c8a56SBarry Smith PetscScalar *vals; 6678965ea79SLois Curfman McInnes 668ce94432eSBarry Smith ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 6698965ea79SLois Curfman McInnes if (!rank) { 670f69a0ea3SMatthew Knepley ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 6713a40ed3dSBarry Smith } else { 672f69a0ea3SMatthew Knepley ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 6738965ea79SLois Curfman McInnes } 6747adad957SLisandro Dalcin /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */ 675878740d9SKris Buschelman ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr); 6760298fd71SBarry Smith ierr = MatMPIDenseSetPreallocation(A,NULL);CHKERRQ(ierr); 6773bb1ff40SBarry Smith ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 6788965ea79SLois Curfman McInnes 67939ddd567SLois Curfman McInnes /* Copy the matrix ... This isn't the most efficient means, 68039ddd567SLois Curfman McInnes but it's quick for now */ 68151022da4SBarry Smith A->insertmode = INSERT_VALUES; 6822205254eSKarl Rupp 6832205254eSKarl Rupp row = mat->rmap->rstart; 6842205254eSKarl Rupp m = mdn->A->rmap->n; 6858965ea79SLois Curfman McInnes for (i=0; i<m; i++) { 686ba8c8a56SBarry Smith ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 687ba8c8a56SBarry Smith ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 688ba8c8a56SBarry Smith ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 68939ddd567SLois Curfman McInnes row++; 6908965ea79SLois Curfman McInnes } 6918965ea79SLois Curfman McInnes 6926d4a8577SBarry Smith ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6936d4a8577SBarry Smith ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6943f08860eSBarry Smith ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 695b9b97703SBarry Smith if (!rank) { 6961a9d3c3cSBarry Smith ierr = PetscObjectSetName((PetscObject)((Mat_MPIDense*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 6977da1fb6eSBarry Smith ierr = MatView_SeqDense(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr); 6988965ea79SLois Curfman McInnes } 6993f08860eSBarry Smith ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 700b0a32e0cSBarry Smith ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 7016bf464f9SBarry Smith ierr = MatDestroy(&A);CHKERRQ(ierr); 7028965ea79SLois Curfman McInnes } 7033a40ed3dSBarry Smith PetscFunctionReturn(0); 7048965ea79SLois Curfman McInnes } 7058965ea79SLois Curfman McInnes 706dfbe8321SBarry Smith PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer) 7078965ea79SLois Curfman McInnes { 708dfbe8321SBarry Smith PetscErrorCode ierr; 709ace3abfcSBarry Smith PetscBool iascii,isbinary,isdraw,issocket; 7108965ea79SLois Curfman McInnes 711433994e6SBarry Smith PetscFunctionBegin; 712251f4c67SDmitry Karpeev ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 713251f4c67SDmitry Karpeev ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 714251f4c67SDmitry Karpeev ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 715251f4c67SDmitry Karpeev ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 7160f5bd95cSBarry Smith 71732077d6dSBarry Smith if (iascii || issocket || isdraw) { 718f1af5d2fSBarry Smith ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 7190f5bd95cSBarry Smith } else if (isbinary) { 7203a40ed3dSBarry Smith ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr); 72111aeaf0aSBarry Smith } 7223a40ed3dSBarry Smith PetscFunctionReturn(0); 7238965ea79SLois Curfman McInnes } 7248965ea79SLois Curfman McInnes 725dfbe8321SBarry Smith PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info) 7268965ea79SLois Curfman McInnes { 7273501a2bdSLois Curfman McInnes Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 7283501a2bdSLois Curfman McInnes Mat mdn = mat->A; 729dfbe8321SBarry Smith PetscErrorCode ierr; 730329f5518SBarry Smith PetscReal isend[5],irecv[5]; 7318965ea79SLois Curfman McInnes 7323a40ed3dSBarry Smith PetscFunctionBegin; 7334e220ebcSLois Curfman McInnes info->block_size = 1.0; 7342205254eSKarl Rupp 7354e220ebcSLois Curfman McInnes ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr); 7362205254eSKarl Rupp 7374e220ebcSLois Curfman McInnes isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 7384e220ebcSLois Curfman McInnes isend[3] = info->memory; isend[4] = info->mallocs; 7398965ea79SLois Curfman McInnes if (flag == MAT_LOCAL) { 7404e220ebcSLois Curfman McInnes info->nz_used = isend[0]; 7414e220ebcSLois Curfman McInnes info->nz_allocated = isend[1]; 7424e220ebcSLois Curfman McInnes info->nz_unneeded = isend[2]; 7434e220ebcSLois Curfman McInnes info->memory = isend[3]; 7444e220ebcSLois Curfman McInnes info->mallocs = isend[4]; 7458965ea79SLois Curfman McInnes } else if (flag == MAT_GLOBAL_MAX) { 746b2566f29SBarry Smith ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 7472205254eSKarl Rupp 7484e220ebcSLois Curfman McInnes info->nz_used = irecv[0]; 7494e220ebcSLois Curfman McInnes info->nz_allocated = irecv[1]; 7504e220ebcSLois Curfman McInnes info->nz_unneeded = irecv[2]; 7514e220ebcSLois Curfman McInnes info->memory = irecv[3]; 7524e220ebcSLois Curfman McInnes info->mallocs = irecv[4]; 7538965ea79SLois Curfman McInnes } else if (flag == MAT_GLOBAL_SUM) { 754b2566f29SBarry Smith ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 7552205254eSKarl Rupp 7564e220ebcSLois Curfman McInnes info->nz_used = irecv[0]; 7574e220ebcSLois Curfman McInnes info->nz_allocated = irecv[1]; 7584e220ebcSLois Curfman McInnes info->nz_unneeded = irecv[2]; 7594e220ebcSLois Curfman McInnes info->memory = irecv[3]; 7604e220ebcSLois Curfman McInnes info->mallocs = irecv[4]; 7618965ea79SLois Curfman McInnes } 7624e220ebcSLois Curfman McInnes info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 7634e220ebcSLois Curfman McInnes info->fill_ratio_needed = 0; 7644e220ebcSLois Curfman McInnes info->factor_mallocs = 0; 7653a40ed3dSBarry Smith PetscFunctionReturn(0); 7668965ea79SLois Curfman McInnes } 7678965ea79SLois Curfman McInnes 768ace3abfcSBarry Smith PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscBool flg) 7698965ea79SLois Curfman McInnes { 77039ddd567SLois Curfman McInnes Mat_MPIDense *a = (Mat_MPIDense*)A->data; 771dfbe8321SBarry Smith PetscErrorCode ierr; 7728965ea79SLois Curfman McInnes 7733a40ed3dSBarry Smith PetscFunctionBegin; 77412c028f9SKris Buschelman switch (op) { 775512a5fc5SBarry Smith case MAT_NEW_NONZERO_LOCATIONS: 77612c028f9SKris Buschelman case MAT_NEW_NONZERO_LOCATION_ERR: 77712c028f9SKris Buschelman case MAT_NEW_NONZERO_ALLOCATION_ERR: 77843674050SBarry Smith MatCheckPreallocated(A,1); 7794e0d8c25SBarry Smith ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 78012c028f9SKris Buschelman break; 78112c028f9SKris Buschelman case MAT_ROW_ORIENTED: 78243674050SBarry Smith MatCheckPreallocated(A,1); 7834e0d8c25SBarry Smith a->roworiented = flg; 7844e0d8c25SBarry Smith ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 78512c028f9SKris Buschelman break; 7864e0d8c25SBarry Smith case MAT_NEW_DIAGONALS: 78713fa8e87SLisandro Dalcin case MAT_KEEP_NONZERO_PATTERN: 78812c028f9SKris Buschelman case MAT_USE_HASH_TABLE: 789290bbb0aSBarry Smith ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 79012c028f9SKris Buschelman break; 79112c028f9SKris Buschelman case MAT_IGNORE_OFF_PROC_ENTRIES: 7924e0d8c25SBarry Smith a->donotstash = flg; 79312c028f9SKris Buschelman break; 79477e54ba9SKris Buschelman case MAT_SYMMETRIC: 79577e54ba9SKris Buschelman case MAT_STRUCTURALLY_SYMMETRIC: 7969a4540c5SBarry Smith case MAT_HERMITIAN: 7979a4540c5SBarry Smith case MAT_SYMMETRY_ETERNAL: 798600fe468SBarry Smith case MAT_IGNORE_LOWER_TRIANGULAR: 799290bbb0aSBarry Smith ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 80077e54ba9SKris Buschelman break; 80112c028f9SKris Buschelman default: 802e32f2f54SBarry Smith SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]); 8033a40ed3dSBarry Smith } 8043a40ed3dSBarry Smith PetscFunctionReturn(0); 8058965ea79SLois Curfman McInnes } 8068965ea79SLois Curfman McInnes 8078965ea79SLois Curfman McInnes 808dfbe8321SBarry Smith PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr) 8095b2fa520SLois Curfman McInnes { 8105b2fa520SLois Curfman McInnes Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 8115b2fa520SLois Curfman McInnes Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 81287828ca2SBarry Smith PetscScalar *l,*r,x,*v; 813dfbe8321SBarry Smith PetscErrorCode ierr; 814d0f46423SBarry Smith PetscInt i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n; 8155b2fa520SLois Curfman McInnes 8165b2fa520SLois Curfman McInnes PetscFunctionBegin; 81772d926a5SLois Curfman McInnes ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr); 8185b2fa520SLois Curfman McInnes if (ll) { 81972d926a5SLois Curfman McInnes ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr); 820e32f2f54SBarry Smith if (s2a != s2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %d != %d.", s2a, s2); 8211ebc52fbSHong Zhang ierr = VecGetArray(ll,&l);CHKERRQ(ierr); 8225b2fa520SLois Curfman McInnes for (i=0; i<m; i++) { 8235b2fa520SLois Curfman McInnes x = l[i]; 8245b2fa520SLois Curfman McInnes v = mat->v + i; 8255b2fa520SLois Curfman McInnes for (j=0; j<n; j++) { (*v) *= x; v+= m;} 8265b2fa520SLois Curfman McInnes } 8271ebc52fbSHong Zhang ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr); 828efee365bSSatish Balay ierr = PetscLogFlops(n*m);CHKERRQ(ierr); 8295b2fa520SLois Curfman McInnes } 8305b2fa520SLois Curfman McInnes if (rr) { 831175be7b4SMatthew Knepley ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr); 832e32f2f54SBarry Smith if (s3a != s3) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3); 833ca9f406cSSatish Balay ierr = VecScatterBegin(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 834ca9f406cSSatish Balay ierr = VecScatterEnd(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8351ebc52fbSHong Zhang ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr); 8365b2fa520SLois Curfman McInnes for (i=0; i<n; i++) { 8375b2fa520SLois Curfman McInnes x = r[i]; 8385b2fa520SLois Curfman McInnes v = mat->v + i*m; 8392205254eSKarl Rupp for (j=0; j<m; j++) (*v++) *= x; 8405b2fa520SLois Curfman McInnes } 8411ebc52fbSHong Zhang ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr); 842efee365bSSatish Balay ierr = PetscLogFlops(n*m);CHKERRQ(ierr); 8435b2fa520SLois Curfman McInnes } 8445b2fa520SLois Curfman McInnes PetscFunctionReturn(0); 8455b2fa520SLois Curfman McInnes } 8465b2fa520SLois Curfman McInnes 847dfbe8321SBarry Smith PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm) 848096963f5SLois Curfman McInnes { 8493501a2bdSLois Curfman McInnes Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 8503501a2bdSLois Curfman McInnes Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data; 851dfbe8321SBarry Smith PetscErrorCode ierr; 85213f74950SBarry Smith PetscInt i,j; 853329f5518SBarry Smith PetscReal sum = 0.0; 85487828ca2SBarry Smith PetscScalar *v = mat->v; 8553501a2bdSLois Curfman McInnes 8563a40ed3dSBarry Smith PetscFunctionBegin; 8573501a2bdSLois Curfman McInnes if (mdn->size == 1) { 858064f8208SBarry Smith ierr = MatNorm(mdn->A,type,nrm);CHKERRQ(ierr); 8593501a2bdSLois Curfman McInnes } else { 8603501a2bdSLois Curfman McInnes if (type == NORM_FROBENIUS) { 861d0f46423SBarry Smith for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) { 862329f5518SBarry Smith sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 8633501a2bdSLois Curfman McInnes } 864b2566f29SBarry Smith ierr = MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 8658f1a2a5eSBarry Smith *nrm = PetscSqrtReal(*nrm); 866dc0b31edSSatish Balay ierr = PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n);CHKERRQ(ierr); 8673a40ed3dSBarry Smith } else if (type == NORM_1) { 868329f5518SBarry Smith PetscReal *tmp,*tmp2; 869dcca6d9dSJed Brown ierr = PetscMalloc2(A->cmap->N,&tmp,A->cmap->N,&tmp2);CHKERRQ(ierr); 87074ed9c26SBarry Smith ierr = PetscMemzero(tmp,A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 87174ed9c26SBarry Smith ierr = PetscMemzero(tmp2,A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 872064f8208SBarry Smith *nrm = 0.0; 8733501a2bdSLois Curfman McInnes v = mat->v; 874d0f46423SBarry Smith for (j=0; j<mdn->A->cmap->n; j++) { 875d0f46423SBarry Smith for (i=0; i<mdn->A->rmap->n; i++) { 87667e560aaSBarry Smith tmp[j] += PetscAbsScalar(*v); v++; 8773501a2bdSLois Curfman McInnes } 8783501a2bdSLois Curfman McInnes } 879b2566f29SBarry Smith ierr = MPIU_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 880d0f46423SBarry Smith for (j=0; j<A->cmap->N; j++) { 881064f8208SBarry Smith if (tmp2[j] > *nrm) *nrm = tmp2[j]; 8823501a2bdSLois Curfman McInnes } 8838627564fSBarry Smith ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr); 884d0f46423SBarry Smith ierr = PetscLogFlops(A->cmap->n*A->rmap->n);CHKERRQ(ierr); 8853a40ed3dSBarry Smith } else if (type == NORM_INFINITY) { /* max row norm */ 886329f5518SBarry Smith PetscReal ntemp; 8873501a2bdSLois Curfman McInnes ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr); 888b2566f29SBarry Smith ierr = MPIU_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 889ce94432eSBarry Smith } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for two norm"); 8903501a2bdSLois Curfman McInnes } 8913a40ed3dSBarry Smith PetscFunctionReturn(0); 8923501a2bdSLois Curfman McInnes } 8933501a2bdSLois Curfman McInnes 894fc4dec0aSBarry Smith PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout) 8953501a2bdSLois Curfman McInnes { 8963501a2bdSLois Curfman McInnes Mat_MPIDense *a = (Mat_MPIDense*)A->data; 8973501a2bdSLois Curfman McInnes Mat_SeqDense *Aloc = (Mat_SeqDense*)a->A->data; 8983501a2bdSLois Curfman McInnes Mat B; 899d0f46423SBarry Smith PetscInt M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart; 9006849ba73SBarry Smith PetscErrorCode ierr; 90113f74950SBarry Smith PetscInt j,i; 90287828ca2SBarry Smith PetscScalar *v; 9033501a2bdSLois Curfman McInnes 9043a40ed3dSBarry Smith PetscFunctionBegin; 905cf37664fSBarry Smith if (reuse == MAT_INPLACE_MATRIX && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports square matrix only in-place"); 906cf37664fSBarry Smith if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) { 907ce94432eSBarry Smith ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 908d0f46423SBarry Smith ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 9097adad957SLisandro Dalcin ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 9100298fd71SBarry Smith ierr = MatMPIDenseSetPreallocation(B,NULL);CHKERRQ(ierr); 911fc4dec0aSBarry Smith } else { 912fc4dec0aSBarry Smith B = *matout; 913fc4dec0aSBarry Smith } 9143501a2bdSLois Curfman McInnes 915d0f46423SBarry Smith m = a->A->rmap->n; n = a->A->cmap->n; v = Aloc->v; 916785e854fSJed Brown ierr = PetscMalloc1(m,&rwork);CHKERRQ(ierr); 9173501a2bdSLois Curfman McInnes for (i=0; i<m; i++) rwork[i] = rstart + i; 9181acff37aSSatish Balay for (j=0; j<n; j++) { 9193501a2bdSLois Curfman McInnes ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr); 9203501a2bdSLois Curfman McInnes v += m; 9213501a2bdSLois Curfman McInnes } 922606d414cSSatish Balay ierr = PetscFree(rwork);CHKERRQ(ierr); 9236d4a8577SBarry Smith ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 9246d4a8577SBarry Smith ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 925cf37664fSBarry Smith if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 9263501a2bdSLois Curfman McInnes *matout = B; 9273501a2bdSLois Curfman McInnes } else { 92828be2f97SBarry Smith ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr); 9293501a2bdSLois Curfman McInnes } 9303a40ed3dSBarry Smith PetscFunctionReturn(0); 931096963f5SLois Curfman McInnes } 932096963f5SLois Curfman McInnes 93344cd7ae7SLois Curfman McInnes 9346849ba73SBarry Smith static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat*); 935d84338a6SBarry Smith extern PetscErrorCode MatScale_MPIDense(Mat,PetscScalar); 9368965ea79SLois Curfman McInnes 9374994cf47SJed Brown PetscErrorCode MatSetUp_MPIDense(Mat A) 938273d9f13SBarry Smith { 939dfbe8321SBarry Smith PetscErrorCode ierr; 940273d9f13SBarry Smith 941273d9f13SBarry Smith PetscFunctionBegin; 942273d9f13SBarry Smith ierr = MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr); 943273d9f13SBarry Smith PetscFunctionReturn(0); 944273d9f13SBarry Smith } 945273d9f13SBarry Smith 946488007eeSBarry Smith PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str) 947488007eeSBarry Smith { 948488007eeSBarry Smith PetscErrorCode ierr; 949488007eeSBarry Smith Mat_MPIDense *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data; 950488007eeSBarry Smith 951488007eeSBarry Smith PetscFunctionBegin; 952488007eeSBarry Smith ierr = MatAXPY(A->A,alpha,B->A,str);CHKERRQ(ierr); 953a3fa217bSJose E. Roman ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 954488007eeSBarry Smith PetscFunctionReturn(0); 955488007eeSBarry Smith } 956488007eeSBarry Smith 9577087cfbeSBarry Smith PetscErrorCode MatConjugate_MPIDense(Mat mat) 958ba337c44SJed Brown { 959ba337c44SJed Brown Mat_MPIDense *a = (Mat_MPIDense*)mat->data; 960ba337c44SJed Brown PetscErrorCode ierr; 961ba337c44SJed Brown 962ba337c44SJed Brown PetscFunctionBegin; 963ba337c44SJed Brown ierr = MatConjugate(a->A);CHKERRQ(ierr); 964ba337c44SJed Brown PetscFunctionReturn(0); 965ba337c44SJed Brown } 966ba337c44SJed Brown 967ba337c44SJed Brown PetscErrorCode MatRealPart_MPIDense(Mat A) 968ba337c44SJed Brown { 969ba337c44SJed Brown Mat_MPIDense *a = (Mat_MPIDense*)A->data; 970ba337c44SJed Brown PetscErrorCode ierr; 971ba337c44SJed Brown 972ba337c44SJed Brown PetscFunctionBegin; 973ba337c44SJed Brown ierr = MatRealPart(a->A);CHKERRQ(ierr); 974ba337c44SJed Brown PetscFunctionReturn(0); 975ba337c44SJed Brown } 976ba337c44SJed Brown 977ba337c44SJed Brown PetscErrorCode MatImaginaryPart_MPIDense(Mat A) 978ba337c44SJed Brown { 979ba337c44SJed Brown Mat_MPIDense *a = (Mat_MPIDense*)A->data; 980ba337c44SJed Brown PetscErrorCode ierr; 981ba337c44SJed Brown 982ba337c44SJed Brown PetscFunctionBegin; 983ba337c44SJed Brown ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 984ba337c44SJed Brown PetscFunctionReturn(0); 985ba337c44SJed Brown } 986ba337c44SJed Brown 9870716a85fSBarry Smith extern PetscErrorCode MatGetColumnNorms_SeqDense(Mat,NormType,PetscReal*); 9880716a85fSBarry Smith PetscErrorCode MatGetColumnNorms_MPIDense(Mat A,NormType type,PetscReal *norms) 9890716a85fSBarry Smith { 9900716a85fSBarry Smith PetscErrorCode ierr; 9910716a85fSBarry Smith PetscInt i,n; 9920716a85fSBarry Smith Mat_MPIDense *a = (Mat_MPIDense*) A->data; 9930716a85fSBarry Smith PetscReal *work; 9940716a85fSBarry Smith 9950716a85fSBarry Smith PetscFunctionBegin; 9960298fd71SBarry Smith ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 997785e854fSJed Brown ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 9980716a85fSBarry Smith ierr = MatGetColumnNorms_SeqDense(a->A,type,work);CHKERRQ(ierr); 9990716a85fSBarry Smith if (type == NORM_2) { 10000716a85fSBarry Smith for (i=0; i<n; i++) work[i] *= work[i]; 10010716a85fSBarry Smith } 10020716a85fSBarry Smith if (type == NORM_INFINITY) { 1003b2566f29SBarry Smith ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr); 10040716a85fSBarry Smith } else { 1005b2566f29SBarry Smith ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr); 10060716a85fSBarry Smith } 10070716a85fSBarry Smith ierr = PetscFree(work);CHKERRQ(ierr); 10080716a85fSBarry Smith if (type == NORM_2) { 10098f1a2a5eSBarry Smith for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 10100716a85fSBarry Smith } 10110716a85fSBarry Smith PetscFunctionReturn(0); 10120716a85fSBarry Smith } 10130716a85fSBarry Smith 101473a71a0fSBarry Smith static PetscErrorCode MatSetRandom_MPIDense(Mat x,PetscRandom rctx) 101573a71a0fSBarry Smith { 101673a71a0fSBarry Smith Mat_MPIDense *d = (Mat_MPIDense*)x->data; 101773a71a0fSBarry Smith PetscErrorCode ierr; 101873a71a0fSBarry Smith PetscScalar *a; 101973a71a0fSBarry Smith PetscInt m,n,i; 102073a71a0fSBarry Smith 102173a71a0fSBarry Smith PetscFunctionBegin; 102273a71a0fSBarry Smith ierr = MatGetSize(d->A,&m,&n);CHKERRQ(ierr); 10238c778c55SBarry Smith ierr = MatDenseGetArray(d->A,&a);CHKERRQ(ierr); 102473a71a0fSBarry Smith for (i=0; i<m*n; i++) { 102573a71a0fSBarry Smith ierr = PetscRandomGetValue(rctx,a+i);CHKERRQ(ierr); 102673a71a0fSBarry Smith } 10278c778c55SBarry Smith ierr = MatDenseRestoreArray(d->A,&a);CHKERRQ(ierr); 102873a71a0fSBarry Smith PetscFunctionReturn(0); 102973a71a0fSBarry Smith } 103073a71a0fSBarry Smith 1031fd4e9aacSBarry Smith extern PetscErrorCode MatMatMultNumeric_MPIDense(Mat A,Mat,Mat); 1032fd4e9aacSBarry Smith 10333b49f96aSBarry Smith static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A,PetscBool *missing,PetscInt *d) 10343b49f96aSBarry Smith { 10353b49f96aSBarry Smith PetscFunctionBegin; 10363b49f96aSBarry Smith *missing = PETSC_FALSE; 10373b49f96aSBarry Smith PetscFunctionReturn(0); 10383b49f96aSBarry Smith } 10393b49f96aSBarry Smith 10408965ea79SLois Curfman McInnes /* -------------------------------------------------------------------*/ 104109dc0095SBarry Smith static struct _MatOps MatOps_Values = { MatSetValues_MPIDense, 104209dc0095SBarry Smith MatGetRow_MPIDense, 104309dc0095SBarry Smith MatRestoreRow_MPIDense, 104409dc0095SBarry Smith MatMult_MPIDense, 104597304618SKris Buschelman /* 4*/ MatMultAdd_MPIDense, 10467c922b88SBarry Smith MatMultTranspose_MPIDense, 10477c922b88SBarry Smith MatMultTransposeAdd_MPIDense, 10488965ea79SLois Curfman McInnes 0, 104909dc0095SBarry Smith 0, 105009dc0095SBarry Smith 0, 105197304618SKris Buschelman /* 10*/ 0, 105209dc0095SBarry Smith 0, 105309dc0095SBarry Smith 0, 105409dc0095SBarry Smith 0, 105509dc0095SBarry Smith MatTranspose_MPIDense, 105697304618SKris Buschelman /* 15*/ MatGetInfo_MPIDense, 10576e4ee0c6SHong Zhang MatEqual_MPIDense, 105809dc0095SBarry Smith MatGetDiagonal_MPIDense, 10595b2fa520SLois Curfman McInnes MatDiagonalScale_MPIDense, 106009dc0095SBarry Smith MatNorm_MPIDense, 106197304618SKris Buschelman /* 20*/ MatAssemblyBegin_MPIDense, 106209dc0095SBarry Smith MatAssemblyEnd_MPIDense, 106309dc0095SBarry Smith MatSetOption_MPIDense, 106409dc0095SBarry Smith MatZeroEntries_MPIDense, 1065d519adbfSMatthew Knepley /* 24*/ MatZeroRows_MPIDense, 1066919b68f7SBarry Smith 0, 106701b82886SBarry Smith 0, 106801b82886SBarry Smith 0, 106901b82886SBarry Smith 0, 10704994cf47SJed Brown /* 29*/ MatSetUp_MPIDense, 1071273d9f13SBarry Smith 0, 107209dc0095SBarry Smith 0, 1073c56a70eeSBarry Smith MatGetDiagonalBlock_MPIDense, 10748c778c55SBarry Smith 0, 1075d519adbfSMatthew Knepley /* 34*/ MatDuplicate_MPIDense, 107609dc0095SBarry Smith 0, 107709dc0095SBarry Smith 0, 107809dc0095SBarry Smith 0, 107909dc0095SBarry Smith 0, 1080d519adbfSMatthew Knepley /* 39*/ MatAXPY_MPIDense, 1081*7dae84e0SHong Zhang MatCreateSubMatrices_MPIDense, 108209dc0095SBarry Smith 0, 108309dc0095SBarry Smith MatGetValues_MPIDense, 108409dc0095SBarry Smith 0, 1085d519adbfSMatthew Knepley /* 44*/ 0, 108609dc0095SBarry Smith MatScale_MPIDense, 10877d68702bSBarry Smith MatShift_Basic, 108809dc0095SBarry Smith 0, 108909dc0095SBarry Smith 0, 109073a71a0fSBarry Smith /* 49*/ MatSetRandom_MPIDense, 109109dc0095SBarry Smith 0, 109209dc0095SBarry Smith 0, 109309dc0095SBarry Smith 0, 109409dc0095SBarry Smith 0, 1095d519adbfSMatthew Knepley /* 54*/ 0, 109609dc0095SBarry Smith 0, 109709dc0095SBarry Smith 0, 109809dc0095SBarry Smith 0, 109909dc0095SBarry Smith 0, 1100*7dae84e0SHong Zhang /* 59*/ MatCreateSubMatrix_MPIDense, 1101b9b97703SBarry Smith MatDestroy_MPIDense, 1102b9b97703SBarry Smith MatView_MPIDense, 1103357abbc8SBarry Smith 0, 110497304618SKris Buschelman 0, 1105d519adbfSMatthew Knepley /* 64*/ 0, 110697304618SKris Buschelman 0, 110797304618SKris Buschelman 0, 110897304618SKris Buschelman 0, 110997304618SKris Buschelman 0, 1110d519adbfSMatthew Knepley /* 69*/ 0, 111197304618SKris Buschelman 0, 111297304618SKris Buschelman 0, 111397304618SKris Buschelman 0, 111497304618SKris Buschelman 0, 1115d519adbfSMatthew Knepley /* 74*/ 0, 111697304618SKris Buschelman 0, 111797304618SKris Buschelman 0, 111897304618SKris Buschelman 0, 111997304618SKris Buschelman 0, 1120d519adbfSMatthew Knepley /* 79*/ 0, 112197304618SKris Buschelman 0, 112297304618SKris Buschelman 0, 112397304618SKris Buschelman 0, 11245bba2384SShri Abhyankar /* 83*/ MatLoad_MPIDense, 1125865e5f61SKris Buschelman 0, 1126865e5f61SKris Buschelman 0, 1127865e5f61SKris Buschelman 0, 1128865e5f61SKris Buschelman 0, 1129865e5f61SKris Buschelman 0, 11309775878aSHong Zhang #if defined(PETSC_HAVE_ELEMENTAL) 1131320f2790SHong Zhang /* 89*/ MatMatMult_MPIDense_MPIDense, 1132320f2790SHong Zhang MatMatMultSymbolic_MPIDense_MPIDense, 11339775878aSHong Zhang #else 11349775878aSHong Zhang /* 89*/ 0, 1135865e5f61SKris Buschelman 0, 11369775878aSHong Zhang #endif 1137fd4e9aacSBarry Smith MatMatMultNumeric_MPIDense, 11382fbe02b9SBarry Smith 0, 1139ba337c44SJed Brown 0, 1140d519adbfSMatthew Knepley /* 94*/ 0, 1141865e5f61SKris Buschelman 0, 1142865e5f61SKris Buschelman 0, 1143ba337c44SJed Brown 0, 1144ba337c44SJed Brown 0, 1145ba337c44SJed Brown /* 99*/ 0, 1146ba337c44SJed Brown 0, 1147ba337c44SJed Brown 0, 1148ba337c44SJed Brown MatConjugate_MPIDense, 1149ba337c44SJed Brown 0, 1150ba337c44SJed Brown /*104*/ 0, 1151ba337c44SJed Brown MatRealPart_MPIDense, 1152ba337c44SJed Brown MatImaginaryPart_MPIDense, 115386d161a7SShri Abhyankar 0, 115486d161a7SShri Abhyankar 0, 115586d161a7SShri Abhyankar /*109*/ 0, 115686d161a7SShri Abhyankar 0, 115786d161a7SShri Abhyankar 0, 115886d161a7SShri Abhyankar 0, 11593b49f96aSBarry Smith MatMissingDiagonal_MPIDense, 116086d161a7SShri Abhyankar /*114*/ 0, 116186d161a7SShri Abhyankar 0, 116286d161a7SShri Abhyankar 0, 116386d161a7SShri Abhyankar 0, 116486d161a7SShri Abhyankar 0, 116586d161a7SShri Abhyankar /*119*/ 0, 116686d161a7SShri Abhyankar 0, 116786d161a7SShri Abhyankar 0, 11680716a85fSBarry Smith 0, 11690716a85fSBarry Smith 0, 11700716a85fSBarry Smith /*124*/ 0, 11713964eb88SJed Brown MatGetColumnNorms_MPIDense, 11723964eb88SJed Brown 0, 11733964eb88SJed Brown 0, 11743964eb88SJed Brown 0, 11753964eb88SJed Brown /*129*/ 0, 1176cb20be35SHong Zhang MatTransposeMatMult_MPIDense_MPIDense, 1177cb20be35SHong Zhang MatTransposeMatMultSymbolic_MPIDense_MPIDense, 1178cb20be35SHong Zhang MatTransposeMatMultNumeric_MPIDense_MPIDense, 11793964eb88SJed Brown 0, 11803964eb88SJed Brown /*134*/ 0, 11813964eb88SJed Brown 0, 11823964eb88SJed Brown 0, 11833964eb88SJed Brown 0, 11843964eb88SJed Brown 0, 11853964eb88SJed Brown /*139*/ 0, 1186f9426fe0SMark Adams 0, 11873964eb88SJed Brown 0 1188ba337c44SJed Brown }; 11898965ea79SLois Curfman McInnes 11907087cfbeSBarry Smith PetscErrorCode MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data) 1191a23d5eceSKris Buschelman { 1192a23d5eceSKris Buschelman Mat_MPIDense *a; 1193dfbe8321SBarry Smith PetscErrorCode ierr; 1194a23d5eceSKris Buschelman 1195a23d5eceSKris Buschelman PetscFunctionBegin; 1196a23d5eceSKris Buschelman mat->preallocated = PETSC_TRUE; 1197a23d5eceSKris Buschelman /* Note: For now, when data is specified above, this assumes the user correctly 1198a23d5eceSKris Buschelman allocates the local dense storage space. We should add error checking. */ 1199a23d5eceSKris Buschelman 1200a23d5eceSKris Buschelman a = (Mat_MPIDense*)mat->data; 120134ef9618SShri Abhyankar ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr); 120234ef9618SShri Abhyankar ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr); 120334ef9618SShri Abhyankar a->nvec = mat->cmap->n; 120434ef9618SShri Abhyankar 1205f69a0ea3SMatthew Knepley ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr); 1206d0f46423SBarry Smith ierr = MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);CHKERRQ(ierr); 12075c5985e7SKris Buschelman ierr = MatSetType(a->A,MATSEQDENSE);CHKERRQ(ierr); 12085c5985e7SKris Buschelman ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr); 12093bb1ff40SBarry Smith ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 1210a23d5eceSKris Buschelman PetscFunctionReturn(0); 1211a23d5eceSKris Buschelman } 1212a23d5eceSKris Buschelman 121365b80a83SHong Zhang #if defined(PETSC_HAVE_ELEMENTAL) 1214cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype,MatReuse reuse,Mat *newmat) 12158baccfbdSHong Zhang { 12168ea901baSHong Zhang Mat mat_elemental; 12178ea901baSHong Zhang PetscErrorCode ierr; 121832d7a744SHong Zhang PetscScalar *v; 121932d7a744SHong Zhang PetscInt m=A->rmap->n,N=A->cmap->N,rstart=A->rmap->rstart,i,*rows,*cols; 12208ea901baSHong Zhang 12218baccfbdSHong Zhang PetscFunctionBegin; 1222378336b6SHong Zhang if (reuse == MAT_REUSE_MATRIX) { 1223378336b6SHong Zhang mat_elemental = *newmat; 1224378336b6SHong Zhang ierr = MatZeroEntries(*newmat);CHKERRQ(ierr); 1225378336b6SHong Zhang } else { 1226378336b6SHong Zhang ierr = MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental);CHKERRQ(ierr); 1227378336b6SHong Zhang ierr = MatSetSizes(mat_elemental,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 1228378336b6SHong Zhang ierr = MatSetType(mat_elemental,MATELEMENTAL);CHKERRQ(ierr); 1229378336b6SHong Zhang ierr = MatSetUp(mat_elemental);CHKERRQ(ierr); 123032d7a744SHong Zhang ierr = MatSetOption(mat_elemental,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 1231378336b6SHong Zhang } 1232378336b6SHong Zhang 123332d7a744SHong Zhang ierr = PetscMalloc2(m,&rows,N,&cols);CHKERRQ(ierr); 123432d7a744SHong Zhang for (i=0; i<N; i++) cols[i] = i; 123532d7a744SHong Zhang for (i=0; i<m; i++) rows[i] = rstart + i; 12368ea901baSHong Zhang 12378ea901baSHong Zhang /* PETSc-Elemental interaface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */ 123832d7a744SHong Zhang ierr = MatDenseGetArray(A,&v);CHKERRQ(ierr); 123932d7a744SHong Zhang ierr = MatSetValues(mat_elemental,m,rows,N,cols,v,ADD_VALUES);CHKERRQ(ierr); 12408ea901baSHong Zhang ierr = MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 12418ea901baSHong Zhang ierr = MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 124232d7a744SHong Zhang ierr = MatDenseRestoreArray(A,&v);CHKERRQ(ierr); 124332d7a744SHong Zhang ierr = PetscFree2(rows,cols);CHKERRQ(ierr); 12448ea901baSHong Zhang 1245511c6705SHong Zhang if (reuse == MAT_INPLACE_MATRIX) { 124628be2f97SBarry Smith ierr = MatHeaderReplace(A,&mat_elemental);CHKERRQ(ierr); 12478ea901baSHong Zhang } else { 12488ea901baSHong Zhang *newmat = mat_elemental; 12498ea901baSHong Zhang } 12508baccfbdSHong Zhang PetscFunctionReturn(0); 12518baccfbdSHong Zhang } 125265b80a83SHong Zhang #endif 12538baccfbdSHong Zhang 12548cc058d9SJed Brown PETSC_EXTERN PetscErrorCode MatCreate_MPIDense(Mat mat) 1255273d9f13SBarry Smith { 1256273d9f13SBarry Smith Mat_MPIDense *a; 1257dfbe8321SBarry Smith PetscErrorCode ierr; 1258273d9f13SBarry Smith 1259273d9f13SBarry Smith PetscFunctionBegin; 1260b00a9115SJed Brown ierr = PetscNewLog(mat,&a);CHKERRQ(ierr); 1261b0a32e0cSBarry Smith mat->data = (void*)a; 1262273d9f13SBarry Smith ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1263273d9f13SBarry Smith 1264273d9f13SBarry Smith mat->insertmode = NOT_SET_VALUES; 1265ce94432eSBarry Smith ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&a->rank);CHKERRQ(ierr); 1266ce94432eSBarry Smith ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&a->size);CHKERRQ(ierr); 1267273d9f13SBarry Smith 1268273d9f13SBarry Smith /* build cache for off array entries formed */ 1269273d9f13SBarry Smith a->donotstash = PETSC_FALSE; 12702205254eSKarl Rupp 1271ce94432eSBarry Smith ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)mat),1,&mat->stash);CHKERRQ(ierr); 1272273d9f13SBarry Smith 1273273d9f13SBarry Smith /* stuff used for matrix vector multiply */ 1274273d9f13SBarry Smith a->lvec = 0; 1275273d9f13SBarry Smith a->Mvctx = 0; 1276273d9f13SBarry Smith a->roworiented = PETSC_TRUE; 1277273d9f13SBarry Smith 1278bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",MatDenseGetArray_MPIDense);CHKERRQ(ierr); 1279bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",MatDenseRestoreArray_MPIDense);CHKERRQ(ierr); 12808baccfbdSHong Zhang #if defined(PETSC_HAVE_ELEMENTAL) 12818baccfbdSHong Zhang ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",MatConvert_MPIDense_Elemental);CHKERRQ(ierr); 12828baccfbdSHong Zhang #endif 1283bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr); 1284bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",MatMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr); 1285bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",MatMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr); 1286bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",MatMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr); 12878949adfdSHong Zhang 12888949adfdSHong Zhang ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMult_mpiaij_mpidense_C",MatTransposeMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr); 12898949adfdSHong Zhang ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultSymbolic_mpiaij_mpidense_C",MatTransposeMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr); 12908949adfdSHong Zhang ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultNumeric_mpiaij_mpidense_C",MatTransposeMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr); 129138aed534SBarry Smith ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr); 1292273d9f13SBarry Smith PetscFunctionReturn(0); 1293273d9f13SBarry Smith } 1294273d9f13SBarry Smith 1295209238afSKris Buschelman /*MC 1296002d173eSKris Buschelman MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices. 1297209238afSKris Buschelman 1298209238afSKris Buschelman This matrix type is identical to MATSEQDENSE when constructed with a single process communicator, 1299209238afSKris Buschelman and MATMPIDENSE otherwise. 1300209238afSKris Buschelman 1301209238afSKris Buschelman Options Database Keys: 1302209238afSKris Buschelman . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions() 1303209238afSKris Buschelman 1304209238afSKris Buschelman Level: beginner 1305209238afSKris Buschelman 130601b82886SBarry Smith 1307209238afSKris Buschelman .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE 1308209238afSKris Buschelman M*/ 1309209238afSKris Buschelman 1310273d9f13SBarry Smith /*@C 1311273d9f13SBarry Smith MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries 1312273d9f13SBarry Smith 1313273d9f13SBarry Smith Not collective 1314273d9f13SBarry Smith 1315273d9f13SBarry Smith Input Parameters: 13161c4f3114SJed Brown . B - the matrix 13170298fd71SBarry Smith - data - optional location of matrix data. Set data=NULL for PETSc 1318273d9f13SBarry Smith to control all matrix memory allocation. 1319273d9f13SBarry Smith 1320273d9f13SBarry Smith Notes: 1321273d9f13SBarry Smith The dense format is fully compatible with standard Fortran 77 1322273d9f13SBarry Smith storage by columns. 1323273d9f13SBarry Smith 1324273d9f13SBarry Smith The data input variable is intended primarily for Fortran programmers 1325273d9f13SBarry Smith who wish to allocate their own matrix memory space. Most users should 13260298fd71SBarry Smith set data=NULL. 1327273d9f13SBarry Smith 1328273d9f13SBarry Smith Level: intermediate 1329273d9f13SBarry Smith 1330273d9f13SBarry Smith .keywords: matrix,dense, parallel 1331273d9f13SBarry Smith 1332273d9f13SBarry Smith .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1333273d9f13SBarry Smith @*/ 13341c4f3114SJed Brown PetscErrorCode MatMPIDenseSetPreallocation(Mat B,PetscScalar *data) 1335273d9f13SBarry Smith { 13364ac538c5SBarry Smith PetscErrorCode ierr; 1337273d9f13SBarry Smith 1338273d9f13SBarry Smith PetscFunctionBegin; 13391c4f3114SJed Brown ierr = PetscTryMethod(B,"MatMPIDenseSetPreallocation_C",(Mat,PetscScalar*),(B,data));CHKERRQ(ierr); 1340273d9f13SBarry Smith PetscFunctionReturn(0); 1341273d9f13SBarry Smith } 1342273d9f13SBarry Smith 13438965ea79SLois Curfman McInnes /*@C 134469b1f4b7SBarry Smith MatCreateDense - Creates a parallel matrix in dense format. 13458965ea79SLois Curfman McInnes 1346db81eaa0SLois Curfman McInnes Collective on MPI_Comm 1347db81eaa0SLois Curfman McInnes 13488965ea79SLois Curfman McInnes Input Parameters: 1349db81eaa0SLois Curfman McInnes + comm - MPI communicator 13508965ea79SLois Curfman McInnes . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1351db81eaa0SLois Curfman McInnes . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 13528965ea79SLois Curfman McInnes . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 1353db81eaa0SLois Curfman McInnes . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 13546cfe35ddSJose E. Roman - data - optional location of matrix data. Set data=NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc 1355dfc5480cSLois Curfman McInnes to control all matrix memory allocation. 13568965ea79SLois Curfman McInnes 13578965ea79SLois Curfman McInnes Output Parameter: 1358477f1c0bSLois Curfman McInnes . A - the matrix 13598965ea79SLois Curfman McInnes 1360b259b22eSLois Curfman McInnes Notes: 136139ddd567SLois Curfman McInnes The dense format is fully compatible with standard Fortran 77 136239ddd567SLois Curfman McInnes storage by columns. 13638965ea79SLois Curfman McInnes 136418f449edSLois Curfman McInnes The data input variable is intended primarily for Fortran programmers 136518f449edSLois Curfman McInnes who wish to allocate their own matrix memory space. Most users should 13666cfe35ddSJose E. Roman set data=NULL (PETSC_NULL_SCALAR for Fortran users). 136718f449edSLois Curfman McInnes 13688965ea79SLois Curfman McInnes The user MUST specify either the local or global matrix dimensions 13698965ea79SLois Curfman McInnes (possibly both). 13708965ea79SLois Curfman McInnes 1371027ccd11SLois Curfman McInnes Level: intermediate 1372027ccd11SLois Curfman McInnes 137339ddd567SLois Curfman McInnes .keywords: matrix,dense, parallel 13748965ea79SLois Curfman McInnes 137539ddd567SLois Curfman McInnes .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 13768965ea79SLois Curfman McInnes @*/ 137769b1f4b7SBarry Smith PetscErrorCode MatCreateDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A) 13788965ea79SLois Curfman McInnes { 13796849ba73SBarry Smith PetscErrorCode ierr; 138013f74950SBarry Smith PetscMPIInt size; 13818965ea79SLois Curfman McInnes 13823a40ed3dSBarry Smith PetscFunctionBegin; 1383f69a0ea3SMatthew Knepley ierr = MatCreate(comm,A);CHKERRQ(ierr); 1384f69a0ea3SMatthew Knepley ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 1385273d9f13SBarry Smith ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1386273d9f13SBarry Smith if (size > 1) { 1387273d9f13SBarry Smith ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr); 1388273d9f13SBarry Smith ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr); 13896cfe35ddSJose E. Roman if (data) { /* user provided data array, so no need to assemble */ 13906cfe35ddSJose E. Roman ierr = MatSetUpMultiply_MPIDense(*A);CHKERRQ(ierr); 13916cfe35ddSJose E. Roman (*A)->assembled = PETSC_TRUE; 13926cfe35ddSJose E. Roman } 1393273d9f13SBarry Smith } else { 1394273d9f13SBarry Smith ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr); 1395273d9f13SBarry Smith ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr); 13968c469469SLois Curfman McInnes } 13973a40ed3dSBarry Smith PetscFunctionReturn(0); 13988965ea79SLois Curfman McInnes } 13998965ea79SLois Curfman McInnes 14006849ba73SBarry Smith static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat) 14018965ea79SLois Curfman McInnes { 14028965ea79SLois Curfman McInnes Mat mat; 14033501a2bdSLois Curfman McInnes Mat_MPIDense *a,*oldmat = (Mat_MPIDense*)A->data; 1404dfbe8321SBarry Smith PetscErrorCode ierr; 14058965ea79SLois Curfman McInnes 14063a40ed3dSBarry Smith PetscFunctionBegin; 14078965ea79SLois Curfman McInnes *newmat = 0; 1408ce94432eSBarry Smith ierr = MatCreate(PetscObjectComm((PetscObject)A),&mat);CHKERRQ(ierr); 1409d0f46423SBarry Smith ierr = MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 14107adad957SLisandro Dalcin ierr = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr); 1411834f8fabSBarry Smith a = (Mat_MPIDense*)mat->data; 1412e04c1aa4SHong Zhang ierr = PetscMemcpy(mat->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 14135aa7edbeSHong Zhang 1414d5f3da31SBarry Smith mat->factortype = A->factortype; 1415c456f294SBarry Smith mat->assembled = PETSC_TRUE; 1416273d9f13SBarry Smith mat->preallocated = PETSC_TRUE; 14178965ea79SLois Curfman McInnes 14188965ea79SLois Curfman McInnes a->size = oldmat->size; 14198965ea79SLois Curfman McInnes a->rank = oldmat->rank; 1420e0fa3b82SLois Curfman McInnes mat->insertmode = NOT_SET_VALUES; 1421b9b97703SBarry Smith a->nvec = oldmat->nvec; 14223782ba37SSatish Balay a->donotstash = oldmat->donotstash; 1423e04c1aa4SHong Zhang 14241e1e43feSBarry Smith ierr = PetscLayoutReference(A->rmap,&mat->rmap);CHKERRQ(ierr); 14251e1e43feSBarry Smith ierr = PetscLayoutReference(A->cmap,&mat->cmap);CHKERRQ(ierr); 14268965ea79SLois Curfman McInnes 1427329f5518SBarry Smith ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 14285609ef8eSBarry Smith ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 14293bb1ff40SBarry Smith ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 143001b82886SBarry Smith 14318965ea79SLois Curfman McInnes *newmat = mat; 14323a40ed3dSBarry Smith PetscFunctionReturn(0); 14338965ea79SLois Curfman McInnes } 14348965ea79SLois Curfman McInnes 14359d36ed5fSBarry Smith PetscErrorCode MatLoad_MPIDense_DenseInFile(MPI_Comm comm,PetscInt fd,PetscInt M,PetscInt N,Mat newmat) 143686d161a7SShri Abhyankar { 143786d161a7SShri Abhyankar PetscErrorCode ierr; 143886d161a7SShri Abhyankar PetscMPIInt rank,size; 143974c13d95SBarry Smith const PetscInt *rowners; 144074c13d95SBarry Smith PetscInt i,m,n,nz,j,mMax; 144186d161a7SShri Abhyankar PetscScalar *array,*vals,*vals_ptr; 14429d36ed5fSBarry Smith Mat_MPIDense *a = (Mat_MPIDense*)newmat->data; 144386d161a7SShri Abhyankar 144486d161a7SShri Abhyankar PetscFunctionBegin; 144586d161a7SShri Abhyankar ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 144686d161a7SShri Abhyankar ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 144786d161a7SShri Abhyankar 144874c13d95SBarry Smith /* determine ownership of rows and columns */ 144974c13d95SBarry Smith m = (newmat->rmap->n < 0) ? PETSC_DECIDE : newmat->rmap->n; 145074c13d95SBarry Smith n = (newmat->cmap->n < 0) ? PETSC_DECIDE : newmat->cmap->n; 145186d161a7SShri Abhyankar 145274c13d95SBarry Smith ierr = MatSetSizes(newmat,m,n,M,N);CHKERRQ(ierr); 14539d36ed5fSBarry Smith if (!a->A || !((Mat_SeqDense*)(a->A->data))->user_alloc) { 14540298fd71SBarry Smith ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr); 14559d36ed5fSBarry Smith } 14568c778c55SBarry Smith ierr = MatDenseGetArray(newmat,&array);CHKERRQ(ierr); 145749467e7dSBarry Smith ierr = MatGetLocalSize(newmat,&m,NULL);CHKERRQ(ierr); 145874c13d95SBarry Smith ierr = MatGetOwnershipRanges(newmat,&rowners);CHKERRQ(ierr); 1459396e826eSBarry Smith ierr = MPI_Reduce(&m,&mMax,1,MPIU_INT,MPI_MAX,0,comm);CHKERRQ(ierr); 146086d161a7SShri Abhyankar if (!rank) { 14619d36ed5fSBarry Smith ierr = PetscMalloc1(mMax*N,&vals);CHKERRQ(ierr); 146286d161a7SShri Abhyankar 146386d161a7SShri Abhyankar /* read in my part of the matrix numerical values */ 146486d161a7SShri Abhyankar ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr); 146586d161a7SShri Abhyankar 146686d161a7SShri Abhyankar /* insert into matrix-by row (this is why cannot directly read into array */ 146786d161a7SShri Abhyankar vals_ptr = vals; 146886d161a7SShri Abhyankar for (i=0; i<m; i++) { 146986d161a7SShri Abhyankar for (j=0; j<N; j++) { 147086d161a7SShri Abhyankar array[i + j*m] = *vals_ptr++; 147186d161a7SShri Abhyankar } 147286d161a7SShri Abhyankar } 147386d161a7SShri Abhyankar 147486d161a7SShri Abhyankar /* read in other processors and ship out */ 147586d161a7SShri Abhyankar for (i=1; i<size; i++) { 147686d161a7SShri Abhyankar nz = (rowners[i+1] - rowners[i])*N; 147786d161a7SShri Abhyankar ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1478a25532f0SBarry Smith ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)(newmat))->tag,comm);CHKERRQ(ierr); 147986d161a7SShri Abhyankar } 148086d161a7SShri Abhyankar } else { 148186d161a7SShri Abhyankar /* receive numeric values */ 1482785e854fSJed Brown ierr = PetscMalloc1(m*N,&vals);CHKERRQ(ierr); 148386d161a7SShri Abhyankar 148486d161a7SShri Abhyankar /* receive message of values*/ 1485a25532f0SBarry Smith ierr = MPIULong_Recv(vals,m*N,MPIU_SCALAR,0,((PetscObject)(newmat))->tag,comm);CHKERRQ(ierr); 148686d161a7SShri Abhyankar 148786d161a7SShri Abhyankar /* insert into matrix-by row (this is why cannot directly read into array */ 148886d161a7SShri Abhyankar vals_ptr = vals; 148986d161a7SShri Abhyankar for (i=0; i<m; i++) { 149086d161a7SShri Abhyankar for (j=0; j<N; j++) { 149186d161a7SShri Abhyankar array[i + j*m] = *vals_ptr++; 149286d161a7SShri Abhyankar } 149386d161a7SShri Abhyankar } 149486d161a7SShri Abhyankar } 14958c778c55SBarry Smith ierr = MatDenseRestoreArray(newmat,&array);CHKERRQ(ierr); 149686d161a7SShri Abhyankar ierr = PetscFree(vals);CHKERRQ(ierr); 149786d161a7SShri Abhyankar ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 149886d161a7SShri Abhyankar ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 149986d161a7SShri Abhyankar PetscFunctionReturn(0); 150086d161a7SShri Abhyankar } 150186d161a7SShri Abhyankar 1502112444f4SShri Abhyankar PetscErrorCode MatLoad_MPIDense(Mat newmat,PetscViewer viewer) 150386d161a7SShri Abhyankar { 1504dfdea288SBarry Smith Mat_MPIDense *a; 150586d161a7SShri Abhyankar PetscScalar *vals,*svals; 1506ce94432eSBarry Smith MPI_Comm comm; 150786d161a7SShri Abhyankar MPI_Status status; 150849467e7dSBarry Smith PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,*rowners,*sndcounts,m,n,maxnz; 150986d161a7SShri Abhyankar PetscInt header[4],*rowlengths = 0,M,N,*cols; 151049467e7dSBarry Smith PetscInt *ourlens,*procsnz = 0,jj,*mycols,*smycols; 15119d36ed5fSBarry Smith PetscInt i,nz,j,rstart,rend; 151286d161a7SShri Abhyankar int fd; 151386d161a7SShri Abhyankar PetscErrorCode ierr; 151486d161a7SShri Abhyankar 151586d161a7SShri Abhyankar PetscFunctionBegin; 1516c98fd787SBarry Smith /* force binary viewer to load .info file if it has not yet done so */ 1517c98fd787SBarry Smith ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 1518ce94432eSBarry Smith ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 151986d161a7SShri Abhyankar ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 152086d161a7SShri Abhyankar ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 152186d161a7SShri Abhyankar ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 15225872f025SBarry Smith if (!rank) { 152386d161a7SShri Abhyankar ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 152486d161a7SShri Abhyankar if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 152586d161a7SShri Abhyankar } 152686d161a7SShri Abhyankar ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 152786d161a7SShri Abhyankar M = header[1]; N = header[2]; nz = header[3]; 152886d161a7SShri Abhyankar 152986d161a7SShri Abhyankar /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 15309d36ed5fSBarry Smith if (newmat->rmap->N < 0) newmat->rmap->N = M; 15319d36ed5fSBarry Smith if (newmat->cmap->N < 0) newmat->cmap->N = N; 153286d161a7SShri Abhyankar 15339d36ed5fSBarry Smith if (newmat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",M,newmat->rmap->N); 15349d36ed5fSBarry Smith if (newmat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",N,newmat->cmap->N); 153586d161a7SShri Abhyankar 153686d161a7SShri Abhyankar /* 153786d161a7SShri Abhyankar Handle case where matrix is stored on disk as a dense matrix 153886d161a7SShri Abhyankar */ 153986d161a7SShri Abhyankar if (nz == MATRIX_BINARY_FORMAT_DENSE) { 15409d36ed5fSBarry Smith ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat);CHKERRQ(ierr); 154186d161a7SShri Abhyankar PetscFunctionReturn(0); 154286d161a7SShri Abhyankar } 154386d161a7SShri Abhyankar 154486d161a7SShri Abhyankar /* determine ownership of all rows */ 15452205254eSKarl Rupp if (newmat->rmap->n < 0) { 15462205254eSKarl Rupp ierr = PetscMPIIntCast(M/size + ((M % size) > rank),&m);CHKERRQ(ierr); 15472205254eSKarl Rupp } else { 15482205254eSKarl Rupp ierr = PetscMPIIntCast(newmat->rmap->n,&m);CHKERRQ(ierr); 15492205254eSKarl Rupp } 155049467e7dSBarry Smith if (newmat->cmap->n < 0) { 155149467e7dSBarry Smith n = PETSC_DECIDE; 155249467e7dSBarry Smith } else { 155349467e7dSBarry Smith ierr = PetscMPIIntCast(newmat->cmap->n,&n);CHKERRQ(ierr); 155449467e7dSBarry Smith } 155549467e7dSBarry Smith 1556854ce69bSBarry Smith ierr = PetscMalloc1(size+2,&rowners);CHKERRQ(ierr); 155786d161a7SShri Abhyankar ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 155886d161a7SShri Abhyankar rowners[0] = 0; 155986d161a7SShri Abhyankar for (i=2; i<=size; i++) { 156086d161a7SShri Abhyankar rowners[i] += rowners[i-1]; 156186d161a7SShri Abhyankar } 156286d161a7SShri Abhyankar rstart = rowners[rank]; 156386d161a7SShri Abhyankar rend = rowners[rank+1]; 156486d161a7SShri Abhyankar 156586d161a7SShri Abhyankar /* distribute row lengths to all processors */ 156649467e7dSBarry Smith ierr = PetscMalloc1(rend-rstart,&ourlens);CHKERRQ(ierr); 156786d161a7SShri Abhyankar if (!rank) { 1568785e854fSJed Brown ierr = PetscMalloc1(M,&rowlengths);CHKERRQ(ierr); 156986d161a7SShri Abhyankar ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1570785e854fSJed Brown ierr = PetscMalloc1(size,&sndcounts);CHKERRQ(ierr); 157186d161a7SShri Abhyankar for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i]; 157286d161a7SShri Abhyankar ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr); 157386d161a7SShri Abhyankar ierr = PetscFree(sndcounts);CHKERRQ(ierr); 157486d161a7SShri Abhyankar } else { 157586d161a7SShri Abhyankar ierr = MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr); 157686d161a7SShri Abhyankar } 157786d161a7SShri Abhyankar 157886d161a7SShri Abhyankar if (!rank) { 157986d161a7SShri Abhyankar /* calculate the number of nonzeros on each processor */ 1580785e854fSJed Brown ierr = PetscMalloc1(size,&procsnz);CHKERRQ(ierr); 158186d161a7SShri Abhyankar ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 158286d161a7SShri Abhyankar for (i=0; i<size; i++) { 158386d161a7SShri Abhyankar for (j=rowners[i]; j< rowners[i+1]; j++) { 158486d161a7SShri Abhyankar procsnz[i] += rowlengths[j]; 158586d161a7SShri Abhyankar } 158686d161a7SShri Abhyankar } 158786d161a7SShri Abhyankar ierr = PetscFree(rowlengths);CHKERRQ(ierr); 158886d161a7SShri Abhyankar 158986d161a7SShri Abhyankar /* determine max buffer needed and allocate it */ 159086d161a7SShri Abhyankar maxnz = 0; 159186d161a7SShri Abhyankar for (i=0; i<size; i++) { 159286d161a7SShri Abhyankar maxnz = PetscMax(maxnz,procsnz[i]); 159386d161a7SShri Abhyankar } 1594785e854fSJed Brown ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr); 159586d161a7SShri Abhyankar 159686d161a7SShri Abhyankar /* read in my part of the matrix column indices */ 159786d161a7SShri Abhyankar nz = procsnz[0]; 1598785e854fSJed Brown ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 159986d161a7SShri Abhyankar ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 160086d161a7SShri Abhyankar 160186d161a7SShri Abhyankar /* read in every one elses and ship off */ 160286d161a7SShri Abhyankar for (i=1; i<size; i++) { 160386d161a7SShri Abhyankar nz = procsnz[i]; 160486d161a7SShri Abhyankar ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 160586d161a7SShri Abhyankar ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 160686d161a7SShri Abhyankar } 160786d161a7SShri Abhyankar ierr = PetscFree(cols);CHKERRQ(ierr); 160886d161a7SShri Abhyankar } else { 160986d161a7SShri Abhyankar /* determine buffer space needed for message */ 161086d161a7SShri Abhyankar nz = 0; 161186d161a7SShri Abhyankar for (i=0; i<m; i++) { 161286d161a7SShri Abhyankar nz += ourlens[i]; 161386d161a7SShri Abhyankar } 1614854ce69bSBarry Smith ierr = PetscMalloc1(nz+1,&mycols);CHKERRQ(ierr); 161586d161a7SShri Abhyankar 161686d161a7SShri Abhyankar /* receive message of column indices*/ 161786d161a7SShri Abhyankar ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 161886d161a7SShri Abhyankar ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 161986d161a7SShri Abhyankar if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 162086d161a7SShri Abhyankar } 162186d161a7SShri Abhyankar 162249467e7dSBarry Smith ierr = MatSetSizes(newmat,m,n,M,N);CHKERRQ(ierr); 1623dfdea288SBarry Smith a = (Mat_MPIDense*)newmat->data; 16242e3566b0SBarry Smith if (!a->A || !((Mat_SeqDense*)(a->A->data))->user_alloc) { 16250298fd71SBarry Smith ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr); 1626dfdea288SBarry Smith } 162786d161a7SShri Abhyankar 162886d161a7SShri Abhyankar if (!rank) { 1629785e854fSJed Brown ierr = PetscMalloc1(maxnz,&vals);CHKERRQ(ierr); 163086d161a7SShri Abhyankar 163186d161a7SShri Abhyankar /* read in my part of the matrix numerical values */ 163286d161a7SShri Abhyankar nz = procsnz[0]; 163386d161a7SShri Abhyankar ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 163486d161a7SShri Abhyankar 163586d161a7SShri Abhyankar /* insert into matrix */ 163686d161a7SShri Abhyankar jj = rstart; 163786d161a7SShri Abhyankar smycols = mycols; 163886d161a7SShri Abhyankar svals = vals; 163986d161a7SShri Abhyankar for (i=0; i<m; i++) { 164086d161a7SShri Abhyankar ierr = MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 164186d161a7SShri Abhyankar smycols += ourlens[i]; 164286d161a7SShri Abhyankar svals += ourlens[i]; 164386d161a7SShri Abhyankar jj++; 164486d161a7SShri Abhyankar } 164586d161a7SShri Abhyankar 164686d161a7SShri Abhyankar /* read in other processors and ship out */ 164786d161a7SShri Abhyankar for (i=1; i<size; i++) { 164886d161a7SShri Abhyankar nz = procsnz[i]; 164986d161a7SShri Abhyankar ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 165086d161a7SShri Abhyankar ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 165186d161a7SShri Abhyankar } 165286d161a7SShri Abhyankar ierr = PetscFree(procsnz);CHKERRQ(ierr); 165386d161a7SShri Abhyankar } else { 165486d161a7SShri Abhyankar /* receive numeric values */ 1655854ce69bSBarry Smith ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr); 165686d161a7SShri Abhyankar 165786d161a7SShri Abhyankar /* receive message of values*/ 165886d161a7SShri Abhyankar ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr); 165986d161a7SShri Abhyankar ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 166086d161a7SShri Abhyankar if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 166186d161a7SShri Abhyankar 166286d161a7SShri Abhyankar /* insert into matrix */ 166386d161a7SShri Abhyankar jj = rstart; 166486d161a7SShri Abhyankar smycols = mycols; 166586d161a7SShri Abhyankar svals = vals; 166686d161a7SShri Abhyankar for (i=0; i<m; i++) { 166786d161a7SShri Abhyankar ierr = MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 166886d161a7SShri Abhyankar smycols += ourlens[i]; 166986d161a7SShri Abhyankar svals += ourlens[i]; 167086d161a7SShri Abhyankar jj++; 167186d161a7SShri Abhyankar } 167286d161a7SShri Abhyankar } 167349467e7dSBarry Smith ierr = PetscFree(ourlens);CHKERRQ(ierr); 167486d161a7SShri Abhyankar ierr = PetscFree(vals);CHKERRQ(ierr); 167586d161a7SShri Abhyankar ierr = PetscFree(mycols);CHKERRQ(ierr); 167686d161a7SShri Abhyankar ierr = PetscFree(rowners);CHKERRQ(ierr); 167786d161a7SShri Abhyankar 167886d161a7SShri Abhyankar ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 167986d161a7SShri Abhyankar ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 168086d161a7SShri Abhyankar PetscFunctionReturn(0); 168186d161a7SShri Abhyankar } 168286d161a7SShri Abhyankar 1683ace3abfcSBarry Smith PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscBool *flag) 16846e4ee0c6SHong Zhang { 16856e4ee0c6SHong Zhang Mat_MPIDense *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data; 16866e4ee0c6SHong Zhang Mat a,b; 1687ace3abfcSBarry Smith PetscBool flg; 16886e4ee0c6SHong Zhang PetscErrorCode ierr; 168990ace30eSBarry Smith 16906e4ee0c6SHong Zhang PetscFunctionBegin; 16916e4ee0c6SHong Zhang a = matA->A; 16926e4ee0c6SHong Zhang b = matB->A; 16936e4ee0c6SHong Zhang ierr = MatEqual(a,b,&flg);CHKERRQ(ierr); 1694b2566f29SBarry Smith ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 16956e4ee0c6SHong Zhang PetscFunctionReturn(0); 16966e4ee0c6SHong Zhang } 169790ace30eSBarry Smith 1698baa3c1c6SHong Zhang PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(Mat A) 1699baa3c1c6SHong Zhang { 1700baa3c1c6SHong Zhang PetscErrorCode ierr; 1701baa3c1c6SHong Zhang Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1702baa3c1c6SHong Zhang Mat_TransMatMultDense *atb = a->atbdense; 1703baa3c1c6SHong Zhang 1704baa3c1c6SHong Zhang PetscFunctionBegin; 1705c5ef1628SHong Zhang ierr = PetscFree3(atb->sendbuf,atb->atbarray,atb->recvcounts);CHKERRQ(ierr); 1706baa3c1c6SHong Zhang ierr = (atb->destroy)(A);CHKERRQ(ierr); 1707baa3c1c6SHong Zhang ierr = PetscFree(atb);CHKERRQ(ierr); 1708baa3c1c6SHong Zhang PetscFunctionReturn(0); 1709baa3c1c6SHong Zhang } 1710baa3c1c6SHong Zhang 1711cb20be35SHong Zhang PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C) 1712cb20be35SHong Zhang { 1713baa3c1c6SHong Zhang Mat_MPIDense *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data; 1714660d5466SHong Zhang Mat_SeqDense *aseq=(Mat_SeqDense*)(a->A)->data, *bseq=(Mat_SeqDense*)(b->A)->data; 1715baa3c1c6SHong Zhang Mat_TransMatMultDense *atb = c->atbdense; 1716cb20be35SHong Zhang PetscErrorCode ierr; 1717cb20be35SHong Zhang MPI_Comm comm; 1718d5017740SHong Zhang PetscMPIInt rank,size,*recvcounts=atb->recvcounts; 1719c5ef1628SHong Zhang PetscScalar *carray,*atbarray=atb->atbarray,*sendbuf=atb->sendbuf; 1720d5017740SHong Zhang PetscInt i,cN=C->cmap->N,cM=C->rmap->N,proc,k,j; 1721660d5466SHong Zhang PetscScalar _DOne=1.0,_DZero=0.0; 1722adc7a786SHong Zhang PetscBLASInt am,an,bn,aN; 1723e68c0b26SHong Zhang const PetscInt *ranges; 1724cb20be35SHong Zhang 1725cb20be35SHong Zhang PetscFunctionBegin; 1726cb20be35SHong Zhang ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1727cb20be35SHong Zhang ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1728cb20be35SHong Zhang ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1729e68c0b26SHong Zhang 1730c5ef1628SHong Zhang /* compute atbarray = aseq^T * bseq */ 1731660d5466SHong Zhang ierr = PetscBLASIntCast(a->A->cmap->n,&an);CHKERRQ(ierr); 1732660d5466SHong Zhang ierr = PetscBLASIntCast(b->A->cmap->n,&bn);CHKERRQ(ierr); 1733660d5466SHong Zhang ierr = PetscBLASIntCast(a->A->rmap->n,&am);CHKERRQ(ierr); 1734adc7a786SHong Zhang ierr = PetscBLASIntCast(A->cmap->N,&aN);CHKERRQ(ierr); 1735adc7a786SHong Zhang PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&an,&bn,&am,&_DOne,aseq->v,&aseq->lda,bseq->v,&bseq->lda,&_DZero,atbarray,&aN)); 1736cb20be35SHong Zhang 1737cb20be35SHong Zhang ierr = MatGetOwnershipRanges(C,&ranges);CHKERRQ(ierr); 1738c5ef1628SHong Zhang for (i=0; i<size; i++) recvcounts[i] = (ranges[i+1] - ranges[i])*cN; 1739cb20be35SHong Zhang 1740660d5466SHong Zhang /* arrange atbarray into sendbuf */ 1741cb20be35SHong Zhang k = 0; 1742cb20be35SHong Zhang for (proc=0; proc<size; proc++) { 1743baa3c1c6SHong Zhang for (j=0; j<cN; j++) { 1744c5ef1628SHong Zhang for (i=ranges[proc]; i<ranges[proc+1]; i++) sendbuf[k++] = atbarray[i+j*cM]; 1745cb20be35SHong Zhang } 1746cb20be35SHong Zhang } 1747c5ef1628SHong Zhang /* sum all atbarray to local values of C */ 1748660d5466SHong Zhang ierr = MatDenseGetArray(c->A,&carray);CHKERRQ(ierr); 17493462b7efSHong Zhang ierr = MPI_Reduce_scatter(sendbuf,carray,recvcounts,MPIU_SCALAR,MPIU_SUM,comm);CHKERRQ(ierr); 1750660d5466SHong Zhang ierr = MatDenseRestoreArray(c->A,&carray);CHKERRQ(ierr); 1751cb20be35SHong Zhang PetscFunctionReturn(0); 1752cb20be35SHong Zhang } 1753cb20be35SHong Zhang 1754cb20be35SHong Zhang PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C) 1755cb20be35SHong Zhang { 1756cb20be35SHong Zhang PetscErrorCode ierr; 1757baa3c1c6SHong Zhang Mat Cdense; 1758cb20be35SHong Zhang MPI_Comm comm; 1759baa3c1c6SHong Zhang PetscMPIInt size; 1760660d5466SHong Zhang PetscInt cm=A->cmap->n,cM,cN=B->cmap->N; 1761baa3c1c6SHong Zhang Mat_MPIDense *c; 1762baa3c1c6SHong Zhang Mat_TransMatMultDense *atb; 1763cb20be35SHong Zhang 1764cb20be35SHong Zhang PetscFunctionBegin; 1765baa3c1c6SHong Zhang ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1766cb20be35SHong Zhang if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) { 1767cb20be35SHong Zhang SETERRQ4(comm,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend); 1768cb20be35SHong Zhang } 1769cb20be35SHong Zhang 1770baa3c1c6SHong Zhang /* create matrix product Cdense */ 1771baa3c1c6SHong Zhang ierr = MatCreate(comm,&Cdense);CHKERRQ(ierr); 1772660d5466SHong Zhang ierr = MatSetSizes(Cdense,cm,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 1773baa3c1c6SHong Zhang ierr = MatSetType(Cdense,MATMPIDENSE);CHKERRQ(ierr); 1774baa3c1c6SHong Zhang ierr = MatMPIDenseSetPreallocation(Cdense,NULL);CHKERRQ(ierr); 1775baa3c1c6SHong Zhang ierr = MatAssemblyBegin(Cdense,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1776baa3c1c6SHong Zhang ierr = MatAssemblyEnd(Cdense,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1777baa3c1c6SHong Zhang *C = Cdense; 1778baa3c1c6SHong Zhang 1779baa3c1c6SHong Zhang /* create data structure for reuse Cdense */ 1780baa3c1c6SHong Zhang ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1781baa3c1c6SHong Zhang ierr = PetscNew(&atb);CHKERRQ(ierr); 1782660d5466SHong Zhang cM = Cdense->rmap->N; 1783c5ef1628SHong Zhang ierr = PetscMalloc3(cM*cN,&atb->sendbuf,cM*cN,&atb->atbarray,size,&atb->recvcounts);CHKERRQ(ierr); 1784baa3c1c6SHong Zhang 1785baa3c1c6SHong Zhang c = (Mat_MPIDense*)Cdense->data; 1786baa3c1c6SHong Zhang c->atbdense = atb; 1787baa3c1c6SHong Zhang atb->destroy = Cdense->ops->destroy; 1788baa3c1c6SHong Zhang Cdense->ops->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense; 1789cb20be35SHong Zhang PetscFunctionReturn(0); 1790cb20be35SHong Zhang } 1791cb20be35SHong Zhang 1792cb20be35SHong Zhang PetscErrorCode MatTransposeMatMult_MPIDense_MPIDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 1793cb20be35SHong Zhang { 1794cb20be35SHong Zhang PetscErrorCode ierr; 1795cb20be35SHong Zhang 1796cb20be35SHong Zhang PetscFunctionBegin; 1797cb20be35SHong Zhang if (scall == MAT_INITIAL_MATRIX) { 1798cb20be35SHong Zhang ierr = MatTransposeMatMultSymbolic_MPIDense_MPIDense(A,B,fill,C);CHKERRQ(ierr); 1799cb20be35SHong Zhang } 1800cb20be35SHong Zhang ierr = MatTransposeMatMultNumeric_MPIDense_MPIDense(A,B,*C);CHKERRQ(ierr); 1801cb20be35SHong Zhang PetscFunctionReturn(0); 1802cb20be35SHong Zhang } 1803320f2790SHong Zhang 1804320f2790SHong Zhang PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(Mat A) 1805320f2790SHong Zhang { 1806320f2790SHong Zhang PetscErrorCode ierr; 1807320f2790SHong Zhang Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1808320f2790SHong Zhang Mat_MatMultDense *ab = a->abdense; 1809320f2790SHong Zhang 1810320f2790SHong Zhang PetscFunctionBegin; 1811320f2790SHong Zhang ierr = MatDestroy(&ab->Ce);CHKERRQ(ierr); 1812320f2790SHong Zhang ierr = MatDestroy(&ab->Ae);CHKERRQ(ierr); 1813320f2790SHong Zhang ierr = MatDestroy(&ab->Be);CHKERRQ(ierr); 1814320f2790SHong Zhang 1815320f2790SHong Zhang ierr = (ab->destroy)(A);CHKERRQ(ierr); 1816320f2790SHong Zhang ierr = PetscFree(ab);CHKERRQ(ierr); 1817320f2790SHong Zhang PetscFunctionReturn(0); 1818320f2790SHong Zhang } 1819320f2790SHong Zhang 18205242a7b1SHong Zhang #if defined(PETSC_HAVE_ELEMENTAL) 1821320f2790SHong Zhang PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C) 1822320f2790SHong Zhang { 1823320f2790SHong Zhang PetscErrorCode ierr; 1824320f2790SHong Zhang Mat_MPIDense *c=(Mat_MPIDense*)C->data; 1825320f2790SHong Zhang Mat_MatMultDense *ab=c->abdense; 1826320f2790SHong Zhang 1827320f2790SHong Zhang PetscFunctionBegin; 1828de0a22f0SHong Zhang ierr = MatConvert_MPIDense_Elemental(A,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Ae);CHKERRQ(ierr); 1829de0a22f0SHong Zhang ierr = MatConvert_MPIDense_Elemental(B,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Be);CHKERRQ(ierr); 1830320f2790SHong Zhang ierr = MatMatMultNumeric(ab->Ae,ab->Be,ab->Ce);CHKERRQ(ierr); 1831de0a22f0SHong Zhang ierr = MatConvert(ab->Ce,MATMPIDENSE,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 1832320f2790SHong Zhang PetscFunctionReturn(0); 1833320f2790SHong Zhang } 1834320f2790SHong Zhang 1835320f2790SHong Zhang PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C) 1836320f2790SHong Zhang { 1837320f2790SHong Zhang PetscErrorCode ierr; 1838320f2790SHong Zhang Mat Ae,Be,Ce; 1839320f2790SHong Zhang Mat_MPIDense *c; 1840320f2790SHong Zhang Mat_MatMultDense *ab; 1841320f2790SHong Zhang 1842320f2790SHong Zhang PetscFunctionBegin; 1843320f2790SHong Zhang if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 1844378336b6SHong Zhang SETERRQ4(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend); 1845320f2790SHong Zhang } 1846320f2790SHong Zhang 1847320f2790SHong Zhang /* convert A and B to Elemental matrices Ae and Be */ 1848320f2790SHong Zhang ierr = MatConvert(A,MATELEMENTAL,MAT_INITIAL_MATRIX, &Ae);CHKERRQ(ierr); 1849320f2790SHong Zhang ierr = MatConvert(B,MATELEMENTAL,MAT_INITIAL_MATRIX, &Be);CHKERRQ(ierr); 1850320f2790SHong Zhang 1851320f2790SHong Zhang /* Ce = Ae*Be */ 1852320f2790SHong Zhang ierr = MatMatMultSymbolic(Ae,Be,fill,&Ce);CHKERRQ(ierr); 1853320f2790SHong Zhang ierr = MatMatMultNumeric(Ae,Be,Ce);CHKERRQ(ierr); 1854320f2790SHong Zhang 1855320f2790SHong Zhang /* convert Ce to C */ 1856320f2790SHong Zhang ierr = MatConvert(Ce,MATMPIDENSE,MAT_INITIAL_MATRIX,C);CHKERRQ(ierr); 1857320f2790SHong Zhang 1858320f2790SHong Zhang /* create data structure for reuse Cdense */ 1859320f2790SHong Zhang ierr = PetscNew(&ab);CHKERRQ(ierr); 1860320f2790SHong Zhang c = (Mat_MPIDense*)(*C)->data; 1861320f2790SHong Zhang c->abdense = ab; 1862320f2790SHong Zhang 1863320f2790SHong Zhang ab->Ae = Ae; 1864320f2790SHong Zhang ab->Be = Be; 1865320f2790SHong Zhang ab->Ce = Ce; 1866320f2790SHong Zhang ab->destroy = (*C)->ops->destroy; 1867320f2790SHong Zhang (*C)->ops->destroy = MatDestroy_MatMatMult_MPIDense_MPIDense; 18689775878aSHong Zhang (*C)->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense; 1869320f2790SHong Zhang PetscFunctionReturn(0); 1870320f2790SHong Zhang } 1871320f2790SHong Zhang 1872150d2497SBarry Smith PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 1873320f2790SHong Zhang { 1874320f2790SHong Zhang PetscErrorCode ierr; 1875320f2790SHong Zhang 1876320f2790SHong Zhang PetscFunctionBegin; 1877320f2790SHong Zhang if (scall == MAT_INITIAL_MATRIX) { /* simbolic product includes numeric product */ 187857299f2fSHong Zhang ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 1879320f2790SHong Zhang ierr = MatMatMultSymbolic_MPIDense_MPIDense(A,B,fill,C);CHKERRQ(ierr); 188057299f2fSHong Zhang ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 1881320f2790SHong Zhang } else { 188257299f2fSHong Zhang ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 1883320f2790SHong Zhang ierr = MatMatMultNumeric_MPIDense_MPIDense(A,B,*C);CHKERRQ(ierr); 188457299f2fSHong Zhang ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 1885320f2790SHong Zhang } 1886320f2790SHong Zhang PetscFunctionReturn(0); 1887320f2790SHong Zhang } 18885242a7b1SHong Zhang #endif 1889