1*9ae82921SPaul Mullowney #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 2*9ae82921SPaul Mullowney 3*9ae82921SPaul Mullowney EXTERN_C_BEGIN 4*9ae82921SPaul Mullowney #undef __FUNCT__ 5*9ae82921SPaul Mullowney #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJCUSPARSE" 6*9ae82921SPaul Mullowney PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJCUSPARSE(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 7*9ae82921SPaul Mullowney { 8*9ae82921SPaul Mullowney Mat_MPIAIJ *b; 9*9ae82921SPaul Mullowney PetscErrorCode ierr; 10*9ae82921SPaul Mullowney PetscInt i; 11*9ae82921SPaul Mullowney 12*9ae82921SPaul Mullowney PetscFunctionBegin; 13*9ae82921SPaul Mullowney if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 14*9ae82921SPaul Mullowney if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 15*9ae82921SPaul Mullowney if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 16*9ae82921SPaul Mullowney if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 17*9ae82921SPaul Mullowney 18*9ae82921SPaul Mullowney ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr); 19*9ae82921SPaul Mullowney ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr); 20*9ae82921SPaul Mullowney ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 21*9ae82921SPaul Mullowney ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 22*9ae82921SPaul Mullowney if (d_nnz) { 23*9ae82921SPaul Mullowney for (i=0; i<B->rmap->n; i++) { 24*9ae82921SPaul Mullowney if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]); 25*9ae82921SPaul Mullowney } 26*9ae82921SPaul Mullowney } 27*9ae82921SPaul Mullowney if (o_nnz) { 28*9ae82921SPaul Mullowney for (i=0; i<B->rmap->n; i++) { 29*9ae82921SPaul Mullowney if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]); 30*9ae82921SPaul Mullowney } 31*9ae82921SPaul Mullowney } 32*9ae82921SPaul Mullowney b = (Mat_MPIAIJ*)B->data; 33*9ae82921SPaul Mullowney 34*9ae82921SPaul Mullowney if (!B->preallocated) { 35*9ae82921SPaul Mullowney /* Explicitly create 2 MATSEQAIJ matrices. */ 36*9ae82921SPaul Mullowney ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 37*9ae82921SPaul Mullowney ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 38*9ae82921SPaul Mullowney ierr = MatSetType(b->A,MATSEQAIJCUSPARSE);CHKERRQ(ierr); 39*9ae82921SPaul Mullowney ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 40*9ae82921SPaul Mullowney ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 41*9ae82921SPaul Mullowney ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 42*9ae82921SPaul Mullowney ierr = MatSetType(b->B,MATSEQAIJCUSPARSE);CHKERRQ(ierr); 43*9ae82921SPaul Mullowney ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 44*9ae82921SPaul Mullowney } 45*9ae82921SPaul Mullowney 46*9ae82921SPaul Mullowney ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 47*9ae82921SPaul Mullowney ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 48*9ae82921SPaul Mullowney B->preallocated = PETSC_TRUE; 49*9ae82921SPaul Mullowney PetscFunctionReturn(0); 50*9ae82921SPaul Mullowney } 51*9ae82921SPaul Mullowney EXTERN_C_END 52*9ae82921SPaul Mullowney #undef __FUNCT__ 53*9ae82921SPaul Mullowney #define __FUNCT__ "MatGetVecs_MPIAIJCUSPARSE" 54*9ae82921SPaul Mullowney PetscErrorCode MatGetVecs_MPIAIJCUSPARSE(Mat mat,Vec *right,Vec *left) 55*9ae82921SPaul Mullowney { 56*9ae82921SPaul Mullowney PetscErrorCode ierr; 57*9ae82921SPaul Mullowney 58*9ae82921SPaul Mullowney PetscFunctionBegin; 59*9ae82921SPaul Mullowney if (right) { 60*9ae82921SPaul Mullowney ierr = VecCreate(((PetscObject)mat)->comm,right);CHKERRQ(ierr); 61*9ae82921SPaul Mullowney ierr = VecSetSizes(*right,mat->cmap->n,PETSC_DETERMINE);CHKERRQ(ierr); 62*9ae82921SPaul Mullowney ierr = VecSetBlockSize(*right,mat->rmap->bs);CHKERRQ(ierr); 63*9ae82921SPaul Mullowney ierr = VecSetType(*right,VECCUSP);CHKERRQ(ierr); 64*9ae82921SPaul Mullowney ierr = PetscLayoutReference(mat->cmap,&(*right)->map);CHKERRQ(ierr); 65*9ae82921SPaul Mullowney } 66*9ae82921SPaul Mullowney if (left) { 67*9ae82921SPaul Mullowney ierr = VecCreate(((PetscObject)mat)->comm,left);CHKERRQ(ierr); 68*9ae82921SPaul Mullowney ierr = VecSetSizes(*left,mat->rmap->n,PETSC_DETERMINE);CHKERRQ(ierr); 69*9ae82921SPaul Mullowney ierr = VecSetBlockSize(*left,mat->rmap->bs);CHKERRQ(ierr); 70*9ae82921SPaul Mullowney ierr = VecSetType(*left,VECCUSP);CHKERRQ(ierr); 71*9ae82921SPaul Mullowney ierr = PetscLayoutReference(mat->rmap,&(*left)->map);CHKERRQ(ierr); 72*9ae82921SPaul Mullowney } 73*9ae82921SPaul Mullowney PetscFunctionReturn(0); 74*9ae82921SPaul Mullowney } 75*9ae82921SPaul Mullowney 76*9ae82921SPaul Mullowney 77*9ae82921SPaul Mullowney #ifdef PETSC_HAVE_TXPETSCGPU 78*9ae82921SPaul Mullowney #undef __FUNCT__ 79*9ae82921SPaul Mullowney #define __FUNCT__ "MatMult_MPIAIJCUSPARSE" 80*9ae82921SPaul Mullowney PetscErrorCode MatMult_MPIAIJCUSPARSE(Mat A,Vec xx,Vec yy) 81*9ae82921SPaul Mullowney { 82*9ae82921SPaul Mullowney // This multiplication sequence is different sequence 83*9ae82921SPaul Mullowney // than the CPU version. In particular, the diagonal block 84*9ae82921SPaul Mullowney // multiplication kernel is launched in one stream. Then, 85*9ae82921SPaul Mullowney // in a separate stream, the data transfers from DeviceToHost 86*9ae82921SPaul Mullowney // (with MPI messaging in between), then HostToDevice are 87*9ae82921SPaul Mullowney // launched. Once the data transfer stream is synchronized, 88*9ae82921SPaul Mullowney // to ensure messaging is complete, the MatMultAdd kernel 89*9ae82921SPaul Mullowney // is launched in the original (MatMult) stream to protect 90*9ae82921SPaul Mullowney // against race conditions. 91*9ae82921SPaul Mullowney // 92*9ae82921SPaul Mullowney // This sequence should only be called for GPU computation. 93*9ae82921SPaul Mullowney Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 94*9ae82921SPaul Mullowney PetscErrorCode ierr; 95*9ae82921SPaul Mullowney PetscInt nt; 96*9ae82921SPaul Mullowney 97*9ae82921SPaul Mullowney PetscFunctionBegin; 98*9ae82921SPaul Mullowney ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 99*9ae82921SPaul Mullowney if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 100*9ae82921SPaul Mullowney ierr = VecScatterInitializeForGPU(a->Mvctx,xx,SCATTER_FORWARD);CHKERRQ(ierr); 101*9ae82921SPaul Mullowney ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 102*9ae82921SPaul Mullowney ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 103*9ae82921SPaul Mullowney ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 104*9ae82921SPaul Mullowney ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 105*9ae82921SPaul Mullowney ierr = VecScatterFinalizeForGPU(a->Mvctx);CHKERRQ(ierr); 106*9ae82921SPaul Mullowney PetscFunctionReturn(0); 107*9ae82921SPaul Mullowney } 108*9ae82921SPaul Mullowney #endif 109*9ae82921SPaul Mullowney 110*9ae82921SPaul Mullowney EXTERN_C_BEGIN 111*9ae82921SPaul Mullowney PetscErrorCode MatCreate_MPIAIJ(Mat); 112*9ae82921SPaul Mullowney EXTERN_C_END 113*9ae82921SPaul Mullowney //PetscErrorCode MatSetValuesBatch_MPIAIJCUSPARSE(Mat J, PetscInt Ne, PetscInt Nl, PetscInt *elemRows, const PetscScalar *elemMats); 114*9ae82921SPaul Mullowney 115*9ae82921SPaul Mullowney EXTERN_C_BEGIN 116*9ae82921SPaul Mullowney #undef __FUNCT__ 117*9ae82921SPaul Mullowney #define __FUNCT__ "MatCreate_MPIAIJCUSPARSE" 118*9ae82921SPaul Mullowney PetscErrorCode MatCreate_MPIAIJCUSPARSE(Mat B) 119*9ae82921SPaul Mullowney { 120*9ae82921SPaul Mullowney PetscErrorCode ierr; 121*9ae82921SPaul Mullowney 122*9ae82921SPaul Mullowney PetscFunctionBegin; 123*9ae82921SPaul Mullowney ierr = MatCreate_MPIAIJ(B);CHKERRQ(ierr); 124*9ae82921SPaul Mullowney ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 125*9ae82921SPaul Mullowney "MatMPIAIJSetPreallocation_MPIAIJCUSPARSE", 126*9ae82921SPaul Mullowney MatMPIAIJSetPreallocation_MPIAIJCUSPARSE);CHKERRQ(ierr); 127*9ae82921SPaul Mullowney B->ops->getvecs = MatGetVecs_MPIAIJCUSPARSE; 128*9ae82921SPaul Mullowney //B->ops->setvaluesbatch = MatSetValuesBatch_MPIAIJCUSPARSE; 129*9ae82921SPaul Mullowney #ifdef PETSC_HAVE_TXPETSCGPU 130*9ae82921SPaul Mullowney B->ops->mult = MatMult_MPIAIJCUSPARSE; 131*9ae82921SPaul Mullowney #endif 132*9ae82921SPaul Mullowney ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJCUSPARSE);CHKERRQ(ierr); 133*9ae82921SPaul Mullowney PetscFunctionReturn(0); 134*9ae82921SPaul Mullowney } 135*9ae82921SPaul Mullowney EXTERN_C_END 136*9ae82921SPaul Mullowney 137*9ae82921SPaul Mullowney 138*9ae82921SPaul Mullowney #undef __FUNCT__ 139*9ae82921SPaul Mullowney #define __FUNCT__ "MatCreateAIJCUSPARSE" 140*9ae82921SPaul Mullowney PetscErrorCode MatCreateAIJCUSPARSE(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 141*9ae82921SPaul Mullowney { 142*9ae82921SPaul Mullowney PetscErrorCode ierr; 143*9ae82921SPaul Mullowney PetscMPIInt size; 144*9ae82921SPaul Mullowney 145*9ae82921SPaul Mullowney PetscFunctionBegin; 146*9ae82921SPaul Mullowney ierr = MatCreate(comm,A);CHKERRQ(ierr); 147*9ae82921SPaul Mullowney ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 148*9ae82921SPaul Mullowney ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 149*9ae82921SPaul Mullowney if (size > 1) { 150*9ae82921SPaul Mullowney ierr = MatSetType(*A,MATMPIAIJCUSPARSE);CHKERRQ(ierr); 151*9ae82921SPaul Mullowney ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 152*9ae82921SPaul Mullowney } else { 153*9ae82921SPaul Mullowney ierr = MatSetType(*A,MATSEQAIJCUSPARSE);CHKERRQ(ierr); 154*9ae82921SPaul Mullowney ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 155*9ae82921SPaul Mullowney } 156*9ae82921SPaul Mullowney PetscFunctionReturn(0); 157*9ae82921SPaul Mullowney } 158*9ae82921SPaul Mullowney 159*9ae82921SPaul Mullowney /*MC 160*9ae82921SPaul Mullowney MATAIJCUSPARSE - MATAIJCUSPARSE = "aijcusparse" - A matrix type to be used for sparse matrices. 161*9ae82921SPaul Mullowney 162*9ae82921SPaul Mullowney This matrix type is identical to MATSEQAIJCUSPARSE when constructed with a single process communicator, 163*9ae82921SPaul Mullowney and MATMPIAIJCUSPARSE otherwise. As a result, for single process communicators, 164*9ae82921SPaul Mullowney MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 165*9ae82921SPaul Mullowney for communicators controlling multiple processes. It is recommended that you call both of 166*9ae82921SPaul Mullowney the above preallocation routines for simplicity. 167*9ae82921SPaul Mullowney 168*9ae82921SPaul Mullowney Options Database Keys: 169*9ae82921SPaul Mullowney . -mat_type mpiaijcusparse - sets the matrix type to "mpiaijcusparse" during a call to MatSetFromOptions() 170*9ae82921SPaul Mullowney 171*9ae82921SPaul Mullowney Level: beginner 172*9ae82921SPaul Mullowney 173*9ae82921SPaul Mullowney .seealso: MatCreateMPIAIJ,MATSEQAIJ,MATMPIAIJ, MATMPIAIJCUSPARSE, MATSEQAIJCUSPARSE 174*9ae82921SPaul Mullowney M*/ 175*9ae82921SPaul Mullowney 176