xref: /petsc/src/mat/impls/aij/mpi/mpicusparse/mpiaijcusparse.cu (revision 9ae82921df069a58776bfe4da82b38e8ff7dd41c)
1*9ae82921SPaul Mullowney #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
2*9ae82921SPaul Mullowney 
3*9ae82921SPaul Mullowney EXTERN_C_BEGIN
4*9ae82921SPaul Mullowney #undef __FUNCT__
5*9ae82921SPaul Mullowney #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJCUSPARSE"
6*9ae82921SPaul Mullowney PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJCUSPARSE(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
7*9ae82921SPaul Mullowney {
8*9ae82921SPaul Mullowney   Mat_MPIAIJ     *b;
9*9ae82921SPaul Mullowney   PetscErrorCode ierr;
10*9ae82921SPaul Mullowney   PetscInt       i;
11*9ae82921SPaul Mullowney 
12*9ae82921SPaul Mullowney   PetscFunctionBegin;
13*9ae82921SPaul Mullowney   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
14*9ae82921SPaul Mullowney   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
15*9ae82921SPaul Mullowney   if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
16*9ae82921SPaul Mullowney   if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
17*9ae82921SPaul Mullowney 
18*9ae82921SPaul Mullowney   ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr);
19*9ae82921SPaul Mullowney   ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr);
20*9ae82921SPaul Mullowney   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
21*9ae82921SPaul Mullowney   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
22*9ae82921SPaul Mullowney   if (d_nnz) {
23*9ae82921SPaul Mullowney     for (i=0; i<B->rmap->n; i++) {
24*9ae82921SPaul Mullowney       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
25*9ae82921SPaul Mullowney     }
26*9ae82921SPaul Mullowney   }
27*9ae82921SPaul Mullowney   if (o_nnz) {
28*9ae82921SPaul Mullowney     for (i=0; i<B->rmap->n; i++) {
29*9ae82921SPaul Mullowney       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
30*9ae82921SPaul Mullowney     }
31*9ae82921SPaul Mullowney   }
32*9ae82921SPaul Mullowney   b = (Mat_MPIAIJ*)B->data;
33*9ae82921SPaul Mullowney 
34*9ae82921SPaul Mullowney   if (!B->preallocated) {
35*9ae82921SPaul Mullowney     /* Explicitly create 2 MATSEQAIJ matrices. */
36*9ae82921SPaul Mullowney     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
37*9ae82921SPaul Mullowney     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
38*9ae82921SPaul Mullowney     ierr = MatSetType(b->A,MATSEQAIJCUSPARSE);CHKERRQ(ierr);
39*9ae82921SPaul Mullowney     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
40*9ae82921SPaul Mullowney     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
41*9ae82921SPaul Mullowney     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
42*9ae82921SPaul Mullowney     ierr = MatSetType(b->B,MATSEQAIJCUSPARSE);CHKERRQ(ierr);
43*9ae82921SPaul Mullowney     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
44*9ae82921SPaul Mullowney   }
45*9ae82921SPaul Mullowney 
46*9ae82921SPaul Mullowney   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
47*9ae82921SPaul Mullowney   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
48*9ae82921SPaul Mullowney   B->preallocated = PETSC_TRUE;
49*9ae82921SPaul Mullowney   PetscFunctionReturn(0);
50*9ae82921SPaul Mullowney }
51*9ae82921SPaul Mullowney EXTERN_C_END
52*9ae82921SPaul Mullowney #undef __FUNCT__
53*9ae82921SPaul Mullowney #define __FUNCT__ "MatGetVecs_MPIAIJCUSPARSE"
54*9ae82921SPaul Mullowney PetscErrorCode  MatGetVecs_MPIAIJCUSPARSE(Mat mat,Vec *right,Vec *left)
55*9ae82921SPaul Mullowney {
56*9ae82921SPaul Mullowney   PetscErrorCode ierr;
57*9ae82921SPaul Mullowney 
58*9ae82921SPaul Mullowney   PetscFunctionBegin;
59*9ae82921SPaul Mullowney   if (right) {
60*9ae82921SPaul Mullowney     ierr = VecCreate(((PetscObject)mat)->comm,right);CHKERRQ(ierr);
61*9ae82921SPaul Mullowney     ierr = VecSetSizes(*right,mat->cmap->n,PETSC_DETERMINE);CHKERRQ(ierr);
62*9ae82921SPaul Mullowney     ierr = VecSetBlockSize(*right,mat->rmap->bs);CHKERRQ(ierr);
63*9ae82921SPaul Mullowney     ierr = VecSetType(*right,VECCUSP);CHKERRQ(ierr);
64*9ae82921SPaul Mullowney     ierr = PetscLayoutReference(mat->cmap,&(*right)->map);CHKERRQ(ierr);
65*9ae82921SPaul Mullowney   }
66*9ae82921SPaul Mullowney   if (left) {
67*9ae82921SPaul Mullowney     ierr = VecCreate(((PetscObject)mat)->comm,left);CHKERRQ(ierr);
68*9ae82921SPaul Mullowney     ierr = VecSetSizes(*left,mat->rmap->n,PETSC_DETERMINE);CHKERRQ(ierr);
69*9ae82921SPaul Mullowney     ierr = VecSetBlockSize(*left,mat->rmap->bs);CHKERRQ(ierr);
70*9ae82921SPaul Mullowney     ierr = VecSetType(*left,VECCUSP);CHKERRQ(ierr);
71*9ae82921SPaul Mullowney     ierr = PetscLayoutReference(mat->rmap,&(*left)->map);CHKERRQ(ierr);
72*9ae82921SPaul Mullowney   }
73*9ae82921SPaul Mullowney   PetscFunctionReturn(0);
74*9ae82921SPaul Mullowney }
75*9ae82921SPaul Mullowney 
76*9ae82921SPaul Mullowney 
77*9ae82921SPaul Mullowney #ifdef PETSC_HAVE_TXPETSCGPU
78*9ae82921SPaul Mullowney #undef __FUNCT__
79*9ae82921SPaul Mullowney #define __FUNCT__ "MatMult_MPIAIJCUSPARSE"
80*9ae82921SPaul Mullowney PetscErrorCode MatMult_MPIAIJCUSPARSE(Mat A,Vec xx,Vec yy)
81*9ae82921SPaul Mullowney {
82*9ae82921SPaul Mullowney   // This multiplication sequence is different sequence
83*9ae82921SPaul Mullowney   // than the CPU version. In particular, the diagonal block
84*9ae82921SPaul Mullowney   // multiplication kernel is launched in one stream. Then,
85*9ae82921SPaul Mullowney   // in a separate stream, the data transfers from DeviceToHost
86*9ae82921SPaul Mullowney   // (with MPI messaging in between), then HostToDevice are
87*9ae82921SPaul Mullowney   // launched. Once the data transfer stream is synchronized,
88*9ae82921SPaul Mullowney   // to ensure messaging is complete, the MatMultAdd kernel
89*9ae82921SPaul Mullowney   // is launched in the original (MatMult) stream to protect
90*9ae82921SPaul Mullowney   // against race conditions.
91*9ae82921SPaul Mullowney   //
92*9ae82921SPaul Mullowney   // This sequence should only be called for GPU computation.
93*9ae82921SPaul Mullowney   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
94*9ae82921SPaul Mullowney   PetscErrorCode ierr;
95*9ae82921SPaul Mullowney   PetscInt       nt;
96*9ae82921SPaul Mullowney 
97*9ae82921SPaul Mullowney   PetscFunctionBegin;
98*9ae82921SPaul Mullowney   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
99*9ae82921SPaul Mullowney   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
100*9ae82921SPaul Mullowney   ierr = VecScatterInitializeForGPU(a->Mvctx,xx,SCATTER_FORWARD);CHKERRQ(ierr);
101*9ae82921SPaul Mullowney   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
102*9ae82921SPaul Mullowney   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
103*9ae82921SPaul Mullowney   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
104*9ae82921SPaul Mullowney   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
105*9ae82921SPaul Mullowney   ierr = VecScatterFinalizeForGPU(a->Mvctx);CHKERRQ(ierr);
106*9ae82921SPaul Mullowney   PetscFunctionReturn(0);
107*9ae82921SPaul Mullowney }
108*9ae82921SPaul Mullowney #endif
109*9ae82921SPaul Mullowney 
110*9ae82921SPaul Mullowney EXTERN_C_BEGIN
111*9ae82921SPaul Mullowney PetscErrorCode  MatCreate_MPIAIJ(Mat);
112*9ae82921SPaul Mullowney EXTERN_C_END
113*9ae82921SPaul Mullowney //PetscErrorCode MatSetValuesBatch_MPIAIJCUSPARSE(Mat J, PetscInt Ne, PetscInt Nl, PetscInt *elemRows, const PetscScalar *elemMats);
114*9ae82921SPaul Mullowney 
115*9ae82921SPaul Mullowney EXTERN_C_BEGIN
116*9ae82921SPaul Mullowney #undef __FUNCT__
117*9ae82921SPaul Mullowney #define __FUNCT__ "MatCreate_MPIAIJCUSPARSE"
118*9ae82921SPaul Mullowney PetscErrorCode  MatCreate_MPIAIJCUSPARSE(Mat B)
119*9ae82921SPaul Mullowney {
120*9ae82921SPaul Mullowney   PetscErrorCode ierr;
121*9ae82921SPaul Mullowney 
122*9ae82921SPaul Mullowney   PetscFunctionBegin;
123*9ae82921SPaul Mullowney   ierr = MatCreate_MPIAIJ(B);CHKERRQ(ierr);
124*9ae82921SPaul Mullowney   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
125*9ae82921SPaul Mullowney                                      "MatMPIAIJSetPreallocation_MPIAIJCUSPARSE",
126*9ae82921SPaul Mullowney                                       MatMPIAIJSetPreallocation_MPIAIJCUSPARSE);CHKERRQ(ierr);
127*9ae82921SPaul Mullowney   B->ops->getvecs        = MatGetVecs_MPIAIJCUSPARSE;
128*9ae82921SPaul Mullowney   //B->ops->setvaluesbatch = MatSetValuesBatch_MPIAIJCUSPARSE;
129*9ae82921SPaul Mullowney #ifdef PETSC_HAVE_TXPETSCGPU
130*9ae82921SPaul Mullowney   B->ops->mult        = MatMult_MPIAIJCUSPARSE;
131*9ae82921SPaul Mullowney #endif
132*9ae82921SPaul Mullowney   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJCUSPARSE);CHKERRQ(ierr);
133*9ae82921SPaul Mullowney   PetscFunctionReturn(0);
134*9ae82921SPaul Mullowney }
135*9ae82921SPaul Mullowney EXTERN_C_END
136*9ae82921SPaul Mullowney 
137*9ae82921SPaul Mullowney 
138*9ae82921SPaul Mullowney #undef __FUNCT__
139*9ae82921SPaul Mullowney #define __FUNCT__ "MatCreateAIJCUSPARSE"
140*9ae82921SPaul Mullowney PetscErrorCode  MatCreateAIJCUSPARSE(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
141*9ae82921SPaul Mullowney {
142*9ae82921SPaul Mullowney   PetscErrorCode ierr;
143*9ae82921SPaul Mullowney   PetscMPIInt    size;
144*9ae82921SPaul Mullowney 
145*9ae82921SPaul Mullowney   PetscFunctionBegin;
146*9ae82921SPaul Mullowney   ierr = MatCreate(comm,A);CHKERRQ(ierr);
147*9ae82921SPaul Mullowney   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
148*9ae82921SPaul Mullowney   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
149*9ae82921SPaul Mullowney   if (size > 1) {
150*9ae82921SPaul Mullowney     ierr = MatSetType(*A,MATMPIAIJCUSPARSE);CHKERRQ(ierr);
151*9ae82921SPaul Mullowney     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
152*9ae82921SPaul Mullowney   } else {
153*9ae82921SPaul Mullowney     ierr = MatSetType(*A,MATSEQAIJCUSPARSE);CHKERRQ(ierr);
154*9ae82921SPaul Mullowney     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
155*9ae82921SPaul Mullowney   }
156*9ae82921SPaul Mullowney   PetscFunctionReturn(0);
157*9ae82921SPaul Mullowney }
158*9ae82921SPaul Mullowney 
159*9ae82921SPaul Mullowney /*MC
160*9ae82921SPaul Mullowney    MATAIJCUSPARSE - MATAIJCUSPARSE = "aijcusparse" - A matrix type to be used for sparse matrices.
161*9ae82921SPaul Mullowney 
162*9ae82921SPaul Mullowney    This matrix type is identical to MATSEQAIJCUSPARSE when constructed with a single process communicator,
163*9ae82921SPaul Mullowney    and MATMPIAIJCUSPARSE otherwise.  As a result, for single process communicators,
164*9ae82921SPaul Mullowney   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
165*9ae82921SPaul Mullowney   for communicators controlling multiple processes.  It is recommended that you call both of
166*9ae82921SPaul Mullowney   the above preallocation routines for simplicity.
167*9ae82921SPaul Mullowney 
168*9ae82921SPaul Mullowney    Options Database Keys:
169*9ae82921SPaul Mullowney . -mat_type mpiaijcusparse - sets the matrix type to "mpiaijcusparse" during a call to MatSetFromOptions()
170*9ae82921SPaul Mullowney 
171*9ae82921SPaul Mullowney   Level: beginner
172*9ae82921SPaul Mullowney 
173*9ae82921SPaul Mullowney .seealso: MatCreateMPIAIJ,MATSEQAIJ,MATMPIAIJ, MATMPIAIJCUSPARSE, MATSEQAIJCUSPARSE
174*9ae82921SPaul Mullowney M*/
175*9ae82921SPaul Mullowney 
176