xref: /petsc/src/mat/impls/aij/mpi/mpicusparse/mpiaijcusparse.cu (revision 2327d61d15021df0a914fbb57b0456ca30aa3dd5)
1*2327d61dSSatish Balay #include "petscconf.h"
2*2327d61dSSatish Balay PETSC_CUDA_EXTERN_C_BEGIN
39ae82921SPaul Mullowney #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
4*2327d61dSSatish Balay PETSC_CUDA_EXTERN_C_END
5bbf3fe20SPaul Mullowney #include "mpicusparsematimpl.h"
69ae82921SPaul Mullowney 
79ae82921SPaul Mullowney EXTERN_C_BEGIN
89ae82921SPaul Mullowney #undef __FUNCT__
99ae82921SPaul Mullowney #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJCUSPARSE"
109ae82921SPaul Mullowney PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJCUSPARSE(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
119ae82921SPaul Mullowney {
12bbf3fe20SPaul Mullowney   Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data;
13bbf3fe20SPaul Mullowney   Mat_MPIAIJCUSPARSE * cusparseStruct = (Mat_MPIAIJCUSPARSE*)b->spptr;
149ae82921SPaul Mullowney   PetscErrorCode ierr;
159ae82921SPaul Mullowney   PetscInt       i;
169ae82921SPaul Mullowney 
179ae82921SPaul Mullowney   PetscFunctionBegin;
189ae82921SPaul Mullowney   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
199ae82921SPaul Mullowney   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
209ae82921SPaul Mullowney   if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
219ae82921SPaul Mullowney   if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
229ae82921SPaul Mullowney 
239ae82921SPaul Mullowney   ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr);
249ae82921SPaul Mullowney   ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr);
259ae82921SPaul Mullowney   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
269ae82921SPaul Mullowney   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
279ae82921SPaul Mullowney   if (d_nnz) {
289ae82921SPaul Mullowney     for (i=0; i<B->rmap->n; i++) {
299ae82921SPaul Mullowney       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
309ae82921SPaul Mullowney     }
319ae82921SPaul Mullowney   }
329ae82921SPaul Mullowney   if (o_nnz) {
339ae82921SPaul Mullowney     for (i=0; i<B->rmap->n; i++) {
349ae82921SPaul Mullowney       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
359ae82921SPaul Mullowney     }
369ae82921SPaul Mullowney   }
379ae82921SPaul Mullowney   if (!B->preallocated) {
38bbf3fe20SPaul Mullowney     /* Explicitly create 2 MATSEQAIJCUSPARSE matrices. */
399ae82921SPaul Mullowney     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
409ae82921SPaul Mullowney     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
419ae82921SPaul Mullowney     ierr = MatSetType(b->A,MATSEQAIJCUSPARSE);CHKERRQ(ierr);
429ae82921SPaul Mullowney     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
439ae82921SPaul Mullowney     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
449ae82921SPaul Mullowney     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
459ae82921SPaul Mullowney     ierr = MatSetType(b->B,MATSEQAIJCUSPARSE);CHKERRQ(ierr);
469ae82921SPaul Mullowney     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
479ae82921SPaul Mullowney   }
489ae82921SPaul Mullowney   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
499ae82921SPaul Mullowney   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
50bbf3fe20SPaul Mullowney   ierr=MatSetOption_SeqAIJCUSPARSE(b->A,cusparseStruct->diagGPUMatFormat,PETSC_TRUE);CHKERRQ(ierr);
51bbf3fe20SPaul Mullowney   ierr=MatSetOption_SeqAIJCUSPARSE(b->B,cusparseStruct->offdiagGPUMatFormat,PETSC_TRUE);CHKERRQ(ierr);
529ae82921SPaul Mullowney   B->preallocated = PETSC_TRUE;
539ae82921SPaul Mullowney   PetscFunctionReturn(0);
549ae82921SPaul Mullowney }
559ae82921SPaul Mullowney EXTERN_C_END
569ae82921SPaul Mullowney #undef __FUNCT__
579ae82921SPaul Mullowney #define __FUNCT__ "MatGetVecs_MPIAIJCUSPARSE"
589ae82921SPaul Mullowney PetscErrorCode  MatGetVecs_MPIAIJCUSPARSE(Mat mat,Vec *right,Vec *left)
599ae82921SPaul Mullowney {
609ae82921SPaul Mullowney   PetscErrorCode ierr;
619ae82921SPaul Mullowney 
629ae82921SPaul Mullowney   PetscFunctionBegin;
639ae82921SPaul Mullowney   if (right) {
649ae82921SPaul Mullowney     ierr = VecCreate(((PetscObject)mat)->comm,right);CHKERRQ(ierr);
659ae82921SPaul Mullowney     ierr = VecSetSizes(*right,mat->cmap->n,PETSC_DETERMINE);CHKERRQ(ierr);
669ae82921SPaul Mullowney     ierr = VecSetBlockSize(*right,mat->rmap->bs);CHKERRQ(ierr);
679ae82921SPaul Mullowney     ierr = VecSetType(*right,VECCUSP);CHKERRQ(ierr);
689ae82921SPaul Mullowney     ierr = PetscLayoutReference(mat->cmap,&(*right)->map);CHKERRQ(ierr);
699ae82921SPaul Mullowney   }
709ae82921SPaul Mullowney   if (left) {
719ae82921SPaul Mullowney     ierr = VecCreate(((PetscObject)mat)->comm,left);CHKERRQ(ierr);
729ae82921SPaul Mullowney     ierr = VecSetSizes(*left,mat->rmap->n,PETSC_DETERMINE);CHKERRQ(ierr);
739ae82921SPaul Mullowney     ierr = VecSetBlockSize(*left,mat->rmap->bs);CHKERRQ(ierr);
749ae82921SPaul Mullowney     ierr = VecSetType(*left,VECCUSP);CHKERRQ(ierr);
759ae82921SPaul Mullowney     ierr = PetscLayoutReference(mat->rmap,&(*left)->map);CHKERRQ(ierr);
769ae82921SPaul Mullowney   }
779ae82921SPaul Mullowney   PetscFunctionReturn(0);
789ae82921SPaul Mullowney }
799ae82921SPaul Mullowney 
809ae82921SPaul Mullowney 
819ae82921SPaul Mullowney #undef __FUNCT__
829ae82921SPaul Mullowney #define __FUNCT__ "MatMult_MPIAIJCUSPARSE"
839ae82921SPaul Mullowney PetscErrorCode MatMult_MPIAIJCUSPARSE(Mat A,Vec xx,Vec yy)
849ae82921SPaul Mullowney {
859ae82921SPaul Mullowney   // This multiplication sequence is different sequence
869ae82921SPaul Mullowney   // than the CPU version. In particular, the diagonal block
879ae82921SPaul Mullowney   // multiplication kernel is launched in one stream. Then,
889ae82921SPaul Mullowney   // in a separate stream, the data transfers from DeviceToHost
899ae82921SPaul Mullowney   // (with MPI messaging in between), then HostToDevice are
909ae82921SPaul Mullowney   // launched. Once the data transfer stream is synchronized,
919ae82921SPaul Mullowney   // to ensure messaging is complete, the MatMultAdd kernel
929ae82921SPaul Mullowney   // is launched in the original (MatMult) stream to protect
939ae82921SPaul Mullowney   // against race conditions.
949ae82921SPaul Mullowney   //
959ae82921SPaul Mullowney   // This sequence should only be called for GPU computation.
969ae82921SPaul Mullowney   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
979ae82921SPaul Mullowney   PetscErrorCode ierr;
989ae82921SPaul Mullowney   PetscInt       nt;
999ae82921SPaul Mullowney 
1009ae82921SPaul Mullowney   PetscFunctionBegin;
1019ae82921SPaul Mullowney   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1029ae82921SPaul Mullowney   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
1039ae82921SPaul Mullowney   ierr = VecScatterInitializeForGPU(a->Mvctx,xx,SCATTER_FORWARD);CHKERRQ(ierr);
1049ae82921SPaul Mullowney   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1059ae82921SPaul Mullowney   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1069ae82921SPaul Mullowney   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1079ae82921SPaul Mullowney   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1089ae82921SPaul Mullowney   ierr = VecScatterFinalizeForGPU(a->Mvctx);CHKERRQ(ierr);
1099ae82921SPaul Mullowney   PetscFunctionReturn(0);
1109ae82921SPaul Mullowney }
111ca45077fSPaul Mullowney 
112ca45077fSPaul Mullowney #undef __FUNCT__
113ca45077fSPaul Mullowney #define __FUNCT__ "MatMult_MPIAIJCUSPARSE"
114ca45077fSPaul Mullowney PetscErrorCode MatMultTranspose_MPIAIJCUSPARSE(Mat A,Vec xx,Vec yy)
115ca45077fSPaul Mullowney {
116ca45077fSPaul Mullowney   // This multiplication sequence is different sequence
117ca45077fSPaul Mullowney   // than the CPU version. In particular, the diagonal block
118ca45077fSPaul Mullowney   // multiplication kernel is launched in one stream. Then,
119ca45077fSPaul Mullowney   // in a separate stream, the data transfers from DeviceToHost
120ca45077fSPaul Mullowney   // (with MPI messaging in between), then HostToDevice are
121ca45077fSPaul Mullowney   // launched. Once the data transfer stream is synchronized,
122ca45077fSPaul Mullowney   // to ensure messaging is complete, the MatMultAdd kernel
123ca45077fSPaul Mullowney   // is launched in the original (MatMult) stream to protect
124ca45077fSPaul Mullowney   // against race conditions.
125ca45077fSPaul Mullowney   //
126ca45077fSPaul Mullowney   // This sequence should only be called for GPU computation.
127ca45077fSPaul Mullowney   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
128ca45077fSPaul Mullowney   PetscErrorCode ierr;
129ca45077fSPaul Mullowney   PetscInt       nt;
130ca45077fSPaul Mullowney 
131ca45077fSPaul Mullowney   PetscFunctionBegin;
132ca45077fSPaul Mullowney   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
133ca45077fSPaul Mullowney   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
134ca45077fSPaul Mullowney   ierr = VecScatterInitializeForGPU(a->Mvctx,xx,SCATTER_FORWARD);CHKERRQ(ierr);
135ca45077fSPaul Mullowney   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
136ca45077fSPaul Mullowney   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
137ca45077fSPaul Mullowney   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
138ca45077fSPaul Mullowney   ierr = (*a->B->ops->multtransposeadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
139ca45077fSPaul Mullowney   ierr = VecScatterFinalizeForGPU(a->Mvctx);CHKERRQ(ierr);
140ca45077fSPaul Mullowney   PetscFunctionReturn(0);
141ca45077fSPaul Mullowney }
1429ae82921SPaul Mullowney 
1439ae82921SPaul Mullowney //PetscErrorCode MatSetValuesBatch_MPIAIJCUSPARSE(Mat J, PetscInt Ne, PetscInt Nl, PetscInt *elemRows, const PetscScalar *elemMats);
1449ae82921SPaul Mullowney 
145ca45077fSPaul Mullowney 
146ca45077fSPaul Mullowney #undef __FUNCT__
147ca45077fSPaul Mullowney #define __FUNCT__ "MatSetOption_MPIAIJCUSPARSE"
148ca45077fSPaul Mullowney PetscErrorCode MatSetOption_MPIAIJCUSPARSE(Mat A,MatOption op,PetscBool flg)
149ca45077fSPaul Mullowney {
150ca45077fSPaul Mullowney   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
151bbf3fe20SPaul Mullowney   Mat_MPIAIJCUSPARSE * cusparseStruct  = (Mat_MPIAIJCUSPARSE*)a->spptr;
152ca45077fSPaul Mullowney   PetscErrorCode ierr;
153ca45077fSPaul Mullowney 
154ca45077fSPaul Mullowney   PetscFunctionBegin;
155ca45077fSPaul Mullowney   ierr = MatSetOption_MPIAIJ(A,op,flg);CHKERRQ(ierr);
156ca45077fSPaul Mullowney   switch (op) {
157bbf3fe20SPaul Mullowney   case MAT_DIAGBLOCK_CSR:
158bbf3fe20SPaul Mullowney     cusparseStruct->diagGPUMatFormat = MAT_DIAGBLOCK_CSR;
159ca45077fSPaul Mullowney     break;
160bbf3fe20SPaul Mullowney   case MAT_OFFDIAGBLOCK_CSR:
161bbf3fe20SPaul Mullowney     cusparseStruct->offdiagGPUMatFormat = MAT_OFFDIAGBLOCK_CSR;
162ca45077fSPaul Mullowney     break;
163bbf3fe20SPaul Mullowney   case MAT_DIAGBLOCK_DIA:
164bbf3fe20SPaul Mullowney   case MAT_OFFDIAGBLOCK_DIA:
165ca45077fSPaul Mullowney   case MAT_DIA:
166ca45077fSPaul Mullowney     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Unsupported GPU matrix storage format DIA for (MPI,SEQ)AIJCUSPARSE matrix type.");
167bbf3fe20SPaul Mullowney   case MAT_DIAGBLOCK_ELL:
168bbf3fe20SPaul Mullowney     cusparseStruct->diagGPUMatFormat = MAT_DIAGBLOCK_ELL;
169ca45077fSPaul Mullowney     break;
170bbf3fe20SPaul Mullowney   case MAT_OFFDIAGBLOCK_ELL:
171bbf3fe20SPaul Mullowney     cusparseStruct->offdiagGPUMatFormat = MAT_OFFDIAGBLOCK_ELL;
172ca45077fSPaul Mullowney     break;
173bbf3fe20SPaul Mullowney   case MAT_DIAGBLOCK_HYB:
174bbf3fe20SPaul Mullowney     cusparseStruct->diagGPUMatFormat = MAT_DIAGBLOCK_HYB;
175ca45077fSPaul Mullowney     break;
176bbf3fe20SPaul Mullowney   case MAT_OFFDIAGBLOCK_HYB:
177bbf3fe20SPaul Mullowney     cusparseStruct->offdiagGPUMatFormat = MAT_OFFDIAGBLOCK_HYB;
178ca45077fSPaul Mullowney     break;
179ca45077fSPaul Mullowney   default:
180ca45077fSPaul Mullowney     break;
181ca45077fSPaul Mullowney   }
182ca45077fSPaul Mullowney   PetscFunctionReturn(0);
183ca45077fSPaul Mullowney }
184ca45077fSPaul Mullowney 
185ca45077fSPaul Mullowney 
186ca45077fSPaul Mullowney EXTERN_C_BEGIN
187ca45077fSPaul Mullowney #undef __FUNCT__
188ca45077fSPaul Mullowney #define __FUNCT__ "MatSetFromOptions_MPIAIJCUSPARSE"
189ca45077fSPaul Mullowney PetscErrorCode MatSetFromOptions_MPIAIJCUSPARSE(Mat A)
190ca45077fSPaul Mullowney {
191ca45077fSPaul Mullowney   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
192bbf3fe20SPaul Mullowney   Mat_MPIAIJCUSPARSE * cusparseStruct  = (Mat_MPIAIJCUSPARSE*)a->spptr;
193ca45077fSPaul Mullowney   PetscErrorCode     ierr;
194ca45077fSPaul Mullowney   PetscInt       idxDiag=0,idxOffDiag=0;
195ca45077fSPaul Mullowney   char * formats[]={CSR,ELL,HYB};
196ca45077fSPaul Mullowney   MatOption diagFormat, offdiagFormat;
197ca45077fSPaul Mullowney   PetscBool      flg;
198ca45077fSPaul Mullowney   PetscFunctionBegin;
199ca45077fSPaul Mullowney   ierr = PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"When using TxPETSCGPU, MPIAIJCUSPARSE Options","Mat");CHKERRQ(ierr);
200ca45077fSPaul Mullowney   if (A->factortype==MAT_FACTOR_NONE) {
201ca45077fSPaul Mullowney     ierr = PetscOptionsEList("-mat_mult_cusparse_diag_storage_format",
202ca45077fSPaul Mullowney 			     "Set the storage format of (mpi)aijcusparse gpu matrices for SpMV",
203ca45077fSPaul Mullowney 			     "None",formats,3,formats[0],&idxDiag,&flg);CHKERRQ(ierr);
204ca45077fSPaul Mullowney     ierr = PetscOptionsEList("-mat_mult_cusparse_offdiag_storage_format",
205ca45077fSPaul Mullowney 			     "Set the storage format of (mpi)aijcusparse gpu matrices for SpMV",
206ca45077fSPaul Mullowney 			     "None",formats,3,formats[0],&idxOffDiag,&flg);CHKERRQ(ierr);
207ca45077fSPaul Mullowney 
208ca45077fSPaul Mullowney     if (formats[idxDiag] == CSR)
209ca45077fSPaul Mullowney       diagFormat=MAT_CSR;
210ca45077fSPaul Mullowney     else if (formats[idxDiag] == ELL)
211ca45077fSPaul Mullowney       diagFormat=MAT_ELL;
212ca45077fSPaul Mullowney     else if (formats[idxDiag] == HYB)
213ca45077fSPaul Mullowney       diagFormat=MAT_HYB;
214ca45077fSPaul Mullowney 
215ca45077fSPaul Mullowney     if (formats[idxOffDiag] == CSR)
216ca45077fSPaul Mullowney       offdiagFormat=MAT_CSR;
217ca45077fSPaul Mullowney     else if (formats[idxOffDiag] == ELL)
218ca45077fSPaul Mullowney       offdiagFormat=MAT_ELL;
219ca45077fSPaul Mullowney     else if (formats[idxOffDiag] == HYB)
220ca45077fSPaul Mullowney       offdiagFormat=MAT_HYB;
221ca45077fSPaul Mullowney 
222bbf3fe20SPaul Mullowney     cusparseStruct->diagGPUMatFormat = diagFormat;
223bbf3fe20SPaul Mullowney     cusparseStruct->offdiagGPUMatFormat = offdiagFormat;
224ca45077fSPaul Mullowney   }
225ca45077fSPaul Mullowney   ierr = PetscOptionsEnd();CHKERRQ(ierr);
226ca45077fSPaul Mullowney   PetscFunctionReturn(0);
227ca45077fSPaul Mullowney }
228ca45077fSPaul Mullowney EXTERN_C_END
229ca45077fSPaul Mullowney 
230bbf3fe20SPaul Mullowney EXTERN_C_BEGIN
231bbf3fe20SPaul Mullowney #undef __FUNCT__
232bbf3fe20SPaul Mullowney #define __FUNCT__ "MatDestroy_MPIAIJCUSPARSE"
233bbf3fe20SPaul Mullowney PetscErrorCode MatDestroy_MPIAIJCUSPARSE(Mat A)
234bbf3fe20SPaul Mullowney {
235bbf3fe20SPaul Mullowney   PetscErrorCode ierr;
236bbf3fe20SPaul Mullowney   Mat_MPIAIJ *a  = (Mat_MPIAIJ*)A->data;
237bbf3fe20SPaul Mullowney   Mat_MPIAIJCUSPARSE * cusparseStruct  = (Mat_MPIAIJCUSPARSE*)a->spptr;
238bbf3fe20SPaul Mullowney 
239bbf3fe20SPaul Mullowney   PetscFunctionBegin;
240bbf3fe20SPaul Mullowney   try {
241bbf3fe20SPaul Mullowney     delete cusparseStruct;
242bbf3fe20SPaul Mullowney   } catch(char* ex) {
243bbf3fe20SPaul Mullowney     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Mat_MPIAIJCUSPARSE error: %s", ex);
244bbf3fe20SPaul Mullowney   }
245bbf3fe20SPaul Mullowney   cusparseStruct = 0;
246bbf3fe20SPaul Mullowney   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
247bbf3fe20SPaul Mullowney   PetscFunctionReturn(0);
248bbf3fe20SPaul Mullowney }
249bbf3fe20SPaul Mullowney EXTERN_C_END
250ca45077fSPaul Mullowney 
2519ae82921SPaul Mullowney EXTERN_C_BEGIN
2529ae82921SPaul Mullowney #undef __FUNCT__
2539ae82921SPaul Mullowney #define __FUNCT__ "MatCreate_MPIAIJCUSPARSE"
254bbf3fe20SPaul Mullowney PetscErrorCode  MatCreate_MPIAIJCUSPARSE(Mat A)
2559ae82921SPaul Mullowney {
2569ae82921SPaul Mullowney   PetscErrorCode ierr;
257bbf3fe20SPaul Mullowney   Mat_MPIAIJ *a;
258bbf3fe20SPaul Mullowney   Mat_MPIAIJCUSPARSE * cusparseStruct;
2599ae82921SPaul Mullowney 
2609ae82921SPaul Mullowney   PetscFunctionBegin;
261bbf3fe20SPaul Mullowney   ierr = MatCreate_MPIAIJ(A);CHKERRQ(ierr);
262bbf3fe20SPaul Mullowney   ierr = PetscObjectComposeFunctionDynamic((PetscObject)A,"MatMPIAIJSetPreallocation_C",
2639ae82921SPaul Mullowney 					   "MatMPIAIJSetPreallocation_MPIAIJCUSPARSE",
2649ae82921SPaul Mullowney 					   MatMPIAIJSetPreallocation_MPIAIJCUSPARSE);CHKERRQ(ierr);
265bbf3fe20SPaul Mullowney   a  = (Mat_MPIAIJ*)A->data;
266bbf3fe20SPaul Mullowney   a->spptr                      = new Mat_MPIAIJCUSPARSE;
267bbf3fe20SPaul Mullowney   cusparseStruct  = (Mat_MPIAIJCUSPARSE*)a->spptr;
268bbf3fe20SPaul Mullowney   cusparseStruct->diagGPUMatFormat    = MAT_DIAGBLOCK_CSR;
269bbf3fe20SPaul Mullowney   cusparseStruct->offdiagGPUMatFormat = MAT_OFFDIAGBLOCK_CSR;
270bbf3fe20SPaul Mullowney   A->ops->getvecs        = MatGetVecs_MPIAIJCUSPARSE;
271bbf3fe20SPaul Mullowney   A->ops->mult           = MatMult_MPIAIJCUSPARSE;
272bbf3fe20SPaul Mullowney   A->ops->multtranspose  = MatMultTranspose_MPIAIJCUSPARSE;
273bbf3fe20SPaul Mullowney   A->ops->setfromoptions = MatSetFromOptions_MPIAIJCUSPARSE;
274bbf3fe20SPaul Mullowney   A->ops->setoption      = MatSetOption_MPIAIJCUSPARSE;
275bbf3fe20SPaul Mullowney   A->ops->destroy        = MatDestroy_MPIAIJCUSPARSE;
276bbf3fe20SPaul Mullowney   ierr = PetscObjectChangeTypeName((PetscObject)A,MATMPIAIJCUSPARSE);CHKERRQ(ierr);
2779ae82921SPaul Mullowney   PetscFunctionReturn(0);
2789ae82921SPaul Mullowney }
2799ae82921SPaul Mullowney EXTERN_C_END
2809ae82921SPaul Mullowney 
2819ae82921SPaul Mullowney 
2829ae82921SPaul Mullowney #undef __FUNCT__
2839ae82921SPaul Mullowney #define __FUNCT__ "MatCreateAIJCUSPARSE"
2849ae82921SPaul Mullowney PetscErrorCode  MatCreateAIJCUSPARSE(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
2859ae82921SPaul Mullowney {
2869ae82921SPaul Mullowney   PetscErrorCode ierr;
2879ae82921SPaul Mullowney   PetscMPIInt    size;
2889ae82921SPaul Mullowney 
2899ae82921SPaul Mullowney   PetscFunctionBegin;
2909ae82921SPaul Mullowney   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2919ae82921SPaul Mullowney   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
2929ae82921SPaul Mullowney   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2939ae82921SPaul Mullowney   if (size > 1) {
2949ae82921SPaul Mullowney     ierr = MatSetType(*A,MATMPIAIJCUSPARSE);CHKERRQ(ierr);
2959ae82921SPaul Mullowney     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2969ae82921SPaul Mullowney   } else {
2979ae82921SPaul Mullowney     ierr = MatSetType(*A,MATSEQAIJCUSPARSE);CHKERRQ(ierr);
2989ae82921SPaul Mullowney     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
2999ae82921SPaul Mullowney   }
3009ae82921SPaul Mullowney   PetscFunctionReturn(0);
3019ae82921SPaul Mullowney }
3029ae82921SPaul Mullowney 
3039ae82921SPaul Mullowney /*MC
3048dc1d2a3SPaul Mullowney    MATMPIAIJCUSPARSE - MATMPIAIJCUSPARSE = "aijcusparse" - A matrix type to be used for sparse matrices.
3059ae82921SPaul Mullowney 
3069ae82921SPaul Mullowney    This matrix type is identical to MATSEQAIJCUSPARSE when constructed with a single process communicator,
3079ae82921SPaul Mullowney    and MATMPIAIJCUSPARSE otherwise.  As a result, for single process communicators,
3089ae82921SPaul Mullowney    MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
3099ae82921SPaul Mullowney    for communicators controlling multiple processes.  It is recommended that you call both of
3109ae82921SPaul Mullowney    the above preallocation routines for simplicity.
3119ae82921SPaul Mullowney 
3129ae82921SPaul Mullowney    Options Database Keys:
3139ae82921SPaul Mullowney . -mat_type mpiaijcusparse - sets the matrix type to "mpiaijcusparse" during a call to MatSetFromOptions()
3149ae82921SPaul Mullowney 
3159ae82921SPaul Mullowney   Level: beginner
3169ae82921SPaul Mullowney 
3179ae82921SPaul Mullowney .seealso: MatCreateMPIAIJ,MATSEQAIJ,MATMPIAIJ, MATMPIAIJCUSPARSE, MATSEQAIJCUSPARSE
3189ae82921SPaul Mullowney M*/
3199ae82921SPaul Mullowney 
320