xref: /petsc/src/mat/impls/aij/mpi/mpicusparse/mpiaijcusparse.cu (revision 045c96e11cb246ed6b25ad49b4bf3c1c90a169b1)
12327d61dSSatish Balay #include "petscconf.h"
22327d61dSSatish Balay PETSC_CUDA_EXTERN_C_BEGIN
39ae82921SPaul Mullowney #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
42327d61dSSatish Balay PETSC_CUDA_EXTERN_C_END
5bbf3fe20SPaul Mullowney #include "mpicusparsematimpl.h"
69ae82921SPaul Mullowney 
79ae82921SPaul Mullowney EXTERN_C_BEGIN
89ae82921SPaul Mullowney #undef __FUNCT__
99ae82921SPaul Mullowney #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJCUSPARSE"
109ae82921SPaul Mullowney PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJCUSPARSE(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
119ae82921SPaul Mullowney {
12bbf3fe20SPaul Mullowney   Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data;
13bbf3fe20SPaul Mullowney   Mat_MPIAIJCUSPARSE * cusparseStruct = (Mat_MPIAIJCUSPARSE*)b->spptr;
149ae82921SPaul Mullowney   PetscErrorCode ierr;
159ae82921SPaul Mullowney   PetscInt       i;
169ae82921SPaul Mullowney 
179ae82921SPaul Mullowney   PetscFunctionBegin;
189ae82921SPaul Mullowney   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
199ae82921SPaul Mullowney   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
209ae82921SPaul Mullowney   if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
219ae82921SPaul Mullowney   if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
229ae82921SPaul Mullowney 
239ae82921SPaul Mullowney   ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr);
249ae82921SPaul Mullowney   ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr);
259ae82921SPaul Mullowney   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
269ae82921SPaul Mullowney   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
279ae82921SPaul Mullowney   if (d_nnz) {
289ae82921SPaul Mullowney     for (i=0; i<B->rmap->n; i++) {
299ae82921SPaul Mullowney       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
309ae82921SPaul Mullowney     }
319ae82921SPaul Mullowney   }
329ae82921SPaul Mullowney   if (o_nnz) {
339ae82921SPaul Mullowney     for (i=0; i<B->rmap->n; i++) {
349ae82921SPaul Mullowney       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
359ae82921SPaul Mullowney     }
369ae82921SPaul Mullowney   }
379ae82921SPaul Mullowney   if (!B->preallocated) {
38bbf3fe20SPaul Mullowney     /* Explicitly create 2 MATSEQAIJCUSPARSE matrices. */
399ae82921SPaul Mullowney     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
409ae82921SPaul Mullowney     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
419ae82921SPaul Mullowney     ierr = MatSetType(b->A,MATSEQAIJCUSPARSE);CHKERRQ(ierr);
429ae82921SPaul Mullowney     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
439ae82921SPaul Mullowney     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
449ae82921SPaul Mullowney     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
459ae82921SPaul Mullowney     ierr = MatSetType(b->B,MATSEQAIJCUSPARSE);CHKERRQ(ierr);
469ae82921SPaul Mullowney     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
479ae82921SPaul Mullowney   }
489ae82921SPaul Mullowney   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
499ae82921SPaul Mullowney   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
50bbf3fe20SPaul Mullowney   ierr=MatSetOption_SeqAIJCUSPARSE(b->A,cusparseStruct->diagGPUMatFormat,PETSC_TRUE);CHKERRQ(ierr);
51bbf3fe20SPaul Mullowney   ierr=MatSetOption_SeqAIJCUSPARSE(b->B,cusparseStruct->offdiagGPUMatFormat,PETSC_TRUE);CHKERRQ(ierr);
529ae82921SPaul Mullowney   B->preallocated = PETSC_TRUE;
539ae82921SPaul Mullowney   PetscFunctionReturn(0);
549ae82921SPaul Mullowney }
559ae82921SPaul Mullowney EXTERN_C_END
569ae82921SPaul Mullowney #undef __FUNCT__
579ae82921SPaul Mullowney #define __FUNCT__ "MatGetVecs_MPIAIJCUSPARSE"
589ae82921SPaul Mullowney PetscErrorCode  MatGetVecs_MPIAIJCUSPARSE(Mat mat,Vec *right,Vec *left)
599ae82921SPaul Mullowney {
609ae82921SPaul Mullowney   PetscErrorCode ierr;
619ae82921SPaul Mullowney 
629ae82921SPaul Mullowney   PetscFunctionBegin;
639ae82921SPaul Mullowney   if (right) {
649ae82921SPaul Mullowney     ierr = VecCreate(((PetscObject)mat)->comm,right);CHKERRQ(ierr);
659ae82921SPaul Mullowney     ierr = VecSetSizes(*right,mat->cmap->n,PETSC_DETERMINE);CHKERRQ(ierr);
669ae82921SPaul Mullowney     ierr = VecSetBlockSize(*right,mat->rmap->bs);CHKERRQ(ierr);
679ae82921SPaul Mullowney     ierr = VecSetType(*right,VECCUSP);CHKERRQ(ierr);
689ae82921SPaul Mullowney     ierr = PetscLayoutReference(mat->cmap,&(*right)->map);CHKERRQ(ierr);
699ae82921SPaul Mullowney   }
709ae82921SPaul Mullowney   if (left) {
719ae82921SPaul Mullowney     ierr = VecCreate(((PetscObject)mat)->comm,left);CHKERRQ(ierr);
729ae82921SPaul Mullowney     ierr = VecSetSizes(*left,mat->rmap->n,PETSC_DETERMINE);CHKERRQ(ierr);
739ae82921SPaul Mullowney     ierr = VecSetBlockSize(*left,mat->rmap->bs);CHKERRQ(ierr);
749ae82921SPaul Mullowney     ierr = VecSetType(*left,VECCUSP);CHKERRQ(ierr);
759ae82921SPaul Mullowney     ierr = PetscLayoutReference(mat->rmap,&(*left)->map);CHKERRQ(ierr);
769ae82921SPaul Mullowney   }
779ae82921SPaul Mullowney   PetscFunctionReturn(0);
789ae82921SPaul Mullowney }
799ae82921SPaul Mullowney 
809ae82921SPaul Mullowney 
819ae82921SPaul Mullowney #undef __FUNCT__
829ae82921SPaul Mullowney #define __FUNCT__ "MatMult_MPIAIJCUSPARSE"
839ae82921SPaul Mullowney PetscErrorCode MatMult_MPIAIJCUSPARSE(Mat A,Vec xx,Vec yy)
849ae82921SPaul Mullowney {
859ae82921SPaul Mullowney   // This multiplication sequence is different sequence
869ae82921SPaul Mullowney   // than the CPU version. In particular, the diagonal block
879ae82921SPaul Mullowney   // multiplication kernel is launched in one stream. Then,
889ae82921SPaul Mullowney   // in a separate stream, the data transfers from DeviceToHost
899ae82921SPaul Mullowney   // (with MPI messaging in between), then HostToDevice are
909ae82921SPaul Mullowney   // launched. Once the data transfer stream is synchronized,
919ae82921SPaul Mullowney   // to ensure messaging is complete, the MatMultAdd kernel
929ae82921SPaul Mullowney   // is launched in the original (MatMult) stream to protect
939ae82921SPaul Mullowney   // against race conditions.
949ae82921SPaul Mullowney   //
959ae82921SPaul Mullowney   // This sequence should only be called for GPU computation.
969ae82921SPaul Mullowney   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
979ae82921SPaul Mullowney   PetscErrorCode ierr;
989ae82921SPaul Mullowney   PetscInt       nt;
999ae82921SPaul Mullowney 
1009ae82921SPaul Mullowney   PetscFunctionBegin;
1019ae82921SPaul Mullowney   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1029ae82921SPaul Mullowney   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
1039ae82921SPaul Mullowney   ierr = VecScatterInitializeForGPU(a->Mvctx,xx,SCATTER_FORWARD);CHKERRQ(ierr);
1049ae82921SPaul Mullowney   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1059ae82921SPaul Mullowney   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1069ae82921SPaul Mullowney   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1079ae82921SPaul Mullowney   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1089ae82921SPaul Mullowney   ierr = VecScatterFinalizeForGPU(a->Mvctx);CHKERRQ(ierr);
1099ae82921SPaul Mullowney   PetscFunctionReturn(0);
1109ae82921SPaul Mullowney }
111ca45077fSPaul Mullowney 
112ca45077fSPaul Mullowney #undef __FUNCT__
113ca45077fSPaul Mullowney #define __FUNCT__ "MatMult_MPIAIJCUSPARSE"
114ca45077fSPaul Mullowney PetscErrorCode MatMultTranspose_MPIAIJCUSPARSE(Mat A,Vec xx,Vec yy)
115ca45077fSPaul Mullowney {
116ca45077fSPaul Mullowney   // This multiplication sequence is different sequence
117ca45077fSPaul Mullowney   // than the CPU version. In particular, the diagonal block
118ca45077fSPaul Mullowney   // multiplication kernel is launched in one stream. Then,
119ca45077fSPaul Mullowney   // in a separate stream, the data transfers from DeviceToHost
120ca45077fSPaul Mullowney   // (with MPI messaging in between), then HostToDevice are
121ca45077fSPaul Mullowney   // launched. Once the data transfer stream is synchronized,
122ca45077fSPaul Mullowney   // to ensure messaging is complete, the MatMultAdd kernel
123ca45077fSPaul Mullowney   // is launched in the original (MatMult) stream to protect
124ca45077fSPaul Mullowney   // against race conditions.
125ca45077fSPaul Mullowney   //
126ca45077fSPaul Mullowney   // This sequence should only be called for GPU computation.
127ca45077fSPaul Mullowney   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
128ca45077fSPaul Mullowney   PetscErrorCode ierr;
129ca45077fSPaul Mullowney   PetscInt       nt;
130ca45077fSPaul Mullowney 
131ca45077fSPaul Mullowney   PetscFunctionBegin;
132ca45077fSPaul Mullowney   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
133ca45077fSPaul Mullowney   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
134ca45077fSPaul Mullowney   ierr = VecScatterInitializeForGPU(a->Mvctx,xx,SCATTER_FORWARD);CHKERRQ(ierr);
135ca45077fSPaul Mullowney   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
136ca45077fSPaul Mullowney   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
137ca45077fSPaul Mullowney   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
138ca45077fSPaul Mullowney   ierr = (*a->B->ops->multtransposeadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
139ca45077fSPaul Mullowney   ierr = VecScatterFinalizeForGPU(a->Mvctx);CHKERRQ(ierr);
140ca45077fSPaul Mullowney   PetscFunctionReturn(0);
141ca45077fSPaul Mullowney }
1429ae82921SPaul Mullowney 
1439ae82921SPaul Mullowney //PetscErrorCode MatSetValuesBatch_MPIAIJCUSPARSE(Mat J, PetscInt Ne, PetscInt Nl, PetscInt *elemRows, const PetscScalar *elemMats);
1449ae82921SPaul Mullowney 
145ca45077fSPaul Mullowney 
146ca45077fSPaul Mullowney #undef __FUNCT__
147ca45077fSPaul Mullowney #define __FUNCT__ "MatSetOption_MPIAIJCUSPARSE"
148ca45077fSPaul Mullowney PetscErrorCode MatSetOption_MPIAIJCUSPARSE(Mat A,MatOption op,PetscBool flg)
149ca45077fSPaul Mullowney {
150ca45077fSPaul Mullowney   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
151bbf3fe20SPaul Mullowney   Mat_MPIAIJCUSPARSE * cusparseStruct  = (Mat_MPIAIJCUSPARSE*)a->spptr;
152ca45077fSPaul Mullowney   PetscErrorCode ierr;
153ca45077fSPaul Mullowney 
154ca45077fSPaul Mullowney   PetscFunctionBegin;
155ca45077fSPaul Mullowney   ierr = MatSetOption_MPIAIJ(A,op,flg);CHKERRQ(ierr);
156ca45077fSPaul Mullowney   switch (op) {
157bbf3fe20SPaul Mullowney   case MAT_DIAGBLOCK_CSR:
158bbf3fe20SPaul Mullowney     cusparseStruct->diagGPUMatFormat = MAT_DIAGBLOCK_CSR;
159ca45077fSPaul Mullowney     break;
160bbf3fe20SPaul Mullowney   case MAT_OFFDIAGBLOCK_CSR:
161bbf3fe20SPaul Mullowney     cusparseStruct->offdiagGPUMatFormat = MAT_OFFDIAGBLOCK_CSR;
162ca45077fSPaul Mullowney     break;
163bbf3fe20SPaul Mullowney   case MAT_DIAGBLOCK_DIA:
164bbf3fe20SPaul Mullowney   case MAT_OFFDIAGBLOCK_DIA:
165ca45077fSPaul Mullowney   case MAT_DIA:
166ca45077fSPaul Mullowney     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Unsupported GPU matrix storage format DIA for (MPI,SEQ)AIJCUSPARSE matrix type.");
167bbf3fe20SPaul Mullowney   case MAT_DIAGBLOCK_ELL:
168bbf3fe20SPaul Mullowney     cusparseStruct->diagGPUMatFormat = MAT_DIAGBLOCK_ELL;
169ca45077fSPaul Mullowney     break;
170bbf3fe20SPaul Mullowney   case MAT_OFFDIAGBLOCK_ELL:
171bbf3fe20SPaul Mullowney     cusparseStruct->offdiagGPUMatFormat = MAT_OFFDIAGBLOCK_ELL;
172ca45077fSPaul Mullowney     break;
173bbf3fe20SPaul Mullowney   case MAT_DIAGBLOCK_HYB:
174bbf3fe20SPaul Mullowney     cusparseStruct->diagGPUMatFormat = MAT_DIAGBLOCK_HYB;
175ca45077fSPaul Mullowney     break;
176bbf3fe20SPaul Mullowney   case MAT_OFFDIAGBLOCK_HYB:
177bbf3fe20SPaul Mullowney     cusparseStruct->offdiagGPUMatFormat = MAT_OFFDIAGBLOCK_HYB;
178ca45077fSPaul Mullowney     break;
179ca45077fSPaul Mullowney   default:
180ca45077fSPaul Mullowney     break;
181ca45077fSPaul Mullowney   }
182ca45077fSPaul Mullowney   PetscFunctionReturn(0);
183ca45077fSPaul Mullowney }
184ca45077fSPaul Mullowney 
185ca45077fSPaul Mullowney 
186ca45077fSPaul Mullowney EXTERN_C_BEGIN
187ca45077fSPaul Mullowney #undef __FUNCT__
188ca45077fSPaul Mullowney #define __FUNCT__ "MatSetFromOptions_MPIAIJCUSPARSE"
189ca45077fSPaul Mullowney PetscErrorCode MatSetFromOptions_MPIAIJCUSPARSE(Mat A)
190ca45077fSPaul Mullowney {
191ca45077fSPaul Mullowney   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
192bbf3fe20SPaul Mullowney   Mat_MPIAIJCUSPARSE * cusparseStruct  = (Mat_MPIAIJCUSPARSE*)a->spptr;
193ca45077fSPaul Mullowney   PetscErrorCode     ierr;
194ca45077fSPaul Mullowney   PetscInt       idxDiag=0,idxOffDiag=0;
195ca45077fSPaul Mullowney   char * formats[]={CSR,ELL,HYB};
196ca45077fSPaul Mullowney   MatOption diagFormat, offdiagFormat;
197ca45077fSPaul Mullowney   PetscBool      flg;
198ca45077fSPaul Mullowney   PetscFunctionBegin;
199ca45077fSPaul Mullowney   ierr = PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"When using TxPETSCGPU, MPIAIJCUSPARSE Options","Mat");CHKERRQ(ierr);
200ca45077fSPaul Mullowney   if (A->factortype==MAT_FACTOR_NONE) {
201ca45077fSPaul Mullowney     ierr = PetscOptionsEList("-mat_mult_cusparse_diag_storage_format",
202ca45077fSPaul Mullowney 			     "Set the storage format of (mpi)aijcusparse gpu matrices for SpMV",
203ca45077fSPaul Mullowney 			     "None",formats,3,formats[0],&idxDiag,&flg);CHKERRQ(ierr);
204ca45077fSPaul Mullowney     ierr = PetscOptionsEList("-mat_mult_cusparse_offdiag_storage_format",
205ca45077fSPaul Mullowney 			     "Set the storage format of (mpi)aijcusparse gpu matrices for SpMV",
206ca45077fSPaul Mullowney 			     "None",formats,3,formats[0],&idxOffDiag,&flg);CHKERRQ(ierr);
207ca45077fSPaul Mullowney 
208*045c96e1SPaul Mullowney     switch (idxDiag)
209*045c96e1SPaul Mullowney       {
210*045c96e1SPaul Mullowney       case 0:
211ca45077fSPaul Mullowney 	diagFormat=MAT_CSR;
212*045c96e1SPaul Mullowney 	break;
213*045c96e1SPaul Mullowney       case 2:
214ca45077fSPaul Mullowney 	diagFormat=MAT_ELL;
215*045c96e1SPaul Mullowney 	break;
216*045c96e1SPaul Mullowney       case 3:
217ca45077fSPaul Mullowney 	diagFormat=MAT_HYB;
218*045c96e1SPaul Mullowney 	break;
219*045c96e1SPaul Mullowney       }
220ca45077fSPaul Mullowney 
221*045c96e1SPaul Mullowney     switch (idxOffDiag)
222*045c96e1SPaul Mullowney       {
223*045c96e1SPaul Mullowney       case 0:
224ca45077fSPaul Mullowney 	offdiagFormat=MAT_CSR;
225*045c96e1SPaul Mullowney 	break;
226*045c96e1SPaul Mullowney       case 2:
227ca45077fSPaul Mullowney 	offdiagFormat=MAT_ELL;
228*045c96e1SPaul Mullowney 	break;
229*045c96e1SPaul Mullowney       case 3:
230ca45077fSPaul Mullowney 	offdiagFormat=MAT_HYB;
231*045c96e1SPaul Mullowney 	break;
232*045c96e1SPaul Mullowney       }
233bbf3fe20SPaul Mullowney     cusparseStruct->diagGPUMatFormat = diagFormat;
234bbf3fe20SPaul Mullowney     cusparseStruct->offdiagGPUMatFormat = offdiagFormat;
235ca45077fSPaul Mullowney   }
236ca45077fSPaul Mullowney   ierr = PetscOptionsEnd();CHKERRQ(ierr);
237ca45077fSPaul Mullowney   PetscFunctionReturn(0);
238ca45077fSPaul Mullowney }
239ca45077fSPaul Mullowney EXTERN_C_END
240ca45077fSPaul Mullowney 
241bbf3fe20SPaul Mullowney EXTERN_C_BEGIN
242bbf3fe20SPaul Mullowney #undef __FUNCT__
243bbf3fe20SPaul Mullowney #define __FUNCT__ "MatDestroy_MPIAIJCUSPARSE"
244bbf3fe20SPaul Mullowney PetscErrorCode MatDestroy_MPIAIJCUSPARSE(Mat A)
245bbf3fe20SPaul Mullowney {
246bbf3fe20SPaul Mullowney   PetscErrorCode ierr;
247bbf3fe20SPaul Mullowney   Mat_MPIAIJ *a  = (Mat_MPIAIJ*)A->data;
248bbf3fe20SPaul Mullowney   Mat_MPIAIJCUSPARSE * cusparseStruct  = (Mat_MPIAIJCUSPARSE*)a->spptr;
249bbf3fe20SPaul Mullowney 
250bbf3fe20SPaul Mullowney   PetscFunctionBegin;
251bbf3fe20SPaul Mullowney   try {
252bbf3fe20SPaul Mullowney     delete cusparseStruct;
253bbf3fe20SPaul Mullowney   } catch(char* ex) {
254bbf3fe20SPaul Mullowney     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Mat_MPIAIJCUSPARSE error: %s", ex);
255bbf3fe20SPaul Mullowney   }
256bbf3fe20SPaul Mullowney   cusparseStruct = 0;
257bbf3fe20SPaul Mullowney   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
258bbf3fe20SPaul Mullowney   PetscFunctionReturn(0);
259bbf3fe20SPaul Mullowney }
260bbf3fe20SPaul Mullowney EXTERN_C_END
261ca45077fSPaul Mullowney 
2629ae82921SPaul Mullowney EXTERN_C_BEGIN
2639ae82921SPaul Mullowney #undef __FUNCT__
2649ae82921SPaul Mullowney #define __FUNCT__ "MatCreate_MPIAIJCUSPARSE"
265bbf3fe20SPaul Mullowney PetscErrorCode  MatCreate_MPIAIJCUSPARSE(Mat A)
2669ae82921SPaul Mullowney {
2679ae82921SPaul Mullowney   PetscErrorCode ierr;
268bbf3fe20SPaul Mullowney   Mat_MPIAIJ *a;
269bbf3fe20SPaul Mullowney   Mat_MPIAIJCUSPARSE * cusparseStruct;
2709ae82921SPaul Mullowney 
2719ae82921SPaul Mullowney   PetscFunctionBegin;
272bbf3fe20SPaul Mullowney   ierr = MatCreate_MPIAIJ(A);CHKERRQ(ierr);
273bbf3fe20SPaul Mullowney   ierr = PetscObjectComposeFunctionDynamic((PetscObject)A,"MatMPIAIJSetPreallocation_C",
2749ae82921SPaul Mullowney 					   "MatMPIAIJSetPreallocation_MPIAIJCUSPARSE",
2759ae82921SPaul Mullowney 					   MatMPIAIJSetPreallocation_MPIAIJCUSPARSE);CHKERRQ(ierr);
276bbf3fe20SPaul Mullowney   a  = (Mat_MPIAIJ*)A->data;
277bbf3fe20SPaul Mullowney   a->spptr                      = new Mat_MPIAIJCUSPARSE;
278bbf3fe20SPaul Mullowney   cusparseStruct  = (Mat_MPIAIJCUSPARSE*)a->spptr;
279bbf3fe20SPaul Mullowney   cusparseStruct->diagGPUMatFormat    = MAT_DIAGBLOCK_CSR;
280bbf3fe20SPaul Mullowney   cusparseStruct->offdiagGPUMatFormat = MAT_OFFDIAGBLOCK_CSR;
281bbf3fe20SPaul Mullowney   A->ops->getvecs        = MatGetVecs_MPIAIJCUSPARSE;
282bbf3fe20SPaul Mullowney   A->ops->mult           = MatMult_MPIAIJCUSPARSE;
283bbf3fe20SPaul Mullowney   A->ops->multtranspose  = MatMultTranspose_MPIAIJCUSPARSE;
284bbf3fe20SPaul Mullowney   A->ops->setfromoptions = MatSetFromOptions_MPIAIJCUSPARSE;
285bbf3fe20SPaul Mullowney   A->ops->setoption      = MatSetOption_MPIAIJCUSPARSE;
286bbf3fe20SPaul Mullowney   A->ops->destroy        = MatDestroy_MPIAIJCUSPARSE;
287bbf3fe20SPaul Mullowney   ierr = PetscObjectChangeTypeName((PetscObject)A,MATMPIAIJCUSPARSE);CHKERRQ(ierr);
2889ae82921SPaul Mullowney   PetscFunctionReturn(0);
2899ae82921SPaul Mullowney }
2909ae82921SPaul Mullowney EXTERN_C_END
2919ae82921SPaul Mullowney 
2929ae82921SPaul Mullowney 
2939ae82921SPaul Mullowney #undef __FUNCT__
2949ae82921SPaul Mullowney #define __FUNCT__ "MatCreateAIJCUSPARSE"
2959ae82921SPaul Mullowney PetscErrorCode  MatCreateAIJCUSPARSE(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
2969ae82921SPaul Mullowney {
2979ae82921SPaul Mullowney   PetscErrorCode ierr;
2989ae82921SPaul Mullowney   PetscMPIInt    size;
2999ae82921SPaul Mullowney 
3009ae82921SPaul Mullowney   PetscFunctionBegin;
3019ae82921SPaul Mullowney   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3029ae82921SPaul Mullowney   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3039ae82921SPaul Mullowney   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3049ae82921SPaul Mullowney   if (size > 1) {
3059ae82921SPaul Mullowney     ierr = MatSetType(*A,MATMPIAIJCUSPARSE);CHKERRQ(ierr);
3069ae82921SPaul Mullowney     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3079ae82921SPaul Mullowney   } else {
3089ae82921SPaul Mullowney     ierr = MatSetType(*A,MATSEQAIJCUSPARSE);CHKERRQ(ierr);
3099ae82921SPaul Mullowney     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
3109ae82921SPaul Mullowney   }
3119ae82921SPaul Mullowney   PetscFunctionReturn(0);
3129ae82921SPaul Mullowney }
3139ae82921SPaul Mullowney 
3149ae82921SPaul Mullowney /*MC
3158dc1d2a3SPaul Mullowney    MATMPIAIJCUSPARSE - MATMPIAIJCUSPARSE = "aijcusparse" - A matrix type to be used for sparse matrices.
3169ae82921SPaul Mullowney 
3179ae82921SPaul Mullowney    This matrix type is identical to MATSEQAIJCUSPARSE when constructed with a single process communicator,
3189ae82921SPaul Mullowney    and MATMPIAIJCUSPARSE otherwise.  As a result, for single process communicators,
3199ae82921SPaul Mullowney    MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
3209ae82921SPaul Mullowney    for communicators controlling multiple processes.  It is recommended that you call both of
3219ae82921SPaul Mullowney    the above preallocation routines for simplicity.
3229ae82921SPaul Mullowney 
3239ae82921SPaul Mullowney    Options Database Keys:
3249ae82921SPaul Mullowney . -mat_type mpiaijcusparse - sets the matrix type to "mpiaijcusparse" during a call to MatSetFromOptions()
3259ae82921SPaul Mullowney 
3269ae82921SPaul Mullowney   Level: beginner
3279ae82921SPaul Mullowney 
3289ae82921SPaul Mullowney .seealso: MatCreateMPIAIJ,MATSEQAIJ,MATMPIAIJ, MATMPIAIJCUSPARSE, MATSEQAIJCUSPARSE
3299ae82921SPaul Mullowney M*/
3309ae82921SPaul Mullowney 
331