xref: /petsc/src/mat/impls/aij/mpi/mpicusparse/mpiaijcusparse.cu (revision ca45077f9abf8e7344aac749d49eef6cfd88f621)
19ae82921SPaul Mullowney #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
29ae82921SPaul Mullowney 
39ae82921SPaul Mullowney EXTERN_C_BEGIN
49ae82921SPaul Mullowney #undef __FUNCT__
59ae82921SPaul Mullowney #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJCUSPARSE"
69ae82921SPaul Mullowney PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJCUSPARSE(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
79ae82921SPaul Mullowney {
89ae82921SPaul Mullowney   Mat_MPIAIJ     *b;
99ae82921SPaul Mullowney   PetscErrorCode ierr;
109ae82921SPaul Mullowney   PetscInt       i;
119ae82921SPaul Mullowney 
129ae82921SPaul Mullowney   PetscFunctionBegin;
139ae82921SPaul Mullowney   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
149ae82921SPaul Mullowney   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
159ae82921SPaul Mullowney   if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
169ae82921SPaul Mullowney   if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
179ae82921SPaul Mullowney 
189ae82921SPaul Mullowney   ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr);
199ae82921SPaul Mullowney   ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr);
209ae82921SPaul Mullowney   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
219ae82921SPaul Mullowney   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
229ae82921SPaul Mullowney   if (d_nnz) {
239ae82921SPaul Mullowney     for (i=0; i<B->rmap->n; i++) {
249ae82921SPaul Mullowney       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
259ae82921SPaul Mullowney     }
269ae82921SPaul Mullowney   }
279ae82921SPaul Mullowney   if (o_nnz) {
289ae82921SPaul Mullowney     for (i=0; i<B->rmap->n; i++) {
299ae82921SPaul Mullowney       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
309ae82921SPaul Mullowney     }
319ae82921SPaul Mullowney   }
329ae82921SPaul Mullowney   b = (Mat_MPIAIJ*)B->data;
339ae82921SPaul Mullowney 
349ae82921SPaul Mullowney   if (!B->preallocated) {
359ae82921SPaul Mullowney     /* Explicitly create 2 MATSEQAIJ matrices. */
369ae82921SPaul Mullowney     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
379ae82921SPaul Mullowney     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
389ae82921SPaul Mullowney     ierr = MatSetType(b->A,MATSEQAIJCUSPARSE);CHKERRQ(ierr);
399ae82921SPaul Mullowney     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
409ae82921SPaul Mullowney     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
419ae82921SPaul Mullowney     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
429ae82921SPaul Mullowney     ierr = MatSetType(b->B,MATSEQAIJCUSPARSE);CHKERRQ(ierr);
439ae82921SPaul Mullowney     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
449ae82921SPaul Mullowney   }
459ae82921SPaul Mullowney 
469ae82921SPaul Mullowney   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
479ae82921SPaul Mullowney   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
48*ca45077fSPaul Mullowney   ierr=MatSetOption_SeqAIJCUSPARSE(b->A,b->diagGPUMatFormat,PETSC_TRUE);CHKERRQ(ierr);
49*ca45077fSPaul Mullowney   ierr=MatSetOption_SeqAIJCUSPARSE(b->B,b->offdiagGPUMatFormat,PETSC_TRUE);CHKERRQ(ierr);
509ae82921SPaul Mullowney   B->preallocated = PETSC_TRUE;
519ae82921SPaul Mullowney   PetscFunctionReturn(0);
529ae82921SPaul Mullowney }
539ae82921SPaul Mullowney EXTERN_C_END
549ae82921SPaul Mullowney #undef __FUNCT__
559ae82921SPaul Mullowney #define __FUNCT__ "MatGetVecs_MPIAIJCUSPARSE"
569ae82921SPaul Mullowney PetscErrorCode  MatGetVecs_MPIAIJCUSPARSE(Mat mat,Vec *right,Vec *left)
579ae82921SPaul Mullowney {
589ae82921SPaul Mullowney   PetscErrorCode ierr;
599ae82921SPaul Mullowney 
609ae82921SPaul Mullowney   PetscFunctionBegin;
619ae82921SPaul Mullowney   if (right) {
629ae82921SPaul Mullowney     ierr = VecCreate(((PetscObject)mat)->comm,right);CHKERRQ(ierr);
639ae82921SPaul Mullowney     ierr = VecSetSizes(*right,mat->cmap->n,PETSC_DETERMINE);CHKERRQ(ierr);
649ae82921SPaul Mullowney     ierr = VecSetBlockSize(*right,mat->rmap->bs);CHKERRQ(ierr);
659ae82921SPaul Mullowney     ierr = VecSetType(*right,VECCUSP);CHKERRQ(ierr);
669ae82921SPaul Mullowney     ierr = PetscLayoutReference(mat->cmap,&(*right)->map);CHKERRQ(ierr);
679ae82921SPaul Mullowney   }
689ae82921SPaul Mullowney   if (left) {
699ae82921SPaul Mullowney     ierr = VecCreate(((PetscObject)mat)->comm,left);CHKERRQ(ierr);
709ae82921SPaul Mullowney     ierr = VecSetSizes(*left,mat->rmap->n,PETSC_DETERMINE);CHKERRQ(ierr);
719ae82921SPaul Mullowney     ierr = VecSetBlockSize(*left,mat->rmap->bs);CHKERRQ(ierr);
729ae82921SPaul Mullowney     ierr = VecSetType(*left,VECCUSP);CHKERRQ(ierr);
739ae82921SPaul Mullowney     ierr = PetscLayoutReference(mat->rmap,&(*left)->map);CHKERRQ(ierr);
749ae82921SPaul Mullowney   }
759ae82921SPaul Mullowney   PetscFunctionReturn(0);
769ae82921SPaul Mullowney }
779ae82921SPaul Mullowney 
789ae82921SPaul Mullowney 
799ae82921SPaul Mullowney #undef __FUNCT__
809ae82921SPaul Mullowney #define __FUNCT__ "MatMult_MPIAIJCUSPARSE"
819ae82921SPaul Mullowney PetscErrorCode MatMult_MPIAIJCUSPARSE(Mat A,Vec xx,Vec yy)
829ae82921SPaul Mullowney {
839ae82921SPaul Mullowney   // This multiplication sequence is different sequence
849ae82921SPaul Mullowney   // than the CPU version. In particular, the diagonal block
859ae82921SPaul Mullowney   // multiplication kernel is launched in one stream. Then,
869ae82921SPaul Mullowney   // in a separate stream, the data transfers from DeviceToHost
879ae82921SPaul Mullowney   // (with MPI messaging in between), then HostToDevice are
889ae82921SPaul Mullowney   // launched. Once the data transfer stream is synchronized,
899ae82921SPaul Mullowney   // to ensure messaging is complete, the MatMultAdd kernel
909ae82921SPaul Mullowney   // is launched in the original (MatMult) stream to protect
919ae82921SPaul Mullowney   // against race conditions.
929ae82921SPaul Mullowney   //
939ae82921SPaul Mullowney   // This sequence should only be called for GPU computation.
949ae82921SPaul Mullowney   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
959ae82921SPaul Mullowney   PetscErrorCode ierr;
969ae82921SPaul Mullowney   PetscInt       nt;
979ae82921SPaul Mullowney 
989ae82921SPaul Mullowney   PetscFunctionBegin;
999ae82921SPaul Mullowney   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1009ae82921SPaul Mullowney   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
1019ae82921SPaul Mullowney   ierr = VecScatterInitializeForGPU(a->Mvctx,xx,SCATTER_FORWARD);CHKERRQ(ierr);
1029ae82921SPaul Mullowney   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1039ae82921SPaul Mullowney   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1049ae82921SPaul Mullowney   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1059ae82921SPaul Mullowney   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1069ae82921SPaul Mullowney   ierr = VecScatterFinalizeForGPU(a->Mvctx);CHKERRQ(ierr);
1079ae82921SPaul Mullowney   PetscFunctionReturn(0);
1089ae82921SPaul Mullowney }
109*ca45077fSPaul Mullowney 
110*ca45077fSPaul Mullowney #undef __FUNCT__
111*ca45077fSPaul Mullowney #define __FUNCT__ "MatMult_MPIAIJCUSPARSE"
112*ca45077fSPaul Mullowney PetscErrorCode MatMultTranspose_MPIAIJCUSPARSE(Mat A,Vec xx,Vec yy)
113*ca45077fSPaul Mullowney {
114*ca45077fSPaul Mullowney   // This multiplication sequence is different sequence
115*ca45077fSPaul Mullowney   // than the CPU version. In particular, the diagonal block
116*ca45077fSPaul Mullowney   // multiplication kernel is launched in one stream. Then,
117*ca45077fSPaul Mullowney   // in a separate stream, the data transfers from DeviceToHost
118*ca45077fSPaul Mullowney   // (with MPI messaging in between), then HostToDevice are
119*ca45077fSPaul Mullowney   // launched. Once the data transfer stream is synchronized,
120*ca45077fSPaul Mullowney   // to ensure messaging is complete, the MatMultAdd kernel
121*ca45077fSPaul Mullowney   // is launched in the original (MatMult) stream to protect
122*ca45077fSPaul Mullowney   // against race conditions.
123*ca45077fSPaul Mullowney   //
124*ca45077fSPaul Mullowney   // This sequence should only be called for GPU computation.
125*ca45077fSPaul Mullowney   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
126*ca45077fSPaul Mullowney   PetscErrorCode ierr;
127*ca45077fSPaul Mullowney   PetscInt       nt;
128*ca45077fSPaul Mullowney 
129*ca45077fSPaul Mullowney   PetscFunctionBegin;
130*ca45077fSPaul Mullowney   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
131*ca45077fSPaul Mullowney   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
132*ca45077fSPaul Mullowney   ierr = VecScatterInitializeForGPU(a->Mvctx,xx,SCATTER_FORWARD);CHKERRQ(ierr);
133*ca45077fSPaul Mullowney   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
134*ca45077fSPaul Mullowney   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
135*ca45077fSPaul Mullowney   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
136*ca45077fSPaul Mullowney   ierr = (*a->B->ops->multtransposeadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
137*ca45077fSPaul Mullowney   ierr = VecScatterFinalizeForGPU(a->Mvctx);CHKERRQ(ierr);
138*ca45077fSPaul Mullowney   PetscFunctionReturn(0);
139*ca45077fSPaul Mullowney }
1409ae82921SPaul Mullowney 
1419ae82921SPaul Mullowney EXTERN_C_BEGIN
1429ae82921SPaul Mullowney PetscErrorCode  MatCreate_MPIAIJ(Mat);
1439ae82921SPaul Mullowney EXTERN_C_END
1449ae82921SPaul Mullowney //PetscErrorCode MatSetValuesBatch_MPIAIJCUSPARSE(Mat J, PetscInt Ne, PetscInt Nl, PetscInt *elemRows, const PetscScalar *elemMats);
1459ae82921SPaul Mullowney 
146*ca45077fSPaul Mullowney 
147*ca45077fSPaul Mullowney #undef __FUNCT__
148*ca45077fSPaul Mullowney #define __FUNCT__ "MatSetOption_MPIAIJCUSPARSE"
149*ca45077fSPaul Mullowney PetscErrorCode MatSetOption_MPIAIJCUSPARSE(Mat A,MatOption op,PetscBool  flg)
150*ca45077fSPaul Mullowney {
151*ca45077fSPaul Mullowney   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
152*ca45077fSPaul Mullowney   PetscErrorCode ierr;
153*ca45077fSPaul Mullowney 
154*ca45077fSPaul Mullowney   PetscFunctionBegin;
155*ca45077fSPaul Mullowney   ierr = MatSetOption_MPIAIJ(A,op,flg);CHKERRQ(ierr);
156*ca45077fSPaul Mullowney   switch (op) {
157*ca45077fSPaul Mullowney   case DIAGBLOCK_MAT_CSR:
158*ca45077fSPaul Mullowney     a->diagGPUMatFormat = DIAGBLOCK_MAT_CSR;
159*ca45077fSPaul Mullowney     break;
160*ca45077fSPaul Mullowney   case OFFDIAGBLOCK_MAT_CSR:
161*ca45077fSPaul Mullowney     a->offdiagGPUMatFormat = OFFDIAGBLOCK_MAT_CSR;
162*ca45077fSPaul Mullowney     break;
163*ca45077fSPaul Mullowney   case DIAGBLOCK_MAT_DIA:
164*ca45077fSPaul Mullowney   case OFFDIAGBLOCK_MAT_DIA:
165*ca45077fSPaul Mullowney   case MAT_DIA:
166*ca45077fSPaul Mullowney     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Unsupported GPU matrix storage format DIA for (MPI,SEQ)AIJCUSPARSE matrix type.");
167*ca45077fSPaul Mullowney   case DIAGBLOCK_MAT_ELL:
168*ca45077fSPaul Mullowney     a->diagGPUMatFormat = DIAGBLOCK_MAT_ELL;
169*ca45077fSPaul Mullowney     break;
170*ca45077fSPaul Mullowney   case OFFDIAGBLOCK_MAT_ELL:
171*ca45077fSPaul Mullowney     a->offdiagGPUMatFormat = OFFDIAGBLOCK_MAT_ELL;
172*ca45077fSPaul Mullowney     break;
173*ca45077fSPaul Mullowney   case DIAGBLOCK_MAT_HYB:
174*ca45077fSPaul Mullowney     a->diagGPUMatFormat = DIAGBLOCK_MAT_HYB;
175*ca45077fSPaul Mullowney     break;
176*ca45077fSPaul Mullowney   case OFFDIAGBLOCK_MAT_HYB:
177*ca45077fSPaul Mullowney     a->offdiagGPUMatFormat = OFFDIAGBLOCK_MAT_HYB;
178*ca45077fSPaul Mullowney     break;
179*ca45077fSPaul Mullowney   default:
180*ca45077fSPaul Mullowney     break;
181*ca45077fSPaul Mullowney   }
182*ca45077fSPaul Mullowney   PetscFunctionReturn(0);
183*ca45077fSPaul Mullowney }
184*ca45077fSPaul Mullowney 
185*ca45077fSPaul Mullowney 
186*ca45077fSPaul Mullowney EXTERN_C_BEGIN
187*ca45077fSPaul Mullowney #undef __FUNCT__
188*ca45077fSPaul Mullowney #define __FUNCT__ "MatSetFromOptions_MPIAIJCUSPARSE"
189*ca45077fSPaul Mullowney PetscErrorCode MatSetFromOptions_MPIAIJCUSPARSE(Mat A)
190*ca45077fSPaul Mullowney {
191*ca45077fSPaul Mullowney   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
192*ca45077fSPaul Mullowney   PetscErrorCode     ierr;
193*ca45077fSPaul Mullowney   PetscInt       idxDiag=0,idxOffDiag=0;
194*ca45077fSPaul Mullowney   char * formats[]={CSR,ELL,HYB};
195*ca45077fSPaul Mullowney   MatOption diagFormat, offdiagFormat;
196*ca45077fSPaul Mullowney   PetscBool      flg;
197*ca45077fSPaul Mullowney   PetscFunctionBegin;
198*ca45077fSPaul Mullowney   ierr = PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"When using TxPETSCGPU, MPIAIJCUSPARSE Options","Mat");CHKERRQ(ierr);
199*ca45077fSPaul Mullowney   if (A->factortype==MAT_FACTOR_NONE) {
200*ca45077fSPaul Mullowney     ierr = PetscOptionsEList("-mat_mult_cusparse_diag_storage_format",
201*ca45077fSPaul Mullowney 			     "Set the storage format of (mpi)aijcusparse gpu matrices for SpMV",
202*ca45077fSPaul Mullowney 			     "None",formats,3,formats[0],&idxDiag,&flg);CHKERRQ(ierr);
203*ca45077fSPaul Mullowney     ierr = PetscOptionsEList("-mat_mult_cusparse_offdiag_storage_format",
204*ca45077fSPaul Mullowney 			     "Set the storage format of (mpi)aijcusparse gpu matrices for SpMV",
205*ca45077fSPaul Mullowney 			     "None",formats,3,formats[0],&idxOffDiag,&flg);CHKERRQ(ierr);
206*ca45077fSPaul Mullowney 
207*ca45077fSPaul Mullowney     //printf("MatSetFromOptions_MPIAIJCUSPARSE : %s %s\n",formats[idxDiag],formats[idxOffDiag]);
208*ca45077fSPaul Mullowney     if (formats[idxDiag] == CSR)
209*ca45077fSPaul Mullowney       diagFormat=MAT_CSR;
210*ca45077fSPaul Mullowney     else if (formats[idxDiag] == ELL)
211*ca45077fSPaul Mullowney       diagFormat=MAT_ELL;
212*ca45077fSPaul Mullowney     else if (formats[idxDiag] == HYB)
213*ca45077fSPaul Mullowney       diagFormat=MAT_HYB;
214*ca45077fSPaul Mullowney 
215*ca45077fSPaul Mullowney     if (formats[idxOffDiag] == CSR)
216*ca45077fSPaul Mullowney       offdiagFormat=MAT_CSR;
217*ca45077fSPaul Mullowney     else if (formats[idxOffDiag] == ELL)
218*ca45077fSPaul Mullowney       offdiagFormat=MAT_ELL;
219*ca45077fSPaul Mullowney     else if (formats[idxOffDiag] == HYB)
220*ca45077fSPaul Mullowney       offdiagFormat=MAT_HYB;
221*ca45077fSPaul Mullowney 
222*ca45077fSPaul Mullowney     a->diagGPUMatFormat = diagFormat;
223*ca45077fSPaul Mullowney     a->offdiagGPUMatFormat = offdiagFormat;
224*ca45077fSPaul Mullowney   }
225*ca45077fSPaul Mullowney   ierr = PetscOptionsEnd();CHKERRQ(ierr);
226*ca45077fSPaul Mullowney   PetscFunctionReturn(0);
227*ca45077fSPaul Mullowney }
228*ca45077fSPaul Mullowney EXTERN_C_END
229*ca45077fSPaul Mullowney 
230*ca45077fSPaul Mullowney 
2319ae82921SPaul Mullowney EXTERN_C_BEGIN
2329ae82921SPaul Mullowney #undef __FUNCT__
2339ae82921SPaul Mullowney #define __FUNCT__ "MatCreate_MPIAIJCUSPARSE"
2349ae82921SPaul Mullowney PetscErrorCode  MatCreate_MPIAIJCUSPARSE(Mat B)
2359ae82921SPaul Mullowney {
2369ae82921SPaul Mullowney   PetscErrorCode ierr;
2379ae82921SPaul Mullowney 
2389ae82921SPaul Mullowney   PetscFunctionBegin;
2399ae82921SPaul Mullowney   ierr = MatCreate_MPIAIJ(B);CHKERRQ(ierr);
2409ae82921SPaul Mullowney   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
2419ae82921SPaul Mullowney                                      "MatMPIAIJSetPreallocation_MPIAIJCUSPARSE",
2429ae82921SPaul Mullowney                                       MatMPIAIJSetPreallocation_MPIAIJCUSPARSE);CHKERRQ(ierr);
2439ae82921SPaul Mullowney   B->ops->getvecs        = MatGetVecs_MPIAIJCUSPARSE;
2449ae82921SPaul Mullowney   B->ops->mult           = MatMult_MPIAIJCUSPARSE;
245*ca45077fSPaul Mullowney   B->ops->multtranspose  = MatMultTranspose_MPIAIJCUSPARSE;
246*ca45077fSPaul Mullowney   B->ops->setfromoptions = MatSetFromOptions_MPIAIJCUSPARSE;
247*ca45077fSPaul Mullowney   B->ops->setoption      = MatSetOption_MPIAIJCUSPARSE;
2489ae82921SPaul Mullowney   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJCUSPARSE);CHKERRQ(ierr);
2499ae82921SPaul Mullowney   PetscFunctionReturn(0);
2509ae82921SPaul Mullowney }
2519ae82921SPaul Mullowney EXTERN_C_END
2529ae82921SPaul Mullowney 
2539ae82921SPaul Mullowney 
2549ae82921SPaul Mullowney #undef __FUNCT__
2559ae82921SPaul Mullowney #define __FUNCT__ "MatCreateAIJCUSPARSE"
2569ae82921SPaul Mullowney PetscErrorCode  MatCreateAIJCUSPARSE(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
2579ae82921SPaul Mullowney {
2589ae82921SPaul Mullowney   PetscErrorCode ierr;
2599ae82921SPaul Mullowney   PetscMPIInt    size;
2609ae82921SPaul Mullowney 
2619ae82921SPaul Mullowney   PetscFunctionBegin;
2629ae82921SPaul Mullowney   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2639ae82921SPaul Mullowney   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
2649ae82921SPaul Mullowney   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2659ae82921SPaul Mullowney   if (size > 1) {
2669ae82921SPaul Mullowney     ierr = MatSetType(*A,MATMPIAIJCUSPARSE);CHKERRQ(ierr);
2679ae82921SPaul Mullowney     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2689ae82921SPaul Mullowney   } else {
2699ae82921SPaul Mullowney     ierr = MatSetType(*A,MATSEQAIJCUSPARSE);CHKERRQ(ierr);
2709ae82921SPaul Mullowney     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
2719ae82921SPaul Mullowney   }
2729ae82921SPaul Mullowney   PetscFunctionReturn(0);
2739ae82921SPaul Mullowney }
2749ae82921SPaul Mullowney 
2759ae82921SPaul Mullowney /*MC
2769ae82921SPaul Mullowney    MATAIJCUSPARSE - MATAIJCUSPARSE = "aijcusparse" - A matrix type to be used for sparse matrices.
2779ae82921SPaul Mullowney 
2789ae82921SPaul Mullowney    This matrix type is identical to MATSEQAIJCUSPARSE when constructed with a single process communicator,
2799ae82921SPaul Mullowney    and MATMPIAIJCUSPARSE otherwise.  As a result, for single process communicators,
2809ae82921SPaul Mullowney   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
2819ae82921SPaul Mullowney   for communicators controlling multiple processes.  It is recommended that you call both of
2829ae82921SPaul Mullowney   the above preallocation routines for simplicity.
2839ae82921SPaul Mullowney 
2849ae82921SPaul Mullowney    Options Database Keys:
2859ae82921SPaul Mullowney . -mat_type mpiaijcusparse - sets the matrix type to "mpiaijcusparse" during a call to MatSetFromOptions()
2869ae82921SPaul Mullowney 
2879ae82921SPaul Mullowney   Level: beginner
2889ae82921SPaul Mullowney 
2899ae82921SPaul Mullowney .seealso: MatCreateMPIAIJ,MATSEQAIJ,MATMPIAIJ, MATMPIAIJCUSPARSE, MATSEQAIJCUSPARSE
2909ae82921SPaul Mullowney M*/
2919ae82921SPaul Mullowney 
292