xref: /petsc/src/mat/impls/aij/mpi/mpicusparse/mpiaijcusparse.cu (revision 050356708b6b130533a9df91dcd046d0f24f763b)
1dced61a5SBarry Smith #define PETSC_SKIP_SPINLOCK
253800007SKarl Rupp #define PETSC_SKIP_CXX_COMPLEX_FIX
399acd6aaSStefano Zampini #define PETSC_SKIP_IMMINTRIN_H_CUDAWORKAROUND 1
40fd2b57fSKarl Rupp 
53d13b8fdSMatthew G. Knepley #include <petscconf.h>
69ae82921SPaul Mullowney #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
757d48284SJunchao Zhang #include <../src/mat/impls/aij/seq/seqcusparse/cusparsematimpl.h>
83d13b8fdSMatthew G. Knepley #include <../src/mat/impls/aij/mpi/mpicusparse/mpicusparsematimpl.h>
99ae82921SPaul Mullowney 
109ae82921SPaul Mullowney PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJCUSPARSE(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
119ae82921SPaul Mullowney {
12bbf3fe20SPaul Mullowney   Mat_MPIAIJ         *b               = (Mat_MPIAIJ*)B->data;
13bbf3fe20SPaul Mullowney   Mat_MPIAIJCUSPARSE * cusparseStruct = (Mat_MPIAIJCUSPARSE*)b->spptr;
149ae82921SPaul Mullowney   PetscErrorCode     ierr;
159ae82921SPaul Mullowney   PetscInt           i;
169ae82921SPaul Mullowney 
179ae82921SPaul Mullowney   PetscFunctionBegin;
189ae82921SPaul Mullowney   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
199ae82921SPaul Mullowney   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
209ae82921SPaul Mullowney   if (d_nnz) {
219ae82921SPaul Mullowney     for (i=0; i<B->rmap->n; i++) {
229ae82921SPaul Mullowney       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
239ae82921SPaul Mullowney     }
249ae82921SPaul Mullowney   }
259ae82921SPaul Mullowney   if (o_nnz) {
269ae82921SPaul Mullowney     for (i=0; i<B->rmap->n; i++) {
279ae82921SPaul Mullowney       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
289ae82921SPaul Mullowney     }
299ae82921SPaul Mullowney   }
309ae82921SPaul Mullowney   if (!B->preallocated) {
31bbf3fe20SPaul Mullowney     /* Explicitly create 2 MATSEQAIJCUSPARSE matrices. */
329ae82921SPaul Mullowney     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
33b470e4b4SRichard Tran Mills     ierr = MatBindToCPU(b->A,B->boundtocpu);CHKERRQ(ierr);
349ae82921SPaul Mullowney     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
359ae82921SPaul Mullowney     ierr = MatSetType(b->A,MATSEQAIJCUSPARSE);CHKERRQ(ierr);
363bb1ff40SBarry Smith     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
379ae82921SPaul Mullowney     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
38b470e4b4SRichard Tran Mills     ierr = MatBindToCPU(b->B,B->boundtocpu);CHKERRQ(ierr);
399ae82921SPaul Mullowney     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
409ae82921SPaul Mullowney     ierr = MatSetType(b->B,MATSEQAIJCUSPARSE);CHKERRQ(ierr);
413bb1ff40SBarry Smith     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
429ae82921SPaul Mullowney   }
439ae82921SPaul Mullowney   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
449ae82921SPaul Mullowney   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
45e057df02SPaul Mullowney   ierr = MatCUSPARSESetFormat(b->A,MAT_CUSPARSE_MULT,cusparseStruct->diagGPUMatFormat);CHKERRQ(ierr);
46e057df02SPaul Mullowney   ierr = MatCUSPARSESetFormat(b->B,MAT_CUSPARSE_MULT,cusparseStruct->offdiagGPUMatFormat);CHKERRQ(ierr);
47b06137fdSPaul Mullowney   ierr = MatCUSPARSESetHandle(b->A,cusparseStruct->handle);CHKERRQ(ierr);
48b06137fdSPaul Mullowney   ierr = MatCUSPARSESetHandle(b->B,cusparseStruct->handle);CHKERRQ(ierr);
49b06137fdSPaul Mullowney   ierr = MatCUSPARSESetStream(b->A,cusparseStruct->stream);CHKERRQ(ierr);
50b06137fdSPaul Mullowney   ierr = MatCUSPARSESetStream(b->B,cusparseStruct->stream);CHKERRQ(ierr);
512205254eSKarl Rupp 
529ae82921SPaul Mullowney   B->preallocated = PETSC_TRUE;
539ae82921SPaul Mullowney   PetscFunctionReturn(0);
549ae82921SPaul Mullowney }
55e057df02SPaul Mullowney 
569ae82921SPaul Mullowney PetscErrorCode MatMult_MPIAIJCUSPARSE(Mat A,Vec xx,Vec yy)
579ae82921SPaul Mullowney {
589ae82921SPaul Mullowney   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
599ae82921SPaul Mullowney   PetscErrorCode ierr;
609ae82921SPaul Mullowney   PetscInt       nt;
619ae82921SPaul Mullowney 
629ae82921SPaul Mullowney   PetscFunctionBegin;
639ae82921SPaul Mullowney   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
649ae82921SPaul Mullowney   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
65959dcdf5SJunchao Zhang   ierr = VecScatterInitializeForGPU(a->Mvctx,xx);CHKERRQ(ierr);
669ae82921SPaul Mullowney   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
674d55d066SJunchao Zhang   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
689ae82921SPaul Mullowney   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
699ae82921SPaul Mullowney   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
709ae82921SPaul Mullowney   ierr = VecScatterFinalizeForGPU(a->Mvctx);CHKERRQ(ierr);
719ae82921SPaul Mullowney   PetscFunctionReturn(0);
729ae82921SPaul Mullowney }
73ca45077fSPaul Mullowney 
74fdc842d1SBarry Smith PetscErrorCode MatMultAdd_MPIAIJCUSPARSE(Mat A,Vec xx,Vec yy,Vec zz)
75fdc842d1SBarry Smith {
76fdc842d1SBarry Smith   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
77fdc842d1SBarry Smith   PetscErrorCode ierr;
78fdc842d1SBarry Smith   PetscInt       nt;
79fdc842d1SBarry Smith 
80fdc842d1SBarry Smith   PetscFunctionBegin;
81fdc842d1SBarry Smith   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
82fdc842d1SBarry Smith   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
83fdc842d1SBarry Smith   ierr = VecScatterInitializeForGPU(a->Mvctx,xx);CHKERRQ(ierr);
84fdc842d1SBarry Smith   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
854d55d066SJunchao Zhang   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
86fdc842d1SBarry Smith   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
87fdc842d1SBarry Smith   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
88fdc842d1SBarry Smith   ierr = VecScatterFinalizeForGPU(a->Mvctx);CHKERRQ(ierr);
89fdc842d1SBarry Smith   PetscFunctionReturn(0);
90fdc842d1SBarry Smith }
91fdc842d1SBarry Smith 
92ca45077fSPaul Mullowney PetscErrorCode MatMultTranspose_MPIAIJCUSPARSE(Mat A,Vec xx,Vec yy)
93ca45077fSPaul Mullowney {
94ca45077fSPaul Mullowney   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
95ca45077fSPaul Mullowney   PetscErrorCode ierr;
96ca45077fSPaul Mullowney   PetscInt       nt;
97ca45077fSPaul Mullowney 
98ca45077fSPaul Mullowney   PetscFunctionBegin;
99ca45077fSPaul Mullowney   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
100ccf5f80bSJunchao Zhang   if (nt != A->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->rmap->n,nt);
101a3fdcf43SKarl Rupp   ierr = VecScatterInitializeForGPU(a->Mvctx,a->lvec);CHKERRQ(ierr);
1029b2db222SKarl Rupp   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
103ca45077fSPaul Mullowney   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1049b2db222SKarl Rupp   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1059b2db222SKarl Rupp   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
106ca45077fSPaul Mullowney   ierr = VecScatterFinalizeForGPU(a->Mvctx);CHKERRQ(ierr);
107ca45077fSPaul Mullowney   PetscFunctionReturn(0);
108ca45077fSPaul Mullowney }
1099ae82921SPaul Mullowney 
110e057df02SPaul Mullowney PetscErrorCode MatCUSPARSESetFormat_MPIAIJCUSPARSE(Mat A,MatCUSPARSEFormatOperation op,MatCUSPARSEStorageFormat format)
111ca45077fSPaul Mullowney {
112ca45077fSPaul Mullowney   Mat_MPIAIJ         *a               = (Mat_MPIAIJ*)A->data;
113bbf3fe20SPaul Mullowney   Mat_MPIAIJCUSPARSE * cusparseStruct = (Mat_MPIAIJCUSPARSE*)a->spptr;
114e057df02SPaul Mullowney 
115ca45077fSPaul Mullowney   PetscFunctionBegin;
116e057df02SPaul Mullowney   switch (op) {
117e057df02SPaul Mullowney   case MAT_CUSPARSE_MULT_DIAG:
118e057df02SPaul Mullowney     cusparseStruct->diagGPUMatFormat = format;
119045c96e1SPaul Mullowney     break;
120e057df02SPaul Mullowney   case MAT_CUSPARSE_MULT_OFFDIAG:
121e057df02SPaul Mullowney     cusparseStruct->offdiagGPUMatFormat = format;
122045c96e1SPaul Mullowney     break;
123e057df02SPaul Mullowney   case MAT_CUSPARSE_ALL:
124e057df02SPaul Mullowney     cusparseStruct->diagGPUMatFormat    = format;
125e057df02SPaul Mullowney     cusparseStruct->offdiagGPUMatFormat = format;
126045c96e1SPaul Mullowney     break;
127e057df02SPaul Mullowney   default:
128e057df02SPaul Mullowney     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unsupported operation %d for MatCUSPARSEFormatOperation. Only MAT_CUSPARSE_MULT_DIAG, MAT_CUSPARSE_MULT_DIAG, and MAT_CUSPARSE_MULT_ALL are currently supported.",op);
129045c96e1SPaul Mullowney   }
130ca45077fSPaul Mullowney   PetscFunctionReturn(0);
131ca45077fSPaul Mullowney }
132e057df02SPaul Mullowney 
1334416b707SBarry Smith PetscErrorCode MatSetFromOptions_MPIAIJCUSPARSE(PetscOptionItems *PetscOptionsObject,Mat A)
134e057df02SPaul Mullowney {
135e057df02SPaul Mullowney   MatCUSPARSEStorageFormat format;
136e057df02SPaul Mullowney   PetscErrorCode           ierr;
137e057df02SPaul Mullowney   PetscBool                flg;
138a183c035SDominic Meiser   Mat_MPIAIJ               *a = (Mat_MPIAIJ*)A->data;
139a183c035SDominic Meiser   Mat_MPIAIJCUSPARSE       *cusparseStruct = (Mat_MPIAIJCUSPARSE*)a->spptr;
1405fd66863SKarl Rupp 
141e057df02SPaul Mullowney   PetscFunctionBegin;
142e55864a3SBarry Smith   ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJCUSPARSE options");CHKERRQ(ierr);
143e057df02SPaul Mullowney   if (A->factortype==MAT_FACTOR_NONE) {
144e057df02SPaul Mullowney     ierr = PetscOptionsEnum("-mat_cusparse_mult_diag_storage_format","sets storage format of the diagonal blocks of (mpi)aijcusparse gpu matrices for SpMV",
145a183c035SDominic Meiser                             "MatCUSPARSESetFormat",MatCUSPARSEStorageFormats,(PetscEnum)cusparseStruct->diagGPUMatFormat,(PetscEnum*)&format,&flg);CHKERRQ(ierr);
146e057df02SPaul Mullowney     if (flg) {
147e057df02SPaul Mullowney       ierr = MatCUSPARSESetFormat(A,MAT_CUSPARSE_MULT_DIAG,format);CHKERRQ(ierr);
148e057df02SPaul Mullowney     }
149e057df02SPaul Mullowney     ierr = PetscOptionsEnum("-mat_cusparse_mult_offdiag_storage_format","sets storage format of the off-diagonal blocks (mpi)aijcusparse gpu matrices for SpMV",
150a183c035SDominic Meiser                             "MatCUSPARSESetFormat",MatCUSPARSEStorageFormats,(PetscEnum)cusparseStruct->offdiagGPUMatFormat,(PetscEnum*)&format,&flg);CHKERRQ(ierr);
151e057df02SPaul Mullowney     if (flg) {
152e057df02SPaul Mullowney       ierr = MatCUSPARSESetFormat(A,MAT_CUSPARSE_MULT_OFFDIAG,format);CHKERRQ(ierr);
153e057df02SPaul Mullowney     }
154e057df02SPaul Mullowney     ierr = PetscOptionsEnum("-mat_cusparse_storage_format","sets storage format of the diagonal and off-diagonal blocks (mpi)aijcusparse gpu matrices for SpMV",
155a183c035SDominic Meiser                             "MatCUSPARSESetFormat",MatCUSPARSEStorageFormats,(PetscEnum)cusparseStruct->diagGPUMatFormat,(PetscEnum*)&format,&flg);CHKERRQ(ierr);
156e057df02SPaul Mullowney     if (flg) {
157e057df02SPaul Mullowney       ierr = MatCUSPARSESetFormat(A,MAT_CUSPARSE_ALL,format);CHKERRQ(ierr);
158e057df02SPaul Mullowney     }
159e057df02SPaul Mullowney   }
1600af67c1bSStefano Zampini   ierr = PetscOptionsTail();CHKERRQ(ierr);
161e057df02SPaul Mullowney   PetscFunctionReturn(0);
162e057df02SPaul Mullowney }
163e057df02SPaul Mullowney 
16434d6c7a5SJose E. Roman PetscErrorCode MatAssemblyEnd_MPIAIJCUSPARSE(Mat A,MatAssemblyType mode)
16534d6c7a5SJose E. Roman {
16634d6c7a5SJose E. Roman   PetscErrorCode ierr;
16734d6c7a5SJose E. Roman   Mat_MPIAIJ     *mpiaij;
16834d6c7a5SJose E. Roman 
16934d6c7a5SJose E. Roman   PetscFunctionBegin;
17034d6c7a5SJose E. Roman   mpiaij = (Mat_MPIAIJ*)A->data;
17134d6c7a5SJose E. Roman   ierr = MatAssemblyEnd_MPIAIJ(A,mode);CHKERRQ(ierr);
17234d6c7a5SJose E. Roman   if (!A->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
17334d6c7a5SJose E. Roman     ierr = VecSetType(mpiaij->lvec,VECSEQCUDA);CHKERRQ(ierr);
17434d6c7a5SJose E. Roman   }
17534d6c7a5SJose E. Roman   PetscFunctionReturn(0);
17634d6c7a5SJose E. Roman }
17734d6c7a5SJose E. Roman 
178bbf3fe20SPaul Mullowney PetscErrorCode MatDestroy_MPIAIJCUSPARSE(Mat A)
179bbf3fe20SPaul Mullowney {
180bbf3fe20SPaul Mullowney   PetscErrorCode     ierr;
181bbf3fe20SPaul Mullowney   Mat_MPIAIJ         *a              = (Mat_MPIAIJ*)A->data;
182bbf3fe20SPaul Mullowney   Mat_MPIAIJCUSPARSE *cusparseStruct = (Mat_MPIAIJCUSPARSE*)a->spptr;
183b06137fdSPaul Mullowney   cudaError_t        err;
184b06137fdSPaul Mullowney   cusparseStatus_t   stat;
185bbf3fe20SPaul Mullowney 
186bbf3fe20SPaul Mullowney   PetscFunctionBegin;
187bbf3fe20SPaul Mullowney   try {
1884b2d9054SStefano Zampini     if (a->A) { ierr = MatCUSPARSEClearHandle(a->A);CHKERRQ(ierr); }
1894b2d9054SStefano Zampini     if (a->B) { ierr = MatCUSPARSEClearHandle(a->B);CHKERRQ(ierr); }
19057d48284SJunchao Zhang     stat = cusparseDestroy(cusparseStruct->handle);CHKERRCUSPARSE(stat);
19117403302SKarl Rupp     if (cusparseStruct->stream) {
192c41cb2e2SAlejandro Lamas Daviña       err = cudaStreamDestroy(cusparseStruct->stream);CHKERRCUDA(err);
19317403302SKarl Rupp     }
194bbf3fe20SPaul Mullowney     delete cusparseStruct;
195bbf3fe20SPaul Mullowney   } catch(char *ex) {
196bbf3fe20SPaul Mullowney     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Mat_MPIAIJCUSPARSE error: %s", ex);
197bbf3fe20SPaul Mullowney   }
1983338378cSStefano Zampini   ierr = PetscObjectComposeFunction((PetscObject)A,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1993338378cSStefano Zampini   ierr = PetscObjectComposeFunction((PetscObject)A,"MatCUSPARSESetFormat_C",NULL);CHKERRQ(ierr);
200bbf3fe20SPaul Mullowney   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
201bbf3fe20SPaul Mullowney   PetscFunctionReturn(0);
202bbf3fe20SPaul Mullowney }
203ca45077fSPaul Mullowney 
2043338378cSStefano Zampini PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat B, MatType mtype, MatReuse reuse, Mat* newmat)
2059ae82921SPaul Mullowney {
2069ae82921SPaul Mullowney   PetscErrorCode     ierr;
207bbf3fe20SPaul Mullowney   Mat_MPIAIJ         *a;
208bbf3fe20SPaul Mullowney   Mat_MPIAIJCUSPARSE *cusparseStruct;
209b06137fdSPaul Mullowney   cusparseStatus_t   stat;
2103338378cSStefano Zampini   Mat                A;
2119ae82921SPaul Mullowney 
2129ae82921SPaul Mullowney   PetscFunctionBegin;
2133338378cSStefano Zampini   if (reuse == MAT_INITIAL_MATRIX) {
2143338378cSStefano Zampini     ierr = MatDuplicate(B,MAT_COPY_VALUES,newmat);CHKERRQ(ierr);
2153338378cSStefano Zampini   } else if (reuse == MAT_REUSE_MATRIX) {
2163338378cSStefano Zampini     ierr = MatCopy(B,*newmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
2173338378cSStefano Zampini   }
2183338378cSStefano Zampini   A = *newmat;
2193338378cSStefano Zampini 
22034136279SStefano Zampini   ierr = PetscFree(A->defaultvectype);CHKERRQ(ierr);
22134136279SStefano Zampini   ierr = PetscStrallocpy(VECCUDA,&A->defaultvectype);CHKERRQ(ierr);
22234136279SStefano Zampini 
223bbf3fe20SPaul Mullowney   a = (Mat_MPIAIJ*)A->data;
2243338378cSStefano Zampini   if (reuse != MAT_REUSE_MATRIX && !a->spptr) {
225bbf3fe20SPaul Mullowney     a->spptr = new Mat_MPIAIJCUSPARSE;
2262205254eSKarl Rupp 
227bbf3fe20SPaul Mullowney     cusparseStruct                      = (Mat_MPIAIJCUSPARSE*)a->spptr;
228e057df02SPaul Mullowney     cusparseStruct->diagGPUMatFormat    = MAT_CUSPARSE_CSR;
229e057df02SPaul Mullowney     cusparseStruct->offdiagGPUMatFormat = MAT_CUSPARSE_CSR;
23017403302SKarl Rupp     cusparseStruct->stream              = 0;
23157d48284SJunchao Zhang     stat = cusparseCreate(&(cusparseStruct->handle));CHKERRCUSPARSE(stat);
2323338378cSStefano Zampini   }
2332205254eSKarl Rupp 
23434d6c7a5SJose E. Roman   A->ops->assemblyend    = MatAssemblyEnd_MPIAIJCUSPARSE;
235bbf3fe20SPaul Mullowney   A->ops->mult           = MatMult_MPIAIJCUSPARSE;
236fdc842d1SBarry Smith   A->ops->multadd        = MatMultAdd_MPIAIJCUSPARSE;
237bbf3fe20SPaul Mullowney   A->ops->multtranspose  = MatMultTranspose_MPIAIJCUSPARSE;
238bbf3fe20SPaul Mullowney   A->ops->setfromoptions = MatSetFromOptions_MPIAIJCUSPARSE;
239bbf3fe20SPaul Mullowney   A->ops->destroy        = MatDestroy_MPIAIJCUSPARSE;
2402205254eSKarl Rupp 
241bbf3fe20SPaul Mullowney   ierr = PetscObjectChangeTypeName((PetscObject)A,MATMPIAIJCUSPARSE);CHKERRQ(ierr);
2423338378cSStefano Zampini   ierr = PetscObjectComposeFunction((PetscObject)A,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJCUSPARSE);CHKERRQ(ierr);
243bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)A,"MatCUSPARSESetFormat_C",MatCUSPARSESetFormat_MPIAIJCUSPARSE);CHKERRQ(ierr);
2449ae82921SPaul Mullowney   PetscFunctionReturn(0);
2459ae82921SPaul Mullowney }
2469ae82921SPaul Mullowney 
2473338378cSStefano Zampini PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJCUSPARSE(Mat A)
2483338378cSStefano Zampini {
2493338378cSStefano Zampini   PetscErrorCode ierr;
2503338378cSStefano Zampini 
2513338378cSStefano Zampini   PetscFunctionBegin;
252*05035670SJunchao Zhang   ierr = PetscCUDAInitializeCheck();CHKERRQ(ierr);
2533338378cSStefano Zampini   ierr = MatCreate_MPIAIJ(A);CHKERRQ(ierr);
2543338378cSStefano Zampini   ierr = MatConvert_MPIAIJ_MPIAIJCUSPARSE(A,MATMPIAIJCUSPARSE,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr);
2553338378cSStefano Zampini   PetscFunctionReturn(0);
2563338378cSStefano Zampini }
2573338378cSStefano Zampini 
2583f9c0db1SPaul Mullowney /*@
2593f9c0db1SPaul Mullowney    MatCreateAIJCUSPARSE - Creates a sparse matrix in AIJ (compressed row) format
260e057df02SPaul Mullowney    (the default parallel PETSc format).  This matrix will ultimately pushed down
2613f9c0db1SPaul Mullowney    to NVidia GPUs and use the CUSPARSE library for calculations. For good matrix
262e057df02SPaul Mullowney    assembly performance the user should preallocate the matrix storage by setting
263e057df02SPaul Mullowney    the parameter nz (or the array nnz).  By setting these parameters accurately,
264e057df02SPaul Mullowney    performance during matrix assembly can be increased by more than a factor of 50.
2659ae82921SPaul Mullowney 
266d083f849SBarry Smith    Collective
267e057df02SPaul Mullowney 
268e057df02SPaul Mullowney    Input Parameters:
269e057df02SPaul Mullowney +  comm - MPI communicator, set to PETSC_COMM_SELF
270e057df02SPaul Mullowney .  m - number of rows
271e057df02SPaul Mullowney .  n - number of columns
272e057df02SPaul Mullowney .  nz - number of nonzeros per row (same for all rows)
273e057df02SPaul Mullowney -  nnz - array containing the number of nonzeros in the various rows
2740298fd71SBarry Smith          (possibly different for each row) or NULL
275e057df02SPaul Mullowney 
276e057df02SPaul Mullowney    Output Parameter:
277e057df02SPaul Mullowney .  A - the matrix
278e057df02SPaul Mullowney 
279e057df02SPaul Mullowney    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
280e057df02SPaul Mullowney    MatXXXXSetPreallocation() paradigm instead of this routine directly.
281e057df02SPaul Mullowney    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
282e057df02SPaul Mullowney 
283e057df02SPaul Mullowney    Notes:
284e057df02SPaul Mullowney    If nnz is given then nz is ignored
285e057df02SPaul Mullowney 
286e057df02SPaul Mullowney    The AIJ format (also called the Yale sparse matrix format or
287e057df02SPaul Mullowney    compressed row storage), is fully compatible with standard Fortran 77
288e057df02SPaul Mullowney    storage.  That is, the stored row and column indices can begin at
289e057df02SPaul Mullowney    either one (as in Fortran) or zero.  See the users' manual for details.
290e057df02SPaul Mullowney 
291e057df02SPaul Mullowney    Specify the preallocated storage with either nz or nnz (not both).
2920298fd71SBarry Smith    Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
293e057df02SPaul Mullowney    allocation.  For large problems you MUST preallocate memory or you
294e057df02SPaul Mullowney    will get TERRIBLE performance, see the users' manual chapter on matrices.
295e057df02SPaul Mullowney 
296e057df02SPaul Mullowney    By default, this format uses inodes (identical nodes) when possible, to
297e057df02SPaul Mullowney    improve numerical efficiency of matrix-vector products and solves. We
298e057df02SPaul Mullowney    search for consecutive rows with the same nonzero structure, thereby
299e057df02SPaul Mullowney    reusing matrix information to achieve increased efficiency.
300e057df02SPaul Mullowney 
301e057df02SPaul Mullowney    Level: intermediate
302e057df02SPaul Mullowney 
303e057df02SPaul Mullowney .seealso: MatCreate(), MatCreateAIJ(), MatSetValues(), MatSeqAIJSetColumnIndices(), MatCreateSeqAIJWithArrays(), MatCreateAIJ(), MATMPIAIJCUSPARSE, MATAIJCUSPARSE
304e057df02SPaul Mullowney @*/
3059ae82921SPaul Mullowney PetscErrorCode  MatCreateAIJCUSPARSE(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3069ae82921SPaul Mullowney {
3079ae82921SPaul Mullowney   PetscErrorCode ierr;
3089ae82921SPaul Mullowney   PetscMPIInt    size;
3099ae82921SPaul Mullowney 
3109ae82921SPaul Mullowney   PetscFunctionBegin;
3119ae82921SPaul Mullowney   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3129ae82921SPaul Mullowney   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3139ae82921SPaul Mullowney   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3149ae82921SPaul Mullowney   if (size > 1) {
3159ae82921SPaul Mullowney     ierr = MatSetType(*A,MATMPIAIJCUSPARSE);CHKERRQ(ierr);
3169ae82921SPaul Mullowney     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3179ae82921SPaul Mullowney   } else {
3189ae82921SPaul Mullowney     ierr = MatSetType(*A,MATSEQAIJCUSPARSE);CHKERRQ(ierr);
3199ae82921SPaul Mullowney     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
3209ae82921SPaul Mullowney   }
3219ae82921SPaul Mullowney   PetscFunctionReturn(0);
3229ae82921SPaul Mullowney }
3239ae82921SPaul Mullowney 
3243ca39a21SBarry Smith /*MC
325e057df02SPaul Mullowney    MATAIJCUSPARSE - MATMPIAIJCUSPARSE = "aijcusparse" = "mpiaijcusparse" - A matrix type to be used for sparse matrices.
326e057df02SPaul Mullowney 
3272692e278SPaul Mullowney    A matrix type type whose data resides on Nvidia GPUs. These matrices can be in either
3282692e278SPaul Mullowney    CSR, ELL, or Hybrid format. The ELL and HYB formats require CUDA 4.2 or later.
3292692e278SPaul Mullowney    All matrix calculations are performed on Nvidia GPUs using the CUSPARSE library.
3309ae82921SPaul Mullowney 
3319ae82921SPaul Mullowney    This matrix type is identical to MATSEQAIJCUSPARSE when constructed with a single process communicator,
3329ae82921SPaul Mullowney    and MATMPIAIJCUSPARSE otherwise.  As a result, for single process communicators,
3339ae82921SPaul Mullowney    MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
3349ae82921SPaul Mullowney    for communicators controlling multiple processes.  It is recommended that you call both of
3359ae82921SPaul Mullowney    the above preallocation routines for simplicity.
3369ae82921SPaul Mullowney 
3379ae82921SPaul Mullowney    Options Database Keys:
338e057df02SPaul Mullowney +  -mat_type mpiaijcusparse - sets the matrix type to "mpiaijcusparse" during a call to MatSetFromOptions()
3398468deeeSKarl Rupp .  -mat_cusparse_storage_format csr - sets the storage format of diagonal and off-diagonal matrices during a call to MatSetFromOptions(). Other options include ell (ellpack) or hyb (hybrid).
3408468deeeSKarl Rupp .  -mat_cusparse_mult_diag_storage_format csr - sets the storage format of diagonal matrix during a call to MatSetFromOptions(). Other options include ell (ellpack) or hyb (hybrid).
3418468deeeSKarl Rupp -  -mat_cusparse_mult_offdiag_storage_format csr - sets the storage format of off-diagonal matrix during a call to MatSetFromOptions(). Other options include ell (ellpack) or hyb (hybrid).
3429ae82921SPaul Mullowney 
3439ae82921SPaul Mullowney   Level: beginner
3449ae82921SPaul Mullowney 
3458468deeeSKarl Rupp  .seealso: MatCreateAIJCUSPARSE(), MATSEQAIJCUSPARSE, MatCreateSeqAIJCUSPARSE(), MatCUSPARSESetFormat(), MatCUSPARSEStorageFormat, MatCUSPARSEFormatOperation
3468468deeeSKarl Rupp M
3479ae82921SPaul Mullowney M*/
348