xref: /petsc/src/mat/impls/aij/mpi/kokkos/mpiaijkok.kokkos.cxx (revision d0295fc027abbea29f13fa82c19cf92da8e9ba99)
1 #include <petscconf.h>
2 #include <../src/mat/impls/aij/mpi/mpiaij.h>
3 
4 PetscErrorCode MatAssemblyEnd_MPIAIJKokkos(Mat A,MatAssemblyType mode)
5 {
6   PetscErrorCode ierr;
7   Mat_MPIAIJ     *mpiaij = (Mat_MPIAIJ*)A->data;
8 
9   PetscFunctionBegin;
10   ierr = MatAssemblyEnd_MPIAIJ(A,mode);CHKERRQ(ierr);
11   if (!A->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
12     ierr = VecSetType(mpiaij->lvec,VECSEQKOKKOS);CHKERRQ(ierr);
13   }
14   PetscFunctionReturn(0);
15 }
16 
17 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJKokkos(Mat mat,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
18 {
19   PetscErrorCode     ierr;
20   PetscInt           i;
21   Mat_MPIAIJ         *mpiaij = (Mat_MPIAIJ*)mat->data;
22 
23 
24   PetscFunctionBegin;
25   ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr);
26   ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr);
27   if (d_nnz) {
28     for (i=0; i<mat->rmap->n; i++) {
29       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
30     }
31   }
32   if (o_nnz) {
33     for (i=0; i<mat->rmap->n; i++) {
34       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
35     }
36   }
37   if (!mat->preallocated) {
38     /* Explicitly create 2 MATSEQAIJKOKKOS matrices. */
39     ierr = MatCreate(PETSC_COMM_SELF,&mpiaij->A);CHKERRQ(ierr);
40     ierr = MatSetSizes(mpiaij->A,mat->rmap->n,mat->cmap->n,mat->rmap->n,mat->cmap->n);CHKERRQ(ierr);
41     ierr = MatSetType(mpiaij->A,MATSEQAIJKOKKOS);CHKERRQ(ierr);
42     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)mpiaij->A);CHKERRQ(ierr);
43     ierr = MatCreate(PETSC_COMM_SELF,&mpiaij->B);CHKERRQ(ierr);
44     ierr = MatSetSizes(mpiaij->B,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);CHKERRQ(ierr);
45     ierr = MatSetType(mpiaij->B,MATSEQAIJKOKKOS);CHKERRQ(ierr);
46     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)mpiaij->B);CHKERRQ(ierr);
47   }
48   ierr = MatSeqAIJSetPreallocation(mpiaij->A,d_nz,d_nnz);CHKERRQ(ierr);
49   ierr = MatSeqAIJSetPreallocation(mpiaij->B,o_nz,o_nnz);CHKERRQ(ierr);
50 
51   mat->preallocated = PETSC_TRUE;
52   PetscFunctionReturn(0);
53 }
54 
55 PetscErrorCode MatMult_MPIAIJKokkos(Mat mat,Vec xx,Vec yy)
56 {
57   Mat_MPIAIJ     *mpiaij = (Mat_MPIAIJ*)mat->data;
58   PetscErrorCode ierr;
59   PetscInt       nt;
60 
61   PetscFunctionBegin;
62   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
63   if (nt != mat->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of mat (%D) and xx (%D)",mat->cmap->n,nt);
64   ierr = VecScatterBegin(mpiaij->Mvctx,xx,mpiaij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
65   ierr = (*mpiaij->A->ops->mult)(mpiaij->A,xx,yy);CHKERRQ(ierr);
66   ierr = VecScatterEnd(mpiaij->Mvctx,xx,mpiaij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
67   ierr = (*mpiaij->B->ops->multadd)(mpiaij->B,mpiaij->lvec,yy,yy);CHKERRQ(ierr);
68   PetscFunctionReturn(0);
69 }
70 
71 PetscErrorCode MatMultAdd_MPIAIJKokkos(Mat mat,Vec xx,Vec yy,Vec zz)
72 {
73   Mat_MPIAIJ     *mpiaij = (Mat_MPIAIJ*)mat->data;
74   PetscErrorCode ierr;
75   PetscInt       nt;
76 
77   PetscFunctionBegin;
78   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
79   if (nt != mat->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of mat (%D) and xx (%D)",mat->cmap->n,nt);
80   ierr = VecScatterBegin(mpiaij->Mvctx,xx,mpiaij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
81   ierr = (*mpiaij->A->ops->multadd)(mpiaij->A,xx,yy,zz);CHKERRQ(ierr);
82   ierr = VecScatterEnd(mpiaij->Mvctx,xx,mpiaij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
83   ierr = (*mpiaij->B->ops->multadd)(mpiaij->B,mpiaij->lvec,zz,zz);CHKERRQ(ierr);
84   PetscFunctionReturn(0);
85 }
86 
87 PetscErrorCode MatMultTranspose_MPIAIJKokkos(Mat mat,Vec xx,Vec yy)
88 {
89   Mat_MPIAIJ     *mpiaij = (Mat_MPIAIJ*)mat->data;
90   PetscErrorCode ierr;
91   PetscInt       nt;
92 
93   PetscFunctionBegin;
94   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
95   if (nt != mat->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of mat (%D) and xx (%D)",mat->rmap->n,nt);
96   ierr = (*mpiaij->B->ops->multtranspose)(mpiaij->B,xx,mpiaij->lvec);CHKERRQ(ierr);
97   ierr = (*mpiaij->A->ops->multtranspose)(mpiaij->A,xx,yy);CHKERRQ(ierr);
98   ierr = VecScatterBegin(mpiaij->Mvctx,mpiaij->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
99   ierr = VecScatterEnd(mpiaij->Mvctx,mpiaij->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
100   PetscFunctionReturn(0);
101 }
102 
103 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJKokkos(Mat A, MatType mtype, MatReuse reuse, Mat* newmat)
104 {
105   PetscErrorCode     ierr;
106   Mat                B;
107 
108   PetscFunctionBegin;
109   if (reuse == MAT_INITIAL_MATRIX) {
110     ierr = MatDuplicate(A,MAT_COPY_VALUES,newmat);CHKERRQ(ierr);
111   } else if (reuse == MAT_REUSE_MATRIX) {
112     ierr = MatCopy(A,*newmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
113   }
114   B = *newmat;
115 
116   ierr = PetscFree(B->defaultvectype);CHKERRQ(ierr);
117   ierr = PetscStrallocpy(VECKOKKOS,&B->defaultvectype);CHKERRQ(ierr);
118   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJKOKKOS);CHKERRQ(ierr);
119 
120   B->ops->assemblyend    = MatAssemblyEnd_MPIAIJKokkos;
121   B->ops->mult           = MatMult_MPIAIJKokkos;
122   B->ops->multadd        = MatMultAdd_MPIAIJKokkos;
123   B->ops->multtranspose  = MatMultTranspose_MPIAIJKokkos;
124 
125   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJKokkos);CHKERRQ(ierr);
126   PetscFunctionReturn(0);
127 }
128 
129 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJKokkos(Mat A)
130 {
131   PetscErrorCode ierr;
132 
133   PetscFunctionBegin;
134   ierr = PetscKokkosInitializeCheck();CHKERRQ(ierr);
135   ierr = MatCreate_MPIAIJ(A);CHKERRQ(ierr);
136   ierr = MatConvert_MPIAIJ_MPIAIJKokkos(A,MATMPIAIJKOKKOS,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr);
137   PetscFunctionReturn(0);
138 }
139 
140 /*@C
141    MatCreateAIJKokkos - Creates a sparse matrix in AIJ (compressed row) format
142    (the default parallel PETSc format).  This matrix will ultimately pushed down
143    to Kokkos for calculations. For good matrix
144    assembly performance the user should preallocate the matrix storage by setting
145    the parameter nz (or the array nnz).  By setting these parameters accurately,
146    performance during matrix assembly can be increased by more than a factor of 50.
147 
148    Collective
149 
150    Input Parameters:
151 +  comm - MPI communicator, set to PETSC_COMM_SELF
152 .  m - number of rows
153 .  n - number of columns
154 .  nz - number of nonzeros per row (same for all rows)
155 -  nnz - array containing the number of nonzeros in the various rows
156          (possibly different for each row) or NULL
157 
158    Output Parameter:
159 .  A - the matrix
160 
161    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
162    MatXXXXSetPreallocation() paradigm instead of this routine directly.
163    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
164 
165    Notes:
166    If nnz is given then nz is ignored
167 
168    The AIJ format (also called the Yale sparse matrix format or
169    compressed row storage), is fully compatible with standard Fortran 77
170    storage.  That is, the stored row and column indices can begin at
171    either one (as in Fortran) or zero.  See the users' manual for details.
172 
173    Specify the preallocated storage with either nz or nnz (not both).
174    Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
175    allocation.  For large problems you MUST preallocate memory or you
176    will get TERRIBLE performance, see the users' manual chapter on matrices.
177 
178    By default, this format uses inodes (identical nodes) when possible, to
179    improve numerical efficiency of matrix-vector products and solves. We
180    search for consecutive rows with the same nonzero structure, thereby
181    reusing matrix information to achieve increased efficiency.
182 
183    Level: intermediate
184 
185 .seealso: MatCreate(), MatCreateAIJ(), MatSetValues(), MatSeqAIJSetColumnIndices(), MatCreateSeqAIJWithArrays(), MatCreateAIJ(), MATMPIAIJKOKKOS, MATAIJKokkos
186 @*/
187 PetscErrorCode  MatCreateAIJKokkos(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
188 {
189   PetscErrorCode ierr;
190   PetscMPIInt    size;
191 
192   PetscFunctionBegin;
193   ierr = MatCreate(comm,A);CHKERRQ(ierr);
194   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
195   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
196   if (size > 1) {
197     ierr = MatSetType(*A,MATMPIAIJKOKKOS);CHKERRQ(ierr);
198     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
199   } else {
200     ierr = MatSetType(*A,MATSEQAIJKOKKOS);CHKERRQ(ierr);
201     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
202   }
203   PetscFunctionReturn(0);
204 }
205 
206