xref: /petsc/src/mat/impls/aij/mpi/mpiviennacl/mpiaijviennacl.cxx (revision 8f86e40f90bf3a380478a8a7e464cee7e32009cd)
1*8f86e40fSKarl Rupp #include "petscconf.h"
2*8f86e40fSKarl Rupp #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
3*8f86e40fSKarl Rupp 
4*8f86e40fSKarl Rupp #undef __FUNCT__
5*8f86e40fSKarl Rupp #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJViennaCL"
6*8f86e40fSKarl Rupp PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJViennaCL(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
7*8f86e40fSKarl Rupp {
8*8f86e40fSKarl Rupp   Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data;
9*8f86e40fSKarl Rupp   PetscErrorCode ierr;
10*8f86e40fSKarl Rupp   PetscInt       i;
11*8f86e40fSKarl Rupp 
12*8f86e40fSKarl Rupp   PetscFunctionBegin;
13*8f86e40fSKarl Rupp   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
14*8f86e40fSKarl Rupp   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
15*8f86e40fSKarl Rupp   if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
16*8f86e40fSKarl Rupp   if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
17*8f86e40fSKarl Rupp 
18*8f86e40fSKarl Rupp   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
19*8f86e40fSKarl Rupp   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
20*8f86e40fSKarl Rupp   if (d_nnz) {
21*8f86e40fSKarl Rupp     for (i=0; i<B->rmap->n; i++) {
22*8f86e40fSKarl Rupp       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
23*8f86e40fSKarl Rupp     }
24*8f86e40fSKarl Rupp   }
25*8f86e40fSKarl Rupp   if (o_nnz) {
26*8f86e40fSKarl Rupp     for (i=0; i<B->rmap->n; i++) {
27*8f86e40fSKarl Rupp       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
28*8f86e40fSKarl Rupp     }
29*8f86e40fSKarl Rupp   }
30*8f86e40fSKarl Rupp   if (!B->preallocated) {
31*8f86e40fSKarl Rupp     /* Explicitly create the two MATSEQAIJVIENNACL matrices. */
32*8f86e40fSKarl Rupp     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
33*8f86e40fSKarl Rupp     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
34*8f86e40fSKarl Rupp     ierr = MatSetType(b->A,MATSEQAIJVIENNACL);CHKERRQ(ierr);
35*8f86e40fSKarl Rupp     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
36*8f86e40fSKarl Rupp     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
37*8f86e40fSKarl Rupp     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
38*8f86e40fSKarl Rupp     ierr = MatSetType(b->B,MATSEQAIJVIENNACL);CHKERRQ(ierr);
39*8f86e40fSKarl Rupp     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
40*8f86e40fSKarl Rupp   }
41*8f86e40fSKarl Rupp   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
42*8f86e40fSKarl Rupp   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
43*8f86e40fSKarl Rupp   B->preallocated = PETSC_TRUE;
44*8f86e40fSKarl Rupp   PetscFunctionReturn(0);
45*8f86e40fSKarl Rupp }
46*8f86e40fSKarl Rupp 
47*8f86e40fSKarl Rupp #undef __FUNCT__
48*8f86e40fSKarl Rupp #define __FUNCT__ "MatGetVecs_MPIAIJViennaCL"
49*8f86e40fSKarl Rupp PetscErrorCode  MatGetVecs_MPIAIJViennaCL(Mat mat,Vec *right,Vec *left)
50*8f86e40fSKarl Rupp {
51*8f86e40fSKarl Rupp   PetscErrorCode ierr;
52*8f86e40fSKarl Rupp 
53*8f86e40fSKarl Rupp   PetscFunctionBegin;
54*8f86e40fSKarl Rupp   if (right) {
55*8f86e40fSKarl Rupp     ierr = VecCreate(PetscObjectComm((PetscObject)mat),right);CHKERRQ(ierr);
56*8f86e40fSKarl Rupp     ierr = VecSetSizes(*right,mat->cmap->n,PETSC_DETERMINE);CHKERRQ(ierr);
57*8f86e40fSKarl Rupp     ierr = VecSetBlockSize(*right,mat->rmap->bs);CHKERRQ(ierr);
58*8f86e40fSKarl Rupp     ierr = VecSetType(*right,VECVIENNACL);CHKERRQ(ierr);
59*8f86e40fSKarl Rupp     ierr = VecSetLayout(*right,mat->cmap);CHKERRQ(ierr);
60*8f86e40fSKarl Rupp   }
61*8f86e40fSKarl Rupp   if (left) {
62*8f86e40fSKarl Rupp     ierr = VecCreate(PetscObjectComm((PetscObject)mat),left);CHKERRQ(ierr);
63*8f86e40fSKarl Rupp     ierr = VecSetSizes(*left,mat->rmap->n,PETSC_DETERMINE);CHKERRQ(ierr);
64*8f86e40fSKarl Rupp     ierr = VecSetBlockSize(*left,mat->rmap->bs);CHKERRQ(ierr);
65*8f86e40fSKarl Rupp     ierr = VecSetType(*left,VECVIENNACL);CHKERRQ(ierr);
66*8f86e40fSKarl Rupp     ierr = VecSetLayout(*left,mat->rmap);CHKERRQ(ierr);
67*8f86e40fSKarl Rupp   }
68*8f86e40fSKarl Rupp   PetscFunctionReturn(0);
69*8f86e40fSKarl Rupp }
70*8f86e40fSKarl Rupp 
71*8f86e40fSKarl Rupp 
72*8f86e40fSKarl Rupp #undef __FUNCT__
73*8f86e40fSKarl Rupp #define __FUNCT__ "MatDestroy_MPIAIJViennaCL"
74*8f86e40fSKarl Rupp PetscErrorCode MatDestroy_MPIAIJViennaCL(Mat A)
75*8f86e40fSKarl Rupp {
76*8f86e40fSKarl Rupp   PetscErrorCode ierr;
77*8f86e40fSKarl Rupp 
78*8f86e40fSKarl Rupp   PetscFunctionBegin;
79*8f86e40fSKarl Rupp   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
80*8f86e40fSKarl Rupp   PetscFunctionReturn(0);
81*8f86e40fSKarl Rupp }
82*8f86e40fSKarl Rupp 
83*8f86e40fSKarl Rupp #undef __FUNCT__
84*8f86e40fSKarl Rupp #define __FUNCT__ "MatCreate_MPIAIJViennaCL"
85*8f86e40fSKarl Rupp PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJViennaCL(Mat A)
86*8f86e40fSKarl Rupp {
87*8f86e40fSKarl Rupp   PetscErrorCode ierr;
88*8f86e40fSKarl Rupp 
89*8f86e40fSKarl Rupp   PetscFunctionBegin;
90*8f86e40fSKarl Rupp   ierr = MatCreate_MPIAIJ(A);CHKERRQ(ierr);
91*8f86e40fSKarl Rupp   ierr = PetscObjectComposeFunction((PetscObject)A,"MatMPIAIJSetPreallocation_C","MatMPIAIJSetPreallocation_MPIAIJViennaCL",MatMPIAIJSetPreallocation_MPIAIJViennaCL);CHKERRQ(ierr);
92*8f86e40fSKarl Rupp   A->ops->getvecs        = MatGetVecs_MPIAIJViennaCL;
93*8f86e40fSKarl Rupp 
94*8f86e40fSKarl Rupp   ierr = PetscObjectChangeTypeName((PetscObject)A,MATMPIAIJVIENNACL);CHKERRQ(ierr);
95*8f86e40fSKarl Rupp   PetscFunctionReturn(0);
96*8f86e40fSKarl Rupp }
97*8f86e40fSKarl Rupp 
98*8f86e40fSKarl Rupp 
99*8f86e40fSKarl Rupp /*@
100*8f86e40fSKarl Rupp    MatCreateAIJViennaCL - Creates a sparse matrix in AIJ (compressed row) format
101*8f86e40fSKarl Rupp    (the default parallel PETSc format).  This matrix will ultimately pushed down
102*8f86e40fSKarl Rupp    to GPUs and use the ViennaCL library for calculations. For good matrix
103*8f86e40fSKarl Rupp    assembly performance the user should preallocate the matrix storage by setting
104*8f86e40fSKarl Rupp    the parameter nz (or the array nnz).  By setting these parameters accurately,
105*8f86e40fSKarl Rupp    performance during matrix assembly can be increased substantially.
106*8f86e40fSKarl Rupp 
107*8f86e40fSKarl Rupp 
108*8f86e40fSKarl Rupp    Collective on MPI_Comm
109*8f86e40fSKarl Rupp 
110*8f86e40fSKarl Rupp    Input Parameters:
111*8f86e40fSKarl Rupp +  comm - MPI communicator, set to PETSC_COMM_SELF
112*8f86e40fSKarl Rupp .  m - number of rows
113*8f86e40fSKarl Rupp .  n - number of columns
114*8f86e40fSKarl Rupp .  nz - number of nonzeros per row (same for all rows)
115*8f86e40fSKarl Rupp -  nnz - array containing the number of nonzeros in the various rows
116*8f86e40fSKarl Rupp          (possibly different for each row) or NULL
117*8f86e40fSKarl Rupp 
118*8f86e40fSKarl Rupp    Output Parameter:
119*8f86e40fSKarl Rupp .  A - the matrix
120*8f86e40fSKarl Rupp 
121*8f86e40fSKarl Rupp    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
122*8f86e40fSKarl Rupp    MatXXXXSetPreallocation() paradigm instead of this routine directly.
123*8f86e40fSKarl Rupp    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
124*8f86e40fSKarl Rupp 
125*8f86e40fSKarl Rupp    Notes:
126*8f86e40fSKarl Rupp    If nnz is given then nz is ignored
127*8f86e40fSKarl Rupp 
128*8f86e40fSKarl Rupp    The AIJ format (also called the Yale sparse matrix format or
129*8f86e40fSKarl Rupp    compressed row storage), is fully compatible with standard Fortran 77
130*8f86e40fSKarl Rupp    storage.  That is, the stored row and column indices can begin at
131*8f86e40fSKarl Rupp    either one (as in Fortran) or zero.  See the users' manual for details.
132*8f86e40fSKarl Rupp 
133*8f86e40fSKarl Rupp    Specify the preallocated storage with either nz or nnz (not both).
134*8f86e40fSKarl Rupp    Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
135*8f86e40fSKarl Rupp    allocation.  For large problems you MUST preallocate memory or you
136*8f86e40fSKarl Rupp    will get TERRIBLE performance, see the users' manual chapter on matrices.
137*8f86e40fSKarl Rupp 
138*8f86e40fSKarl Rupp    By default, this format uses inodes (identical nodes) when possible, to
139*8f86e40fSKarl Rupp    improve numerical efficiency of matrix-vector products and solves. We
140*8f86e40fSKarl Rupp    search for consecutive rows with the same nonzero structure, thereby
141*8f86e40fSKarl Rupp    reusing matrix information to achieve increased efficiency.
142*8f86e40fSKarl Rupp 
143*8f86e40fSKarl Rupp    Level: intermediate
144*8f86e40fSKarl Rupp 
145*8f86e40fSKarl Rupp .seealso: MatCreate(), MatCreateAIJ(), MatCreateAIJCUSP(), MatSetValues(), MatSeqAIJSetColumnIndices(), MatCreateSeqAIJWithArrays(), MatCreateAIJ(), MATMPIAIJVIENNACL, MATAIJVIENNACL
146*8f86e40fSKarl Rupp @*/
147*8f86e40fSKarl Rupp #undef __FUNCT__
148*8f86e40fSKarl Rupp #define __FUNCT__ "MatCreateAIJViennaCL"
149*8f86e40fSKarl Rupp PetscErrorCode  MatCreateAIJViennaCL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
150*8f86e40fSKarl Rupp {
151*8f86e40fSKarl Rupp   PetscErrorCode ierr;
152*8f86e40fSKarl Rupp   PetscMPIInt    size;
153*8f86e40fSKarl Rupp 
154*8f86e40fSKarl Rupp   PetscFunctionBegin;
155*8f86e40fSKarl Rupp   ierr = MatCreate(comm,A);CHKERRQ(ierr);
156*8f86e40fSKarl Rupp   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
157*8f86e40fSKarl Rupp   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
158*8f86e40fSKarl Rupp   if (size > 1) {
159*8f86e40fSKarl Rupp     ierr = MatSetType(*A,MATMPIAIJVIENNACL);CHKERRQ(ierr);
160*8f86e40fSKarl Rupp     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
161*8f86e40fSKarl Rupp   } else {
162*8f86e40fSKarl Rupp     ierr = MatSetType(*A,MATSEQAIJVIENNACL);CHKERRQ(ierr);
163*8f86e40fSKarl Rupp     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
164*8f86e40fSKarl Rupp   }
165*8f86e40fSKarl Rupp   PetscFunctionReturn(0);
166*8f86e40fSKarl Rupp }
167*8f86e40fSKarl Rupp 
168*8f86e40fSKarl Rupp /*M
169*8f86e40fSKarl Rupp    MATAIJVIENNACL - MATMPIAIJVIENNACL= "aijviennacl" = "mpiaijviennacl" - A matrix type to be used for sparse matrices.
170*8f86e40fSKarl Rupp 
171*8f86e40fSKarl Rupp    A matrix type (CSR format) whose data resides on GPUs.
172*8f86e40fSKarl Rupp    All matrix calculations are performed using the ViennaCL library.
173*8f86e40fSKarl Rupp 
174*8f86e40fSKarl Rupp    This matrix type is identical to MATSEQAIJVIENNACL when constructed with a single process communicator,
175*8f86e40fSKarl Rupp    and MATMPIAIJVIENNACL otherwise.  As a result, for single process communicators,
176*8f86e40fSKarl Rupp    MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
177*8f86e40fSKarl Rupp    for communicators controlling multiple processes.  It is recommended that you call both of
178*8f86e40fSKarl Rupp    the above preallocation routines for simplicity.
179*8f86e40fSKarl Rupp 
180*8f86e40fSKarl Rupp    Options Database Keys:
181*8f86e40fSKarl Rupp +  -mat_type mpiaijviennacl - sets the matrix type to "mpiaijviennacl" during a call to MatSetFromOptions()
182*8f86e40fSKarl Rupp 
183*8f86e40fSKarl Rupp   Level: beginner
184*8f86e40fSKarl Rupp 
185*8f86e40fSKarl Rupp  .seealso: MatCreateAIJViennaCL(), MATSEQAIJVIENNACL, MatCreateSeqAIJVIENNACL()
186*8f86e40fSKarl Rupp M*/
187*8f86e40fSKarl Rupp 
188