1 2 3 /* 4 Defines the basic matrix operations for the AIJ (compressed row) 5 matrix storage format. 6 */ 7 8 #include <petscconf.h> 9 #include <../src/mat/impls/aij/seq/aij.h> /*I "petscmat.h" I*/ 10 #include <petscbt.h> 11 #include <../src/vec/vec/impls/dvecimpl.h> 12 #include <petsc-private/vecimpl.h> 13 14 #include <../src/mat/impls/aij/seq/seqviennacl/viennaclmatimpl.h> 15 16 17 #include <algorithm> 18 #include <vector> 19 #include <string> 20 21 #include "viennacl/linalg/prod.hpp" 22 23 #undef __FUNCT__ 24 #define __FUNCT__ "MatViennaCLCopyToGPU" 25 PetscErrorCode MatViennaCLCopyToGPU(Mat A) 26 { 27 28 Mat_SeqAIJViennaCL *viennaclstruct = (Mat_SeqAIJViennaCL*)A->spptr; 29 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 30 PetscInt *ii; 31 PetscErrorCode ierr; 32 33 34 PetscFunctionBegin; 35 if (A->rmap->n > 0 && A->cmap->n > 0) { //some OpenCL SDKs have issues with buffers of size 0 36 if (A->valid_GPU_matrix == PETSC_VIENNACL_UNALLOCATED || A->valid_GPU_matrix == PETSC_VIENNACL_CPU) { 37 ierr = PetscLogEventBegin(MAT_ViennaCLCopyToGPU,A,0,0,0);CHKERRQ(ierr); 38 39 try { 40 ierr = PetscObjectSetFromOptions_ViennaCL((PetscObject)A);CHKERRQ(ierr); /* Allows to set device type before allocating any objects */ 41 viennaclstruct->mat = new ViennaCLAIJMatrix(); 42 if (a->compressedrow.use) { 43 ii = a->compressedrow.i; 44 45 viennaclstruct->mat->set(ii, a->j, a->a, A->rmap->n, A->cmap->n, a->nz); 46 47 // TODO: Either convert to full CSR (inefficient), or hold row indices in temporary vector (requires additional bookkeeping for matrix-vector multiplications) 48 // Cannot be reasonably supported in ViennaCL 1.4.x (custom kernels required), hence postponing until there is support for compressed CSR 49 50 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: Compressed CSR (only nonzero rows) not yet supported"); 51 } else { 52 53 // Since PetscInt is in general different from cl_uint, we have to convert: 54 viennacl::backend::mem_handle dummy; 55 56 viennacl::backend::typesafe_host_array<unsigned int> row_buffer; row_buffer.raw_resize(dummy, A->rmap->n+1); 57 for (PetscInt i=0; i<=A->rmap->n; ++i) 58 row_buffer.set(i, (a->i)[i]); 59 60 viennacl::backend::typesafe_host_array<unsigned int> col_buffer; col_buffer.raw_resize(dummy, a->nz); 61 for (PetscInt i=0; i<a->nz; ++i) 62 col_buffer.set(i, (a->j)[i]); 63 64 viennaclstruct->mat->set(row_buffer.get(), col_buffer.get(), a->a, A->rmap->n, A->cmap->n, a->nz); 65 } 66 ViennaCLWaitForGPU(); 67 } catch(std::exception const & ex) { 68 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what()); 69 } 70 71 A->valid_GPU_matrix = PETSC_VIENNACL_BOTH; 72 73 ierr = PetscLogEventEnd(MAT_ViennaCLCopyToGPU,A,0,0,0);CHKERRQ(ierr); 74 } 75 } 76 PetscFunctionReturn(0); 77 } 78 79 #undef __FUNCT__ 80 #define __FUNCT__ "MatViennaCLCopyFromGPU" 81 PetscErrorCode MatViennaCLCopyFromGPU(Mat A, const ViennaCLAIJMatrix *Agpu) 82 { 83 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 84 PetscInt m = A->rmap->n; 85 PetscErrorCode ierr; 86 87 88 PetscFunctionBegin; 89 if (A->valid_GPU_matrix == PETSC_VIENNACL_UNALLOCATED) { 90 try { 91 if (a->compressedrow.use) { 92 SETERRQ(PETSC_COMM_WORLD, PETSC_ERR_ARG_WRONG, "ViennaCL: Cannot handle row compression for GPU matrices"); 93 } else { 94 95 if ((PetscInt)Agpu->size1() != m) SETERRQ2(PETSC_COMM_WORLD, PETSC_ERR_ARG_SIZ, "GPU matrix has %d rows, should be %d", Agpu->size1(), m); 96 a->nz = Agpu->nnz(); 97 a->maxnz = a->nz; /* Since we allocate exactly the right amount */ 98 A->preallocated = PETSC_TRUE; 99 if (a->singlemalloc) { 100 if (a->a) {ierr = PetscFree3(a->a,a->j,a->i);CHKERRQ(ierr);} 101 } else { 102 if (a->i) {ierr = PetscFree(a->i);CHKERRQ(ierr);} 103 if (a->j) {ierr = PetscFree(a->j);CHKERRQ(ierr);} 104 if (a->a) {ierr = PetscFree(a->a);CHKERRQ(ierr);} 105 } 106 ierr = PetscMalloc3(a->nz,&a->a,a->nz,&a->j,m+1,&a->i);CHKERRQ(ierr); 107 ierr = PetscLogObjectMemory((PetscObject)A, a->nz*(sizeof(PetscScalar)+sizeof(PetscInt))+(m+1)*sizeof(PetscInt));CHKERRQ(ierr); 108 109 a->singlemalloc = PETSC_TRUE; 110 111 /* Setup row lengths */ 112 if (a->imax) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);} 113 ierr = PetscMalloc2(m,&a->imax,m,&a->ilen);CHKERRQ(ierr); 114 ierr = PetscLogObjectMemory((PetscObject)A, 2*m*sizeof(PetscInt));CHKERRQ(ierr); 115 116 /* Copy data back from GPU */ 117 viennacl::backend::typesafe_host_array<unsigned int> row_buffer; row_buffer.raw_resize(Agpu->handle1(), Agpu->size1() + 1); 118 119 // copy row array 120 viennacl::backend::memory_read(Agpu->handle1(), 0, row_buffer.raw_size(), row_buffer.get()); 121 (a->i)[0] = row_buffer[0]; 122 for (PetscInt i = 0; i < (PetscInt)Agpu->size1(); ++i) { 123 (a->i)[i+1] = row_buffer[i+1]; 124 a->imax[i] = a->ilen[i] = a->i[i+1] - a->i[i]; //Set imax[] and ilen[] arrays at the same time as i[] for better cache reuse 125 } 126 127 // copy column indices 128 viennacl::backend::typesafe_host_array<unsigned int> col_buffer; col_buffer.raw_resize(Agpu->handle2(), Agpu->nnz()); 129 viennacl::backend::memory_read(Agpu->handle2(), 0, col_buffer.raw_size(), col_buffer.get()); 130 for (PetscInt i=0; i < (PetscInt)Agpu->nnz(); ++i) 131 (a->j)[i] = col_buffer[i]; 132 133 // copy nonzero entries directly to destination (no conversion required) 134 viennacl::backend::memory_read(Agpu->handle(), 0, sizeof(PetscScalar)*Agpu->nnz(), a->a); 135 136 ViennaCLWaitForGPU(); 137 /* TODO: Once a->diag is moved out of MatAssemblyEnd(), invalidate it here. */ 138 } 139 } catch(std::exception const & ex) { 140 SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what()); 141 } 142 143 /* This assembly prevents resetting the flag to PETSC_VIENNACL_CPU and recopying */ 144 ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 145 ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 146 147 A->valid_GPU_matrix = PETSC_VIENNACL_BOTH; 148 } else { 149 SETERRQ(PETSC_COMM_WORLD, PETSC_ERR_ARG_WRONG, "ViennaCL error: Only valid for unallocated GPU matrices"); 150 } 151 PetscFunctionReturn(0); 152 } 153 154 #undef __FUNCT__ 155 #define __FUNCT__ "MatGetVecs_SeqAIJViennaCL" 156 PetscErrorCode MatGetVecs_SeqAIJViennaCL(Mat mat, Vec *right, Vec *left) 157 { 158 PetscErrorCode ierr; 159 PetscInt rbs,cbs; 160 161 PetscFunctionBegin; 162 ierr = MatGetBlockSizes(mat,&rbs,&cbs);CHKERRQ(ierr); 163 if (right) { 164 ierr = VecCreate(PetscObjectComm((PetscObject)mat),right);CHKERRQ(ierr); 165 ierr = VecSetSizes(*right,mat->cmap->n,PETSC_DETERMINE);CHKERRQ(ierr); 166 ierr = VecSetBlockSize(*right,cbs);CHKERRQ(ierr); 167 ierr = VecSetType(*right,VECSEQVIENNACL);CHKERRQ(ierr); 168 ierr = PetscLayoutReference(mat->cmap,&(*right)->map);CHKERRQ(ierr); 169 } 170 if (left) { 171 ierr = VecCreate(PetscObjectComm((PetscObject)mat),left);CHKERRQ(ierr); 172 ierr = VecSetSizes(*left,mat->rmap->n,PETSC_DETERMINE);CHKERRQ(ierr); 173 ierr = VecSetBlockSize(*left,rbs);CHKERRQ(ierr); 174 ierr = VecSetType(*left,VECSEQVIENNACL);CHKERRQ(ierr); 175 ierr = PetscLayoutReference(mat->rmap,&(*left)->map);CHKERRQ(ierr); 176 } 177 PetscFunctionReturn(0); 178 } 179 180 #undef __FUNCT__ 181 #define __FUNCT__ "MatMult_SeqAIJViennaCL" 182 PetscErrorCode MatMult_SeqAIJViennaCL(Mat A,Vec xx,Vec yy) 183 { 184 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 185 PetscErrorCode ierr; 186 Mat_SeqAIJViennaCL *viennaclstruct = (Mat_SeqAIJViennaCL*)A->spptr; 187 const ViennaCLVector *xgpu=NULL; 188 ViennaCLVector *ygpu=NULL; 189 190 PetscFunctionBegin; 191 if (A->rmap->n > 0 && A->cmap->n > 0) { 192 ierr = VecViennaCLGetArrayRead(xx,&xgpu);CHKERRQ(ierr); 193 ierr = VecViennaCLGetArrayWrite(yy,&ygpu);CHKERRQ(ierr); 194 try { 195 *ygpu = viennacl::linalg::prod(*viennaclstruct->mat,*xgpu); 196 ViennaCLWaitForGPU(); 197 } catch (std::exception const & ex) { 198 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what()); 199 } 200 ierr = VecViennaCLRestoreArrayRead(xx,&xgpu);CHKERRQ(ierr); 201 ierr = VecViennaCLRestoreArrayWrite(yy,&ygpu);CHKERRQ(ierr); 202 ierr = PetscLogFlops(2.0*a->nz - viennaclstruct->mat->nnz());CHKERRQ(ierr); 203 } 204 PetscFunctionReturn(0); 205 } 206 207 208 209 #undef __FUNCT__ 210 #define __FUNCT__ "MatMultAdd_SeqAIJViennaCL" 211 PetscErrorCode MatMultAdd_SeqAIJViennaCL(Mat A,Vec xx,Vec yy,Vec zz) 212 { 213 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 214 PetscErrorCode ierr; 215 Mat_SeqAIJViennaCL *viennaclstruct = (Mat_SeqAIJViennaCL*)A->spptr; 216 const ViennaCLVector *xgpu=NULL,*ygpu=NULL; 217 ViennaCLVector *zgpu=NULL; 218 219 PetscFunctionBegin; 220 if (A->rmap->n > 0 && A->cmap->n > 0) { 221 try { 222 ierr = VecViennaCLGetArrayRead(xx,&xgpu);CHKERRQ(ierr); 223 ierr = VecViennaCLGetArrayRead(yy,&ygpu);CHKERRQ(ierr); 224 ierr = VecViennaCLGetArrayWrite(zz,&zgpu);CHKERRQ(ierr); 225 226 if (a->compressedrow.use) { 227 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: Compressed CSR (only nonzero rows) not yet supported"); 228 } else { 229 if (zz == xx || zz == yy) { //temporary required 230 ViennaCLVector temp = viennacl::linalg::prod(*viennaclstruct->mat, *xgpu); 231 *zgpu = *ygpu + temp; 232 ViennaCLWaitForGPU(); 233 } else { 234 *zgpu = viennacl::linalg::prod(*viennaclstruct->mat, *xgpu); 235 *zgpu += *ygpu; 236 ViennaCLWaitForGPU(); 237 } 238 } 239 240 ierr = VecViennaCLRestoreArrayRead(xx,&xgpu);CHKERRQ(ierr); 241 ierr = VecViennaCLRestoreArrayRead(yy,&ygpu);CHKERRQ(ierr); 242 ierr = VecViennaCLRestoreArrayWrite(zz,&zgpu);CHKERRQ(ierr); 243 244 } catch(std::exception const & ex) { 245 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what()); 246 } 247 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 248 } 249 PetscFunctionReturn(0); 250 } 251 252 #undef __FUNCT__ 253 #define __FUNCT__ "MatAssemblyEnd_SeqAIJViennaCL" 254 PetscErrorCode MatAssemblyEnd_SeqAIJViennaCL(Mat A,MatAssemblyType mode) 255 { 256 PetscErrorCode ierr; 257 258 PetscFunctionBegin; 259 ierr = MatAssemblyEnd_SeqAIJ(A,mode);CHKERRQ(ierr); 260 ierr = MatViennaCLCopyToGPU(A);CHKERRQ(ierr); 261 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 262 A->ops->mult = MatMult_SeqAIJViennaCL; 263 A->ops->multadd = MatMultAdd_SeqAIJViennaCL; 264 PetscFunctionReturn(0); 265 } 266 267 /* --------------------------------------------------------------------------------*/ 268 #undef __FUNCT__ 269 #define __FUNCT__ "MatCreateSeqAIJViennaCL" 270 /*@ 271 MatCreateSeqAIJViennaCL - Creates a sparse matrix in AIJ (compressed row) format 272 (the default parallel PETSc format). This matrix will ultimately be pushed down 273 to GPUs and use the ViennaCL library for calculations. For good matrix 274 assembly performance the user should preallocate the matrix storage by setting 275 the parameter nz (or the array nnz). By setting these parameters accurately, 276 performance during matrix assembly can be increased substantially. 277 278 279 Collective on MPI_Comm 280 281 Input Parameters: 282 + comm - MPI communicator, set to PETSC_COMM_SELF 283 . m - number of rows 284 . n - number of columns 285 . nz - number of nonzeros per row (same for all rows) 286 - nnz - array containing the number of nonzeros in the various rows 287 (possibly different for each row) or NULL 288 289 Output Parameter: 290 . A - the matrix 291 292 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 293 MatXXXXSetPreallocation() paradigm instead of this routine directly. 294 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 295 296 Notes: 297 If nnz is given then nz is ignored 298 299 The AIJ format (also called the Yale sparse matrix format or 300 compressed row storage), is fully compatible with standard Fortran 77 301 storage. That is, the stored row and column indices can begin at 302 either one (as in Fortran) or zero. See the users' manual for details. 303 304 Specify the preallocated storage with either nz or nnz (not both). 305 Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory 306 allocation. For large problems you MUST preallocate memory or you 307 will get TERRIBLE performance, see the users' manual chapter on matrices. 308 309 Level: intermediate 310 311 .seealso: MatCreate(), MatCreateAIJ(), MatCreateAIJCUSP(), MatSetValues(), MatSeqAIJSetColumnIndices(), MatCreateSeqAIJWithArrays(), MatCreateAIJ() 312 313 @*/ 314 PetscErrorCode MatCreateSeqAIJViennaCL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 315 { 316 PetscErrorCode ierr; 317 318 PetscFunctionBegin; 319 ierr = MatCreate(comm,A);CHKERRQ(ierr); 320 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 321 ierr = MatSetType(*A,MATSEQAIJVIENNACL);CHKERRQ(ierr); 322 ierr = MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,(PetscInt*)nnz);CHKERRQ(ierr); 323 PetscFunctionReturn(0); 324 } 325 326 327 #undef __FUNCT__ 328 #define __FUNCT__ "MatDestroy_SeqAIJViennaCL" 329 PetscErrorCode MatDestroy_SeqAIJViennaCL(Mat A) 330 { 331 PetscErrorCode ierr; 332 Mat_SeqAIJViennaCL *viennaclcontainer = (Mat_SeqAIJViennaCL*)A->spptr; 333 334 PetscFunctionBegin; 335 try { 336 if (A->valid_GPU_matrix != PETSC_VIENNACL_UNALLOCATED) delete (ViennaCLAIJMatrix*)(viennaclcontainer->mat); 337 delete viennaclcontainer; 338 A->valid_GPU_matrix = PETSC_VIENNACL_UNALLOCATED; 339 } catch(std::exception const & ex) { 340 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"ViennaCL error: %s", ex.what()); 341 } 342 /* this next line is because MatDestroy tries to PetscFree spptr if it is not zero, and PetscFree only works if the memory was allocated with PetscNew or PetscMalloc, which don't call the constructor */ 343 A->spptr = 0; 344 ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr); 345 PetscFunctionReturn(0); 346 } 347 348 349 #undef __FUNCT__ 350 #define __FUNCT__ "MatCreate_SeqAIJViennaCL" 351 PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJViennaCL(Mat B) 352 { 353 PetscErrorCode ierr; 354 Mat_SeqAIJ *aij; 355 356 PetscFunctionBegin; 357 ierr = MatCreate_SeqAIJ(B);CHKERRQ(ierr); 358 aij = (Mat_SeqAIJ*)B->data; 359 aij->inode.use = PETSC_FALSE; 360 B->ops->mult = MatMult_SeqAIJViennaCL; 361 B->ops->multadd = MatMultAdd_SeqAIJViennaCL; 362 B->spptr = new Mat_SeqAIJViennaCL(); 363 364 ((Mat_SeqAIJViennaCL*)B->spptr)->mat = 0; 365 366 B->ops->assemblyend = MatAssemblyEnd_SeqAIJViennaCL; 367 B->ops->destroy = MatDestroy_SeqAIJViennaCL; 368 B->ops->getvecs = MatGetVecs_SeqAIJViennaCL; 369 370 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQAIJVIENNACL);CHKERRQ(ierr); 371 372 B->valid_GPU_matrix = PETSC_VIENNACL_UNALLOCATED; 373 PetscFunctionReturn(0); 374 } 375 376 377 /*M 378 MATSEQAIJVIENNACL - MATAIJVIENNACL = "aijviennacl" = "seqaijviennacl" - A matrix type to be used for sparse matrices. 379 380 A matrix type type whose data resides on GPUs. These matrices are in CSR format by 381 default. All matrix calculations are performed using the ViennaCL library. 382 383 Options Database Keys: 384 + -mat_type aijviennacl - sets the matrix type to "seqaijviennacl" during a call to MatSetFromOptions() 385 . -mat_viennacl_storage_format csr - sets the storage format of matrices for MatMult during a call to MatSetFromOptions(). 386 - -mat_viennacl_mult_storage_format csr - sets the storage format of matrices for MatMult during a call to MatSetFromOptions(). 387 388 Level: beginner 389 390 .seealso: MatCreateSeqAIJViennaCL(), MATAIJVIENNACL, MatCreateAIJViennaCL() 391 M*/ 392 393