1 #include "petsc/private/petscimpl.h" 2 #include <petscsystypes.h> 3 #include <petscerror.h> 4 #include <petscveckokkos.hpp> 5 6 #include <Kokkos_Core.hpp> 7 #include <KokkosSparse_CrsMatrix.hpp> 8 #include <KokkosSparse_spmv.hpp> 9 #include <KokkosSparse_spmv.hpp> 10 #include <../src/mat/impls/aij/seq/aij.h> 11 12 #include <../src/mat/impls/aij/seq/kokkos/aijkokkosimpl.hpp> 13 #include <petscmat.h> 14 15 static PetscErrorCode MatSetOps_SeqAIJKokkos(Mat); /* Forward declaration */ 16 17 static PetscErrorCode MatAssemblyEnd_SeqAIJKokkos(Mat A,MatAssemblyType mode) 18 { 19 PetscErrorCode ierr; 20 Mat_SeqAIJKokkos *aijkok = static_cast<Mat_SeqAIJKokkos*>(A->spptr); 21 22 PetscFunctionBegin; 23 ierr = MatAssemblyEnd_SeqAIJ(A,mode);CHKERRQ(ierr); 24 if (aijkok && aijkok->device_mat_d.data()) { 25 A->offloadmask = PETSC_OFFLOAD_GPU; // in GPU mode, no going back. MatSetValues checks this 26 } 27 /* Don't build (or update) the Mat_SeqAIJKokkos struct. We delay it to the very last moment until we need it. */ 28 PetscFunctionReturn(0); 29 } 30 31 static PetscErrorCode MatSeqAIJKokkosSyncDevice(Mat A) 32 { 33 Mat_SeqAIJ *aijseq = (Mat_SeqAIJ*)A->data; 34 Mat_SeqAIJKokkos *aijkok = static_cast<Mat_SeqAIJKokkos*>(A->spptr); 35 PetscInt nrows = A->rmap->n,ncols = A->cmap->n,nnz = aijseq->nz; 36 37 PetscFunctionBegin; 38 /* If aijkok is not built yet or the nonzero pattern on CPU has changed, build aijkok from the scratch */ 39 if (!aijkok || aijkok->nonzerostate != A->nonzerostate) { 40 delete aijkok; 41 aijkok = new Mat_SeqAIJKokkos(nrows,ncols,nnz,aijseq->i,aijseq->j,aijseq->a); 42 aijkok->nonzerostate = A->nonzerostate; 43 A->spptr = aijkok; 44 } else if (A->offloadmask == PETSC_OFFLOAD_CPU) { /* Copy values only */ 45 Kokkos::deep_copy(aijkok->a_d,aijkok->a_h); 46 } 47 A->offloadmask = PETSC_OFFLOAD_BOTH; 48 PetscFunctionReturn(0); 49 } 50 51 // MatSeqAIJKokkosSetDeviceMat takes a PetscSplitCSRDataStructure with device data and copies it to the device. Note, "deep_copy" here is really a shallow copy 52 PETSC_EXTERN PetscErrorCode MatSeqAIJKokkosSetDeviceMat(Mat A, PetscSplitCSRDataStructure *h_mat) 53 { 54 Mat_SeqAIJKokkos *aijkok; 55 Kokkos::View<PetscSplitCSRDataStructure, Kokkos::HostSpace> h_mat_k(h_mat); 56 57 PetscFunctionBegin; 58 // ierr = MatSeqAIJKokkosSyncDevice(A);CHKERRQ(ierr); 59 aijkok = static_cast<Mat_SeqAIJKokkos*>(A->spptr); 60 if (!aijkok) SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_PLIB,"no Mat_SeqAIJKokkos"); 61 aijkok->device_mat_d = create_mirror(DeviceMemorySpace(),h_mat_k); 62 Kokkos::deep_copy (aijkok->device_mat_d, h_mat_k); 63 PetscFunctionReturn(0); 64 } 65 66 // MatSeqAIJKokkosGetDeviceMat gets the device if it is here, otherwise it creates a place for it and returns NULL 67 PETSC_EXTERN PetscErrorCode MatSeqAIJKokkosGetDeviceMat(Mat A, PetscSplitCSRDataStructure **d_mat) 68 { 69 Mat_SeqAIJKokkos *aijkok; 70 71 PetscFunctionBegin; 72 aijkok = static_cast<Mat_SeqAIJKokkos*>(A->spptr); 73 if (aijkok && aijkok->device_mat_d.data()) { 74 *d_mat = aijkok->device_mat_d.data(); 75 } else { 76 PetscErrorCode ierr; 77 ierr = MatSeqAIJKokkosSyncDevice(A);CHKERRQ(ierr); // create aijkok (we are making d_mat now so make a place for it) 78 *d_mat = NULL; 79 } 80 PetscFunctionReturn(0); 81 } 82 83 /* y = A x */ 84 static PetscErrorCode MatMult_SeqAIJKokkos(Mat A,Vec xx,Vec yy) 85 { 86 PetscErrorCode ierr; 87 Mat_SeqAIJKokkos *aijkok; 88 ConstPetscScalarViewDevice_t xv; 89 PetscScalarViewDevice_t yv; 90 91 PetscFunctionBegin; 92 ierr = MatSeqAIJKokkosSyncDevice(A);CHKERRQ(ierr); 93 ierr = VecKokkosGetDeviceViewRead(xx,&xv);CHKERRQ(ierr); 94 ierr = VecKokkosGetDeviceView(yy,&yv);CHKERRQ(ierr); 95 aijkok = static_cast<Mat_SeqAIJKokkos*>(A->spptr); 96 KokkosSparse::spmv("N",1.0/*alpha*/,aijkok->csr,xv,0.0/*beta*/,yv); /* y = alpha A x + beta y */ 97 ierr = VecKokkosRestoreDeviceViewRead(xx,&xv);CHKERRQ(ierr); 98 ierr = VecKokkosRestoreDeviceView(yy,&yv);CHKERRQ(ierr); 99 /* 2.0*aijkok->csr.nnz()-aijkok->csr.numRows() seems more accurate here but assumes there are no zero-rows. So a little sloopy here. */ 100 ierr = WaitForKokkos();CHKERRQ(ierr); 101 ierr = PetscLogGpuFlops(2.0*aijkok->csr.nnz());CHKERRQ(ierr); 102 PetscFunctionReturn(0); 103 } 104 105 /* y = A^T x */ 106 static PetscErrorCode MatMultTranspose_SeqAIJKokkos(Mat A,Vec xx,Vec yy) 107 { 108 PetscErrorCode ierr; 109 Mat_SeqAIJKokkos *aijkok; 110 ConstPetscScalarViewDevice_t xv; 111 PetscScalarViewDevice_t yv; 112 113 PetscFunctionBegin; 114 ierr = MatSeqAIJKokkosSyncDevice(A);CHKERRQ(ierr); 115 ierr = VecKokkosGetDeviceViewRead(xx,&xv);CHKERRQ(ierr); 116 ierr = VecKokkosGetDeviceView(yy,&yv);CHKERRQ(ierr); 117 aijkok = static_cast<Mat_SeqAIJKokkos*>(A->spptr); 118 KokkosSparse::spmv("T",1.0/*alpha*/,aijkok->csr,xv,0.0/*beta*/,yv); /* y = alpha A^T x + beta y */ 119 ierr = VecKokkosRestoreDeviceViewRead(xx,&xv);CHKERRQ(ierr); 120 ierr = VecKokkosRestoreDeviceView(yy,&yv);CHKERRQ(ierr); 121 ierr = WaitForKokkos();CHKERRQ(ierr); 122 ierr = PetscLogGpuFlops(2.0*aijkok->csr.nnz());CHKERRQ(ierr); 123 PetscFunctionReturn(0); 124 } 125 126 /* y = A^H x */ 127 static PetscErrorCode MatMultHermitianTranspose_SeqAIJKokkos(Mat A,Vec xx,Vec yy) 128 { 129 PetscErrorCode ierr; 130 Mat_SeqAIJKokkos *aijkok; 131 ConstPetscScalarViewDevice_t xv; 132 PetscScalarViewDevice_t yv; 133 134 PetscFunctionBegin; 135 ierr = MatSeqAIJKokkosSyncDevice(A);CHKERRQ(ierr); 136 ierr = VecKokkosGetDeviceViewRead(xx,&xv);CHKERRQ(ierr); 137 ierr = VecKokkosGetDeviceView(yy,&yv);CHKERRQ(ierr); 138 aijkok = static_cast<Mat_SeqAIJKokkos*>(A->spptr); 139 KokkosSparse::spmv("C",1.0/*alpha*/,aijkok->csr,xv,0.0/*beta*/,yv); /* y = alpha A^H x + beta y */ 140 ierr = VecKokkosRestoreDeviceViewRead(xx,&xv);CHKERRQ(ierr); 141 ierr = VecKokkosRestoreDeviceView(yy,&yv);CHKERRQ(ierr); 142 ierr = WaitForKokkos();CHKERRQ(ierr); 143 ierr = PetscLogGpuFlops(2.0*aijkok->csr.nnz());CHKERRQ(ierr); 144 PetscFunctionReturn(0); 145 } 146 147 /* z = A x + y */ 148 static PetscErrorCode MatMultAdd_SeqAIJKokkos(Mat A,Vec xx,Vec yy, Vec zz) 149 { 150 PetscErrorCode ierr; 151 Mat_SeqAIJKokkos *aijkok; 152 ConstPetscScalarViewDevice_t xv,yv; 153 PetscScalarViewDevice_t zv; 154 155 PetscFunctionBegin; 156 ierr = MatSeqAIJKokkosSyncDevice(A);CHKERRQ(ierr); 157 ierr = VecKokkosGetDeviceViewRead(xx,&xv);CHKERRQ(ierr); 158 ierr = VecKokkosGetDeviceViewRead(yy,&yv);CHKERRQ(ierr); 159 ierr = VecKokkosGetDeviceView(zz,&zv);CHKERRQ(ierr); 160 if (zz != yy) Kokkos::deep_copy(zv,yv); 161 aijkok = static_cast<Mat_SeqAIJKokkos*>(A->spptr); 162 KokkosSparse::spmv("N",1.0/*alpha*/,aijkok->csr,xv,1.0/*beta*/,zv); /* z = alpha A x + beta z */ 163 ierr = VecKokkosRestoreDeviceViewRead(xx,&xv);CHKERRQ(ierr); 164 ierr = VecKokkosRestoreDeviceViewRead(yy,&yv);CHKERRQ(ierr); 165 ierr = VecKokkosRestoreDeviceView(zz,&zv);CHKERRQ(ierr); 166 ierr = WaitForKokkos();CHKERRQ(ierr); 167 ierr = PetscLogGpuFlops(2.0*aijkok->csr.nnz());CHKERRQ(ierr); 168 PetscFunctionReturn(0); 169 } 170 171 /* z = A^T x + y */ 172 static PetscErrorCode MatMultTransposeAdd_SeqAIJKokkos(Mat A,Vec xx,Vec yy,Vec zz) 173 { 174 PetscErrorCode ierr; 175 Mat_SeqAIJKokkos *aijkok; 176 ConstPetscScalarViewDevice_t xv,yv; 177 PetscScalarViewDevice_t zv; 178 179 PetscFunctionBegin; 180 ierr = MatSeqAIJKokkosSyncDevice(A);CHKERRQ(ierr); 181 ierr = VecKokkosGetDeviceViewRead(xx,&xv);CHKERRQ(ierr); 182 ierr = VecKokkosGetDeviceViewRead(yy,&yv);CHKERRQ(ierr); 183 ierr = VecKokkosGetDeviceView(zz,&zv);CHKERRQ(ierr); 184 if (zz != yy) Kokkos::deep_copy(zv,yv); 185 aijkok = static_cast<Mat_SeqAIJKokkos*>(A->spptr); 186 KokkosSparse::spmv("T",1.0/*alpha*/,aijkok->csr,xv,1.0/*beta*/,zv); /* z = alpha A^T x + beta z */ 187 ierr = VecKokkosRestoreDeviceViewRead(xx,&xv);CHKERRQ(ierr); 188 ierr = VecKokkosRestoreDeviceViewRead(yy,&yv);CHKERRQ(ierr); 189 ierr = VecKokkosRestoreDeviceView(zz,&zv);CHKERRQ(ierr); 190 ierr = WaitForKokkos();CHKERRQ(ierr); 191 ierr = PetscLogGpuFlops(2.0*aijkok->csr.nnz());CHKERRQ(ierr); 192 PetscFunctionReturn(0); 193 } 194 195 /* z = A^H x + y */ 196 static PetscErrorCode MatMultHermitianTransposeAdd_SeqAIJKokkos(Mat A,Vec xx,Vec yy,Vec zz) 197 { 198 PetscErrorCode ierr; 199 Mat_SeqAIJKokkos *aijkok; 200 ConstPetscScalarViewDevice_t xv,yv; 201 PetscScalarViewDevice_t zv; 202 203 PetscFunctionBegin; 204 ierr = MatSeqAIJKokkosSyncDevice(A);CHKERRQ(ierr); 205 ierr = VecKokkosGetDeviceViewRead(xx,&xv);CHKERRQ(ierr); 206 ierr = VecKokkosGetDeviceViewRead(yy,&yv);CHKERRQ(ierr); 207 ierr = VecKokkosGetDeviceView(zz,&zv);CHKERRQ(ierr); 208 if (zz != yy) Kokkos::deep_copy(zv,yv); 209 aijkok = static_cast<Mat_SeqAIJKokkos*>(A->spptr); 210 KokkosSparse::spmv("C",1.0/*alpha*/,aijkok->csr,xv,1.0/*beta*/,zv); /* z = alpha A^H x + beta z */ 211 ierr = VecKokkosRestoreDeviceViewRead(xx,&xv);CHKERRQ(ierr); 212 ierr = VecKokkosRestoreDeviceViewRead(yy,&yv);CHKERRQ(ierr); 213 ierr = VecKokkosRestoreDeviceView(zz,&zv);CHKERRQ(ierr); 214 ierr = WaitForKokkos();CHKERRQ(ierr); 215 ierr = PetscLogGpuFlops(2.0*aijkok->csr.nnz());CHKERRQ(ierr); 216 PetscFunctionReturn(0); 217 } 218 219 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJKokkos(Mat A, MatType mtype, MatReuse reuse, Mat* newmat) 220 { 221 PetscErrorCode ierr; 222 Mat B; 223 Mat_SeqAIJ *aij; 224 225 PetscFunctionBegin; 226 if (reuse == MAT_INITIAL_MATRIX) { /* Build a new mat */ 227 ierr = MatDuplicate(A,MAT_COPY_VALUES,newmat);CHKERRQ(ierr); 228 } else if (reuse == MAT_REUSE_MATRIX) { /* Reuse the mat created before */ 229 ierr = MatCopy(A,*newmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 230 } 231 232 B = *newmat; 233 ierr = PetscFree(B->defaultvectype);CHKERRQ(ierr); 234 ierr = PetscStrallocpy(VECKOKKOS,&B->defaultvectype);CHKERRQ(ierr); 235 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQAIJKOKKOS);CHKERRQ(ierr); 236 ierr = MatSetOps_SeqAIJKokkos(B);CHKERRQ(ierr); 237 /* TODO: see ViennaCL and CUSPARSE once we have a BindToCPU? */ 238 aij = (Mat_SeqAIJ*)B->data; 239 aij->inode.use = PETSC_FALSE; 240 241 B->offloadmask = PETSC_OFFLOAD_CPU; 242 PetscFunctionReturn(0); 243 } 244 245 static PetscErrorCode MatDuplicate_SeqAIJKokkos(Mat A,MatDuplicateOption cpvalues,Mat *B) 246 { 247 PetscErrorCode ierr; 248 249 PetscFunctionBegin; 250 ierr = MatDuplicate_SeqAIJ(A,cpvalues,B);CHKERRQ(ierr); 251 ierr = MatConvert_SeqAIJ_SeqAIJKokkos(*B,MATSEQAIJKOKKOS,MAT_INPLACE_MATRIX,B);CHKERRQ(ierr); 252 PetscFunctionReturn(0); 253 } 254 255 static PetscErrorCode MatDestroy_SeqAIJKokkos(Mat A) 256 { 257 PetscErrorCode ierr; 258 Mat_SeqAIJKokkos *aijkok = static_cast<Mat_SeqAIJKokkos*>(A->spptr); 259 260 PetscFunctionBegin; 261 if (aijkok && aijkok->device_mat_d.data()) { 262 delete aijkok->colmap_d; 263 delete aijkok->i_uncompressed_d; 264 } 265 delete aijkok; 266 ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr); 267 PetscFunctionReturn(0); 268 } 269 270 PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJKokkos(Mat A) 271 { 272 PetscErrorCode ierr; 273 274 PetscFunctionBegin; 275 ierr = PetscKokkosInitializeCheck();CHKERRQ(ierr); 276 ierr = MatCreate_SeqAIJ(A);CHKERRQ(ierr); 277 ierr = MatConvert_SeqAIJ_SeqAIJKokkos(A,MATSEQAIJKOKKOS,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr); 278 PetscFunctionReturn(0); 279 } 280 281 static PetscErrorCode MatSetValues_SeqAIJKokkos(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 282 { 283 PetscErrorCode ierr; 284 Mat_SeqAIJKokkos *aijkok = static_cast<Mat_SeqAIJKokkos*>(A->spptr); 285 PetscFunctionBegin; 286 if (aijkok && aijkok->device_mat_d.data()) { 287 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Mixing GPU and non-GPU assembly not supported"); 288 } 289 ierr = MatSetValues_SeqAIJ(A,m,im,n,in,v,is);CHKERRQ(ierr); 290 PetscFunctionReturn(0); 291 } 292 293 static PetscErrorCode MatZeroEntries_SeqAIJKokkos(Mat A) 294 { 295 PetscErrorCode ierr; 296 PetscBool both = PETSC_FALSE; 297 Mat_SeqAIJKokkos *aijkok = static_cast<Mat_SeqAIJKokkos*>(A->spptr); 298 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 299 300 PetscFunctionBegin; 301 if (aijkok && aijkok->a_d.data()) { 302 Kokkos::parallel_for (aijkok->a_d.size(), KOKKOS_LAMBDA (int i) { aijkok->a_d(i) = 0; }); 303 both = PETSC_TRUE; 304 } 305 ierr = PetscArrayzero(a->a,a->i[A->rmap->n]);CHKERRQ(ierr); 306 ierr = MatSeqAIJInvalidateDiagonal(A);CHKERRQ(ierr); 307 if (both) A->offloadmask = PETSC_OFFLOAD_BOTH; 308 else A->offloadmask = PETSC_OFFLOAD_CPU; 309 310 PetscFunctionReturn(0); 311 } 312 313 static PetscErrorCode MatAXPY_SeqAIJKokkos(Mat Y,PetscScalar a,Mat X,MatStructure str) // put axpy in aijcusparse, etc. 314 { 315 PetscErrorCode ierr; 316 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data,*y = (Mat_SeqAIJ*)Y->data; 317 PetscBool flgx,flgy; 318 319 PetscFunctionBegin; 320 if (a == (PetscScalar)0.0) PetscFunctionReturn(0); 321 PetscValidHeaderSpecific(Y,MAT_CLASSID,1); 322 PetscValidHeaderSpecific(X,MAT_CLASSID,3); 323 ierr = PetscObjectTypeCompare((PetscObject)Y,MATSEQAIJKOKKOS,&flgy);CHKERRQ(ierr); 324 ierr = PetscObjectTypeCompare((PetscObject)X,MATSEQAIJKOKKOS,&flgx);CHKERRQ(ierr); 325 if (!flgx || !flgy) { 326 ierr = MatAXPY_SeqAIJ( Y, a, X, str);CHKERRQ(ierr); 327 PetscFunctionReturn(0); 328 } 329 if (str == DIFFERENT_NONZERO_PATTERN) { 330 if (x->nz == y->nz) { 331 PetscBool e; 332 ierr = PetscArraycmp(x->i,y->i,Y->rmap->n+1,&e);CHKERRQ(ierr); 333 if (e) { 334 ierr = PetscArraycmp(x->j,y->j,y->nz,&e);CHKERRQ(ierr); 335 if (e) { 336 str = SAME_NONZERO_PATTERN; 337 } 338 } 339 } 340 } 341 if (str != SAME_NONZERO_PATTERN) { 342 ierr = MatAXPY_SeqAIJ( Y, a, X, str);CHKERRQ(ierr); 343 PetscFunctionReturn(0); 344 } else { 345 if (Y->offloadmask==PETSC_OFFLOAD_CPU && X->offloadmask==PETSC_OFFLOAD_CPU) { 346 ierr = MatAXPY_SeqAIJ( Y, a, X, str);CHKERRQ(ierr); 347 PetscFunctionReturn(0); 348 } else if ((Y->offloadmask==PETSC_OFFLOAD_GPU || Y->offloadmask==PETSC_OFFLOAD_BOTH) && X->offloadmask == PETSC_OFFLOAD_CPU) { 349 ierr = MatSeqAIJKokkosSyncDevice(X);CHKERRQ(ierr); 350 } else if ((X->offloadmask==PETSC_OFFLOAD_GPU || X->offloadmask==PETSC_OFFLOAD_BOTH) && Y->offloadmask == PETSC_OFFLOAD_CPU) { 351 ierr = MatSeqAIJKokkosSyncDevice(Y);CHKERRQ(ierr); /* promote Y */ 352 } 353 { 354 Mat_SeqAIJKokkos *aijkokY = static_cast<Mat_SeqAIJKokkos*>(Y->spptr); 355 Mat_SeqAIJKokkos *aijkokX = static_cast<Mat_SeqAIJKokkos*>(X->spptr); 356 if (aijkokY && aijkokX && aijkokY->a_d.data() && aijkokX->a_d.data()) { 357 Kokkos::parallel_for (aijkokY->a_d.size(), KOKKOS_LAMBDA (int i) { aijkokY->a_d(i) += a*aijkokX->a_d(i); }); 358 Kokkos::deep_copy(aijkokY->a_h,aijkokY->a_d); // we could not copy and keep GPU 359 Y->offloadmask = PETSC_OFFLOAD_BOTH; 360 ierr = PetscLogGpuFlops(2.0*aijkokY->a_d.size());CHKERRQ(ierr); 361 } else SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_PLIB,"no Mat_SeqAIJKokkos ???"); 362 } 363 } 364 PetscFunctionReturn(0); 365 } 366 367 static PetscErrorCode MatSetOps_SeqAIJKokkos(Mat A) 368 { 369 PetscFunctionBegin; 370 A->ops->setvalues = MatSetValues_SeqAIJKokkos; /* protect with DEBUG, but MatSeqAIJSetTotalPreallocation defeats this ??? */ 371 A->ops->assemblyend = MatAssemblyEnd_SeqAIJKokkos; 372 A->ops->destroy = MatDestroy_SeqAIJKokkos; 373 A->ops->duplicate = MatDuplicate_SeqAIJKokkos; 374 375 A->ops->axpy = MatAXPY_SeqAIJKokkos; 376 A->ops->zeroentries = MatZeroEntries_SeqAIJKokkos; 377 A->ops->mult = MatMult_SeqAIJKokkos; 378 A->ops->multadd = MatMultAdd_SeqAIJKokkos; 379 A->ops->multtranspose = MatMultTranspose_SeqAIJKokkos; 380 A->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJKokkos; 381 A->ops->multhermitiantranspose = MatMultHermitianTranspose_SeqAIJKokkos; 382 A->ops->multhermitiantransposeadd = MatMultHermitianTransposeAdd_SeqAIJKokkos; 383 PetscFunctionReturn(0); 384 } 385 386 /* --------------------------------------------------------------------------------*/ 387 /*C 388 MatCreateSeqAIJKokkos - Creates a sparse matrix in AIJ (compressed row) format 389 (the default parallel PETSc format). This matrix will ultimately be handled by 390 Kokkos for calculations. For good matrix 391 assembly performance the user should preallocate the matrix storage by setting 392 the parameter nz (or the array nnz). By setting these parameters accurately, 393 performance during matrix assembly can be increased by more than a factor of 50. 394 395 Collective 396 397 Input Parameters: 398 + comm - MPI communicator, set to PETSC_COMM_SELF 399 . m - number of rows 400 . n - number of columns 401 . nz - number of nonzeros per row (same for all rows) 402 - nnz - array containing the number of nonzeros in the various rows 403 (possibly different for each row) or NULL 404 405 Output Parameter: 406 . A - the matrix 407 408 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 409 MatXXXXSetPreallocation() paradgm instead of this routine directly. 410 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 411 412 Notes: 413 If nnz is given then nz is ignored 414 415 The AIJ format (also called the Yale sparse matrix format or 416 compressed row storage), is fully compatible with standard Fortran 77 417 storage. That is, the stored row and column indices can begin at 418 either one (as in Fortran) or zero. See the users' manual for details. 419 420 Specify the preallocated storage with either nz or nnz (not both). 421 Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory 422 allocation. For large problems you MUST preallocate memory or you 423 will get TERRIBLE performance, see the users' manual chapter on matrices. 424 425 By default, this format uses inodes (identical nodes) when possible, to 426 improve numerical efficiency of matrix-vector products and solves. We 427 search for consecutive rows with the same nonzero structure, thereby 428 reusing matrix information to achieve increased efficiency. 429 430 Level: intermediate 431 432 .seealso: MatCreate(), MatCreateAIJ(), MatSetValues(), MatSeqAIJSetColumnIndices(), MatCreateSeqAIJWithArrays(), MatCreateAIJ(), MATSeqAIJKokkos, MATAIJKOKKOS 433 @*/ 434 PetscErrorCode MatCreateSeqAIJKokkos(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 435 { 436 PetscErrorCode ierr; 437 438 PetscFunctionBegin; 439 ierr = PetscKokkosInitializeCheck();CHKERRQ(ierr); 440 ierr = MatCreate(comm,A);CHKERRQ(ierr); 441 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 442 ierr = MatSetType(*A,MATSEQAIJKOKKOS);CHKERRQ(ierr); 443 ierr = MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,(PetscInt*)nnz);CHKERRQ(ierr); 444 PetscFunctionReturn(0); 445 } 446