1f17f7ee2SSatish Balay #include <petsc/private/matimpl.h> 2f17f7ee2SSatish Balay #include <petsc/private/vecimpl.h> 3f17f7ee2SSatish Balay #include <petscsf.h> 4f17f7ee2SSatish Balay #if defined(PETSC_HAVE_CUDA) 5f17f7ee2SSatish Balay #include <thrust/for_each.h> 6f17f7ee2SSatish Balay #include <thrust/device_vector.h> 7f17f7ee2SSatish Balay #include <thrust/execution_policy.h> 8f17f7ee2SSatish Balay #endif 9f17f7ee2SSatish Balay 10*0dd791a8SStefano Zampini PETSC_INTERN PetscErrorCode MatDenseGetH2OpusStridedSF(Mat A, PetscSF h2sf, PetscSF *osf) 11d71ae5a4SJacob Faibussowitsch { 12f17f7ee2SSatish Balay PetscSF asf; 13f17f7ee2SSatish Balay 14f17f7ee2SSatish Balay PetscFunctionBegin; 15f17f7ee2SSatish Balay PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 16f17f7ee2SSatish Balay PetscValidHeaderSpecific(h2sf, PETSCSF_CLASSID, 2); 174f572ea9SToby Isaac PetscAssertPointer(osf, 3); 18*0dd791a8SStefano Zampini PetscCall(PetscObjectQuery((PetscObject)A, "_math2opus_stridedsf", (PetscObject *)&asf)); 19f17f7ee2SSatish Balay if (!asf) { 20f17f7ee2SSatish Balay PetscInt lda; 21f17f7ee2SSatish Balay 22f17f7ee2SSatish Balay PetscCall(MatDenseGetLDA(A, &lda)); 23*0dd791a8SStefano Zampini PetscCall(PetscSFCreateStridedSF(h2sf, A->cmap->N, lda, PETSC_DECIDE, &asf)); 24*0dd791a8SStefano Zampini PetscCall(PetscObjectCompose((PetscObject)A, "_math2opus_stridedsf", (PetscObject)asf)); 25f17f7ee2SSatish Balay PetscCall(PetscObjectDereference((PetscObject)asf)); 26f17f7ee2SSatish Balay } 27f17f7ee2SSatish Balay *osf = asf; 283ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 29f17f7ee2SSatish Balay } 30f17f7ee2SSatish Balay 31f17f7ee2SSatish Balay #if defined(PETSC_HAVE_CUDA) 329371c9d4SSatish Balay struct SignVector_Functor { 33f17f7ee2SSatish Balay const PetscScalar *v; 34f17f7ee2SSatish Balay PetscScalar *s; 35f17f7ee2SSatish Balay SignVector_Functor(const PetscScalar *_v, PetscScalar *_s) : v(_v), s(_s) { } 36f17f7ee2SSatish Balay 379371c9d4SSatish Balay __host__ __device__ void operator()(PetscInt i) { s[i] = (v[i] < 0 ? -1 : 1); } 38f17f7ee2SSatish Balay }; 39f17f7ee2SSatish Balay #endif 40f17f7ee2SSatish Balay 41d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode VecSign(Vec v, Vec s) 42d71ae5a4SJacob Faibussowitsch { 43f17f7ee2SSatish Balay const PetscScalar *av; 44f17f7ee2SSatish Balay PetscScalar *as; 45f17f7ee2SSatish Balay PetscInt i, n; 46f17f7ee2SSatish Balay #if defined(PETSC_HAVE_CUDA) 47f17f7ee2SSatish Balay PetscBool viscuda, siscuda; 48f17f7ee2SSatish Balay #endif 49f17f7ee2SSatish Balay 50f17f7ee2SSatish Balay PetscFunctionBegin; 51f17f7ee2SSatish Balay PetscValidHeaderSpecific(v, VEC_CLASSID, 1); 52f17f7ee2SSatish Balay PetscValidHeaderSpecific(s, VEC_CLASSID, 2); 53f17f7ee2SSatish Balay PetscCall(VecGetLocalSize(s, &n)); 54f17f7ee2SSatish Balay PetscCall(VecGetLocalSize(v, &i)); 55f17f7ee2SSatish Balay PetscCheck(i == n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Invalid local sizes %" PetscInt_FMT " != %" PetscInt_FMT, i, n); 56f17f7ee2SSatish Balay #if defined(PETSC_HAVE_CUDA) 57f17f7ee2SSatish Balay PetscCall(PetscObjectTypeCompareAny((PetscObject)v, &viscuda, VECSEQCUDA, VECMPICUDA, "")); 58f17f7ee2SSatish Balay PetscCall(PetscObjectTypeCompareAny((PetscObject)s, &siscuda, VECSEQCUDA, VECMPICUDA, "")); 59f17f7ee2SSatish Balay viscuda = (PetscBool)(viscuda && !v->boundtocpu); 60f17f7ee2SSatish Balay siscuda = (PetscBool)(siscuda && !s->boundtocpu); 61f17f7ee2SSatish Balay if (viscuda && siscuda) { 62f17f7ee2SSatish Balay PetscCall(VecCUDAGetArrayRead(v, &av)); 63f17f7ee2SSatish Balay PetscCall(VecCUDAGetArrayWrite(s, &as)); 64f17f7ee2SSatish Balay SignVector_Functor sign_vector(av, as); 659371c9d4SSatish Balay thrust::for_each(thrust::device, thrust::counting_iterator<PetscInt>(0), thrust::counting_iterator<PetscInt>(n), sign_vector); 66f17f7ee2SSatish Balay PetscCall(VecCUDARestoreArrayWrite(s, &as)); 67f17f7ee2SSatish Balay PetscCall(VecCUDARestoreArrayRead(v, &av)); 68f17f7ee2SSatish Balay } else 69f17f7ee2SSatish Balay #endif 70f17f7ee2SSatish Balay { 71f17f7ee2SSatish Balay PetscCall(VecGetArrayRead(v, &av)); 72f17f7ee2SSatish Balay PetscCall(VecGetArrayWrite(s, &as)); 73f17f7ee2SSatish Balay for (i = 0; i < n; i++) as[i] = PetscAbsScalar(av[i]) < 0 ? -1. : 1.; 74f17f7ee2SSatish Balay PetscCall(VecRestoreArrayWrite(s, &as)); 75f17f7ee2SSatish Balay PetscCall(VecRestoreArrayRead(v, &av)); 76f17f7ee2SSatish Balay } 773ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 78f17f7ee2SSatish Balay } 79f17f7ee2SSatish Balay 80f17f7ee2SSatish Balay #if defined(PETSC_HAVE_CUDA) 819371c9d4SSatish Balay struct StandardBasis_Functor { 82f17f7ee2SSatish Balay PetscScalar *v; 83f17f7ee2SSatish Balay PetscInt j; 84f17f7ee2SSatish Balay 85f17f7ee2SSatish Balay StandardBasis_Functor(PetscScalar *_v, PetscInt _j) : v(_v), j(_j) { } 869371c9d4SSatish Balay __host__ __device__ void operator()(PetscInt i) { v[i] = (i == j ? 1 : 0); } 87f17f7ee2SSatish Balay }; 88f17f7ee2SSatish Balay #endif 89f17f7ee2SSatish Balay 90d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode VecSetDelta(Vec x, PetscInt i) 91d71ae5a4SJacob Faibussowitsch { 92f17f7ee2SSatish Balay #if defined(PETSC_HAVE_CUDA) 93f17f7ee2SSatish Balay PetscBool iscuda; 94f17f7ee2SSatish Balay #endif 95f17f7ee2SSatish Balay PetscInt st, en; 96f17f7ee2SSatish Balay 97f17f7ee2SSatish Balay PetscFunctionBegin; 98f17f7ee2SSatish Balay PetscCall(VecGetOwnershipRange(x, &st, &en)); 99f17f7ee2SSatish Balay #if defined(PETSC_HAVE_CUDA) 100f17f7ee2SSatish Balay PetscCall(PetscObjectTypeCompareAny((PetscObject)x, &iscuda, VECSEQCUDA, VECMPICUDA, "")); 101f17f7ee2SSatish Balay iscuda = (PetscBool)(iscuda && !x->boundtocpu); 102f17f7ee2SSatish Balay if (iscuda) { 103f17f7ee2SSatish Balay PetscScalar *ax; 104f17f7ee2SSatish Balay PetscCall(VecCUDAGetArrayWrite(x, &ax)); 105f17f7ee2SSatish Balay StandardBasis_Functor delta(ax, i - st); 1069371c9d4SSatish Balay thrust::for_each(thrust::device, thrust::counting_iterator<PetscInt>(0), thrust::counting_iterator<PetscInt>(en - st), delta); 107f17f7ee2SSatish Balay PetscCall(VecCUDARestoreArrayWrite(x, &ax)); 108f17f7ee2SSatish Balay } else 109f17f7ee2SSatish Balay #endif 110f17f7ee2SSatish Balay { 111f17f7ee2SSatish Balay PetscCall(VecSet(x, 0.)); 11248a46eb9SPierre Jolivet if (st <= i && i < en) PetscCall(VecSetValue(x, i, 1.0, INSERT_VALUES)); 113f17f7ee2SSatish Balay PetscCall(VecAssemblyBegin(x)); 114f17f7ee2SSatish Balay PetscCall(VecAssemblyEnd(x)); 115f17f7ee2SSatish Balay } 1163ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 117f17f7ee2SSatish Balay } 118f17f7ee2SSatish Balay 119f17f7ee2SSatish Balay /* these are approximate norms */ 120f17f7ee2SSatish Balay /* NORM_2: Estimating the matrix p-norm Nicholas J. Higham 121f17f7ee2SSatish Balay NORM_1/NORM_INFINITY: A block algorithm for matrix 1-norm estimation, with an application to 1-norm pseudospectra Higham, Nicholas J. and Tisseur, Francoise */ 122d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatApproximateNorm_Private(Mat A, NormType normtype, PetscInt normsamples, PetscReal *n) 123d71ae5a4SJacob Faibussowitsch { 124f17f7ee2SSatish Balay Vec x, y, w, z; 125f17f7ee2SSatish Balay PetscReal normz, adot; 126f17f7ee2SSatish Balay PetscScalar dot; 127f17f7ee2SSatish Balay PetscInt i, j, N, jold = -1; 128f17f7ee2SSatish Balay PetscBool boundtocpu = PETSC_TRUE; 129f17f7ee2SSatish Balay 130f17f7ee2SSatish Balay PetscFunctionBegin; 131f17f7ee2SSatish Balay #if defined(PETSC_HAVE_DEVICE) 132f17f7ee2SSatish Balay boundtocpu = A->boundtocpu; 133f17f7ee2SSatish Balay #endif 134f17f7ee2SSatish Balay switch (normtype) { 135f17f7ee2SSatish Balay case NORM_INFINITY: 136f17f7ee2SSatish Balay case NORM_1: 137f17f7ee2SSatish Balay if (normsamples < 0) normsamples = 10; /* pure guess */ 138f17f7ee2SSatish Balay if (normtype == NORM_INFINITY) { 139f17f7ee2SSatish Balay Mat B; 140f17f7ee2SSatish Balay PetscCall(MatCreateTranspose(A, &B)); 141f17f7ee2SSatish Balay A = B; 142f17f7ee2SSatish Balay } else { 143f17f7ee2SSatish Balay PetscCall(PetscObjectReference((PetscObject)A)); 144f17f7ee2SSatish Balay } 145f17f7ee2SSatish Balay PetscCall(MatCreateVecs(A, &x, &y)); 146f17f7ee2SSatish Balay PetscCall(MatCreateVecs(A, &z, &w)); 147f17f7ee2SSatish Balay PetscCall(VecBindToCPU(x, boundtocpu)); 148f17f7ee2SSatish Balay PetscCall(VecBindToCPU(y, boundtocpu)); 149f17f7ee2SSatish Balay PetscCall(VecBindToCPU(z, boundtocpu)); 150f17f7ee2SSatish Balay PetscCall(VecBindToCPU(w, boundtocpu)); 151f17f7ee2SSatish Balay PetscCall(VecGetSize(x, &N)); 152f17f7ee2SSatish Balay PetscCall(VecSet(x, 1. / N)); 153f17f7ee2SSatish Balay *n = 0.0; 154f17f7ee2SSatish Balay for (i = 0; i < normsamples; i++) { 155f17f7ee2SSatish Balay PetscCall(MatMult(A, x, y)); 156f17f7ee2SSatish Balay PetscCall(VecSign(y, w)); 157f17f7ee2SSatish Balay PetscCall(MatMultTranspose(A, w, z)); 158f17f7ee2SSatish Balay PetscCall(VecNorm(z, NORM_INFINITY, &normz)); 159f17f7ee2SSatish Balay PetscCall(VecDot(x, z, &dot)); 160f17f7ee2SSatish Balay adot = PetscAbsScalar(dot); 161f17f7ee2SSatish Balay PetscCall(PetscInfo(A, "%s norm it %" PetscInt_FMT " -> (%g %g)\n", NormTypes[normtype], i, (double)normz, (double)adot)); 162f17f7ee2SSatish Balay if (normz <= adot && i > 0) { 163f17f7ee2SSatish Balay PetscCall(VecNorm(y, NORM_1, n)); 164f17f7ee2SSatish Balay break; 165f17f7ee2SSatish Balay } 166f17f7ee2SSatish Balay PetscCall(VecMax(z, &j, &normz)); 167f17f7ee2SSatish Balay if (j == jold) { 168f17f7ee2SSatish Balay PetscCall(VecNorm(y, NORM_1, n)); 169f17f7ee2SSatish Balay PetscCall(PetscInfo(A, "%s norm it %" PetscInt_FMT " -> breakdown (j==jold)\n", NormTypes[normtype], i)); 170f17f7ee2SSatish Balay break; 171f17f7ee2SSatish Balay } 172f17f7ee2SSatish Balay jold = j; 173f17f7ee2SSatish Balay PetscCall(VecSetDelta(x, j)); 174f17f7ee2SSatish Balay } 175f17f7ee2SSatish Balay PetscCall(MatDestroy(&A)); 176f17f7ee2SSatish Balay PetscCall(VecDestroy(&x)); 177f17f7ee2SSatish Balay PetscCall(VecDestroy(&w)); 178f17f7ee2SSatish Balay PetscCall(VecDestroy(&y)); 179f17f7ee2SSatish Balay PetscCall(VecDestroy(&z)); 180f17f7ee2SSatish Balay break; 181f17f7ee2SSatish Balay case NORM_2: 182f17f7ee2SSatish Balay if (normsamples < 0) normsamples = 20; /* pure guess */ 183f17f7ee2SSatish Balay PetscCall(MatCreateVecs(A, &x, &y)); 184f17f7ee2SSatish Balay PetscCall(MatCreateVecs(A, &z, NULL)); 185f17f7ee2SSatish Balay PetscCall(VecBindToCPU(x, boundtocpu)); 186f17f7ee2SSatish Balay PetscCall(VecBindToCPU(y, boundtocpu)); 187f17f7ee2SSatish Balay PetscCall(VecBindToCPU(z, boundtocpu)); 188f17f7ee2SSatish Balay PetscCall(VecSetRandom(x, NULL)); 189f17f7ee2SSatish Balay PetscCall(VecNormalize(x, NULL)); 190f17f7ee2SSatish Balay *n = 0.0; 191f17f7ee2SSatish Balay for (i = 0; i < normsamples; i++) { 192f17f7ee2SSatish Balay PetscCall(MatMult(A, x, y)); 193f17f7ee2SSatish Balay PetscCall(VecNormalize(y, n)); 194f17f7ee2SSatish Balay PetscCall(MatMultTranspose(A, y, z)); 195f17f7ee2SSatish Balay PetscCall(VecNorm(z, NORM_2, &normz)); 196f17f7ee2SSatish Balay PetscCall(VecDot(x, z, &dot)); 197f17f7ee2SSatish Balay adot = PetscAbsScalar(dot); 198f17f7ee2SSatish Balay PetscCall(PetscInfo(A, "%s norm it %" PetscInt_FMT " -> %g (%g %g)\n", NormTypes[normtype], i, (double)*n, (double)normz, (double)adot)); 199f17f7ee2SSatish Balay if (normz <= adot) break; 200f17f7ee2SSatish Balay if (i < normsamples - 1) { 201f17f7ee2SSatish Balay Vec t; 202f17f7ee2SSatish Balay 203f17f7ee2SSatish Balay PetscCall(VecNormalize(z, NULL)); 204f17f7ee2SSatish Balay t = x; 205f17f7ee2SSatish Balay x = z; 206f17f7ee2SSatish Balay z = t; 207f17f7ee2SSatish Balay } 208f17f7ee2SSatish Balay } 209f17f7ee2SSatish Balay PetscCall(VecDestroy(&x)); 210f17f7ee2SSatish Balay PetscCall(VecDestroy(&y)); 211f17f7ee2SSatish Balay PetscCall(VecDestroy(&z)); 212f17f7ee2SSatish Balay break; 213d71ae5a4SJacob Faibussowitsch default: 214d71ae5a4SJacob Faibussowitsch SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "%s norm not supported", NormTypes[normtype]); 215f17f7ee2SSatish Balay } 216f17f7ee2SSatish Balay PetscCall(PetscInfo(A, "%s norm %g computed in %" PetscInt_FMT " iterations\n", NormTypes[normtype], (double)*n, i)); 2173ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 218f17f7ee2SSatish Balay } 219