1f17f7ee2SSatish Balay #include <petsc/private/matimpl.h> 2f17f7ee2SSatish Balay #include <petsc/private/vecimpl.h> 3f17f7ee2SSatish Balay #include <petscsf.h> 4f17f7ee2SSatish Balay #if defined(PETSC_HAVE_CUDA) 5f17f7ee2SSatish Balay #include <thrust/for_each.h> 6f17f7ee2SSatish Balay #include <thrust/device_vector.h> 7f17f7ee2SSatish Balay #include <thrust/execution_policy.h> 8f17f7ee2SSatish Balay #endif 9f17f7ee2SSatish Balay 109371c9d4SSatish Balay PETSC_INTERN PetscErrorCode PetscSFGetVectorSF(PetscSF sf, PetscInt nv, PetscInt ldr, PetscInt ldl, PetscSF *vsf) { 11f17f7ee2SSatish Balay PetscSF rankssf; 12f17f7ee2SSatish Balay const PetscSFNode *iremote; 13f17f7ee2SSatish Balay PetscSFNode *viremote, *rremotes; 14f17f7ee2SSatish Balay const PetscInt *ilocal; 15f17f7ee2SSatish Balay PetscInt *vilocal = NULL, *ldrs; 16f17f7ee2SSatish Balay const PetscMPIInt *ranks; 17f17f7ee2SSatish Balay PetscMPIInt *sranks; 18f17f7ee2SSatish Balay PetscInt nranks, nr, nl, vnr, vnl, i, v, j, maxl; 19f17f7ee2SSatish Balay MPI_Comm comm; 20f17f7ee2SSatish Balay 21f17f7ee2SSatish Balay PetscFunctionBegin; 22f17f7ee2SSatish Balay PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1); 23f17f7ee2SSatish Balay PetscValidLogicalCollectiveInt(sf, nv, 2); 24f17f7ee2SSatish Balay PetscValidPointer(vsf, 5); 25f17f7ee2SSatish Balay if (nv == 1) { 26f17f7ee2SSatish Balay PetscCall(PetscObjectReference((PetscObject)sf)); 27f17f7ee2SSatish Balay *vsf = sf; 28f17f7ee2SSatish Balay PetscFunctionReturn(0); 29f17f7ee2SSatish Balay } 30f17f7ee2SSatish Balay PetscCall(PetscObjectGetComm((PetscObject)sf, &comm)); 31f17f7ee2SSatish Balay PetscCall(PetscSFGetGraph(sf, &nr, &nl, &ilocal, &iremote)); 32f17f7ee2SSatish Balay PetscCall(PetscSFGetLeafRange(sf, NULL, &maxl)); 33f17f7ee2SSatish Balay maxl += 1; 34f17f7ee2SSatish Balay if (ldl == PETSC_DECIDE) ldl = maxl; 35f17f7ee2SSatish Balay if (ldr == PETSC_DECIDE) ldr = nr; 36f17f7ee2SSatish Balay PetscCheck(ldr >= nr, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid leading dimension %" PetscInt_FMT " < %" PetscInt_FMT, ldr, nr); 37f17f7ee2SSatish Balay PetscCheck(ldl >= maxl, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid leading dimension %" PetscInt_FMT " < %" PetscInt_FMT, ldl, maxl); 38f17f7ee2SSatish Balay vnr = nr * nv; 39f17f7ee2SSatish Balay vnl = nl * nv; 40f17f7ee2SSatish Balay PetscCall(PetscMalloc1(vnl, &viremote)); 41f17f7ee2SSatish Balay if (ilocal) PetscCall(PetscMalloc1(vnl, &vilocal)); 42f17f7ee2SSatish Balay 43f17f7ee2SSatish Balay /* TODO: Should this special SF be available, e.g. 44f17f7ee2SSatish Balay PetscSFGetRanksSF or similar? */ 45f17f7ee2SSatish Balay PetscCall(PetscSFGetRootRanks(sf, &nranks, &ranks, NULL, NULL, NULL)); 46f17f7ee2SSatish Balay PetscCall(PetscMalloc1(nranks, &sranks)); 47f17f7ee2SSatish Balay PetscCall(PetscArraycpy(sranks, ranks, nranks)); 48f17f7ee2SSatish Balay PetscCall(PetscSortMPIInt(nranks, sranks)); 49f17f7ee2SSatish Balay PetscCall(PetscMalloc1(nranks, &rremotes)); 50f17f7ee2SSatish Balay for (i = 0; i < nranks; i++) { 51f17f7ee2SSatish Balay rremotes[i].rank = sranks[i]; 52f17f7ee2SSatish Balay rremotes[i].index = 0; 53f17f7ee2SSatish Balay } 54f17f7ee2SSatish Balay PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_CONFONLY, &rankssf)); 55f17f7ee2SSatish Balay PetscCall(PetscSFSetGraph(rankssf, 1, nranks, NULL, PETSC_OWN_POINTER, rremotes, PETSC_OWN_POINTER)); 56f17f7ee2SSatish Balay PetscCall(PetscMalloc1(nranks, &ldrs)); 57f17f7ee2SSatish Balay PetscCall(PetscSFBcastBegin(rankssf, MPIU_INT, &ldr, ldrs, MPI_REPLACE)); 58f17f7ee2SSatish Balay PetscCall(PetscSFBcastEnd(rankssf, MPIU_INT, &ldr, ldrs, MPI_REPLACE)); 59f17f7ee2SSatish Balay PetscCall(PetscSFDestroy(&rankssf)); 60f17f7ee2SSatish Balay 61f17f7ee2SSatish Balay j = -1; 62f17f7ee2SSatish Balay for (i = 0; i < nl; i++) { 63f17f7ee2SSatish Balay const PetscInt r = iremote[i].rank; 64f17f7ee2SSatish Balay const PetscInt ii = iremote[i].index; 65f17f7ee2SSatish Balay 66*48a46eb9SPierre Jolivet if (j < 0 || sranks[j] != r) PetscCall(PetscFindMPIInt(r, nranks, sranks, &j)); 67f17f7ee2SSatish Balay PetscCheck(j >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unable to locate neighbor rank %" PetscInt_FMT, r); 68f17f7ee2SSatish Balay for (v = 0; v < nv; v++) { 69f17f7ee2SSatish Balay viremote[v * nl + i].rank = r; 70f17f7ee2SSatish Balay viremote[v * nl + i].index = v * ldrs[j] + ii; 71f17f7ee2SSatish Balay if (ilocal) vilocal[v * nl + i] = v * ldl + ilocal[i]; 72f17f7ee2SSatish Balay } 73f17f7ee2SSatish Balay } 74f17f7ee2SSatish Balay PetscCall(PetscFree(sranks)); 75f17f7ee2SSatish Balay PetscCall(PetscFree(ldrs)); 76f17f7ee2SSatish Balay PetscCall(PetscSFCreate(comm, vsf)); 77f17f7ee2SSatish Balay PetscCall(PetscSFSetGraph(*vsf, vnr, vnl, vilocal, PETSC_OWN_POINTER, viremote, PETSC_OWN_POINTER)); 78f17f7ee2SSatish Balay PetscFunctionReturn(0); 79f17f7ee2SSatish Balay } 80f17f7ee2SSatish Balay 819371c9d4SSatish Balay PETSC_INTERN PetscErrorCode MatDenseGetH2OpusVectorSF(Mat A, PetscSF h2sf, PetscSF *osf) { 82f17f7ee2SSatish Balay PetscSF asf; 83f17f7ee2SSatish Balay 84f17f7ee2SSatish Balay PetscFunctionBegin; 85f17f7ee2SSatish Balay PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 86f17f7ee2SSatish Balay PetscValidHeaderSpecific(h2sf, PETSCSF_CLASSID, 2); 87f17f7ee2SSatish Balay PetscValidPointer(osf, 3); 88f17f7ee2SSatish Balay PetscCall(PetscObjectQuery((PetscObject)A, "_math2opus_vectorsf", (PetscObject *)&asf)); 89f17f7ee2SSatish Balay if (!asf) { 90f17f7ee2SSatish Balay PetscInt lda; 91f17f7ee2SSatish Balay 92f17f7ee2SSatish Balay PetscCall(MatDenseGetLDA(A, &lda)); 93f17f7ee2SSatish Balay PetscCall(PetscSFGetVectorSF(h2sf, A->cmap->N, lda, PETSC_DECIDE, &asf)); 94f17f7ee2SSatish Balay PetscCall(PetscObjectCompose((PetscObject)A, "_math2opus_vectorsf", (PetscObject)asf)); 95f17f7ee2SSatish Balay PetscCall(PetscObjectDereference((PetscObject)asf)); 96f17f7ee2SSatish Balay } 97f17f7ee2SSatish Balay *osf = asf; 98f17f7ee2SSatish Balay PetscFunctionReturn(0); 99f17f7ee2SSatish Balay } 100f17f7ee2SSatish Balay 101f17f7ee2SSatish Balay #if defined(PETSC_HAVE_CUDA) 1029371c9d4SSatish Balay struct SignVector_Functor { 103f17f7ee2SSatish Balay const PetscScalar *v; 104f17f7ee2SSatish Balay PetscScalar *s; 105f17f7ee2SSatish Balay SignVector_Functor(const PetscScalar *_v, PetscScalar *_s) : v(_v), s(_s) { } 106f17f7ee2SSatish Balay 1079371c9d4SSatish Balay __host__ __device__ void operator()(PetscInt i) { s[i] = (v[i] < 0 ? -1 : 1); } 108f17f7ee2SSatish Balay }; 109f17f7ee2SSatish Balay #endif 110f17f7ee2SSatish Balay 1119371c9d4SSatish Balay PETSC_INTERN PetscErrorCode VecSign(Vec v, Vec s) { 112f17f7ee2SSatish Balay const PetscScalar *av; 113f17f7ee2SSatish Balay PetscScalar *as; 114f17f7ee2SSatish Balay PetscInt i, n; 115f17f7ee2SSatish Balay #if defined(PETSC_HAVE_CUDA) 116f17f7ee2SSatish Balay PetscBool viscuda, siscuda; 117f17f7ee2SSatish Balay #endif 118f17f7ee2SSatish Balay 119f17f7ee2SSatish Balay PetscFunctionBegin; 120f17f7ee2SSatish Balay PetscValidHeaderSpecific(v, VEC_CLASSID, 1); 121f17f7ee2SSatish Balay PetscValidHeaderSpecific(s, VEC_CLASSID, 2); 122f17f7ee2SSatish Balay PetscCall(VecGetLocalSize(s, &n)); 123f17f7ee2SSatish Balay PetscCall(VecGetLocalSize(v, &i)); 124f17f7ee2SSatish Balay PetscCheck(i == n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Invalid local sizes %" PetscInt_FMT " != %" PetscInt_FMT, i, n); 125f17f7ee2SSatish Balay #if defined(PETSC_HAVE_CUDA) 126f17f7ee2SSatish Balay PetscCall(PetscObjectTypeCompareAny((PetscObject)v, &viscuda, VECSEQCUDA, VECMPICUDA, "")); 127f17f7ee2SSatish Balay PetscCall(PetscObjectTypeCompareAny((PetscObject)s, &siscuda, VECSEQCUDA, VECMPICUDA, "")); 128f17f7ee2SSatish Balay viscuda = (PetscBool)(viscuda && !v->boundtocpu); 129f17f7ee2SSatish Balay siscuda = (PetscBool)(siscuda && !s->boundtocpu); 130f17f7ee2SSatish Balay if (viscuda && siscuda) { 131f17f7ee2SSatish Balay PetscCall(VecCUDAGetArrayRead(v, &av)); 132f17f7ee2SSatish Balay PetscCall(VecCUDAGetArrayWrite(s, &as)); 133f17f7ee2SSatish Balay SignVector_Functor sign_vector(av, as); 1349371c9d4SSatish Balay thrust::for_each(thrust::device, thrust::counting_iterator<PetscInt>(0), thrust::counting_iterator<PetscInt>(n), sign_vector); 135f17f7ee2SSatish Balay PetscCall(VecCUDARestoreArrayWrite(s, &as)); 136f17f7ee2SSatish Balay PetscCall(VecCUDARestoreArrayRead(v, &av)); 137f17f7ee2SSatish Balay } else 138f17f7ee2SSatish Balay #endif 139f17f7ee2SSatish Balay { 140f17f7ee2SSatish Balay PetscCall(VecGetArrayRead(v, &av)); 141f17f7ee2SSatish Balay PetscCall(VecGetArrayWrite(s, &as)); 142f17f7ee2SSatish Balay for (i = 0; i < n; i++) as[i] = PetscAbsScalar(av[i]) < 0 ? -1. : 1.; 143f17f7ee2SSatish Balay PetscCall(VecRestoreArrayWrite(s, &as)); 144f17f7ee2SSatish Balay PetscCall(VecRestoreArrayRead(v, &av)); 145f17f7ee2SSatish Balay } 146f17f7ee2SSatish Balay PetscFunctionReturn(0); 147f17f7ee2SSatish Balay } 148f17f7ee2SSatish Balay 149f17f7ee2SSatish Balay #if defined(PETSC_HAVE_CUDA) 1509371c9d4SSatish Balay struct StandardBasis_Functor { 151f17f7ee2SSatish Balay PetscScalar *v; 152f17f7ee2SSatish Balay PetscInt j; 153f17f7ee2SSatish Balay 154f17f7ee2SSatish Balay StandardBasis_Functor(PetscScalar *_v, PetscInt _j) : v(_v), j(_j) { } 1559371c9d4SSatish Balay __host__ __device__ void operator()(PetscInt i) { v[i] = (i == j ? 1 : 0); } 156f17f7ee2SSatish Balay }; 157f17f7ee2SSatish Balay #endif 158f17f7ee2SSatish Balay 1599371c9d4SSatish Balay PETSC_INTERN PetscErrorCode VecSetDelta(Vec x, PetscInt i) { 160f17f7ee2SSatish Balay #if defined(PETSC_HAVE_CUDA) 161f17f7ee2SSatish Balay PetscBool iscuda; 162f17f7ee2SSatish Balay #endif 163f17f7ee2SSatish Balay PetscInt st, en; 164f17f7ee2SSatish Balay 165f17f7ee2SSatish Balay PetscFunctionBegin; 166f17f7ee2SSatish Balay PetscCall(VecGetOwnershipRange(x, &st, &en)); 167f17f7ee2SSatish Balay #if defined(PETSC_HAVE_CUDA) 168f17f7ee2SSatish Balay PetscCall(PetscObjectTypeCompareAny((PetscObject)x, &iscuda, VECSEQCUDA, VECMPICUDA, "")); 169f17f7ee2SSatish Balay iscuda = (PetscBool)(iscuda && !x->boundtocpu); 170f17f7ee2SSatish Balay if (iscuda) { 171f17f7ee2SSatish Balay PetscScalar *ax; 172f17f7ee2SSatish Balay PetscCall(VecCUDAGetArrayWrite(x, &ax)); 173f17f7ee2SSatish Balay StandardBasis_Functor delta(ax, i - st); 1749371c9d4SSatish Balay thrust::for_each(thrust::device, thrust::counting_iterator<PetscInt>(0), thrust::counting_iterator<PetscInt>(en - st), delta); 175f17f7ee2SSatish Balay PetscCall(VecCUDARestoreArrayWrite(x, &ax)); 176f17f7ee2SSatish Balay } else 177f17f7ee2SSatish Balay #endif 178f17f7ee2SSatish Balay { 179f17f7ee2SSatish Balay PetscCall(VecSet(x, 0.)); 180*48a46eb9SPierre Jolivet if (st <= i && i < en) PetscCall(VecSetValue(x, i, 1.0, INSERT_VALUES)); 181f17f7ee2SSatish Balay PetscCall(VecAssemblyBegin(x)); 182f17f7ee2SSatish Balay PetscCall(VecAssemblyEnd(x)); 183f17f7ee2SSatish Balay } 184f17f7ee2SSatish Balay PetscFunctionReturn(0); 185f17f7ee2SSatish Balay } 186f17f7ee2SSatish Balay 187f17f7ee2SSatish Balay /* these are approximate norms */ 188f17f7ee2SSatish Balay /* NORM_2: Estimating the matrix p-norm Nicholas J. Higham 189f17f7ee2SSatish Balay NORM_1/NORM_INFINITY: A block algorithm for matrix 1-norm estimation, with an application to 1-norm pseudospectra Higham, Nicholas J. and Tisseur, Francoise */ 1909371c9d4SSatish Balay PETSC_INTERN PetscErrorCode MatApproximateNorm_Private(Mat A, NormType normtype, PetscInt normsamples, PetscReal *n) { 191f17f7ee2SSatish Balay Vec x, y, w, z; 192f17f7ee2SSatish Balay PetscReal normz, adot; 193f17f7ee2SSatish Balay PetscScalar dot; 194f17f7ee2SSatish Balay PetscInt i, j, N, jold = -1; 195f17f7ee2SSatish Balay PetscBool boundtocpu = PETSC_TRUE; 196f17f7ee2SSatish Balay 197f17f7ee2SSatish Balay PetscFunctionBegin; 198f17f7ee2SSatish Balay #if defined(PETSC_HAVE_DEVICE) 199f17f7ee2SSatish Balay boundtocpu = A->boundtocpu; 200f17f7ee2SSatish Balay #endif 201f17f7ee2SSatish Balay switch (normtype) { 202f17f7ee2SSatish Balay case NORM_INFINITY: 203f17f7ee2SSatish Balay case NORM_1: 204f17f7ee2SSatish Balay if (normsamples < 0) normsamples = 10; /* pure guess */ 205f17f7ee2SSatish Balay if (normtype == NORM_INFINITY) { 206f17f7ee2SSatish Balay Mat B; 207f17f7ee2SSatish Balay PetscCall(MatCreateTranspose(A, &B)); 208f17f7ee2SSatish Balay A = B; 209f17f7ee2SSatish Balay } else { 210f17f7ee2SSatish Balay PetscCall(PetscObjectReference((PetscObject)A)); 211f17f7ee2SSatish Balay } 212f17f7ee2SSatish Balay PetscCall(MatCreateVecs(A, &x, &y)); 213f17f7ee2SSatish Balay PetscCall(MatCreateVecs(A, &z, &w)); 214f17f7ee2SSatish Balay PetscCall(VecBindToCPU(x, boundtocpu)); 215f17f7ee2SSatish Balay PetscCall(VecBindToCPU(y, boundtocpu)); 216f17f7ee2SSatish Balay PetscCall(VecBindToCPU(z, boundtocpu)); 217f17f7ee2SSatish Balay PetscCall(VecBindToCPU(w, boundtocpu)); 218f17f7ee2SSatish Balay PetscCall(VecGetSize(x, &N)); 219f17f7ee2SSatish Balay PetscCall(VecSet(x, 1. / N)); 220f17f7ee2SSatish Balay *n = 0.0; 221f17f7ee2SSatish Balay for (i = 0; i < normsamples; i++) { 222f17f7ee2SSatish Balay PetscCall(MatMult(A, x, y)); 223f17f7ee2SSatish Balay PetscCall(VecSign(y, w)); 224f17f7ee2SSatish Balay PetscCall(MatMultTranspose(A, w, z)); 225f17f7ee2SSatish Balay PetscCall(VecNorm(z, NORM_INFINITY, &normz)); 226f17f7ee2SSatish Balay PetscCall(VecDot(x, z, &dot)); 227f17f7ee2SSatish Balay adot = PetscAbsScalar(dot); 228f17f7ee2SSatish Balay PetscCall(PetscInfo(A, "%s norm it %" PetscInt_FMT " -> (%g %g)\n", NormTypes[normtype], i, (double)normz, (double)adot)); 229f17f7ee2SSatish Balay if (normz <= adot && i > 0) { 230f17f7ee2SSatish Balay PetscCall(VecNorm(y, NORM_1, n)); 231f17f7ee2SSatish Balay break; 232f17f7ee2SSatish Balay } 233f17f7ee2SSatish Balay PetscCall(VecMax(z, &j, &normz)); 234f17f7ee2SSatish Balay if (j == jold) { 235f17f7ee2SSatish Balay PetscCall(VecNorm(y, NORM_1, n)); 236f17f7ee2SSatish Balay PetscCall(PetscInfo(A, "%s norm it %" PetscInt_FMT " -> breakdown (j==jold)\n", NormTypes[normtype], i)); 237f17f7ee2SSatish Balay break; 238f17f7ee2SSatish Balay } 239f17f7ee2SSatish Balay jold = j; 240f17f7ee2SSatish Balay PetscCall(VecSetDelta(x, j)); 241f17f7ee2SSatish Balay } 242f17f7ee2SSatish Balay PetscCall(MatDestroy(&A)); 243f17f7ee2SSatish Balay PetscCall(VecDestroy(&x)); 244f17f7ee2SSatish Balay PetscCall(VecDestroy(&w)); 245f17f7ee2SSatish Balay PetscCall(VecDestroy(&y)); 246f17f7ee2SSatish Balay PetscCall(VecDestroy(&z)); 247f17f7ee2SSatish Balay break; 248f17f7ee2SSatish Balay case NORM_2: 249f17f7ee2SSatish Balay if (normsamples < 0) normsamples = 20; /* pure guess */ 250f17f7ee2SSatish Balay PetscCall(MatCreateVecs(A, &x, &y)); 251f17f7ee2SSatish Balay PetscCall(MatCreateVecs(A, &z, NULL)); 252f17f7ee2SSatish Balay PetscCall(VecBindToCPU(x, boundtocpu)); 253f17f7ee2SSatish Balay PetscCall(VecBindToCPU(y, boundtocpu)); 254f17f7ee2SSatish Balay PetscCall(VecBindToCPU(z, boundtocpu)); 255f17f7ee2SSatish Balay PetscCall(VecSetRandom(x, NULL)); 256f17f7ee2SSatish Balay PetscCall(VecNormalize(x, NULL)); 257f17f7ee2SSatish Balay *n = 0.0; 258f17f7ee2SSatish Balay for (i = 0; i < normsamples; i++) { 259f17f7ee2SSatish Balay PetscCall(MatMult(A, x, y)); 260f17f7ee2SSatish Balay PetscCall(VecNormalize(y, n)); 261f17f7ee2SSatish Balay PetscCall(MatMultTranspose(A, y, z)); 262f17f7ee2SSatish Balay PetscCall(VecNorm(z, NORM_2, &normz)); 263f17f7ee2SSatish Balay PetscCall(VecDot(x, z, &dot)); 264f17f7ee2SSatish Balay adot = PetscAbsScalar(dot); 265f17f7ee2SSatish Balay PetscCall(PetscInfo(A, "%s norm it %" PetscInt_FMT " -> %g (%g %g)\n", NormTypes[normtype], i, (double)*n, (double)normz, (double)adot)); 266f17f7ee2SSatish Balay if (normz <= adot) break; 267f17f7ee2SSatish Balay if (i < normsamples - 1) { 268f17f7ee2SSatish Balay Vec t; 269f17f7ee2SSatish Balay 270f17f7ee2SSatish Balay PetscCall(VecNormalize(z, NULL)); 271f17f7ee2SSatish Balay t = x; 272f17f7ee2SSatish Balay x = z; 273f17f7ee2SSatish Balay z = t; 274f17f7ee2SSatish Balay } 275f17f7ee2SSatish Balay } 276f17f7ee2SSatish Balay PetscCall(VecDestroy(&x)); 277f17f7ee2SSatish Balay PetscCall(VecDestroy(&y)); 278f17f7ee2SSatish Balay PetscCall(VecDestroy(&z)); 279f17f7ee2SSatish Balay break; 2809371c9d4SSatish Balay default: SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "%s norm not supported", NormTypes[normtype]); 281f17f7ee2SSatish Balay } 282f17f7ee2SSatish Balay PetscCall(PetscInfo(A, "%s norm %g computed in %" PetscInt_FMT " iterations\n", NormTypes[normtype], (double)*n, i)); 283f17f7ee2SSatish Balay PetscFunctionReturn(0); 284f17f7ee2SSatish Balay } 285