Changeset 882


Ignore:
Timestamp:
Aug 30, 2011, 7:19:08 PM (8 years ago)
Author:
sam
Message:

core: reactivate half denormals for the PS3.

We know we will not have denormal floats on the PS3, but we should still
create denormal halves in case the other end (maybe the GPU?) knows how
to handle them.

Location:
trunk/src
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • trunk/src/half.cpp

    r879 r882  
    1111#if defined HAVE_CONFIG_H
    1212#   include "config.h"
     13#endif
     14
     15#if defined __CELLOS_LV2__
     16#   include <ppu_altivec_internals.h>
    1317#endif
    1418
     
    3842    static uint16_t const basetable[512] =
    3943    {
    40 #define S1(i) (((i) < 103) ? 0x0000: \
     44#define S1(i) (((i) < 103) ? 0x0000 : \
    4145               ((i) < 113) ? 0x0400 >> (113 - (i)) : \
    4246               ((i) < 143) ? ((i) - 112) << 10 : 0x7c00)
     
    4448#undef S1
    4549#define S1(i) (0x8000 | (((i) < 103) ? 0x0000 : \
    46                          ((i) < 113) ? 0x0400 >> (113 - (i)): \
     50                         ((i) < 113) ? 0x0400 >> (113 - (i)) : \
    4751                         ((i) < 143) ? ((i) - 112) << 10 : 0x7c00))
    4852        S256(0),
     
    7377    unsigned int e = (x >> 23) & 0xff; /* Using int is faster here */
    7478
    75     /* If zero, or denormal, or exponent underflows too much for a denormal,
    76      * return signed zero. */
    77 #if !defined __CELLOS_LV2__
     79    /* If zero, or denormal, or exponent underflows too much for a denormal
     80     * half, return signed zero. */
    7881    if (e < 103)
    7982        return bits;
    80 #else
    81     /* PS3 don't know bout my denormals */
    82     if (e < 113)
    83         return bits;
    84 #endif
    8583
    8684    /* If NaN, return NaN. If Inf or exponent overflow, return Inf. */
     
    9492    }
    9593
    96 #if !defined __CELLOS_LV2__
    9794    /* If exponent underflows but not too much, return a denormal */
    9895    if (e < 113)
     
    104101        return bits;
    105102    }
    106 #endif
    107103
    108104    bits |= ((e - 112) << 10) | (m >> 1);
     
    112108    return bits;
    113109}
     110
     111#if 0
     112static inline void float_to_half_vector(half *dst, float const *src)
     113{
     114    vector unsigned int const v7 = vec_splat_u32(7);
     115    vector unsigned short const v6 = vec_splat_u16(6);
     116#if _XBOX
     117    vector signed short const v9 = vec_splat_u16(9);
     118    vector unsigned short const v10 = vec_splat_u16(10);
     119#else
     120    vector signed short const v0x0040 = {
     121        0x0040, 0x0040, 0x0040, 0x0040, 0x0040, 0x0040, 0x0040, 0x0040};
     122    vector unsigned short const v0x0400 = {
     123        0x0400, 0x0400, 0x0400, 0x0400, 0x0400, 0x0400, 0x0400, 0x0400};
     124#endif
     125    vector unsigned char const shuffle_high = {
     126        0, 1, 4, 5, 8, 9, 12, 13, 16, 17, 20, 21, 24, 25, 28, 29};
     127    vector unsigned char const shuffle_low = {
     128        2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31};
     129    vector unsigned char const v0xbf70 = {
     130        0xbf, 0x70, 0xbf, 0x70, 0xbf, 0x70, 0xbf, 0x70,
     131        0xbf, 0x70, 0xbf, 0x70, 0xbf, 0x70, 0xbf, 0x70};
     132
     133    vector unsigned short v_mant, v_ret;
     134    vector signed short v_exp;
     135    vector unsigned int in0 = (vector unsigned int)vec_ld(0, src);
     136    vector unsigned int in1 = (vector unsigned int)vec_ld(16, src);
     137
     138    in0 = vec_sra(in0, v7);
     139    in1 = vec_sra(in1, v7);
     140    v_exp = (vector signed short)vec_perm(in0, in1, shuffle_high);
     141    v_mant = (vector unsigned short)vec_perm(in0, in1, shuffle_low);
     142    v_exp = (vector signed short)vec_subs((vector unsigned char)v_exp, v0xbf70);
     143#if _XBOX
     144    v_ret = (vector unsigned short)vec_or(v_exp, vec_sr(v_exp, v9));
     145#else
     146    v_ret = (vector unsigned short)vec_madds(v_exp, v0x0040, v_exp);
     147#endif
     148    v_mant = vec_sr(v_mant, v6);
     149#if _XBOX
     150    v_ret = vec_or(v_mant, vec_sl(v_ret, v10));
     151#else
     152    v_ret = vec_mladd(v_ret, v0x0400, v_mant);
     153#endif
     154    vec_st(v_ret, 0, (uint16_t *)dst);
     155}
     156#endif
    114157
    115158static int const shifttable[32] =
     
    211254
    212255/* Constructor from float. Uses the non-branching version because benchmarks
    213  * indicate it is always twice as fast. The penalty of loading the lookup
    214  * tables does not seem important. */
     256 * indicate it is about 80% faster on amd64, and 20% faster on the PS3. The
     257 * penalty of loading the lookup tables does not seem important. */
    215258half half::makefast(float f)
    216259{
    217260    union { float f; uint32_t x; } u = { f };
    218 #if !defined __CELLOS_LV2__
    219261    return makebits(float_to_half_nobranch(u.x));
    220 #else
    221     /* This code is slightly faster on the PS3, mostly because we
    222      * don't need to care about denormals. */
    223     return makebits(float_to_half_branch(u.x));
    224 #endif
    225262}
    226263
     
    234271/* Cast to float. Uses the branching version because loading the tables
    235272 * for only one value is going to be cache-expensive. */
    236 half::operator float() const
    237 {
    238     /* FIXME: there is a hidden "this" in this method. Export more
    239      * code so that it can all work in registers instead. */
     273float half::tofloat(half h)
     274{
    240275    union { float f; uint32_t x; } u;
    241     u.x = half_to_float_branch(bits);
     276    u.x = half_to_float_branch(h.bits);
    242277    return u.f;
    243278}
     
    249284        union { float f; uint32_t x; } u;
    250285        u.f = *src++;
    251 #if !defined __CELLOS_LV2__
    252286        *dst++ = makebits(float_to_half_nobranch(u.x));
    253 #else
    254         /* This code is slightly faster on the PS3, mostly because we
    255          * don't need to care about denormals. */
    256         *dst++ = makebits(float_to_half_branch(u.x));
     287#if 0
     288        /* AltiVec code. Will work one day. */
     289        float_to_half_vector(dst, src);
     290        src += 8;
     291        dst += 8;
     292        i += 7;
    257293#endif
    258294    }
  • trunk/src/half.h

    r879 r882  
    5252
    5353    /* Cast to other types */
    54     operator float() const;
    55     inline operator int() const { return (int)(float)*this; }
     54    inline operator float() const { return tofloat(*this); }
     55    inline operator int() const { return (int)tofloat(*this); }
     56
     57    static float tofloat(half h);
    5658
    5759    /* Array conversions */
Note: See TracChangeset for help on using the changeset viewer.