Changeset 879


Ignore:
Timestamp:
Aug 29, 2011, 7:03:07 PM (8 years ago)
Author:
sam
Message:

core: tune the half precision code so that the best variants are being
used on the PS3 platform.

Location:
trunk
Files:
4 edited

Legend:

Unmodified
Added
Removed
  • trunk/src/half.cpp

    r877 r879  
    7575    /* If zero, or denormal, or exponent underflows too much for a denormal,
    7676     * return signed zero. */
     77#if !defined __CELLOS_LV2__
    7778    if (e < 103)
    7879        return bits;
     80#else
     81    /* PS3 don't know bout my denormals */
     82    if (e < 113)
     83        return bits;
     84#endif
    7985
    8086    /* If NaN, return NaN. If Inf or exponent overflow, return Inf. */
     
    8894    }
    8995
     96#if !defined __CELLOS_LV2__
    9097    /* If exponent underflows but not too much, return a denormal */
    9198    if (e < 113)
     
    97104        return bits;
    98105    }
     106#endif
    99107
    100108    bits |= ((e - 112) << 10) | (m >> 1);
     
    173181    if (e == 0)
    174182    {
     183#if !defined __CELLOS_LV2__
    175184        uint32_t v = m | (m >> 1);
    176185        v |= v >> 2;
     
    183192         * added to our underestimated exponent. */
    184193        return s | (((125 - e) << 23) + (m << e));
     194#else
     195        /* PS3 don't know bout my denormals */
     196        return s;
     197#endif
    185198    }
    186199
     
    203216{
    204217    union { float f; uint32_t x; } u = { f };
     218#if !defined __CELLOS_LV2__
    205219    return makebits(float_to_half_nobranch(u.x));
     220#else
     221    /* This code is slightly faster on the PS3, mostly because we
     222     * don't need to care about denormals. */
     223    return makebits(float_to_half_branch(u.x));
     224#endif
    206225}
    207226
    208227/* Constructor from float with better precision. */
    209 half half::makeslow(float f)
     228half half::makeaccurate(float f)
    210229{
    211230    union { float f; uint32_t x; } u = { f };
     
    230249        union { float f; uint32_t x; } u;
    231250        u.f = *src++;
     251#if !defined __CELLOS_LV2__
    232252        *dst++ = makebits(float_to_half_nobranch(u.x));
     253#else
     254        /* This code is slightly faster on the PS3, mostly because we
     255         * don't need to care about denormals. */
     256        *dst++ = makebits(float_to_half_branch(u.x));
     257#endif
    233258    }
    234259
     
    241266    {
    242267        union { float f; uint32_t x; } u;
     268#if !defined __CELLOS_LV2__
     269        /* This code is really too slow on the PS3, even with the denormal
     270         * handling stripped off. */
    243271        u.x = half_to_float_nobranch((*src++).bits);
     272#else
     273        u.x = half_to_float_branch((*src++).bits);
     274#endif
    244275        *dst++ = u.f;
    245276    }
  • trunk/src/half.h

    r877 r879  
    8080
    8181    /* Factories */
    82     static half makeslow(float f);
    8382    static half makefast(float f);
     83    static half makeaccurate(float f);
    8484    static inline half makebits(uint16_t x)
    8585    {
  • trunk/test/half.cpp

    r877 r879  
    3535    CPPUNIT_TEST_SUITE(HalfTest);
    3636    CPPUNIT_TEST(test_half_from_float);
    37     CPPUNIT_TEST(test_half_makeslow);
     37    CPPUNIT_TEST(test_half_makeaccurate);
    3838    CPPUNIT_TEST(test_half_makebits);
    3939    CPPUNIT_TEST(test_half_is_nan);
     
    6767    }
    6868
    69     void test_half_makeslow()
     69    void test_half_makeaccurate()
    7070    {
    7171        for (size_t i = 0; i < sizeof(pairs) / sizeof(*pairs); i++)
    7272        {
    73             half a = half::makeslow(pairs[i].f);
     73            half a = half::makeaccurate(pairs[i].f);
    7474            uint16_t b = pairs[i].x;
    7575            CPPUNIT_ASSERT_EQUAL(a.bits, b);
  • trunk/test/lol-bench.cpp

    r877 r879  
    6767        timer.GetMs();
    6868        half::convert(pf, ph, HALF_TABLE_SIZE);
    69         result[1] += timer.GetMs();
     69        result[0] += timer.GetMs();
    7070
    7171        /* Convert half to float (fast) */
     
    7373        for (size_t i = 0; i < HALF_TABLE_SIZE; i++)
    7474            pf[i] = (float)ph[i];
    75         result[0] += timer.GetMs();
     75        result[1] += timer.GetMs();
    7676
    7777        /* Convert float to half (array) */
    7878        timer.GetMs();
    7979        half::convert(ph, pf, HALF_TABLE_SIZE);
    80         result[4] += timer.GetMs();
     80        result[2] += timer.GetMs();
    8181
    8282        /* Convert float to half (fast) */
     
    8484        for (size_t i = 0; i < HALF_TABLE_SIZE; i++)
    8585            ph[i] = (half)pf[i];
    86         result[2] += timer.GetMs();
     86        result[3] += timer.GetMs();
    8787
    88         /* Convert float to half (slow) */
     88        /* Convert float to half (accurate) */
    8989        timer.GetMs();
    9090        for (size_t i = 0; i < HALF_TABLE_SIZE; i++)
    91             ph[i] = half::makeslow(pf[i]);
    92         result[3] += timer.GetMs();
     91            ph[i] = half::makeaccurate(pf[i]);
     92        result[4] += timer.GetMs();
    9393
    9494        /* Change sign of every half */
     
    117117        result[i] *= 1000000.0f / (HALF_TABLE_SIZE * HALF_RUNS);
    118118
    119     Log::Info("                         ns/elem\n");
    120     Log::Info("float = half            %7.3f\n", result[0]);
    121     Log::Info("float[] = half[]        %7.3f\n", result[1]);
    122     Log::Info("half = float            %7.3f\n", result[2]);
    123     Log::Info("half = makeslow(float)  %7.3f\n", result[3]);
    124     Log::Info("half[] = float[]        %7.3f\n", result[4]);
    125     Log::Info("half = -half            %7.3f\n", result[5]);
    126     Log::Info("float += half           %7.3f\n", result[6]);
    127     Log::Info("half += float           %7.3f\n", result[7]);
     119    Log::Info("                          ns/elem\n");
     120    Log::Info("float[] = half[]         %7.3f\n", result[0]);
     121    Log::Info("float = half             %7.3f\n", result[1]);
     122    Log::Info("half[] = float[]         %7.3f\n", result[2]);
     123    Log::Info("half = float (fast)      %7.3f\n", result[3]);
     124    Log::Info("half = float (accurate)  %7.3f\n", result[4]);
     125    Log::Info("half = -half             %7.3f\n", result[5]);
     126    Log::Info("float += half            %7.3f\n", result[6]);
     127    Log::Info("half += float            %7.3f\n", result[7]);
    128128}
    129129
Note: See TracChangeset for help on using the changeset viewer.