source: trunk/test/tutorial/tut03.cpp @ 1214

Last change on this file since 1214 was 1214, checked in by sam, 8 years ago

win32: the uniform handling code was completely broken; we now properly
retrieve the constant's register index in the description table. This also
allows us to use a single uniform handle for both the vertex and pixel
shaders in a PS3 Cg program.

  • Property svn:keywords set to Id
File size: 33.4 KB
Line 
1//
2// Lol Engine - Fractal tutorial
3//
4// Copyright: (c) 2011 Sam Hocevar <sam@hocevar.net>
5//   This program is free software; you can redistribute it and/or
6//   modify it under the terms of the Do What The Fuck You Want To
7//   Public License, Version 2, as published by Sam Hocevar. See
8//   http://sam.zoy.org/projects/COPYING.WTFPL for more details.
9//
10
11#if defined HAVE_CONFIG_H
12#   include "config.h"
13#endif
14
15#include <cstring>
16#include <cmath>
17
18#include "core.h"
19#include "lolgl.h"
20#include "loldebug.h"
21
22using namespace std;
23using namespace lol;
24
25#if defined _WIN32 && defined USE_D3D9
26#   define FAR
27#   define NEAR
28#   include <d3d9.h>
29#endif
30
31#if USE_SDL && defined __APPLE__
32#   include <SDL_main.h>
33#endif
34
35#if defined _WIN32
36#   undef main /* FIXME: still needed? */
37#   include <direct.h>
38#endif
39
40#if defined USE_D3D9
41extern IDirect3DDevice9 *g_d3ddevice;
42#elif defined _XBOX
43extern D3DDevice *g_d3ddevice;
44#elif __CELLOS_LV2__
45static GLint const INTERNAL_FORMAT = GL_ARGB_SCE;
46static GLenum const TEXTURE_FORMAT = GL_BGRA;
47static GLenum const TEXTURE_TYPE = GL_UNSIGNED_INT_8_8_8_8_REV;
48#elif defined __native_client__
49static GLint const INTERNAL_FORMAT = GL_RGBA;
50static GLenum const TEXTURE_FORMAT = GL_RGBA;
51static GLenum const TEXTURE_TYPE = GL_UNSIGNED_BYTE;
52#else
53/* Seems efficient for little endian textures */
54static GLint const INTERNAL_FORMAT = GL_RGBA;
55static GLenum const TEXTURE_FORMAT = GL_BGRA;
56static GLenum const TEXTURE_TYPE = GL_UNSIGNED_INT_8_8_8_8_REV;
57#endif
58
59class Fractal : public WorldEntity
60{
61public:
62    Fractal(ivec2 const &size)
63    {
64        /* Ensure texture size is a multiple of 16 for better aligned
65         * data access. Store the dimensions of a texel for our shader,
66         * as well as the half-size of the screen. */
67        m_size = size;
68        m_size.x = (m_size.x + 15) & ~15;
69        m_size.y = (m_size.y + 15) & ~15;
70        /* FIXME: casts are necessary because of missing operators */
71        m_texel_settings = vec4(1.0, 1.0, 2.0, 2.0) / (vec4)m_size.xyxy;
72        m_screen_settings = vec4(1.0, 1.0, 0.5, 0.5) * (vec4)m_size.xyxy;
73
74        /* Window size decides the world aspect ratio. For instance, 640×480
75         * will be mapped to (-0.66,-0.5) - (0.66,0.5). */
76#if !defined __native_client__
77        m_window_size = Video::GetSize();
78#else
79        /* FIXME: it's illegal to call this on the game thread! */
80        m_window_size = ivec2(640, 480);
81#endif
82        if (m_window_size.y < m_window_size.x)
83            m_window2world = 0.5 / m_window_size.y;
84        else
85            m_window2world = 0.5 / m_window_size.x;
86        m_texel2world = (vec2)m_window_size / (vec2)m_size
87                                            * (vec2)m_window2world;
88
89        m_oldmouse = ivec2(0, 0);
90
91        m_pixels = new u8vec4[m_size.x * m_size.y];
92        m_tmppixels = new u8vec4[m_size.x / 2 * m_size.y / 2];
93        m_frame = -1;
94        m_slices = 4;
95        for (int i = 0; i < 4; i++)
96        {
97            m_deltashift[i] = 0.0;
98            m_deltascale[i] = 1.0;
99            m_dirty[i] = 2;
100        }
101#if defined __CELLOS_LV2__
102        //m_center = f64cmplx(-.22815528839841, -1.11514249704382);
103        //m_center = f64cmplx(0.001643721971153, 0.822467633298876);
104        m_center = f64cmplx(-0.65823419062254, 0.50221777363480);
105        m_zoom_speed = -0.000025;
106#else
107        m_center = -0.75;
108        m_zoom_speed = 0.0;
109#endif
110        m_translate = 0;
111        m_radius = 5.0;
112        m_ready = false;
113        m_drag = false;
114
115        m_palette = new u8vec4[(MAX_ITERATIONS + 1) * PALETTE_STEP];
116        for (int i = 0; i < (MAX_ITERATIONS + 1) * PALETTE_STEP; i++)
117        {
118            double f = (double)i / PALETTE_STEP;
119
120            double r = 0.5 * sin(f * 0.27 + 2.0) + 0.5;
121            double g = 0.5 * sin(f * 0.17 - 1.8) + 0.5;
122            double b = 0.5 * sin(f * 0.21 - 2.6) + 0.5;
123
124            if (f < 7.0)
125            {
126                f = f < 1.0 ? 0.0 : (f - 1.0) / 6.0;
127                r *= f;
128                g *= f;
129                b *= f;
130            }
131
132            uint8_t red = r * 255.99f;
133            uint8_t green = g * 255.99f;
134            uint8_t blue = b * 255.99f;
135#if defined __CELLOS_LV2__
136            m_palette[i] = u8vec4(255, red, green, blue);
137#elif defined __native_client__
138            m_palette[i] = u8vec4(red, green, blue, 255);
139#else
140            m_palette[i] = u8vec4(blue, green, red, 255);
141#endif
142        }
143
144#if !defined __native_client__
145        m_centertext = new Text(NULL, "gfx/font/ascii.png");
146        m_centertext->SetPos(ivec3(5, m_window_size.y - 15, 1));
147        Ticker::Ref(m_centertext);
148
149        m_mousetext = new Text(NULL, "gfx/font/ascii.png");
150        m_mousetext->SetPos(ivec3(5, m_window_size.y - 29, 1));
151        Ticker::Ref(m_mousetext);
152
153        m_zoomtext = new Text(NULL, "gfx/font/ascii.png");
154        m_zoomtext->SetPos(ivec3(5, m_window_size.y - 43, 1));
155        Ticker::Ref(m_zoomtext);
156#endif
157
158        position = ivec3(0, 0, 0);
159        bbox[0] = position;
160        bbox[1] = ivec3(m_window_size, 0);
161        Input::TrackMouse(this);
162
163        /* Spawn worker threads and wait for their readiness. */
164        for (int i = 0; i < MAX_THREADS; i++)
165            m_threads[i] = new Thread(DoWorkHelper, this);
166        for (int i = 0; i < MAX_THREADS; i++)
167            m_spawnqueue.Pop();
168    }
169
170    ~Fractal()
171    {
172        /* Signal worker threads for completion and wait for
173         * them to quit. */
174        for (int i = 0; i < MAX_THREADS; i++)
175            m_jobqueue.Push(-1);
176        for (int i = 0; i < MAX_THREADS; i++)
177            m_donequeue.Pop();
178
179        Input::UntrackMouse(this);
180#if !defined __native_client__
181        Ticker::Unref(m_centertext);
182        Ticker::Unref(m_mousetext);
183        Ticker::Unref(m_zoomtext);
184#endif
185        delete m_pixels;
186        delete m_tmppixels;
187        delete m_palette;
188    }
189
190    inline f64cmplx TexelToWorldOffset(vec2 texel)
191    {
192        double dx = (0.5 + texel.x - m_size.x / 2) * m_texel2world.x;
193        double dy = (0.5 + m_size.y / 2 - texel.y) * m_texel2world.y;
194        return m_radius * f64cmplx(dx, dy);
195    }
196
197    inline f64cmplx ScreenToWorldOffset(vec2 pixel)
198    {
199        /* No 0.5 offset here, because we want to be able to position the
200         * mouse at (0,0) exactly. */
201        double dx = pixel.x - m_window_size.x / 2;
202        double dy = m_window_size.y / 2 - pixel.y;
203        return m_radius * m_window2world * f64cmplx(dx, dy);
204    }
205
206    virtual void TickGame(float deltams)
207    {
208        WorldEntity::TickGame(deltams);
209
210        int prev_frame = m_frame;
211        m_frame = (m_frame + 1) % 4;
212
213        f64cmplx worldmouse = m_center + ScreenToWorldOffset(mousepos);
214
215        ivec3 buttons = Input::GetMouseButtons();
216#if !defined __CELLOS_LV2__
217        if (buttons[1])
218        {
219            if (!m_drag)
220            {
221                m_oldmouse = mousepos;
222                m_drag = true;
223            }
224            m_translate = ScreenToWorldOffset(m_oldmouse)
225                        - ScreenToWorldOffset(mousepos);
226            /* XXX: the purpose of this hack is to avoid translating by
227             * an exact number of pixels. If this were to happen, the step()
228             * optimisation for i915 cards in our shader would behave
229             * incorrectly because a quarter of the pixels in the image
230             * would have tie rankings in the distance calculation. */
231            m_translate *= 1023.0 / 1024.0;
232            m_oldmouse = mousepos;
233        }
234        else
235        {
236            m_drag = false;
237            if (m_translate != 0.0)
238            {
239                m_translate *= pow(2.0, -deltams * 0.005);
240                if (m_translate.norm() / m_radius < 1e-4)
241                    m_translate = 0.0;
242            }
243        }
244
245        if ((buttons[0] || buttons[2]) && mousepos.x != -1)
246        {
247            double zoom = buttons[0] ? -0.0005 : 0.0005;
248            m_zoom_speed += deltams * zoom;
249            if (m_zoom_speed / zoom > 5)
250                m_zoom_speed = 5 * zoom;
251        }
252        else if (m_zoom_speed)
253        {
254            m_zoom_speed *= pow(2.0, -deltams * 0.005);
255            if (abs(m_zoom_speed) < 1e-5 || m_drag)
256                m_zoom_speed = 0.0;
257        }
258#endif
259
260        if (m_zoom_speed || m_translate != 0.0)
261        {
262            f64cmplx oldcenter = m_center;
263            double oldradius = m_radius;
264            double zoom = pow(2.0, deltams * m_zoom_speed);
265            if (m_radius * zoom > 8.0)
266            {
267                m_zoom_speed *= -1.0;
268                zoom = 8.0 / m_radius;
269            }
270            else if (m_radius * zoom < 1e-14)
271            {
272                m_zoom_speed *= -1.0;
273                zoom = 1e-14 / m_radius;
274            }
275            m_radius *= zoom;
276#if !defined __CELLOS_LV2__
277            m_center += m_translate;
278            m_center = (m_center - worldmouse) * zoom + worldmouse;
279            worldmouse = m_center + ScreenToWorldOffset(mousepos);
280#endif
281
282            /* Store the transformation properties to go from m_frame - 1
283             * to m_frame. */
284            m_deltashift[prev_frame] = (m_center - oldcenter) / oldradius;
285            m_deltashift[prev_frame].x /= m_size.x * m_texel2world.x;
286            m_deltashift[prev_frame].y /= m_size.y * m_texel2world.y;
287            m_deltascale[prev_frame] = m_radius / oldradius;
288            m_dirty[0] = m_dirty[1] = m_dirty[2] = m_dirty[3] = 2;
289        }
290        else
291        {
292            /* If settings didn't change, set transformation from previous
293             * frame to identity. */
294            m_deltashift[prev_frame] = 0.0;
295            m_deltascale[prev_frame] = 1.0;
296        }
297
298        /* Transformation from current frame to current frame is always
299         * identity. */
300        m_zoom_settings[m_frame][0] = 0.0f;
301        m_zoom_settings[m_frame][1] = 0.0f;
302        m_zoom_settings[m_frame][2] = 1.0f;
303
304        /* Compute transformation from other frames to current frame */
305        for (int i = 0; i < 3; i++)
306        {
307            int prev_index = (m_frame + 4 - i) % 4;
308            int cur_index = (m_frame + 3 - i) % 4;
309
310            m_zoom_settings[cur_index][0] = m_zoom_settings[prev_index][0] * m_deltascale[cur_index] + m_deltashift[cur_index].x;
311            m_zoom_settings[cur_index][1] = m_zoom_settings[prev_index][1] * m_deltascale[cur_index] + m_deltashift[cur_index].y;
312            m_zoom_settings[cur_index][2] = m_zoom_settings[prev_index][2] * m_deltascale[cur_index];
313        }
314
315        /* Precompute texture offset change instead of doing it in GLSL */
316        for (int i = 0; i < 4; i++)
317        {
318            m_zoom_settings[i][0] += 0.5 * (1.0 - m_zoom_settings[i][2]);
319            m_zoom_settings[i][1] -= 0.5 * (1.0 - m_zoom_settings[i][2]);
320        }
321
322#if !defined __native_client__
323        char buf[128];
324        sprintf(buf, "center: %+16.14f%+16.14fi", m_center.x, m_center.y);
325        m_centertext->SetText(buf);
326        sprintf(buf, " mouse: %+16.14f%+16.14fi", worldmouse.x, worldmouse.y);
327        m_mousetext->SetText(buf);
328        sprintf(buf, "  zoom: %g", 1.0 / m_radius);
329        m_zoomtext->SetText(buf);
330#endif
331
332        if (m_dirty[m_frame])
333        {
334            m_dirty[m_frame]--;
335
336            for (int i = 0; i < m_size.y; i += MAX_LINES * 2)
337                m_jobqueue.Push(i);
338        }
339    }
340
341    static void *DoWorkHelper(void *data)
342    {
343        Fractal *that = (Fractal *)data;
344        that->m_spawnqueue.Push(0);
345        for ( ; ; )
346        {
347            int line = that->m_jobqueue.Pop();
348            if (line == -1)
349                break;
350            that->DoWork(line);
351            that->m_donequeue.Push(0);
352        }
353        that->m_donequeue.Push(0);
354        return NULL;
355    };
356
357    void DoWork(int line)
358    {
359        double const maxsqlen = 1024;
360        double const k1 = 1.0 / (1 << 10) / (std::log(maxsqlen) / std::log(2.0));
361
362        int jmin = ((m_frame + 1) % 4) / 2 + line;
363        int jmax = jmin + MAX_LINES * 2;
364        if (jmax > m_size.y)
365            jmax = m_size.y;
366        u8vec4 *m_pixelstart = m_pixels
367                             + m_size.x * (m_size.y / 4 * m_frame + line / 4);
368
369        for (int j = jmin; j < jmax; j += 2)
370        for (int i = m_frame % 2; i < m_size.x; i += 2)
371        {
372            f64cmplx z0 = m_center + TexelToWorldOffset(ivec2(i, j));
373            f64cmplx z1, z2, z3, r0 = z0;
374            //f64cmplx r0(0.28693186889504513, 0.014286693904085048);
375            //f64cmplx r0(0.001643721971153, 0.822467633298876);
376            //f64cmplx r0(-1.207205434596, 0.315432814901);
377            //f64cmplx r0(-0.79192956889854, -0.14632423080102);
378            //f64cmplx r0(0.3245046418497685, 0.04855101129280834);
379            int iter = MAX_ITERATIONS - 4;
380            for (;;)
381            {
382                /* Unroll the loop: tests are more expensive to do at each
383                 * iteration than the few extra multiplications. */
384                z1 = z0 * z0 + r0;
385                z2 = z1 * z1 + r0;
386                z3 = z2 * z2 + r0;
387                z0 = z3 * z3 + r0;
388                if (sqlen(z0) >= maxsqlen)
389                    break;
390                iter -= 4;
391                if (iter < 4)
392                    break;
393            }
394
395            if (iter)
396            {
397                double n = sqlen(z0);
398
399                if (sqlen(z1) >= maxsqlen) { iter += 3; n = sqlen(z1); }
400                else if (sqlen(z2) >= maxsqlen) { iter += 2; n = sqlen(z2); }
401                else if (sqlen(z3) >= maxsqlen) { iter += 1; n = sqlen(z3); }
402
403                if (n > maxsqlen * maxsqlen)
404                    n = maxsqlen * maxsqlen;
405
406                /* Approximate log(sqrt(n))/log(sqrt(maxsqlen)) */
407                double f = iter;
408                union { double n; uint64_t x; } u = { n };
409                double k = (u.x >> 42) - (((1 << 10) - 1) << 10);
410                k *= k1;
411
412                /* Approximate log2(k) in [1,2]. */
413                f += (- 0.344847817623168308695977510213252644185 * k
414                      + 2.024664188044341212602376988171727038739) * k
415                      - 1.674876738008591047163498125918330313237;
416
417                *m_pixelstart++ = m_palette[(int)(f * PALETTE_STEP)];
418            }
419            else
420            {
421#if defined __CELLOS_LV2__
422                *m_pixelstart++ = u8vec4(255, 0, 0, 0);
423#else
424                *m_pixelstart++ = u8vec4(0, 0, 0, 255);
425#endif
426            }
427        }
428    }
429
430    virtual void TickDraw(float deltams)
431    {
432        WorldEntity::TickDraw(deltams);
433
434        static float const vertices[] =
435        {
436             1.0f,  1.0f,
437            -1.0f,  1.0f,
438            -1.0f, -1.0f,
439            -1.0f, -1.0f,
440             1.0f, -1.0f,
441             1.0f,  1.0f,
442        };
443
444        static float const texcoords[] =
445        {
446             1.0f,  1.0f,
447             0.0f,  1.0f,
448             0.0f,  0.0f,
449             0.0f,  0.0f,
450             1.0f,  0.0f,
451             1.0f,  1.0f,
452        };
453
454        if (!m_ready)
455        {
456#if !defined __CELLOS_LV2__ && !defined _XBOX && !defined USE_D3D9
457            /* Create a texture of half the width and twice the height
458             * so that we can upload four different subimages each frame. */
459            glGenTextures(1, &m_texid);
460            glBindTexture(GL_TEXTURE_2D, m_texid);
461            glTexImage2D(GL_TEXTURE_2D, 0, INTERNAL_FORMAT,
462                         m_size.x / 2, m_size.y * 2, 0,
463                         TEXTURE_FORMAT, TEXTURE_TYPE, m_pixels);
464#   if defined __CELLOS_LV2__
465            /* We need this hint because by default the storage type is
466             * GL_TEXTURE_SWIZZLED_GPU_SCE. */
467            glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_ALLOCATION_HINT_SCE,
468                            GL_TEXTURE_TILED_GPU_SCE);
469#   endif
470            glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
471            glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
472#else
473            g_d3ddevice->CreateTexture(m_size.x / 2, m_size.y * 2, 1,
474                                       D3DUSAGE_DYNAMIC, D3DFMT_A8R8G8B8,
475                                       D3DPOOL_SYSTEMMEM, &m_tex, NULL);
476#endif
477
478            m_shader = Shader::Create(
479#if !defined __CELLOS_LV2__ && !defined _XBOX && !defined USE_D3D9
480#   if !defined HAVE_GLES_2X
481                "#version 120\n"
482#   else
483                "precision highp float;"
484#   endif
485                ""
486                "uniform mat4 u_ZoomSettings;"
487                "uniform vec4 u_TexelSize;"
488                "uniform vec4 u_ScreenSize;"
489                ""
490                "attribute vec2 a_TexCoord;"
491                "attribute vec2 a_Vertex;"
492                ""
493                "varying vec4 v_CenterX, v_CenterY, v_IndexX, v_IndexY;"
494                ""
495                "void main(void)"
496                "{"
497                "    gl_Position = vec4(a_Vertex, 0.0, 1.0);"
498                     /* Center point in [-.5,.5], apply zoom and translation
499                      * transformation, and go back to texture coordinates
500                      * in [0,1]. That's the ideal point we would like to
501                      * compute the value for. Then add or remove half the
502                      * size of a texel: the distance from this new point to
503                      * the final point will be our error. */
504                "    vec4 offsets = vec4(0.5, -0.5, 0.015625, -0.015625);"
505                "    vec4 zoomscale = vec4(u_ZoomSettings[0][2],"
506                "                          u_ZoomSettings[1][2],"
507                "                          u_ZoomSettings[2][2],"
508                "                          u_ZoomSettings[3][2]);"
509                "    vec4 zoomtx = vec4(u_ZoomSettings[0][0],"
510                "                       u_ZoomSettings[1][0],"
511                "                       u_ZoomSettings[2][0],"
512                "                       u_ZoomSettings[3][0]);"
513                "    vec4 zoomty = vec4(u_ZoomSettings[0][1],"
514                "                       u_ZoomSettings[1][1],"
515                "                       u_ZoomSettings[2][1],"
516                "                       u_ZoomSettings[3][1]);"
517                "    v_CenterX = zoomscale * a_TexCoord.x + zoomtx"
518                "              + offsets.xyxy * u_TexelSize.x;"
519                "    v_CenterY = zoomscale * a_TexCoord.y - zoomty"
520                "              + offsets.xyyx * u_TexelSize.y;"
521                     /* Precompute the multiple of one texel where our ideal
522                      * point lies. The fragment shader will call floor() on
523                      * this value. We add or remove a slight offset to avoid
524                      * rounding issues at the image's edges. */
525                "    v_IndexX = v_CenterX * u_ScreenSize.z - offsets.zwzw;"
526                "    v_IndexY = v_CenterY * u_ScreenSize.w - offsets.zwwz;"
527                "}",
528
529#   if !defined HAVE_GLES_2X
530                "#version 120\n"
531#   else
532                "precision highp float;"
533#   endif
534                ""
535                "uniform vec4 u_TexelSize;"
536                "uniform sampler2D u_Texture;"
537                ""
538                "varying vec4 v_CenterX, v_CenterY, v_IndexX, v_IndexY;"
539                ""
540                "void main(void)"
541                "{"
542                "    vec4 v05 = vec4(0.5, 0.5, 0.5, 0.5);"
543                "    vec4 rx, ry, t0, dx, dy, dd;"
544                     /* Get a pixel coordinate from each slice into rx & ry */
545                "    rx = u_TexelSize.x + u_TexelSize.z * floor(v_IndexX);"
546                "    ry = u_TexelSize.y + u_TexelSize.w * floor(v_IndexY);"
547                     /* Compute inverse distance to expected pixel in dd,
548                      * and put zero if we fall outside the texture. */
549                "    t0 = step(abs(rx - v05), v05) * step(abs(ry - v05), v05);"
550                "    dx = rx - v_CenterX;"
551                "    dy = ry - v_CenterY;"
552                //"    vec4 dd = t0 * (abs(dx) + abs(dy));"
553                //"    vec4 dd = t0 / (0.001 + sqrt((dx * dx) + (dy * dy)));"
554                "    dd = t0 / (0.000001 + (dx * dx) + (dy * dy));"
555                     /* Modify Y coordinate to select proper quarter. */
556                "    ry = ry * 0.25 + vec4(0.0, 0.25, 0.5, 0.75);"
557                ""
558#   if 1
559                "\n#if 0\n" /* XXX: disabled until we can autodetect i915 */
560                     /* t1.x <-- dd.x > dd.y */
561                     /* t1.y <-- dd.z > dd.w */
562                "    vec2 t1 = step(dd.xz, dd.yw);"
563                     /* ret.x <-- max(rx.x, rx.y) wrt. t1.x */
564                     /* ret.y <-- max(rx.z, rx.w) wrt. t1.y */
565                     /* ret.z <-- max(ry.x, ry.y) wrt. t1.x */
566                     /* ret.w <-- max(ry.z, ry.w) wrt. t1.y */
567                "    vec4 ret = mix(vec4(rx.xz, ry.xz),"
568                "                   vec4(rx.yw, ry.yw), t1.xyxy);"
569                     /* dd.x <-- max(dd.x, dd.y) */
570                     /* dd.z <-- max(dd.z, dd.w) */
571                "    dd.xy = mix(dd.xz, dd.yw, t1);"
572                     /* t2 <-- dd.x > dd.z */
573                "    float t2 = step(dd.x, dd.y);"
574                     /* ret.x <-- max(ret.x, ret.y); */
575                     /* ret.y <-- max(ret.z, ret.w); */
576                "    ret.xy = mix(ret.xz, ret.yw, t2);"
577                "\n#else\n"
578                     /* Fallback for i915 cards -- the trick to reduce the
579                      * number of operations is to compute both step(a,b)
580                      * and step(b,a) and hope that their sum is 1. This is
581                      * almost always the case, and when it isn't we can
582                      * afford to have a few wrong pixels. However, a real
583                      * problem is when panning the image, because half the
584                      * screen is likely to flicker. To avoid this problem,
585                      * we cheat a little (see m_translate comment above). */
586                "    vec4 t1 = step(dd.xzyw, dd.ywxz);"
587                "    vec4 ret = vec4(rx.xz, ry.xz) * t1.zwzw"
588                "             + vec4(rx.yw, ry.yw) * t1.xyxy;"
589                "    dd.xy = dd.xz * t1.zw + dd.yw * t1.xy;"
590                "    vec2 t2 = step(dd.xy, dd.yx);"
591                "    ret.xy = ret.xz * t2.yy + ret.yw * t2.xx;"
592                "\n#endif\n"
593                     /* Nearest neighbour */
594                "    gl_FragColor = texture2D(u_Texture, ret.xy);"
595#   else
596                     /* Alternate version: some kind of linear interpolation */
597                "    vec4 p0 = texture2D(u_Texture, vec2(rx.x, ry.x));"
598                "    vec4 p1 = texture2D(u_Texture, vec2(rx.y, ry.y));"
599                "    vec4 p2 = texture2D(u_Texture, vec2(rx.z, ry.z));"
600                "    vec4 p3 = texture2D(u_Texture, vec2(rx.w, ry.w));"
601                "    gl_FragColor = 1.0 / (dd.x + dd.y + dd.z + dd.w)"
602                "          * (dd.x * p0 + dd.y * p1 + dd.z * p2 + dd.w * p3);"
603#   endif
604                "}"
605#else
606                "void main(float4 a_Vertex : POSITION,"
607                "          float2 a_TexCoord : TEXCOORD0,"
608                "          uniform float4x4 u_ZoomSettings,"
609                "          uniform float4 u_TexelSize,"
610                "          uniform float4 u_ScreenSize,"
611                "          out float4 out_Position : POSITION0,"
612                "          out float4 v_CenterX : TEXCOORD0,"
613                "          out float4 v_CenterY : TEXCOORD1,"
614                "          out float4 v_IndexX : TEXCOORD2,"
615                "          out float4 v_IndexY : TEXCOORD3)"
616                "{"
617                "    out_Position = a_Vertex;"
618                "    float4 offsets = float4(0.5, -0.5, 0.015625, -0.015625);"
619                "    float4 zoomscale = float4(u_ZoomSettings[2][0],"
620                "                              u_ZoomSettings[2][1],"
621                "                              u_ZoomSettings[2][2],"
622                "                              u_ZoomSettings[2][3]);"
623                "    float4 zoomtx = float4(u_ZoomSettings[0][0],"
624                "                           u_ZoomSettings[0][1],"
625                "                           u_ZoomSettings[0][2],"
626                "                           u_ZoomSettings[0][3]);"
627                "    float4 zoomty = float4(u_ZoomSettings[1][0],"
628                "                           u_ZoomSettings[1][1],"
629                "                           u_ZoomSettings[1][2],"
630                "                           u_ZoomSettings[1][3]);"
631                "    v_CenterX = zoomscale * a_TexCoord.x + zoomtx"
632                "              + offsets.xyxy * u_TexelSize.x;"
633                "    v_CenterY = zoomscale * a_TexCoord.y - zoomty"
634                "              + offsets.xyyx * u_TexelSize.y;"
635                "    v_IndexX = v_CenterX * u_ScreenSize.z - offsets.zwzw;"
636                "    v_IndexY = v_CenterY * u_ScreenSize.w - offsets.zwwz;"
637                "}",
638
639                "void main(in float4 v_CenterX : TEXCOORD0,"
640                "          in float4 v_CenterY : TEXCOORD1,"
641                "          in float4 v_IndexX : TEXCOORD2,"
642                "          in float4 v_IndexY : TEXCOORD3,"
643                "          uniform float4 u_TexelSize,"
644                "          uniform sampler2D u_Texture,"
645                "          out float4 out_FragColor : COLOR)"
646                "{"
647                "    float4 v05 = float4(0.5, 0.5, 0.5, 0.5);"
648                "    float4 rx, ry, t0, dx, dy, dd;"
649                "    rx = u_TexelSize.x + u_TexelSize.z * floor(v_IndexX);"
650                "    ry = u_TexelSize.y + u_TexelSize.w * floor(v_IndexY);"
651                "    t0 = step(abs(rx - v05), v05) * step(abs(ry - v05), v05);"
652                "    dx = rx - v_CenterX;"
653                "    dy = ry - v_CenterY;"
654                "    dd = t0 / (0.000001 + (dx * dx) + (dy * dy));"
655                "    ry = ry * 0.25 + float4(0.0, 0.25, 0.5, 0.75);"
656                "    float2 t1 = step(dd.xz, dd.yw);"
657                "    float4 ret = lerp(float4(rx.xz, ry.xz),"
658                "                      float4(rx.yw, ry.yw), t1.xyxy);"
659                "    dd.xy = lerp(dd.xz, dd.yw, t1);"
660                "    float t2 = step(dd.x, dd.y);"
661                "    ret.xy = lerp(ret.xz, ret.yw, t2);"
662                "    out_FragColor = tex2D(u_Texture, ret.xy);"
663                "}"
664#endif
665            );
666#if !defined _XBOX && !defined USE_D3D9
667            m_vertexattrib = m_shader->GetAttribLocation("a_Vertex");
668            m_texattrib = m_shader->GetAttribLocation("a_TexCoord");
669#endif
670            m_texeluni = m_shader->GetUniformLocation("u_TexelSize");
671            m_screenuni = m_shader->GetUniformLocation("u_ScreenSize");
672            m_zoomuni = m_shader->GetUniformLocation("u_ZoomSettings");
673            m_ready = true;
674
675#if !defined __CELLOS_LV2__ && !defined __ANDROID__ && !defined _XBOX && !defined USE_D3D9
676            /* Method 1: store vertex buffer on the GPU memory */
677            glGenBuffers(1, &m_vbo);
678            glBindBuffer(GL_ARRAY_BUFFER, m_vbo);
679            glBufferData(GL_ARRAY_BUFFER, sizeof(vertices), vertices,
680                         GL_STATIC_DRAW);
681            glGenBuffers(1, &m_tbo);
682            glBindBuffer(GL_ARRAY_BUFFER, m_tbo);
683            glBufferData(GL_ARRAY_BUFFER, sizeof(texcoords), texcoords,
684                         GL_STATIC_DRAW);
685#elif !defined __CELLOS_LV2__ && !defined __ANDROID__ && !defined _XBOX && !defined USE_D3D9
686            /* Method 2: upload vertex information at each frame */
687#elif defined _XBOX || defined USE_D3D9
688            D3DVERTEXELEMENT9 const elements[] =
689            {
690                { 0, 0, D3DDECLTYPE_FLOAT2, D3DDECLMETHOD_DEFAULT, D3DDECLUSAGE_POSITION, 0 },
691                { 1, 0, D3DDECLTYPE_FLOAT2, D3DDECLMETHOD_DEFAULT, D3DDECLUSAGE_TEXCOORD, 0 },
692                D3DDECL_END()
693            };
694            g_d3ddevice->CreateVertexDeclaration(elements, &m_vdecl);
695
696            if (FAILED(g_d3ddevice->CreateVertexBuffer(sizeof(vertices), D3DUSAGE_WRITEONLY, NULL, D3DPOOL_MANAGED, &m_vbo, NULL)))
697                exit(0);
698            vec2 *tmp1;
699            if (FAILED(m_vbo->Lock(0, 0, (void **)&tmp1, 0)))
700                exit(0);
701            memcpy(tmp1, vertices, sizeof(vertices));
702            m_vbo->Unlock();
703
704            if (FAILED(g_d3ddevice->CreateVertexBuffer(sizeof(texcoords), D3DUSAGE_WRITEONLY, NULL, D3DPOOL_MANAGED, &m_tbo, NULL)))
705                exit(0);
706            vec2 *tmp2;
707            if (FAILED(m_tbo->Lock(0, 0, (void **)&tmp2, 0)))
708                exit(0);
709            memcpy(tmp2, texcoords, sizeof(texcoords));
710            m_tbo->Unlock();
711#else
712#endif
713
714            /* FIXME: this object never cleans up */
715        }
716
717#if defined _XBOX || defined USE_D3D9
718
719#else
720#   if !defined HAVE_GLES_2X
721        glEnable(GL_TEXTURE_2D);
722#   endif
723        glBindTexture(GL_TEXTURE_2D, m_texid);
724#endif
725
726        if (m_dirty[m_frame])
727        {
728            for (int i = 0; i < m_size.y; i += MAX_LINES * 2)
729                m_donequeue.Pop();
730
731            m_dirty[m_frame]--;
732
733#if defined _XBOX || defined USE_D3D9
734            D3DLOCKED_RECT rect;
735            m_tex->LockRect(0, &rect, NULL,
736                            D3DLOCK_DISCARD | D3DLOCK_NOOVERWRITE);
737            for (int j = 0; j < m_size.y * 2; j++)
738            {
739                u8vec4 *line = (u8vec4 *)rect.pBits + j * rect.Pitch / 4;
740                for (int i = 0; i < m_size.x / 2; i++)
741                    line[i] = m_pixels[m_size.x / 2 * j + i];
742            }
743            m_tex->UnlockRect(0);
744#elif defined __CELLOS_LV2__
745            /* glTexSubImage2D is extremely slow on the PS3, to the point
746             * that uploading the whole texture is 40 times faster. */
747            glTexImage2D(GL_TEXTURE_2D, 0, INTERNAL_FORMAT,
748                         m_size.x / 2, m_size.y * 2, 0,
749                         TEXTURE_FORMAT, TEXTURE_TYPE, m_pixels);
750#else
751            glTexSubImage2D(GL_TEXTURE_2D, 0, 0, m_frame * m_size.y / 2,
752                            m_size.x / 2, m_size.y / 2,
753                            TEXTURE_FORMAT, TEXTURE_TYPE,
754                            m_pixels + m_size.x * m_size.y / 4 * m_frame);
755#endif
756        }
757
758        m_shader->Bind();
759        m_shader->SetUniform(m_texeluni, m_texel_settings);
760        m_shader->SetUniform(m_screenuni, m_screen_settings);
761        m_shader->SetUniform(m_zoomuni, m_zoom_settings);
762#if defined _XBOX || defined USE_D3D9
763        g_d3ddevice->SetTexture(0, m_tex);
764        //g_d3ddevice->SetRenderState(D3DRS_CULLMODE, D3DCULL_CW);
765        g_d3ddevice->SetRenderState(D3DRS_CULLMODE, D3DCULL_NONE);
766        g_d3ddevice->SetVertexDeclaration(m_vdecl);
767        g_d3ddevice->SetStreamSource(0, m_vbo, 0, sizeof(*vertices));
768        g_d3ddevice->SetStreamSource(1, m_tbo, 0, sizeof(*texcoords));
769#elif !defined __CELLOS_LV2__ && !defined __ANDROID__
770        glBindBuffer(GL_ARRAY_BUFFER, m_vbo);
771        glEnableVertexAttribArray(m_vertexattrib);
772        glVertexAttribPointer(m_vertexattrib, 2, GL_FLOAT, GL_FALSE, 0, 0);
773
774        glBindBuffer(GL_ARRAY_BUFFER, m_tbo);
775        glEnableVertexAttribArray(m_texattrib);
776        glVertexAttribPointer(m_texattrib, 2, GL_FLOAT, GL_FALSE, 0, 0);
777#elif !defined __CELLOS_LV2__ && !defined __ANDROID__
778        /* Never used for now */
779        //glEnableVertexAttribArray(m_vertexattrib);
780        //glVertexAttribPointer(m_vertexattrib, 2, GL_FLOAT, GL_FALSE, 0, vertices);
781#else
782        glEnableClientState(GL_VERTEX_ARRAY);
783        glVertexPointer(2, GL_FLOAT, 0, vertices);
784        glEnableClientState(GL_TEXTURE_COORD_ARRAY);
785        glTexCoordPointer(2, GL_FLOAT, 0, texcoords);
786#endif
787
788#if defined _XBOX || defined USE_D3D9
789        /* FIXME: what the fuck? Why does "2" not work here instead of 3? */
790        g_d3ddevice->DrawPrimitive(D3DPT_TRIANGLELIST, 0, 3);
791#else
792        glDrawArrays(GL_TRIANGLES, 0, 6);
793#endif
794
795#if defined _XBOX || defined USE_D3D9
796
797#elif !defined __CELLOS_LV2__ && !defined __ANDROID__
798        glDisableVertexAttribArray(m_vertexattrib);
799        glDisableVertexAttribArray(m_texattrib);
800        glBindBuffer(GL_ARRAY_BUFFER, 0);
801#elif !defined __CELLOS_LV2__ && !defined __ANDROID__
802        /* Never used for now */
803        //glDisableVertexAttribArray(m_vertexattrib);
804        //glDisableVertexAttribArray(m_texattrib);
805#else
806        glDisableClientState(GL_VERTEX_ARRAY);
807        glDisableClientState(GL_TEXTURE_COORD_ARRAY);
808#endif
809    }
810
811private:
812    static int const MAX_ITERATIONS = 340;
813    static int const PALETTE_STEP = 32;
814    static int const MAX_THREADS = 8;
815    static int const MAX_LINES = 8;
816
817    ivec2 m_size, m_window_size, m_oldmouse;
818    double m_window2world;
819    f64vec2 m_texel2world;
820    u8vec4 *m_pixels, *m_tmppixels, *m_palette;
821    Shader *m_shader;
822#if defined USE_D3D9
823    IDirect3DTexture9 *m_tex;
824    IDirect3DVertexDeclaration9 *m_vdecl;
825    IDirect3DVertexBuffer9 *m_vbo, *m_tbo;
826#elif defined _XBOX
827    D3DTexture *m_tex;
828    D3DVertexDeclaration *m_vdecl;
829    D3DVertexBuffer *m_vbo, *m_tbo;
830#else
831    GLuint m_texid;
832#   if !defined __CELLOS_LV2__ && !defined __ANDROID__
833    GLuint m_vbo, m_tbo;
834    GLuint m_tco;
835#   endif
836#endif
837    int m_vertexattrib, m_texattrib;
838    ShaderUniform m_texeluni, m_screenuni, m_zoomuni;
839    int m_frame, m_slices, m_dirty[4];
840    bool m_ready, m_drag;
841
842    f64cmplx m_center, m_translate;
843    double m_zoom_speed, m_radius;
844    vec4 m_texel_settings, m_screen_settings;
845    mat4 m_zoom_settings;
846    f64cmplx m_deltashift[4];
847    double m_deltascale[4];
848
849    /* Worker threads */
850    Thread *m_threads[MAX_THREADS];
851    Queue<int> m_spawnqueue, m_jobqueue, m_donequeue;
852
853    /* Debug information */
854#if !defined __native_client__
855    Text *m_centertext, *m_mousetext, *m_zoomtext;
856#endif
857};
858
859int main(int argc, char **argv)
860{
861    Application app("Tutorial 3: Fractal", ivec2(640, 480), 60.0f);
862
863#if defined _MSC_VER
864    _chdir("..");
865#elif defined _WIN32
866    _chdir("../..");
867#endif
868
869    new DebugFps(5, 5);
870    new Fractal(ivec2(640, 480));
871    //new DebugRecord("fractalol.ogm", 60.0f);
872
873    app.Run();
874
875    return EXIT_SUCCESS;
876}
877
Note: See TracBrowser for help on using the repository browser.