mightymandel v16

GPU-based Mandelbrot set explorer

fpxx_approx.c
Go to the documentation of this file.
1 // mightymandel -- GPU-based Mandelbrot Set explorer
2 // Copyright (C) 2012,2013,2014,2015 Claude Heiland-Allen
3 // License GPL3+ http://www.gnu.org/licenses/gpl.html
4 
5 #include <math.h>
6 #include <stdbool.h>
7 #include <stdio.h>
8 #include <stdlib.h>
9 
10 #include "fpxx_approx.h"
11 #include "shader.h"
12 #include "logging.h"
13 #include "utility.h"
14 #include "mightymandel.h"
15 #include "texture.h"
16 
17 extern const char *fpxx_approx_vert;
18 const GLchar *fpxx_approx_varyings[] = {"cne", "zdz", "err"};
19 
20 void fpxx_approx_begin(struct fpxx_approx *s) {
21  s->program = compile_program_tf("fpxx_approx", fpxx_approx_vert, 0, 0, 3, fpxx_approx_varyings);
22  s->iters = glGetUniformLocation(s->program, "iters");D;
23  s->c0 = glGetAttribLocation(s->program, "c0");D;
24  glGenVertexArrays(1, &s->vao);
25  mpfr_inits2(53
26  , s->p.x, s->p.y, s->z.x, s->z.y, s->z2.x, s->z2.y, s->delta4
27  , s->a.x, s->a.y, s->b.x, s->b.y, s->c.x, s->c.y
28  , s->u.x, s->u.y, s->v.x, s->v.y, s->w.x, s->w.y
29  , s->a2.x, s->a2.y, s->b2.x, s->b2.y, s->c2.x, s->c2.y
30  , s->u2.x, s->u2.y, s->v2.x, s->v2.y, s->w2.x, s->w2.y
31  , s->t1.x, s->t1.y, s->t2.x, s->t2.y
32  , s->t3.x, s->t3.y, s->t4.x, s->t4.y
33  , s->t5.x, s->t5.y, s->t6.x, s->t6.y
34  , (mpfr_ptr) 0);
35 }
36 
37 void fpxx_approx_end(struct fpxx_approx *s) {
38  glDeleteVertexArrays(1, &s->vao);
39  glDeleteProgram(s->program);D;
40  mpfr_clears
41  ( s->p.x, s->p.y, s->z.x, s->z.y, s->z2.x, s->z2.y, s->delta4
42  , s->a.x, s->a.y, s->b.x, s->b.y, s->c.x, s->c.y
43  , s->u.x, s->u.y, s->v.x, s->v.y, s->w.x, s->w.y
44  , s->a2.x, s->a2.y, s->b2.x, s->b2.y, s->c2.x, s->c2.y
45  , s->u2.x, s->u2.y, s->v2.x, s->v2.y, s->w2.x, s->w2.y
46  , s->t1.x, s->t1.y, s->t2.x, s->t2.y
47  , s->t3.x, s->t3.y, s->t4.x, s->t4.y
48  , s->t5.x, s->t5.y, s->t6.x, s->t6.y
49  , (mpfr_ptr) 0);
50 }
51 
52 void fpxx_approx_do(struct fpxx_approx *s, GLuint *active_count, GLuint *vbo, GLuint query, mpfr_t zx, mpfr_t zy, mpfr_t dzx, mpfr_t dzy, int pass, const mpfr_t radius, const mpfr_t refx, const mpfr_t refy, bool series_approx, bool initial_slice, void *abort_data, abort_t abort_fn) {
53  if (initial_slice) {
54  s->too_deep = false;
55  mpfr_sqr(s->delta4, radius, MPFR_RNDN);
56  mpfr_sqr(s->delta4, s->delta4, MPFR_RNDN);
57  mpfr_prec_t p = mpfr_get_prec(refx);
58  c_set_prec(s->p, p);
59  c_set_prec(s->z, p);
60  c_set_prec(s->a, p);
61  c_set_prec(s->b, p);
62  c_set_prec(s->c, p);
63  c_set_prec(s->u, p);
64  c_set_prec(s->v, p);
65  c_set_prec(s->w, p);
66  c_set_prec(s->z2, p);
67  c_set_prec(s->a2, p);
68  c_set_prec(s->b2, p);
69  c_set_prec(s->c2, p);
70  c_set_prec(s->u2, p);
71  c_set_prec(s->v2, p);
72  c_set_prec(s->w2, p);
73  c_set_prec(s->t1, p);
74  c_set_prec(s->t2, p);
75  c_set_prec(s->t3, p);
76  c_set_prec(s->t4, p);
77  c_set_prec(s->t5, p);
78  c_set_prec(s->t6, p);
79  mpfr_set(s->p.x, refx, MPFR_RNDN); mpfr_set(s->p.y, refy, MPFR_RNDN);
80  mpfr_set(s->z.x, refx, MPFR_RNDN); mpfr_set(s->z.y, refy, MPFR_RNDN);
81  mpfr_set_ui(s->a.x, 1, MPFR_RNDN); mpfr_set_ui(s->a.y, 0, MPFR_RNDN);
82  mpfr_set_ui(s->b.x, 0, MPFR_RNDN); mpfr_set_ui(s->b.y, 0, MPFR_RNDN);
83  mpfr_set_ui(s->c.x, 0, MPFR_RNDN); mpfr_set_ui(s->c.y, 0, MPFR_RNDN);
84  if (DE) {
85  mpfr_set_ui(s->u.x, 0, MPFR_RNDN); mpfr_set_ui(s->u.y, 0, MPFR_RNDN);
86  mpfr_set_ui(s->v.x, 0, MPFR_RNDN); mpfr_set_ui(s->v.y, 0, MPFR_RNDN);
87  mpfr_set_ui(s->w.x, 0, MPFR_RNDN); mpfr_set_ui(s->w.y, 0, MPFR_RNDN);
88  }
89  unsigned int n = 1;
90  bool accurate = true;
91  while (accurate) {
92  if (abort_fn(abort_data)) {
93  return;
94  }
95  s->values[0][0] = mpfr_get_d(s->a.x, MPFR_RNDN);
96  s->values[0][1] = mpfr_get_d(s->a.y, MPFR_RNDN);
97  s->values[1][0] = mpfr_get_d(s->b.x, MPFR_RNDN);
98  s->values[1][1] = mpfr_get_d(s->b.y, MPFR_RNDN);
99  s->values[2][0] = mpfr_get_d(s->c.x, MPFR_RNDN);
100  s->values[2][1] = mpfr_get_d(s->c.y, MPFR_RNDN);
101  if (DE) {
102  s->values[3][0] = mpfr_get_d(s->u.x, MPFR_RNDN);
103  s->values[3][1] = mpfr_get_d(s->u.y, MPFR_RNDN);
104  s->values[4][0] = mpfr_get_d(s->v.x, MPFR_RNDN);
105  s->values[4][1] = mpfr_get_d(s->v.y, MPFR_RNDN);
106  s->values[5][0] = mpfr_get_d(s->w.x, MPFR_RNDN);
107  s->values[5][1] = mpfr_get_d(s->w.y, MPFR_RNDN);
108  }
109  if (! series_approx) {
110  break;
111  }
112  n += 1;
113  if (DE) {
114  // w <- 2 (a c + z w + a v + b u)
115  c_mul(s->w2, s->z, s->w, s->t3.x, s->t3.y, s->t4.x, s->t4.y);
116  c_mul(s->t1, s->a, s->c, s->t3.x, s->t3.y, s->t4.x, s->t4.y);
117  c_add(s->w2, s->w2, s->t1);
118  c_mul(s->t1, s->a, s->v, s->t3.x, s->t3.y, s->t4.x, s->t4.y);
119  c_add(s->w2, s->w2, s->t1);
120  c_mul(s->t1, s->b, s->u, s->t3.x, s->t3.y, s->t4.x, s->t4.y);
121  c_add(s->w2, s->w2, s->t1);
122  c_mul_2ui(s->w, s->w, 1);
123  // v <- 2 (a b + z v + a u)
124  c_mul(s->v2, s->z, s->v, s->t3.x, s->t3.y, s->t4.x, s->t4.y);
125  c_mul(s->t2, s->a, s->u, s->t3.x, s->t3.y, s->t4.x, s->t4.y);
126  c_add(s->v2, s->v2, s->t2);
127  c_mul(s->t2, s->a, s->b, s->t3.x, s->t3.y, s->t4.x, s->t4.y);
128  c_add(s->v2, s->v2, s->t2);
129  c_mul_2ui(s->v2, s->v2, 1);
130  // u <- 2 (a a + z u)
131  c_mul(s->u2, s->z, s->u, s->t3.x, s->t3.y, s->t4.x, s->t4.y);
132  c_sqr(s->t1, s->a, s->t3.y, s->t4.x, s->t4.y);
133  c_add(s->u2, s->u2, s->t1);
134  c_mul_2ui(s->u2, s->u, 1);
135  } else {
136  // a b
137  c_mul(s->t2, s->a, s->b, s->t3.x, s->t3.y, s->t4.x, s->t4.y);
138  // a a
139  c_sqr(s->t1, s->a, s->t3.y, s->t4.x, s->t4.y);
140  }
141  // c <- 2 (a b + z c)
142  c_mul(s->c2, s->z, s->c, s->t3.x, s->t3.y, s->t4.x, s->t4.y);
143  c_add(s->c2, s->t2, s->c2);
144  c_mul_2ui(s->c2, s->c2, 1);
145  // b <- 2 z b + a a
146  c_mul(s->b2, s->z, s->b, s->t3.x, s->t3.y, s->t4.x, s->t4.y);
147  c_mul_2ui(s->b2, s->b2, 1);
148  c_add(s->b2, s->t1, s->b2);
149  // a2 <- 2 z a + 1
150  c_mul(s->a2, s->z, s->a, s->t3.x, s->t3.y, s->t4.x, s->t4.y);
151  c_mul_2ui(s->a2, s->a2, 1);
152  mpfr_add_ui(s->a2.x, s->a2.x, 1, MPFR_RNDN);
153  // z2 <- z z + p
154  c_sqr(s->z2, s->z, s->t5.y, s->t6.x, s->t6.y);
155  c_add(s->z2, s->z2, s->p);
156  // accurate <- |c|^2 d^4 << |a|^2 && |w|^2 d^4 << |u|^2
157  c_mag2(s->t1.x, s->c2, s->t4.x, s->t4.y);
158  c_mag2(s->t1.y, s->a2, s->t4.x, s->t4.y);
159  mpfr_div(s->t1.x, s->t1.x, s->t1.y, MPFR_RNDN);
160  mpfr_mul(s->t1.x, s->t1.x, s->delta4, MPFR_RNDN);
161  if (DE) {
162  c_mag2(s->t2.x, s->w2, s->t4.x, s->t4.y);
163  c_mag2(s->t2.y, s->u2, s->t4.x, s->t4.y);
164  mpfr_div(s->t2.x, s->t2.x, s->t2.y, MPFR_RNDN);
165  mpfr_mul(s->t2.x, s->t2.x, s->delta4, MPFR_RNDN);
166  }
167  c_mag2(s->t4.y, s->z2, s->t3.x, s->t3.y);
168  const double eps = 1e-40;
169  accurate = mpfr_get_d(s->t1.x, MPFR_RNDN) < eps && ! (mpfr_get_d(s->t4.y, MPFR_RNDN) > 5); // && (DE ? mpfr_get_d(s->t2.x, MPFR_RNDN) < eps : 1);
170  if (accurate) {
171  c_set(s->z, s->z2);
172  c_set(s->a, s->a2);
173  c_set(s->b, s->b2);
174  c_set(s->c, s->c2);
175  if (DE) {
176  c_set(s->u, s->u2);
177  c_set(s->v, s->v2);
178  c_set(s->w, s->w2);
179  }
180  }
181  }
182  n -= 1;
183  s->n = n;
184  log_message(LOG_INFO, "approx skip: %d\n", n);
185  }
186  bool ok = true;
187  for (int i = 0; i < (DE ? 6 : 3); ++i) {
188  for (int j = 0; j < 2; ++j) {
189  if (fabs(s->values[i][j]) > 1.0e144 || isnan(s->values[i][j])) {
190  ok = false;
191  }
192  }
193  }
194  mpfr_set_prec(dzx, mpfr_get_prec(s->a.x));
195  mpfr_set_prec(dzy, mpfr_get_prec(s->a.y));
196  mpfr_set_prec(zx, mpfr_get_prec(s->z.x));
197  mpfr_set_prec(zy, mpfr_get_prec(s->z.y));
198  mpfr_set(dzx, s->a.x, MPFR_RNDN);
199  mpfr_set(dzy, s->a.y, MPFR_RNDN);
200  mpfr_set(zx, s->z.x, MPFR_RNDN);
201  mpfr_set(zy, s->z.y, MPFR_RNDN);
202  if (! (*active_count > 0)) {
203  return;
204  }
205  if (ok) {
206  debug_message("approx GPU\n");
207  glEnable(GL_RASTERIZER_DISCARD);D;
208  glBindVertexArray(s->vao);
209  glBindBuffer(GL_ARRAY_BUFFER, vbo[1]);D;
210  glUseProgram(s->program);D;
211  glUniform2dv(s->abcuvw, DE ? 6 : 3, &s->values[0][0]);D;
212  glUniform1i(s->iters, s->n);D;
213  glVertexAttribLPointer(s->c0, 2, GL_DOUBLE, (DE ? 9 : 7) * sizeof(GLdouble), 0);D;
214  glEnableVertexAttribArray(s->c0);D;
215  glBindBufferBase(GL_TRANSFORM_FEEDBACK_BUFFER, 0, vbo[0]);D;
216  glBeginQuery(GL_TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN, query);D;
217  glBeginTransformFeedback(GL_POINTS);D;
218  glDrawArrays(GL_POINTS, 0, *active_count);D;
219  glEndTransformFeedback();D;
220  glEndQuery(GL_TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN);D;
221  int before = *active_count;
222  glGetQueryObjectuiv(query, GL_QUERY_RESULT, active_count);D;
223  int after = *active_count;
224  debug_message("approx active_count: %d -> %d\n", before, after);
225  glBindBufferBase(GL_TRANSFORM_FEEDBACK_BUFFER, 0, 0);D;
226  glDisableVertexAttribArray(s->c0);D;
227  glUseProgram(0);D;
228  glBindBuffer(GL_ARRAY_BUFFER, 0);D;
229  glBindVertexArray(0);
230  glDisable(GL_RASTERIZER_DISCARD);D;
231  debug_message("VBO approx: %d -> %d\n", vbo[1], vbo[0]);
232  } else {
233  debug_message("approx CPU (overflow risk)\n");
234  debug_message("VBO approx: %d -> %d\n", vbo[1], vbo[0]);
235  glBindBuffer(GL_ARRAY_BUFFER, vbo[1]);D;
236  glBindBufferBase(GL_TRANSFORM_FEEDBACK_BUFFER, 0, vbo[0]);D;
237  const double *in = glMapBufferRange(GL_ARRAY_BUFFER, 0, *active_count * (DE ? 9 : 7) * sizeof(GLdouble), GL_MAP_READ_BIT);D;
238  bool ok;
239  if (in) {
240  const double *q = in;
241  if (pass == 0 && ! (q[0] != 0) && ! (q[1] != 0)) {
242  log_message(LOG_WARN, "approx delta(z)_0 == 0, rendering will probably fail\n");
243  s->too_deep = true;
244  }
245  double *out = glMapBufferRange(GL_TRANSFORM_FEEDBACK_BUFFER, 0, *active_count * (DE ? 9 : 7) * sizeof(GLdouble), GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_BUFFER_BIT);D;
246  if (out) {
247  double *p = out;
248  // initialize variables
249  C c, c2, c3, z, dz, t;
250  R t0, t1, t2, t3;
251 #define VARS c.x, c.y, c2.x, c2.y, c3.x, c3.y, z.x, z.y, dz.x, dz.y, t.x, t.y, t0, t1, t2, t3
252  mpfr_inits2(mpfr_get_prec(refx), VARS, (mpfr_ptr) 0);
253  for (unsigned int k = 0; k < *active_count; ++k) {
254  mpfr_set_d(c.x, q[0], MPFR_RNDN);
255  mpfr_set_d(c.y, q[1], MPFR_RNDN);
256  q += (DE ? 9 : 7);
257  *p++ = mpfr_get_d(c.x, MPFR_RNDN);
258  *p++ = mpfr_get_d(c.y, MPFR_RNDN);
259  *p++ = s->n;
260  *p++ = 0; // escaped flag
261  // approximation cubic: z = A c + B c c + C c c c
262  c_mul(z, s->a, c, t0, t1, t2, t3);
263  c_sqr(c2, c, t0, t1, t3);
264  c_mul(t, s->b, c2, t0, t1, t2, t3);
265  c_add(z, z, t);
266  c_mul(c3, c, c2, t0, t1, t2, t3);
267  c_mul(t, s->c, c3, t0, t1, t2, t3);
268  c_add(z, z, t);
269  *p++ = mpfr_get_d(z.x, MPFR_RNDN);
270  *p++ = mpfr_get_d(z.y, MPFR_RNDN);
271  if (DE) {
272  // approximation cubic: z = U c + U c c + U c c c
273  c_mul(dz, s->u, c, t0, t1, t2, t3);
274  c_mul(t, s->v, c2, t0, t1, t2, t3);
275  c_add(dz, dz, t);
276  c_mul(t, s->w, c3, t0, t1, t2, t3);
277  c_add(dz, dz, t);
278  *p++ = mpfr_get_d(dz.x, MPFR_RNDN);
279  *p++ = mpfr_get_d(dz.y, MPFR_RNDN);
280  }
281  *p++ = 0; // error flag
282  }
283  if (DE) {
284  debug_message("approx out: c %e %e z %e %e dz %e %e\n", out[0], out[1], out[4], out[5], out[6], out[7]);
285  } else {
286  debug_message("approx out: c %e %e z %e %e\n", out[0], out[1], out[4], out[5]);
287  }
288  mpfr_clears(VARS, (mpfr_ptr) 0);
289 #undef VARS
290  out = 0;
291  p = 0;
292  ok = glUnmapBuffer(GL_TRANSFORM_FEEDBACK_BUFFER);D;
293  glBindBufferBase(GL_TRANSFORM_FEEDBACK_BUFFER, 0, 0);D;
294  } else {
295  ok = false;
296  }
297  in = 0;
298  q = 0;
299  ok &= glUnmapBuffer(GL_ARRAY_BUFFER);D;
300  glBindBuffer(GL_ARRAY_BUFFER, 0);D;
301  } else {
302  ok = false;
303  }
304  if (! ok) {
305  log_message(LOG_ERROR, "CPU-based series approximation initialisation failed\n");
306  }
307  }
308 }