Apfelmännchen mit C++11 std::thread

Die Berechnung der Bildzeilen wurde einzelnen Threads in einem Threadpool übertragen.

//: mandelbrot.cpp : Fraktalbild - R.Richter 2014-05-02
///////////////////////////////////////////////////////
#include <ctime>
#include <iostream>
#include <string>
#include <complex>
#include "image.h"
#include "progsch/ThreadPool.h"
 
typedef std::complex<double> complex;
 
int compute(complex c, int maxIterations)
{
  int count = 0;
  complex z; 
 
  while (abs(z) <= 2.0 && count < maxIterations) 
  {
    z = z * z + c;
    ++ count;
  }
  return count;
}
 
inline 
Color color(int height, int max)
{
  // color scheme from: 
  // http://shreyassiravara.wordpress.com/2010/08/14/the-mandelbrot-set/
  if (height >= max) return Color::BLACK;
  double h = 255 * log(double(height)) / log(double(max));
  return Color(0.9 * h, 0.8 * h, 0.6 * h);
}
 
inline
double scale(int pos, int length, double low, double high)
{
  return low + pos * (high-low) / (length-1);
}
 
Image mandelbrot(int width, int height, 
                 int maxIterations,
                 complex left_bottom, complex right_top)
{
  int const numThreads = 16;
  ThreadPool pool(numThreads);
  std::vector<std::future<std::vector<Color>>> lines;
 
  for (int y = 0; y < height; ++y)
  {
    lines.push_back(
      pool.enqueue(
        [y, width, height, maxIterations, left_bottom, right_top] 
        {
          std::vector<Color> line(width);
 
          for (int x = 0; x < width; ++x)
          {
            complex c(scale(x, width,  real(left_bottom), real(right_top)),
                      scale(y, height, imag(left_bottom), imag(right_top)));
 
            int iterations = compute(c, maxIterations);
            line[x] = color(iterations, maxIterations);   
          }
          // std::cout << ("line " + std::to_string(y) + "\n");
          return line;
        }
      )			
    );
  }	
 
  Image image(width, height);
  for (int y = 0; y < height; ++y)
  {
    auto line = lines[y].get();
    for (int x = 0; x < width; ++x)
    {
      image.pixel(x, y) = line[x];   
    }
  }
  return image;
}
 
int main()
{
  int width  = 1000;
  int height = 1000;
  int maxIterations = 10000;
  complex left_bottom(-2.0, -2.0);
  complex right_top  ( 2.0,  2.0);
 
  saveBMP("mandel.bmp", 
          mandelbrot(width, height, maxIterations, left_bottom, right_top));
 
  std::cout << clock() / double(CLOCKS_PER_SEC) << " seconds CPU time\n";        
  return 0;
}

Übersetzung, Ausführung

Das Programm wurde mit g++ 4.9.0, Option -O3 -std=c++11 für Linux auf einem Udoo Quad und mit TDM MinGW g++ 4.8.1 auf Windows übersetzt. Die Zahl der gleichzeitig ausführenden Threads (numThreads) wurde dabei in Zweierpotenzen von 1 bis 16 variiert.

Zur Ausführung kann das Programm auf Udoo Quad, einem Dual-Core-Rechner unter Windows 7 und einem Quad-Core-Rechner mit Hyperthreading unter Windows 7.

Zeitmessung

Die Ausführungszeit sinkt auf einen Minimalwert, wenn die Zahl der Threads mit der Zahl der Prozessoren überinstimmt. Unter Linux bleibt die Berechnungszeit auch dann bei diesem Minimum, wenn die Zahl der im Programmm angegebenen Threads höher ist als die tatsächliche Anzahl vorhandener Prozessoren. Unter Windows ist die Angabe einer zu hohen Threadanzahl schädlich. Dieser Effekt scheint an der von TDM MinGW g++ genutzten winpThread-Bibliothek zu liegen. Bei Übersetzung und Messungen unter Microsoft Visual Studio 2013 Express verschwand dieserr Effekt. Allerdings lief das im Release-Mode (Standardeinstellungen, Optimierungsstufe /O2) erzeugte Programm generell ca. 3x bis 4x langsamer als das mit dem 64bit-GNU-Compiler (Optimierungsstufe -O3) erzeugte Kompilat. Dies war das eigentlich Überraschende an dieser Messung. Einen Faktor 2 macht wohl der Unterschied zwischen 32bit- und 64bit-Anwendungen aus… Nun, woher kommt der Rest?