From 292dba06f5e7b42af43a2186a462df72d4cf2808 Mon Sep 17 00:00:00 2001
From: Nick Gasson <nick@nickg.me.uk>
Date: Sat, 15 Jan 2011 09:54:29 +0000
Subject: [PATCH] Start trying to optimise vector code

---
 CMakeLists.txt      |  9 ++++--
 include/Maths.hpp   | 73 ++++++++++++++++++++++++++++-----------------
 src/Mesh.cpp        |  6 +---
 tools/MathsTest.cpp | 53 ++++++++++++++++++++++++++++++++
 4 files changed, 107 insertions(+), 34 deletions(-)
 create mode 100644 tools/MathsTest.cpp
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 13b437c..e147078 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -44,8 +44,10 @@ include_directories (include ${CMAKE_CURRENT_BINARY_DIR})
 if (NOT WIN32) # Unix
   set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall")
   # "-Wconversion -Werror"
-endif (NOT WIN32)
-
+  if ("${CMAKE_SYSTEM_PROCESSOR}" STREQUAL "i686")
+    set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -msse2 -mfpmath=sse")
+  endif ()
+endif ()
 
 # WIN32 makes a non-console application on Windows
 add_executable (${PROJECT_NAME} WIN32 ${folder_source})
@@ -54,6 +56,9 @@ target_link_libraries (${PROJECT_NAME} ${SDL_LIBRARY} ${SDLIMAGE_LIBRARY}
   ${OPENGL_LIBRARY} ${XERCES_LIBRARIES} ${Boost_LIBRARIES}
   ${FREETYPE_LIBRARIES} ${GLEW_LIBRARY})
 
+# Test tool
+add_executable (MathsTest EXCLUDE_FROM_ALL tools/MathsTest.cpp)
+
 # Profiling
 if (PROFILE)
   set_target_properties (${PROJECT_NAME} PROPERTIES LINK_FLAGS -pg)
diff --git a/include/Maths.hpp b/include/Maths.hpp
index c185132..0d425ba 100644
--- a/include/Maths.hpp
+++ b/include/Maths.hpp
@@ -1,5 +1,5 @@
 //
-//  Copyright (C) 2009-2010  Nick Gasson
+//  Copyright (C) 2009-2011  Nick Gasson
 //
 //  This program is free software: you can redistribute it and/or modify
 //  it under the terms of the GNU General Public License as published by
@@ -24,14 +24,31 @@
 #include <ostream>
 #include <cassert>
 
+#if 0
+template <typename T, int N>
+union Packed;
+
+template <>
+union Packed<float, 4> {
+   int __attribute__((mode(V4SF))) packed;
+   float unpacked[4];
+};
+
+template <>
+union Packed<float, 3> {
+   int __attribute__((mode(V3SF))) packed;
+   float unpacked[3];
+};
+#endif
+
 // A generic 3D vector
 template <typename T>
 struct Vector {
-   Vector(T x, T y, T z) : x(x), y(y), z(z) {}
-   Vector() : x(0), y(0), z(0) {}
+   inline Vector(T x, T y, T z) : x(x), y(y), z(z) {}
+   inline Vector() : x(0), y(0), z(0) {}
 
    // Cross product
-   Vector<T> operator*(const Vector<T>& v) const
+   inline Vector<T> operator*(const Vector<T>& v) const
    {
       return Vector<T>(
          y*v.z - z*v.y,
@@ -40,30 +57,30 @@ struct Vector {
    }
 
    // Multiply by a scalar
-   Vector<T> operator*(T t) const
+   inline Vector<T> operator*(T t) const
    {
       return Vector<T>(x*t, y*t, z*t);
    }
 
    // Divide by a scalar
-   Vector<T> operator/(T t) const
+   inline Vector<T> operator/(T t) const
    {
       return Vector<T>(x/t, y/t, z/t);
    }
 
    // Scalar product
-   T dot(const Vector<T>&v) const
+   inline T dot(const Vector<T>&v) const
    {
       return x*v.x + y*v.y + z*v.z;
    }
 
    // Magnitude
-   T length() const
+   inline T length() const
    {
-      return static_cast<T>(sqrt(static_cast<double>(x*x + y*y + z*z)));
+      return sqrt(x*x + y*y + z*z);
    }
 
-   Vector<T>& normalise()
+   inline Vector<T>& normalise()
    {
       T m = length();
       x /= m;
@@ -72,12 +89,12 @@ struct Vector {
       return *this;
    }
 
-   Vector<T> operator+(const Vector<T>& v) const
+   inline Vector<T> operator+(const Vector<T>& v) const
    {
       return Vector<T>(x+v.x, y+v.y, z+v.z);
    }
 
-   Vector<T> operator+=(const Vector<T>& v)
+   inline Vector<T>& operator+=(const Vector<T>& v)
    {
       x += v.x;
       y += v.y;
@@ -85,17 +102,17 @@ struct Vector {
       return *this;
    }
    
-   Vector<T> operator-(const Vector<T>& v) const
+   inline Vector<T> operator-(const Vector<T>& v) const
    {
       return Vector<T>(x-v.x, y-v.y, z-v.z);
    }
 
-   Vector<T> operator-() const
+   inline Vector<T> operator-() const
    {
       return Vector<T>(-x, -y, -z);
    }
 
-   Vector<T> operator-=(const Vector<T>& v)
+   inline Vector<T>& operator-=(const Vector<T>& v)
    {
       x -= v.x;
       y -= v.y;
@@ -103,17 +120,17 @@ struct Vector {
       return *this;
    }
    
-   bool operator==(const Vector<T>& v) const
+   inline bool operator==(const Vector<T>& v) const
    {
       return x == v.x && y == v.y && z == v.z;
    }
 
-   bool operator!=(const Vector<T>& v) const
+   inline bool operator!=(const Vector<T>& v) const
    {
       return !(v == *this);
    }
 
-   bool operator<(const Vector<T>& rhs) const
+   inline bool operator<(const Vector<T>& rhs) const
    {
       return x < rhs.x
          || (x == rhs.x
@@ -132,25 +149,27 @@ struct Vector {
 };
 
 template <typename T>
-std::ostream& operator<<(std::ostream& a_stream, const Vector<T>& a_vector)
+std::ostream& operator<<(std::ostream& s, const Vector<T>& v)
 {
-   return a_stream << "[" << a_vector.x << " " << a_vector.y
-                  << " " << a_vector.z << "]";
+   return s << "[" << v.x << " " << v.y
+            << " " << v.z << "]";
 }
 
 template <typename T>
-Vector<T> make_vector(T x, T y, T z)
+inline Vector<T> make_vector(T x, T y, T z)
 {
    return Vector<T>(x, y, z);
 }
 
+typedef Vector<float> VectorF;
+
 // Find a surface normal
 template <typename T>
 Vector<T> surface_normal(const Vector<T>& a, const Vector<T>& b,
    const Vector<T>& c)
 {
-   Vector<T> v1 = b - a;
-   Vector<T> v2 = c - a;
+   const Vector<T> v1 = b - a;
+   const Vector<T> v2 = c - a;
    Vector<T> n = v1 * v2;
    n.normalise();
    return n;
@@ -158,7 +177,7 @@ Vector<T> surface_normal(const Vector<T>& a, const Vector<T>& b,
 
 // Useful debugging function
 void draw_normal(const Vector<float>& a_position,
-   const Vector<float>& a_normal);
+                 const Vector<float>& a_normal);
 
 // A 2D point in space
 template <typename T>
@@ -258,13 +277,13 @@ float approx_gradient(function<float (float)> a_func, float x);
 template <typename T>
 inline float deg_to_rad(T t)
 {
-   return static_cast<float>(t) * M_PI / 180.0;
+   return float(t) * M_PI / 180.0f;
 }
 
 template <typename T>
 inline T rad_to_deg(float r)
 {
-   return static_cast<T>(r * 180.0 / M_PI);
+   return T(r * 180.0f / M_PI);
 }
 
 #endif
diff --git a/src/Mesh.cpp b/src/Mesh.cpp
index 0654f43..da4d854 100644
--- a/src/Mesh.cpp
+++ b/src/Mesh.cpp
@@ -67,11 +67,7 @@ struct MeshBuffer : IMeshBuffer {
 
    static bool merge_vector(const Vector<float>& v1, const Vector<float>& v2)
    {
-      const float tolerance = 0.001f;
-      
-      return abs(v1.x - v2.x) < tolerance
-	 && abs(v1.y - v2.y) < tolerance
-	 && abs(v1.z - v2.z) < tolerance;
+      return v1.approx_equal(v2, 0.001f);
    }
    
    vector<Vertex> vertices;
diff --git a/tools/MathsTest.cpp b/tools/MathsTest.cpp
new file mode 100644
index 0000000..aea11a6
--- /dev/null
+++ b/tools/MathsTest.cpp
@@ -0,0 +1,53 @@
+#include "Maths.hpp"
+
+#include <iostream>
+
+/*
+
+  Baseline:
+  
+  0000000000400bc0 <vfadd>:
+  400bc0:       66 0f d6 44 24 d8       movq   %xmm0,-0x28(%rsp)
+  400bc6:       f3 0f 58 cb             addss  %xmm3,%xmm1
+  400bca:       66 0f d6 54 24 c8       movq   %xmm2,-0x38(%rsp)
+  400bd0:       f3 0f 10 44 24 dc       movss  -0x24(%rsp),%xmm0
+  400bd6:       f3 0f 10 54 24 d8       movss  -0x28(%rsp),%xmm2
+  400bdc:       f3 0f 58 44 24 cc       addss  -0x34(%rsp),%xmm0
+  400be2:       f3 0f 58 54 24 c8       addss  -0x38(%rsp),%xmm2
+  400be8:       f3 0f 11 44 24 ec       movss  %xmm0,-0x14(%rsp)
+  400bee:       f3 0f 11 54 24 e8       movss  %xmm2,-0x18(%rsp)
+  400bf4:       f3 0f 7e 44 24 e8       movq   -0x18(%rsp),%xmm0
+  400bfa:       c3                      retq
+
+  Make arguments const&:
+
+  0000000000400bc0 <vfadd>:
+  400bc0:       f3 0f 10 47 04          movss  0x4(%rdi),%xmm0
+  400bc5:       f3 0f 10 17             movss  (%rdi),%xmm2
+  400bc9:       f3 0f 58 46 04          addss  0x4(%rsi),%xmm0
+  400bce:       f3 0f 58 16             addss  (%rsi),%xmm2
+  400bd2:       f3 0f 10 4f 08          movss  0x8(%rdi),%xmm1
+  400bd7:       f3 0f 58 4e 08          addss  0x8(%rsi),%xmm1
+  400bdc:       f3 0f 11 44 24 ec       movss  %xmm0,-0x14(%rsp)
+  400be2:       f3 0f 11 54 24 e8       movss  %xmm2,-0x18(%rsp)
+  400be8:       f3 0f 7e 44 24 e8       movq   -0x18(%rsp),%xmm0
+  400bee:       c3                      retq   
+
+ */
+
+extern "C" VectorF vfadd(const VectorF& a, const VectorF& b)
+{
+   return a + b;
+}
+
+int main(int argc, char **argv)
+{
+   VectorF a = make_vector(2.0f, 3.0f, 4.0f);
+   VectorF b = make_vector(5.0f, 6.0f, 7.0f);
+
+   VectorF c = vfadd(a, b);
+
+   cout << c << endl;
+   
+   return 0;
+}
-- 
2.39.2