From 292dba06f5e7b42af43a2186a462df72d4cf2808 Mon Sep 17 00:00:00 2001 From: Nick Gasson Date: Sat, 15 Jan 2011 09:54:29 +0000 Subject: [PATCH] Start trying to optimise vector code --- CMakeLists.txt | 9 ++++-- include/Maths.hpp | 73 ++++++++++++++++++++++++++++----------------- src/Mesh.cpp | 6 +--- tools/MathsTest.cpp | 53 ++++++++++++++++++++++++++++++++ 4 files changed, 107 insertions(+), 34 deletions(-) create mode 100644 tools/MathsTest.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 13b437c..e147078 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -44,8 +44,10 @@ include_directories (include ${CMAKE_CURRENT_BINARY_DIR}) if (NOT WIN32) # Unix set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall") # "-Wconversion -Werror" -endif (NOT WIN32) - + if ("${CMAKE_SYSTEM_PROCESSOR}" STREQUAL "i686") + set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -msse2 -mfpmath=sse") + endif () +endif () # WIN32 makes a non-console application on Windows add_executable (${PROJECT_NAME} WIN32 ${folder_source}) @@ -54,6 +56,9 @@ target_link_libraries (${PROJECT_NAME} ${SDL_LIBRARY} ${SDLIMAGE_LIBRARY} ${OPENGL_LIBRARY} ${XERCES_LIBRARIES} ${Boost_LIBRARIES} ${FREETYPE_LIBRARIES} ${GLEW_LIBRARY}) +# Test tool +add_executable (MathsTest EXCLUDE_FROM_ALL tools/MathsTest.cpp) + # Profiling if (PROFILE) set_target_properties (${PROJECT_NAME} PROPERTIES LINK_FLAGS -pg) diff --git a/include/Maths.hpp b/include/Maths.hpp index c185132..0d425ba 100644 --- a/include/Maths.hpp +++ b/include/Maths.hpp @@ -1,5 +1,5 @@ // -// Copyright (C) 2009-2010 Nick Gasson +// Copyright (C) 2009-2011 Nick Gasson // // This program is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by @@ -24,14 +24,31 @@ #include #include +#if 0 +template +union Packed; + +template <> +union Packed { + int __attribute__((mode(V4SF))) packed; + float unpacked[4]; +}; + +template <> +union Packed { + int __attribute__((mode(V3SF))) packed; + float unpacked[3]; +}; +#endif + // A generic 3D vector template struct Vector { - Vector(T x, T y, T z) : x(x), y(y), z(z) {} - Vector() : x(0), y(0), z(0) {} + inline Vector(T x, T y, T z) : x(x), y(y), z(z) {} + inline Vector() : x(0), y(0), z(0) {} // Cross product - Vector operator*(const Vector& v) const + inline Vector operator*(const Vector& v) const { return Vector( y*v.z - z*v.y, @@ -40,30 +57,30 @@ struct Vector { } // Multiply by a scalar - Vector operator*(T t) const + inline Vector operator*(T t) const { return Vector(x*t, y*t, z*t); } // Divide by a scalar - Vector operator/(T t) const + inline Vector operator/(T t) const { return Vector(x/t, y/t, z/t); } // Scalar product - T dot(const Vector&v) const + inline T dot(const Vector&v) const { return x*v.x + y*v.y + z*v.z; } // Magnitude - T length() const + inline T length() const { - return static_cast(sqrt(static_cast(x*x + y*y + z*z))); + return sqrt(x*x + y*y + z*z); } - Vector& normalise() + inline Vector& normalise() { T m = length(); x /= m; @@ -72,12 +89,12 @@ struct Vector { return *this; } - Vector operator+(const Vector& v) const + inline Vector operator+(const Vector& v) const { return Vector(x+v.x, y+v.y, z+v.z); } - Vector operator+=(const Vector& v) + inline Vector& operator+=(const Vector& v) { x += v.x; y += v.y; @@ -85,17 +102,17 @@ struct Vector { return *this; } - Vector operator-(const Vector& v) const + inline Vector operator-(const Vector& v) const { return Vector(x-v.x, y-v.y, z-v.z); } - Vector operator-() const + inline Vector operator-() const { return Vector(-x, -y, -z); } - Vector operator-=(const Vector& v) + inline Vector& operator-=(const Vector& v) { x -= v.x; y -= v.y; @@ -103,17 +120,17 @@ struct Vector { return *this; } - bool operator==(const Vector& v) const + inline bool operator==(const Vector& v) const { return x == v.x && y == v.y && z == v.z; } - bool operator!=(const Vector& v) const + inline bool operator!=(const Vector& v) const { return !(v == *this); } - bool operator<(const Vector& rhs) const + inline bool operator<(const Vector& rhs) const { return x < rhs.x || (x == rhs.x @@ -132,25 +149,27 @@ struct Vector { }; template -std::ostream& operator<<(std::ostream& a_stream, const Vector& a_vector) +std::ostream& operator<<(std::ostream& s, const Vector& v) { - return a_stream << "[" << a_vector.x << " " << a_vector.y - << " " << a_vector.z << "]"; + return s << "[" << v.x << " " << v.y + << " " << v.z << "]"; } template -Vector make_vector(T x, T y, T z) +inline Vector make_vector(T x, T y, T z) { return Vector(x, y, z); } +typedef Vector VectorF; + // Find a surface normal template Vector surface_normal(const Vector& a, const Vector& b, const Vector& c) { - Vector v1 = b - a; - Vector v2 = c - a; + const Vector v1 = b - a; + const Vector v2 = c - a; Vector n = v1 * v2; n.normalise(); return n; @@ -158,7 +177,7 @@ Vector surface_normal(const Vector& a, const Vector& b, // Useful debugging function void draw_normal(const Vector& a_position, - const Vector& a_normal); + const Vector& a_normal); // A 2D point in space template @@ -258,13 +277,13 @@ float approx_gradient(function a_func, float x); template inline float deg_to_rad(T t) { - return static_cast(t) * M_PI / 180.0; + return float(t) * M_PI / 180.0f; } template inline T rad_to_deg(float r) { - return static_cast(r * 180.0 / M_PI); + return T(r * 180.0f / M_PI); } #endif diff --git a/src/Mesh.cpp b/src/Mesh.cpp index 0654f43..da4d854 100644 --- a/src/Mesh.cpp +++ b/src/Mesh.cpp @@ -67,11 +67,7 @@ struct MeshBuffer : IMeshBuffer { static bool merge_vector(const Vector& v1, const Vector& v2) { - const float tolerance = 0.001f; - - return abs(v1.x - v2.x) < tolerance - && abs(v1.y - v2.y) < tolerance - && abs(v1.z - v2.z) < tolerance; + return v1.approx_equal(v2, 0.001f); } vector vertices; diff --git a/tools/MathsTest.cpp b/tools/MathsTest.cpp new file mode 100644 index 0000000..aea11a6 --- /dev/null +++ b/tools/MathsTest.cpp @@ -0,0 +1,53 @@ +#include "Maths.hpp" + +#include + +/* + + Baseline: + + 0000000000400bc0 : + 400bc0: 66 0f d6 44 24 d8 movq %xmm0,-0x28(%rsp) + 400bc6: f3 0f 58 cb addss %xmm3,%xmm1 + 400bca: 66 0f d6 54 24 c8 movq %xmm2,-0x38(%rsp) + 400bd0: f3 0f 10 44 24 dc movss -0x24(%rsp),%xmm0 + 400bd6: f3 0f 10 54 24 d8 movss -0x28(%rsp),%xmm2 + 400bdc: f3 0f 58 44 24 cc addss -0x34(%rsp),%xmm0 + 400be2: f3 0f 58 54 24 c8 addss -0x38(%rsp),%xmm2 + 400be8: f3 0f 11 44 24 ec movss %xmm0,-0x14(%rsp) + 400bee: f3 0f 11 54 24 e8 movss %xmm2,-0x18(%rsp) + 400bf4: f3 0f 7e 44 24 e8 movq -0x18(%rsp),%xmm0 + 400bfa: c3 retq + + Make arguments const&: + + 0000000000400bc0 : + 400bc0: f3 0f 10 47 04 movss 0x4(%rdi),%xmm0 + 400bc5: f3 0f 10 17 movss (%rdi),%xmm2 + 400bc9: f3 0f 58 46 04 addss 0x4(%rsi),%xmm0 + 400bce: f3 0f 58 16 addss (%rsi),%xmm2 + 400bd2: f3 0f 10 4f 08 movss 0x8(%rdi),%xmm1 + 400bd7: f3 0f 58 4e 08 addss 0x8(%rsi),%xmm1 + 400bdc: f3 0f 11 44 24 ec movss %xmm0,-0x14(%rsp) + 400be2: f3 0f 11 54 24 e8 movss %xmm2,-0x18(%rsp) + 400be8: f3 0f 7e 44 24 e8 movq -0x18(%rsp),%xmm0 + 400bee: c3 retq + + */ + +extern "C" VectorF vfadd(const VectorF& a, const VectorF& b) +{ + return a + b; +} + +int main(int argc, char **argv) +{ + VectorF a = make_vector(2.0f, 3.0f, 4.0f); + VectorF b = make_vector(5.0f, 6.0f, 7.0f); + + VectorF c = vfadd(a, b); + + cout << c << endl; + + return 0; +} -- 2.39.2