Skip to content
Open

Gemm #50

Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion benchmarks/bench1/Jamfile.v2
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,4 @@

exe bench1
: bench1.cpp bench11.cpp bench12.cpp bench13.cpp
;
: : <optimization>speed ;
14 changes: 10 additions & 4 deletions benchmarks/bench1/bench1.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -76,22 +76,28 @@ void do_bench (std::string type_string, int scale)
header (type_string + ", 3");
bench_1<scalar, 3> () (1000000 * scale);
bench_2<scalar, 3> () (300000 * scale);
bench_3<scalar, 3> () (100000 * scale);
bench_3<scalar, 3> () (3000000 * scale);

header (type_string + ", 10");
bench_1<scalar, 10> () (300000 * scale);
bench_2<scalar, 10> () (30000 * scale);
bench_3<scalar, 10> () (3000 * scale);
bench_3<scalar, 10> () (100000 * scale);

header (type_string + ", 30");
bench_1<scalar, 30> () (100000 * scale);
bench_2<scalar, 30> () (3000 * scale);
bench_3<scalar, 30> () (100 * scale);
bench_3<scalar, 30> () (30000 * scale);

header (type_string + ", 100");
bench_1<scalar, 100> () (30000 * scale);
bench_2<scalar, 100> () (300 * scale);
bench_3<scalar, 100> () (3 * scale);
bench_3<scalar, 100> () (1000 * scale);

header (type_string + ", 300");
bench_3<scalar, 300> () (30 * scale);

header (type_string + ", 1000");
bench_3<scalar, 1000> () (1 * scale);
}

int main (int argc, char *argv []) {
Expand Down
8 changes: 8 additions & 0 deletions benchmarks/bench1/bench13.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -166,13 +166,17 @@ template struct bench_3<float, 3>;
template struct bench_3<float, 10>;
template struct bench_3<float, 30>;
template struct bench_3<float, 100>;
template struct bench_3<float, 300>;
template struct bench_3<float, 1000>;
#endif

#ifdef USE_DOUBLE
template struct bench_3<double, 3>;
template struct bench_3<double, 10>;
template struct bench_3<double, 30>;
template struct bench_3<double, 100>;
template struct bench_3<double, 300>;
template struct bench_3<double, 1000>;
#endif

#ifdef USE_STD_COMPLEX
Expand All @@ -181,12 +185,16 @@ template struct bench_3<std::complex<float>, 3>;
template struct bench_3<std::complex<float>, 10>;
template struct bench_3<std::complex<float>, 30>;
template struct bench_3<std::complex<float>, 100>;
template struct bench_3<std::complex<float>, 300>;
template struct bench_3<std::complex<float>, 1000>;
#endif

#ifdef USE_DOUBLE
template struct bench_3<std::complex<double>, 3>;
template struct bench_3<std::complex<double>, 10>;
template struct bench_3<std::complex<double>, 30>;
template struct bench_3<std::complex<double>, 100>;
template struct bench_3<std::complex<double>, 300>;
template struct bench_3<std::complex<double>, 1000>;
#endif
#endif
3 changes: 2 additions & 1 deletion benchmarks/bench3/Jamfile.v2
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,5 @@

exe bench3
: bench3.cpp bench31.cpp bench32.cpp bench33.cpp
;
: : <optimization>speed ;

14 changes: 10 additions & 4 deletions benchmarks/bench3/bench3.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -76,22 +76,28 @@ void do_bench (std::string type_string, int scale)
header (type_string + ", 3");
bench_1<scalar, 3> () (1000000 * scale);
bench_2<scalar, 3> () (300000 * scale);
bench_3<scalar, 3> () (100000 * scale);
bench_3<scalar, 3> () (3000000 * scale);

header (type_string + ", 10");
bench_1<scalar, 10> () (300000 * scale);
bench_2<scalar, 10> () (30000 * scale);
bench_3<scalar, 10> () (3000 * scale);
bench_3<scalar, 10> () (100000 * scale);

header (type_string + ", 30");
bench_1<scalar, 30> () (100000 * scale);
bench_2<scalar, 30> () (3000 * scale);
bench_3<scalar, 30> () (100 * scale);
bench_3<scalar, 30> () (30000 * scale);

header (type_string + ", 100");
bench_1<scalar, 100> () (30000 * scale);
bench_2<scalar, 100> () (300 * scale);
bench_3<scalar, 100> () (3 * scale);
bench_3<scalar, 100> () (1000 * scale);

header (type_string + ", 300");
bench_3<scalar, 300> () (30 * scale);

header (type_string + ", 1000");
bench_3<scalar, 1000> () (1 * scale);
}

int main (int argc, char *argv []) {
Expand Down
8 changes: 8 additions & 0 deletions benchmarks/bench3/bench33.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -172,13 +172,17 @@ template struct bench_3<float, 3>;
template struct bench_3<float, 10>;
template struct bench_3<float, 30>;
template struct bench_3<float, 100>;
template struct bench_3<float, 300>;
template struct bench_3<float, 1000>;
#endif

#ifdef USE_DOUBLE
template struct bench_3<double, 3>;
template struct bench_3<double, 10>;
template struct bench_3<double, 30>;
template struct bench_3<double, 100>;
template struct bench_3<double, 300>;
template struct bench_3<double, 1000>;
#endif

#ifdef USE_STD_COMPLEX
Expand All @@ -187,12 +191,16 @@ template struct bench_3<std::complex<float>, 3>;
template struct bench_3<std::complex<float>, 10>;
template struct bench_3<std::complex<float>, 30>;
template struct bench_3<std::complex<float>, 100>;
template struct bench_3<std::complex<float>, 300>;
template struct bench_3<std::complex<float>, 1000>;
#endif

#ifdef USE_DOUBLE
template struct bench_3<std::complex<double>, 3>;
template struct bench_3<std::complex<double>, 10>;
template struct bench_3<std::complex<double>, 30>;
template struct bench_3<std::complex<double>, 100>;
template struct bench_3<std::complex<double>, 300>;
template struct bench_3<std::complex<double>, 1000>;
#endif
#endif
3 changes: 2 additions & 1 deletion doc/operations_overview.html
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,8 @@ <h3>norms</h3>

<pre><code>
t = norm_inf(v); i = index_norm_inf(v);
t = norm_1(v); t = norm_2(v);
t = norm_1(v); t = norm_2(v);
t = norm_2_square(v);
t = norm_inf(A); i = index_norm_inf(A);
t = norm_1(A); t = norm_frobenius(A);
</code></pre>
Expand Down
5 changes: 5 additions & 0 deletions doc/vector_expression.html
Original file line number Diff line number Diff line change
Expand Up @@ -847,6 +847,11 @@ <h4>Prototypes</h4>
typename vector_scalar_unary_traits&lt;E, vector_norm_2&lt;typename E::value_type&gt; &gt;::result_type
norm_2 (const vector_expression&lt;E&gt; &amp;e);

// norm_2_square v = sum (v [i] * v [i])
template&lt;class E&gt;
typename vector_scalar_unary_traits&lt;E, vector_norm_2_square&lt;typename E::value_type&gt; &gt;::result_type
norm_2_square (const vector_expression&lt;E&gt; &amp;e);

// norm_inf v = max (abs (v [i]))
template&lt;class E&gt;
typename vector_scalar_unary_traits&lt;E, vector_norm_inf&lt;typename E::value_type&gt; &gt;::result_type
Expand Down
121 changes: 121 additions & 0 deletions include/boost/numeric/ublas/detail/block_sizes.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
//
// Copyright (c) 2016
// Michael Lehn, Imre Palik
//
// Distributed under the Boost Software License, Version 1.0. (See
// accompanying file LICENSE_1_0.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt)

#ifndef _BOOST_UBLAS_BLOCK_SIZES_
#define _BOOST_UBLAS_BLOCK_SIZES_

#include <boost/numeric/ublas/detail/vector.hpp>

namespace boost { namespace numeric { namespace ublas { namespace detail {

template <typename T>
struct prod_block_size {
static const unsigned vector_length = _BOOST_UBLAS_VECTOR_SIZE/sizeof(T); // Number of elements in a vector register
static const unsigned mc = 256;
static const unsigned kc = 512; // stripe length
static const unsigned nc = (4096/(3 * vector_length)) * (3 * vector_length);
static const unsigned mr = 4; // stripe width for lhs
static const unsigned nr = 3 * vector_length; // stripe width for rhs
static const unsigned align = 64; // align temporary arrays to this boundary
static const unsigned limit = 14; // Use gemm from this size
BOOST_STATIC_ASSERT_MSG(mc>0 && kc>0 && nc>0 && mr>0 && nr>0, "Invalid block size.");
BOOST_STATIC_ASSERT_MSG(mc % mr == 0, "MC must be a multiple of MR.");
BOOST_STATIC_ASSERT_MSG(nc % nr == 0, "NC must be a multiple of NR.");
};

template <>
struct prod_block_size<float> {
static const unsigned mc = 256;
static const unsigned kc = 512; // stripe length
static const unsigned nc = 4096;
static const unsigned mr = 4; // stripe width for lhs
static const unsigned nr = 16; // stripe width for rhs
static const unsigned align = 64; // align temporary arrays to this boundary
static const unsigned limit = 14; // Use gemm from this size
static const unsigned vector_length = _BOOST_UBLAS_VECTOR_SIZE/sizeof(float); // Number of elements in a vector register
BOOST_STATIC_ASSERT_MSG(mc>0 && kc>0 && nc>0 && mr>0 && nr>0, "Invalid block size.");
BOOST_STATIC_ASSERT_MSG(mc % mr == 0, "MC must be a multiple of MR.");
BOOST_STATIC_ASSERT_MSG(nc % nr == 0, "NC must be a multiple of NR.");
};

template <>
struct prod_block_size<long double> {
static const unsigned mc = 256;
static const unsigned kc = 512; // stripe length
static const unsigned nc = 4096;
static const unsigned mr = 1; // stripe width for lhs
static const unsigned nr = 4; // stripe width for rhs
static const unsigned align = 64; // align temporary arrays to this boundary
static const unsigned limit = 42; // Use gemm from this size
static const unsigned vector_length = 1; // Number of elements in a vector register
BOOST_STATIC_ASSERT_MSG(mc>0 && kc>0 && nc>0 && mr>0 && nr>0, "Invalid block size.");
BOOST_STATIC_ASSERT_MSG(mc % mr == 0, "MC must be a multiple of MR.");
BOOST_STATIC_ASSERT_MSG(nc % nr == 0, "NC must be a multiple of NR.");
};

template <typename T>
struct prod_block_size<std::complex<T> > {
static const unsigned vector_length = _BOOST_UBLAS_VECTOR_SIZE/sizeof(T); // Number of elements in a vector register
static const unsigned mc = 255;
static const unsigned kc = 512; // stripe length
static const unsigned nc = 4096;
static const unsigned mr = 3; // stripe width for lhs
static const unsigned nr = vector_length; // stripe width for rhs
static const unsigned align = 64; // align temporary arrays to this boundary
static const unsigned limit = 23; // Use gemm from this size
BOOST_STATIC_ASSERT_MSG(mc>0 && kc>0 && nc>0 && mr>0 && nr>0, "Invalid block size.");
BOOST_STATIC_ASSERT_MSG(mc % mr == 0, "MC must be a multiple of MR.");
BOOST_STATIC_ASSERT_MSG(nc % nr == 0, "NC must be a multiple of NR.");
};

template <>
struct prod_block_size<std::complex<long double> > {
static const unsigned mc = 256;
static const unsigned kc = 512; // stripe length
static const unsigned nc = 4096;
static const unsigned mr = 1; // stripe width for lhs
static const unsigned nr = 1; // stripe width for rhs
static const unsigned align = 64; // align temporary arrays to this boundary
static const unsigned limit = 68; // Use gemm from this size
static const unsigned vector_length = 1; // Number of elements in a vector register
BOOST_STATIC_ASSERT_MSG(mc>0 && kc>0 && nc>0 && mr>0 && nr>0, "Invalid block size.");
BOOST_STATIC_ASSERT_MSG(mc % mr == 0, "MC must be a multiple of MR.");
BOOST_STATIC_ASSERT_MSG(nc % nr == 0, "NC must be a multiple of NR.");
};

template<typename T>
struct is_blocksize {
struct fallback { static const int nr = 0; };
struct derived : T, fallback {};
template<int C1>
struct nonr {
static const bool value = false;
typedef false_type type;
};

template<typename C> static char (&f(typename nonr<C::nr>::type*))[1];
template<typename C> static char (&f(...))[2];

static bool const value = sizeof(f<derived>(0)) == 2;
};

template<typename T>
struct check_blocksize {
BOOST_STATIC_ASSERT_MSG(T::mc>0 && T::kc>0 && T::nc>0 && T::mr>0 && T::nr>0,
"Invalid block size.");
BOOST_STATIC_ASSERT_MSG(T::mc % T::mr == 0,
"MC must be a multiple of MR.");
BOOST_STATIC_ASSERT_MSG(T::nc % T::nr == 0,
"NC must be a multiple of NR.");
BOOST_STATIC_ASSERT_MSG(T::vector_length <= 1 || T::nr % T::vector_length == 0,
"NR must be a multiple of vector size");
BOOST_STATIC_ASSERT_MSG(T::limit >= 2,
"Minimum matrix size for gemm is 2*2");
};
}}}}
#endif
Loading