1 /* -*- C++ -*- ------------------------------------------------------------
3 Copyright (c) 2007 Jesse Anders and Demian Nave http://cmldev.net/
5 The Configurable Math Library (CML) is distributed under the terms of the
6 Boost Software License, v1.0 (see cml/LICENSE for details).
8 *-----------------------------------------------------------------------*/
12 * Defines vector unrollers.
14 * @todo Add unrolling for dynamic vectors, and for vectors longer than
15 * CML_VECTOR_UNROLL_LIMIT.
17 * @todo Does it make sense to unroll an assignment if either side of the
18 * assignment has a fixed size, or just when the target vector is fixed
22 #ifndef vector_unroller_h
23 #define vector_unroller_h
25 #include <cml/et/traits.h>
26 #include <cml/et/size_checking.h>
27 #include <cml/et/scalar_ops.h>
29 #if !defined(CML_VECTOR_UNROLL_LIMIT)
30 #error "CML_VECTOR_UNROLL_LIMIT is undefined."
37 /** Unroll a binary assignment operator on a fixed-size vector.
39 * This uses forward iteration to make efficient use of the cache.
42 * @sa cml::et::OpAssign
44 * @bug Need to verify that OpT is actually an assignment operator.
46 template<class OpT
, typename E
, class AT
, class SrcT
>
47 class VectorAssignmentUnroller
51 /* Forward declare: */
52 template<int N
, int Last
, bool can_unroll
> struct Eval
;
54 /* The vector type being assigned to: */
55 typedef cml::vector
<E
,AT
> vector_type
;
57 /* Record traits for the arguments: */
58 typedef ExprTraits
<vector_type
> dest_traits
;
59 typedef ExprTraits
<SrcT
> src_traits
;
61 /** Evaluate the binary operator for the first Len-1 elements. */
62 template<int N
, int Last
> struct Eval
<N
,Last
,true> {
63 void operator()(vector_type
& dest
, const SrcT
& src
) const {
65 /* Apply to current N: */
66 OpT().apply(dest
[N
], src_traits().get(src
,N
));
67 /* Note: we don't need get(), since dest is a vector. */
70 Eval
<N
+1,Last
,true>()(dest
, src
);
74 /** Evaluate the binary operator at element Last. */
75 template<int Last
> struct Eval
<Last
,Last
,true> {
76 void operator()(vector_type
& dest
, const SrcT
& src
) const {
78 /* Apply to last element: */
79 OpT().apply(dest
[Last
], src_traits().get(src
,Last
));
80 /* Note: we don't need get(), since dest is a vector. */
85 /** Evaluate the binary operator using a loop.
87 * This is used when the vector's length is longer than
88 * CML_VECTOR_UNROLL_LIMIT
90 template<int N
, int Last
> struct Eval
<N
,Last
,false> {
91 void operator()(vector_type
& dest
, const SrcT
& src
) const {
92 for(size_t i
= 0; i
<= Last
; ++i
) {
93 OpT().apply(dest
[i
], src_traits().get(src
,i
));
94 /* Note: we don't need get(), since dest is a vector. */
102 /** Unroll assignment to a fixed-sized vector. */
103 void operator()(vector_type
& dest
, const SrcT
& src
, cml::fixed_size_tag
)
105 typedef cml::vector
<E
,AT
> vector_type
;
106 enum { Len
= vector_type::array_size
};
107 typedef typename VectorAssignmentUnroller
<OpT
,E
,AT
,SrcT
>::template
108 Eval
<0, Len
-1, (Len
<= CML_VECTOR_UNROLL_LIMIT
)> Unroller
;
109 /* Note: Len is the array size, so Len-1 is the last element. */
111 /* Use a run-time check if src is a run-time sized expression: */
112 typedef typename ExprTraits
<SrcT
>::size_tag src_size
;
113 typedef typename select_if
<
114 same_type
<src_size
,dynamic_size_tag
>::is_true
,
115 dynamic_size_tag
, fixed_size_tag
>::result size_tag
;
117 /* Check the expression size (the returned size isn't needed): */
118 CheckedSize(dest
,src
,size_tag());
119 /* Note: for two fixed-size expressions, the if-statements and
120 * comparisons should be completely eliminated as dead code. If src
121 * is a dynamic-sized expression, the check will still happen.
124 /* Now, call the unroller: */
125 Unroller()(dest
,src
);
130 /* XXX Blah, a temp. hack to fix the auto-resizing stuff below. */
131 size_t CheckOrResize(
132 vector_type
& dest
, const SrcT
& src
, cml::resizable_tag
)
134 #if defined(CML_AUTOMATIC_VECTOR_RESIZE_ON_ASSIGNMENT)
135 /* Get the size of src. This also causes src to check its size: */
136 size_t N
= std::max(dest
.size(),src_traits().size(src
));
138 /* Set the destination vector's size: */
139 cml::et::detail::Resize(dest
,N
);
141 size_t N
= CheckedSize(dest
,src
,dynamic_size_tag());
147 size_t CheckOrResize(
148 vector_type
& dest
, const SrcT
& src
, cml::not_resizable_tag
)
150 return CheckedSize(dest
,src
,dynamic_size_tag());
152 /* XXX Blah, a temp. hack to fix the auto-resizing stuff below. */
156 /** Just use a loop to assign to a runtime-sized vector. */
157 void operator()(vector_type
& dest
, const SrcT
& src
, cml::dynamic_size_tag
)
160 typedef ExprTraits
<SrcT
> src_traits
;
161 size_t N
= this->CheckOrResize(
162 dest
,src
,typename
vector_type::resizing_tag());
163 for(size_t i
= 0; i
< N
; ++i
) {
164 OpT().apply(dest
[i
], src_traits().get(src
,i
));
165 /* Note: we don't need get(), since dest is a vector. */
171 /** Unroll a vector accumulation/reduction operator.
173 * This uses forward iteration to make efficient use of the cache.
175 template<class AccumT
, class OpT
, class LeftT
, class RightT
>
176 struct VectorAccumulateUnroller
178 /* Forward declare: */
179 template<int N
, int Last
, bool can_unroll
> struct Eval
;
181 /* Record traits for the arguments: */
182 typedef ExprTraits
<LeftT
> left_traits
;
183 typedef ExprTraits
<RightT
> right_traits
;
185 /* Figure out the return type: */
186 typedef typename
AccumT::value_type result_type
;
188 /** Evaluate for the first Len-1 elements. */
189 template<int N
, int Last
> struct Eval
<N
,Last
,true> {
190 result_type
operator()(
191 const LeftT
& left
, const RightT
& right
) const
193 /* Apply to last value: */
194 return AccumT().apply(
195 OpT().apply(left
[N
], right_traits().get(right
,N
)),
196 Eval
<N
+1,Last
,true>()(left
, right
));
197 /* Note: we don't need get(), since dest is a vector. */
201 /** Evaluate the binary operator at element Last. */
202 template<int Last
> struct Eval
<Last
,Last
,true> {
203 result_type
operator()(
204 const LeftT
& left
, const RightT
& right
) const
206 return OpT().apply(left
[Last
],right_traits().get(right
,Last
));
207 /* Note: we don't need get(), since dest is a vector. */
211 /** Evaluate using a loop. */
212 template<int N
, int Last
> struct Eval
<N
,Last
,false> {
213 result_type
operator()(
214 const LeftT
& left
, const RightT
& right
) const
216 result_type accum
= OpT().apply(left
[0],right
[0]);
217 for(size_t i
= 1; i
<= Last
; ++i
) {
218 /* XXX This might not be optimized properly by some compilers,
219 * but to do anything else requires changing the requirements
220 * of a scalar operator.
222 accum
= AccumT().apply(accum
, OpT().apply(
223 left
[i
],right_traits().get(right
,i
)));
224 /* Note: we don't need get(), since dest is a vector. */
232 /** Construct an assignment unroller.
234 * The operator must be an assignment op, otherwise, this doesn't make any
237 * @bug Need to verify that OpT is actually an assignment operator.
239 template<class OpT
, class SrcT
, typename E
, class AT
> inline
240 void UnrollAssignment(cml::vector
<E
,AT
>& dest
, const SrcT
& src
)
242 /* Record the destination vector type, and the expression traits: */
243 typedef cml::vector
<E
,AT
> vector_type
;
245 /* Record the type of the unroller: */
246 typedef detail::VectorAssignmentUnroller
<OpT
,E
,AT
,SrcT
> unroller
;
248 /* Do the unroll call: */
249 unroller()(dest
, src
, typename
vector_type::size_tag());
250 /* XXX It may make sense to unroll if either side is a fixed size. */
258 // -------------------------------------------------------------------------