diff --git a/colormaps.cpp b/colormaps.cpp new file mode 100644 index 0000000..851a5c7 --- /dev/null +++ b/colormaps.cpp @@ -0,0 +1,430 @@ +/********** + * Copyright 2014 Samuel Bear Powell + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. +\**********/ +#include "Colormaps.hpp" +#include +#include +#include + +#ifdef MSVC_ISNAN +int isnan(double x) { + return _isnan(x); +} +int isnan(float x) { + return _isnanf(x); +} +#endif + +#undef min +#undef max + +template +static vector vec(T(&arr)[N]) { + return vector(begin(arr), end(arr)); +} + +uint32_t Colormap::make_format(int red_byte, int green_byte, int blue_byte, int alpha_byte, uint8_t alpha_value) { + return (red_byte & 3) | ((green_byte & 3) << 2) | ((blue_byte & 3) << 4) | ((alpha_byte & 3) << 6) | (uint32_t(alpha_value) << 8); +} + +uint32_t Colormap::format_color(uint32_t format, uint8_t r, uint8_t g, uint8_t b) { + uint32_t a = ((format >> 8) & 0xff); + return format_color(format,r,g,b,a); +} + +uint32_t Colormap::format_color(uint32_t format, uint8_t r, uint8_t g, uint8_t b, uint8_t a) { + uint32_t rshift = (format & 3) * 8; + uint32_t gshift = ((format >> 2) & 3) * 8; + uint32_t bshift = ((format >> 4) & 3) * 8; + uint32_t ashift = ((format >> 6) & 3) * 8; + return (a << ashift) | (uint32_t(r) << rshift) | (uint32_t(g) << gshift) | (uint32_t(b) << bshift); +} + +void Colormap::get_rgb(uint32_t color, uint32_t format, uint8_t& r, uint8_t& g, uint8_t& b) { + uint8_t a; + get_rgba(color,format,r,g,b,a); +} + +void Colormap::get_rgba(uint32_t color, uint32_t format, uint8_t& r, uint8_t& g, uint8_t& b, uint8_t& a) { + uint32_t rshift = (format & 3) * 8; + uint32_t gshift = ((format >> 2) & 3) * 8; + uint32_t bshift = ((format >> 4) & 3) * 8; + uint32_t ashift = ((format >> 6) & 3) * 8; + r = (color >> rshift)&0xff; + g = (color >> gshift)&0xff; + b = (color >> bshift)&0xff; + a = (color >> ashift)&0xff; +} + +uint32_t Colormap::reformat_color(uint32_t color, uint32_t old_format, uint32_t new_format) { + uint8_t r,g,b,a; + get_rgba(color, old_format, r, g, b, a); + return format_color(new_format, r, g, b, a); +} + +Colormap::Colormap() : map(1, 0), format(0), min_color(0), max_color(0), nan_color(0), min(flimits::quiet_NaN()), max(flimits::quiet_NaN()) {} + +Colormap::Colormap(const Colormap& cm) : map(cm.map), format(cm.format), max_color(cm.max_color), min_color(cm.min_color), nan_color(cm.nan_color), min(cm.min), max(cm.max) {} + +uint32_t Colormap::_apply(float val, float min, float max, float scale) const { + if (isnan(val)) + return nan_color; + else if (val < min) + return min_color; + else if (val >= max) + return max_color; + else { + int idx((val - min)*scale); + return map[idx]; + } +} + +Colormap& Colormap::operator=(const Colormap& cm) { + Colormap tmp(cm); + swap(tmp); + return *this; +} + +void Colormap::swap(Colormap& cm) { + std::swap(map, cm.map); + std::swap(format, cm.format); + std::swap(min_color, cm.min_color); + std::swap(max_color, cm.max_color); + std::swap(nan_color, cm.nan_color); + std::swap(min, cm.min); + std::swap(max, cm.max); +} + +Colormap::Colormap(Colormap&& cm) : map(std::move(cm.map)), format(cm.format), max_color(cm.max_color), min_color(cm.min_color), nan_color(cm.nan_color), min(cm.min), max(cm.max) {} + +Colormap& Colormap::operator=(Colormap&& cm) { + Colormap tmp(std::move(cm)); + swap(tmp); + return *this; +} + +Colormap::Colormap(const vector& map, uint32_t format, uint32_t min_color, uint32_t max_color, uint32_t nan_color, float min, float max) : +map(map), format(format), min_color(min_color), max_color(max_color), nan_color(nan_color), min(min), max(max) {} + +Colormap::Colormap(const size_t n, const vector& vals, const vector& gray, uint32_t format) : map(n), format(format), min_color(0), max_color(0), nan_color(0), min(flimits::quiet_NaN()), max(flimits::quiet_NaN()) { + auto g = interpolate(n, vals, gray); + for (size_t i = 0; i < n; ++i) { + uint8_t x = uint8_t(g[i] * 0xff); + map[i] = format_color(format, x, x, x); + } + min_color = map[0]; + max_color = map[n - 1]; +} +Colormap::Colormap(const size_t n, const vector& vals, const vector& red, const vector& green, const vector& blue, uint32_t format) : map(n), format(format), min_color(0), max_color(0), nan_color(0), min(flimits::quiet_NaN()), max(flimits::quiet_NaN()) { + auto r = interpolate(n, vals, red); + auto g = interpolate(n, vals, green); + auto b = interpolate(n, vals, blue); + for (size_t i = 0; i < n; ++i) { + map[i] = format_color(format, uint8_t(r[i] * 0xff), uint8_t(g[i] * 0xff), uint8_t(b[i] * 0xff)); + } + min_color = map[0]; + max_color = map[n - 1]; +} +Colormap::Colormap(const size_t n, const vector> &red, const vector> &green, const vector> &blue, uint32_t format) : map(n), format(format), min_color(0), max_color(0), nan_color(0), min(flimits::quiet_NaN()), max(flimits::quiet_NaN()) { + auto r = interpolate(n, red); + auto g = interpolate(n, green); + auto b = interpolate(n, blue); + for (size_t i = 0; i < n; ++i) { + map[i] = format_color(format, uint8_t(r[i] * 0xff), uint8_t(g[i] * 0xff), uint8_t(b[i] * 0xff)); + } + min_color = map[0]; + max_color = map[n - 1]; +} + +size_t Colormap::size() const { + return map.size(); +} + +void Colormap::reformat(uint32_t new_format) { + if(new_format != format) { + Colormap tmp(*this); + tmp.format = new_format; + tmp.min_color = reformat_color(min_color, format, tmp.format); + tmp.max_color = reformat_color(max_color, format, tmp.format); + tmp.nan_color = reformat_color(nan_color, format, tmp.format); + for (uint32_t& c : tmp.map) { + c = reformat_color(c, format, tmp.format); + } + swap(tmp); + } +} + +uint32_t Colormap::apply(float val, float min, float max) const { + if (isnan(min)) { + min = this->min; + if (isnan(min)) + min = 0.0f; + } + if (isnan(max)) { + max = this->max; + if (isnan(max)) + max = 1.0f; + } + float scale = float(map.size()) / (max - min); + return _apply(val, min, max, scale); +} + +void Colormap::apply(float val, uint8_t &r, uint8_t &g, uint8_t &b, float min, float max) const { + get_rgb(apply(val,min,max),format,r,g,b); +} +void Colormap::apply(float val, uint8_t &r, uint8_t &g, uint8_t &b, uint8_t &a, float min, float max) const { + get_rgba(apply(val,min,max),format,r,g,b,a); +} + + +const int Colormap::min_idx = -1; +const int Colormap::max_idx = -2; +const int Colormap::nan_idx = -3; + +uint32_t* Colormap::data() { + return map.data(); +} + +const uint32_t* Colormap::data() const { + return map.data(); +} + +const uint32_t& Colormap::get(int idx) const { + if(idx == min_idx) return min_color; + else if(idx == max_idx) return max_color; + else if(idx == nan_idx) return nan_color; + else return map[idx]; +} +void Colormap::get(int idx, uint8_t& r, uint8_t& g, uint8_t& b) const { + Colormap::get_rgb(get(idx),format,r,g,b); +} +void Colormap::get(int idx, uint8_t& r, uint8_t& g, uint8_t& b, uint8_t& a) const { + Colormap::get_rgba(get(idx),format,r,g,b,a); +} + +void Colormap::set(int idx, uint8_t r, uint8_t g, uint8_t b) { + uint32_t a = ((format >> 8) & 0xff); + set(idx,r,g,b,a); +} + +void Colormap::set(int idx, uint8_t r, uint8_t g, uint8_t b, uint8_t a) { + if(idx == min_idx) min_color = format_color(format,r,g,b,a); + else if(idx == max_idx) max_color = format_color(format,r,g,b,a); + else if(idx == nan_idx) nan_color = format_color(format,r,g,b,a); + else map[idx] = format_color(format,r,g,b,a); +} + +Colormaps::Colormaps(const std::string& filename, uint32_t format, size_t n) { + parse(filename, format, n); + ////grayscale: + //float zo[] = { 0.0f, 1.0f }; + //gray = Colormap(n, vec(zo), vec(zo), format); + //add("gray",gray); + ////jet: + //float jv[] = { 0.000, 0.125, 0.375, 0.625, 0.875, 1.000 }; + //float jr[] = { 0, 0, 0, 1, 1, 0.5 }; + //float jg[] = { 0, 0, 1, 1, 0, 0 }; + //float jb[] = { 0.5, 1, 1, 0, 0, 0 }; + //jet = Colormap(n, vec(jv), vec(jr), vec(jg), vec(jb), format); + //add("jet",jet); + ////hsv: + //float hv[] = { 0, 1. / 6, 2. / 6, 3. / 6, 4. / 6, 5. / 6, 1 }; + //float hr[] = { 1, 1, 0, 0, 0, 1, 1 }; + //float hg[] = { 0, 1, 1, 1, 0, 0, 0 }; + //float hb[] = { 0, 0, 0, 1, 1, 1, 0 }; + //hsv = Colormap(n, vec(hv), vec(hr), vec(hg), vec(hb), format); + //add("hsv", hsv); +} + +static bool parse_name(std::istream& in, std::string& name, char delim) { + name = ""; + std::istream::sentry s(in); + if (s) while (in.good()) { + char c = in.get(); + if (std::isspace(c,in.getloc())) continue; //skip space + if (c == delim) return true; + else name += c; + } + return false; +} + +static bool parse_list(std::istream& in, std::vector& vals) { + vals.clear(); + std::istream::sentry s(in); + float v; + bool open = false; + if (s) while (in.good()) { + char c = in.get(); + if (std::isspace(c, in.getloc())) continue; + if (!open && c == '{') { + open = true; + in >> v; + vals.push_back(v); + } + else if (open && c == ',') { + //number comes next + in >> v; + vals.push_back(v); + } + else if (open && c == '}') { + //end list + return true; + } + else { + //syntax error + return false; + } + } + return false; +} + +static bool parse_pair(std::istream& in, std::pair& p) { + std::istream::sentry s(in); + bool open=false; + int done = 0; + if (s) while (in.good()) { + char c = in.get(); + if (std::isspace(c, in.getloc())) continue; //ignore whitespace + if (!open && c == '(') { + open = true; + in >> p.first; + done = 1; + } + else if (open && done == 1 && c == ',') { + in >> p.second; + done = 2; + } + else if (open && done == 2 && c == ')') { + return true; + } + else return false; + } + return false; +} + +static bool parse_list(std::istream& in, std::vector>& vals) { + vals.clear(); + std::istream::sentry s(in); + std::pair val; + bool open = false; + if (s) while (in.good()) { + char c = in.get(); + if (std::isspace(c, in.getloc())) continue; //ignore spaces + if (!open && c == '{') { + open = true; + if (!parse_pair(in, val)) return false; + vals.push_back(val); + } + else if (open && c == ',') { + if (!parse_pair(in, val)) return false; + vals.push_back(val); + } + else if (open && c == '}') { + return true; + } + else { + return false; + } + } + return false; +} + +void Colormaps::parse(const std::string& filename, uint32_t format, size_t n) { + ifstream in(filename); + while (true) { + std::string name, cname; + if (!parse_name(in, name, ':')) return; + if (!parse_name(in, cname, '=')) return; + if (cname == "value") { + //we're in value, red, green, blue mode + std::vector vals, red, green, blue; + if (!parse_list(in, vals)) return; + + if (!parse_name(in, cname, '=')) return; + if (cname != "red") return; + if (!parse_list(in, red)) return; + + if (!parse_name(in, cname, '=')) return; + if (cname != "green") return; + if (!parse_list(in, green)) return; + + if (!parse_name(in, cname, '=')) return; + if (cname != "blue") return; + if (!parse_list(in, blue)) return; + + add(name, Colormap(n, vals, red, green, blue, format)); + } + else if (cname == "red") { + //we're in pairs mode + std::vector> red, green, blue; + if (!parse_list(in, red)) return; + + if (!parse_name(in, cname, '=')) return; + if (cname != "green") return; + if (!parse_list(in, green)) return; + + if (!parse_name(in, cname, '=')) return; + if (cname != "blue") return; + if (!parse_list(in, blue)) return; + + add(name, Colormap(n, red, green, blue, format)); + } + else { + return; + } + } +} + +bool Colormaps::has(const string& name) const { + return (maps.find(name) != maps.end()); +} + +const Colormap& Colormaps::get(const string& name) const { + try { + return maps.at(name); + } catch(std::out_of_range&) { + throw std::runtime_error("Colormap not found: " + name); + } +} + +Colormap& Colormaps::get(const string& name) { + try { + return maps.at(name); + } catch(std::out_of_range&) { + throw std::runtime_error("Colormap not found: " + name); + } +} + +Colormap& Colormaps::operator[](const string& name) { + return maps[name]; +} + +Colormap& Colormaps::operator[](string&& name) { + return maps[move(name)]; +} + +Colormap& Colormaps::add(const string& name, const Colormap& cmap) { + if (name == "gray") gray = cmap; + else if (name == "jet") jet = cmap; + else if (name == "hsv") hsv = cmap; + return maps[name] = cmap; +} + +Colormap& Colormaps::add(const string& name, Colormap&& cmap) { + if (name == "gray") gray = cmap; + else if (name == "jet") jet = cmap; + else if (name == "hsv") hsv = cmap; + return maps[name] = std::move(cmap); +} diff --git a/colormaps.hpp b/colormaps.hpp new file mode 100644 index 0000000..1fb90d9 --- /dev/null +++ b/colormaps.hpp @@ -0,0 +1,220 @@ +/********** + * Copyright 2014 Samuel Bear Powell + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. +\**********/ +#pragma once +#ifndef COLORMAPS_HPP +#define COLORMAPS_HPP + +#include +#include +#include +#include +#include +#include +#include +#include +using namespace std; + +typedef std::numeric_limits flimits; + +#ifdef MSVC_ISNAN +int isnan(double x); +int isnan(float x); +#endif +//A map that translates floats to uint32s + +struct Colormap { +public: + static uint32_t make_format(int red_byte=0, int green_byte=1, int blue_byte=2, int alpha_byte=3, uint8_t alpha_value=0xff); + static uint32_t format_color(uint32_t format, uint8_t r, uint8_t g, uint8_t b); + static uint32_t format_color(uint32_t format, uint8_t r, uint8_t g, uint8_t b, uint8_t a); + static void get_rgb(uint32_t color, uint32_t format, uint8_t& r, uint8_t& g, uint8_t& b); + static void get_rgba(uint32_t color, uint32_t format, uint8_t& r, uint8_t& g, uint8_t& b, uint8_t& a); + static uint32_t reformat_color(uint32_t color, uint32_t old_format, uint32_t new_format); + static const int min_idx, max_idx, nan_idx; +protected: + uint32_t _apply(float val, float min, float max, float scale) const; +public: + vector map; + uint32_t format; + uint32_t min_color, max_color, nan_color; + float min, max; + + + //constructors: + Colormap(); + Colormap(const Colormap& cm); //copy construct + Colormap& operator=(const Colormap& cm); //copy assign + void swap(Colormap& cm); + Colormap(Colormap&& cm); //move + Colormap& operator=(Colormap&& cm); //move assign + + size_t size() const; + + //copy each member: + Colormap(const vector& map, uint32_t format, uint32_t min_color = 0, uint32_t max_color = 0, uint32_t nan_color = 0, float min = flimits::quiet_NaN(), float max = flimits::quiet_NaN()); + //interpolate the map: + Colormap(const size_t n, const vector& vals, const vector& gray, uint32_t format); + Colormap(const size_t n, const vector& vals, const vector& red, const vector& green, const vector& blue, uint32_t format); + Colormap(const size_t n, const vector> &red, const vector> &green, const vector> &blue, uint32_t format); + + //apply the map to a float: + //if min, max = NAN, then we use the member min, max; if member min, max = NAN, then we use 0,1 + uint32_t apply(float val, float min = flimits::quiet_NaN(), float max = flimits::quiet_NaN()) const; + void apply(float val, uint8_t &r, uint8_t &g, uint8_t &b, float min = flimits::quiet_NaN(), float max = flimits::quiet_NaN()) const; + void apply(float val, uint8_t &r, uint8_t &g, uint8_t &b, uint8_t &a, float min = flimits::quiet_NaN(), float max = flimits::quiet_NaN()) const; + uint32_t operator()(float val, float min = flimits::quiet_NaN(), float max = flimits::quiet_NaN()) const { + return apply(val, min, max); + } + uint32_t operator[](float val) const { + return apply(val); + } + + uint32_t* data(); + const uint32_t* data() const; + const uint32_t& get(int idx) const; + void get(int idx, uint8_t& r, uint8_t& g, uint8_t& b) const; + void get(int idx, uint8_t& r, uint8_t& g, uint8_t& b, uint8_t& a) const; + + void set(int idx, uint8_t r, uint8_t g, uint8_t b); + void set(int idx, uint8_t r, uint8_t g, uint8_t b, uint8_t a); + + void reformat(uint32_t new_format); + + //apply to an iterable dataset + //if min, max are not specified, then we use member min, max; if member min, max = NAN, then we find the min, max of the dataset + template + void apply(InputIt first, InputIt last, OutputIt out_first, float min = flimits::quiet_NaN(), float max = flimits::quiet_NaN()) const { + //get min and max set up: + bool find_min = false, find_max = false; + if (isnan(min)) { + min = this->min; + if (isnan(min)) { + min = flimits::infinity(); + find_min = true; + } + } + if (isnan(max)) { + max = this->max; + if (isnan(max)) { + max = -flimits::infinity(); + find_max = true; + } + } + if (find_min || find_max) { + InputIt it = first; + while (it != last) { + float val = *it++; + if (find_min && val < min) min = val; + if (find_max && val > max) max = val; + } + } + //apply the colormap + float scale = float(map.size()) / (max - min); + while (first != last) { + *out_first++ = _apply(*first++, min, max, scale); + } + } + //apply to iterable collections (using std::begin and std::end) + template + void apply(const InputT& in, OutputT& out, const float& min = 0.0f, const float& max = 1.0f) const { + apply(std::begin(in), std::end(in), std::begin(out), min, max); + } +}; + +struct Colormaps { + Colormap gray, hsv, jet; + unordered_map maps; + Colormaps(const std::string& filename, uint32_t format, size_t n = 256); + + void parse(const std::string& filename, uint32_t format, size_t n=256); + + bool has(const string& name) const; + const Colormap& get(const string& name) const; + Colormap& get(const string& name); + Colormap& operator[](const string& name); + Colormap& operator[](string&& name); + + Colormap& add(const string& name, const Colormap& cmap); + Colormap& add(const string& name, Colormap&& cmap); + + unordered_map::iterator begin() { + return maps.begin(); + } + unordered_map::iterator end() { + return maps.end(); + } + unordered_map::const_iterator begin() const { + return maps.begin(); + } + unordered_map::const_iterator end() const { + return maps.end(); + } +}; + +template +vector interpolate(size_t n, const vector& positions, const vector values) { + vector out(n); + float step = 1.0f / (n - 1); + if (values.size() != positions.size()) + throw runtime_error("positions and values must have same size!"); + size_t left_idx = 0, end_idx = positions.size(); + size_t right_idx = (left_idx + 1 == end_idx) ? left_idx : left_idx + 1; + + float left_pos = positions[left_idx], right_pos = positions[right_idx]; + value_t left_val = values[left_idx], right_val = values[right_idx]; + + for (size_t i = 0; i < n; ++i) { + float pos = i*step; + //increment the left/right points until they straddle the current position + while (right_pos <= pos && left_idx + 1 != end_idx) { + ++left_idx; + right_idx = (left_idx + 1 == end_idx) ? left_idx : left_idx + 1; + left_pos = positions[left_idx], right_pos = positions[right_idx]; + left_val = values[left_idx], right_val = values[right_idx]; + } + //take right or left values if below or above the position: + if (pos >= right_pos) out[i] = right_val; + else if (pos <= left_pos) out[i] = left_val; + //otherwise linearly interpolate between the values: + else out[i] = left_val + (right_val - left_val)*((pos - left_pos) / (right_pos - left_pos)); + } + return out; +} + +template +vector interpolate(size_t n, const vector> &pts) { + vector out(n); + float step = 1.0f / (n - 1); + auto left = pts.begin(), end = pts.end(); + auto right = left + 1 == end ? left : left + 1; + float left_pos = left->first, right_pos = right->first; + value_t left_val = left->second, right_val = right->second; + for (size_t i = 0; i < n; ++i) { + float pos = i*step; + while (right_pos <= pos && left + 1 != end) { + ++left; + right = left + 1 == end ? left : left + 1; + left_pos = left->first, left_val = left->second; + right_pos = right->first, right_val = right->second; + } + if (pos >= right_pos) out[i] = right_val; + else if (pos <= left_pos) out[i] = left_val; + else out[i] = left_val + (right_val - left_val)*((pos - left_pos) / (right_pos - left_pos)); + } + return out; +} + +#endif // COLORMAPS_HPP diff --git a/polarization.cpp b/polarization.cpp new file mode 100644 index 0000000..a2abe45 --- /dev/null +++ b/polarization.cpp @@ -0,0 +1,503 @@ +/********** + * Copyright 2014 Samuel Bear Powell + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. +\**********/ +#include "polarization.hpp" +#include +#include +#include + +const uint8_t Polarization::default_pattern = (PIX0(0, 0) | PIX1(1, 1) | PIX2(0, 1) | PIX3(1, 0)); + +void Polarization::trans(__m128 mat[4]) { + _MM_TRANSPOSE4_PS(mat[0], mat[1], mat[2], mat[3]); +} + +void Polarization::inv(const __m128 mat[4], __m128 out[4]) { + //adapted from Intel AP-928 "Streaming SIMD Extensions - Inverse of a 4x4 Matrix" + __m128 minor0, minor1, minor2, minor3; + __m128 row0, row1, row2, row3; + __m128 det, tmp1; + + row0 = mat[0]; + row1 = mat[1]; + row2 = mat[2]; + row3 = mat[3]; + //transpose: + _MM_TRANSPOSE4_PS(row0, row1, row2, row3); + //Compute cofactors: + tmp1 = _mm_mul_ps(row2, row3); + tmp1 = _mm_shuffle_ps(tmp1, tmp1, 0xB1); + minor0 = _mm_mul_ps(row1, tmp1); + minor1 = _mm_mul_ps(row0, tmp1); + tmp1 = _mm_shuffle_ps(tmp1, tmp1, 0x4E); + minor0 = _mm_sub_ps(_mm_mul_ps(row1, tmp1), minor0); + minor1 = _mm_sub_ps(_mm_mul_ps(row0, tmp1), minor1); + minor1 = _mm_shuffle_ps(minor1, minor1, 0x4E); + + tmp1 = _mm_mul_ps(row1, row2); + tmp1 = _mm_shuffle_ps(tmp1, tmp1, 0xB1); + minor0 = _mm_add_ps(_mm_mul_ps(row3, tmp1), minor0); + minor3 = _mm_mul_ps(row0, tmp1); + tmp1 = _mm_shuffle_ps(tmp1, tmp1, 0x4E); + minor0 = _mm_sub_ps(minor0, _mm_mul_ps(row3, tmp1)); + minor3 = _mm_sub_ps(_mm_mul_ps(row0, tmp1), minor3); + minor3 = _mm_shuffle_ps(minor3, minor3, 0x4E); + + tmp1 = _mm_mul_ps(_mm_shuffle_ps(row1, row1, 0x4E), row3); + tmp1 = _mm_shuffle_ps(tmp1, tmp1, 0xB1); + row2 = _mm_shuffle_ps(row2, row2, 0x4E); + minor0 = _mm_add_ps(_mm_mul_ps(row2, tmp1), minor0); + minor2 = _mm_mul_ps(row0, tmp1); + tmp1 = _mm_shuffle_ps(tmp1, tmp1, 0x4E); + minor0 = _mm_sub_ps(minor0, _mm_mul_ps(row2, tmp1)); + minor2 = _mm_sub_ps(_mm_mul_ps(row0, tmp1), minor2); + minor2 = _mm_shuffle_ps(minor2, minor2, 0x4E); + + tmp1 = _mm_mul_ps(row0, row1); + tmp1 = _mm_shuffle_ps(tmp1, tmp1, 0xB1); + minor2 = _mm_add_ps(_mm_mul_ps(row3, tmp1), minor2); + minor3 = _mm_sub_ps(_mm_mul_ps(row2, tmp1), minor3); + tmp1 = _mm_shuffle_ps(tmp1, tmp1, 0x4E); + minor2 = _mm_sub_ps(_mm_mul_ps(row3, tmp1), minor2); + minor3 = _mm_sub_ps(minor3, _mm_mul_ps(row2, tmp1)); + + tmp1 = _mm_mul_ps(row0, row3); + tmp1 = _mm_shuffle_ps(tmp1, tmp1, 0xB1); + minor1 = _mm_sub_ps(minor1, _mm_mul_ps(row2, tmp1)); + minor2 = _mm_add_ps(_mm_mul_ps(row1, tmp1), minor2); + tmp1 = _mm_shuffle_ps(tmp1, tmp1, 0x4E); + minor1 = _mm_add_ps(_mm_mul_ps(row2, tmp1), minor1); + minor2 = _mm_sub_ps(minor2, _mm_mul_ps(row1, tmp1)); + + tmp1 = _mm_mul_ps(row0, row2); + tmp1 = _mm_shuffle_ps(tmp1, tmp1, 0xB1); + minor1 = _mm_add_ps(_mm_mul_ps(row3, tmp1), minor1); + minor3 = _mm_sub_ps(minor3, _mm_mul_ps(row1, tmp1)); + tmp1 = _mm_shuffle_ps(tmp1, tmp1, 0x4E); + minor1 = _mm_sub_ps(minor1, _mm_mul_ps(row3, tmp1)); + minor3 = _mm_add_ps(_mm_mul_ps(row1, tmp1), minor3); + //compute determinant: + det = _mm_mul_ps(row0, minor0); + det = _mm_add_ps(_mm_shuffle_ps(det, det, 0x4E), det); + det = _mm_add_ss(_mm_shuffle_ps(det, det, 0xB1), det); + tmp1 = _mm_rcp_ss(det); + det = _mm_sub_ss(_mm_add_ss(tmp1, tmp1), _mm_mul_ss(det, _mm_mul_ss(tmp1, tmp1))); + det = _mm_shuffle_ps(det, det, 0x00); + //store: + out[0] = _mm_mul_ps(det, minor0); + out[1] = _mm_mul_ps(det, minor1); + out[2] = _mm_mul_ps(det, minor2); + out[3] = _mm_mul_ps(det, minor3); +} + +//perform a dot product of a row-vector and a matrix +//the matrix is in column-major order!! +__m128 Polarization::dot(const __m128 vec, const __m128 mat[4]) { + //expand vec to columns + __m128 vec0 = _mm_shuffle_ps(vec, vec, _MM_SHUFFLE(0, 0, 0, 0)); + __m128 vec1 = _mm_shuffle_ps(vec, vec, _MM_SHUFFLE(1, 1, 1, 1)); + __m128 vec2 = _mm_shuffle_ps(vec, vec, _MM_SHUFFLE(2, 2, 2, 2)); + __m128 vec3 = _mm_shuffle_ps(vec, vec, _MM_SHUFFLE(3, 3, 3, 3)); + //element-wise multiply each column of vec with each column of mat + __m128 prod0 = _mm_mul_ps(vec0, mat[0]); + __m128 prod1 = _mm_mul_ps(vec1, mat[1]); + __m128 prod2 = _mm_mul_ps(vec2, mat[2]); + __m128 prod3 = _mm_mul_ps(vec3, mat[3]); + //sum down each column + return _mm_add_ps(_mm_add_ps(prod0, prod1), _mm_add_ps(prod2, prod3)); +} +//perform a dot product of a matrix with a column vector +//the matrix is in row-major order!! +__m128 Polarization::dot(const __m128 mat[4], const __m128 vec) { + //element-wise product of vector with each row + __m128 prod0 = _mm_mul_ps(vec, mat[0]); + __m128 prod1 = _mm_mul_ps(vec, mat[1]); + __m128 prod2 = _mm_mul_ps(vec, mat[2]); + __m128 prod3 = _mm_mul_ps(vec, mat[3]); + //sum each product using horizontal-add + return _mm_hadd_ps(_mm_hadd_ps(prod0, prod1), _mm_hadd_ps(prod2, prod3)); +} + +float Polarization::dot(const __m128 vec0, const __m128 vec1) { + __m128 prod = _mm_mul_ps(vec0, vec1); + return prod.m128_f32[0] + prod.m128_f32[1] + prod.m128_f32[2] + prod.m128_f32[3]; +} + +void Polarization::unpack_superpixels(const size_t rows, const size_t cols, const __m128* packed, __m128* out, uint8_t pattern) { + size_t rows_2 = rows / 2, cols_2 = cols / 2; + int r, c, rx2, cx2; + #pragma omp parallel for private(r,c,rx2,cx2) + for (r = 0; r < rows_2; ++r) { + for (c = 0; c < cols_2; ++c) { + rx2 = r << 1; cx2 = c << 1; + __m128 p = packed[IDX(r, c, cols_2)]; + out[IDX(rx2 | PATR(pattern, 0), cx2 | PATC(pattern, 0), cols)] = _mm_setr_ps(p.m128_f32[0], 0, 0, 0); + out[IDX(rx2 | PATR(pattern, 1), cx2 | PATC(pattern, 1), cols)] = _mm_setr_ps(0, p.m128_f32[1], 0, 0); + out[IDX(rx2 | PATR(pattern, 2), cx2 | PATC(pattern, 2), cols)] = _mm_setr_ps(0, 0, p.m128_f32[2], 0); + out[IDX(rx2 | PATR(pattern, 3), cx2 | PATC(pattern, 3), cols)] = _mm_setr_ps(0, 0, 0, p.m128_f32[3]); + } + } +} + +void Polarization::mask_low_high(const size_t n, const __m128* raw, float low, float high, __m128* out) { + __m128 low_vec = _mm_set1_ps(low); + __m128 high_vec = _mm_set1_ps(high); + __m128 nan_vec = _mm_set1_ps(std::numeric_limits::quiet_NaN()); + #pragma omp parallel for + for (int i = 0; i < n; ++i) { + __m128 val = *raw++; + //mask = (val < low) | (val > high) + __m128 mask = _mm_or_ps(_mm_cmplt_ps(val, low_vec), _mm_cmpgt_ps(val, high_vec)); + //val = (~mask & val) | (mask & NAN) + val = _mm_or_ps(_mm_andnot_ps(mask, val), _mm_and_ps(mask, nan_vec)); + *out++ = val; + } +} +void Polarization::mask_low(const size_t n, const __m128* raw, float low, __m128* out) { + __m128 low_vec = _mm_set1_ps(low); + __m128 nan_vec = _mm_set1_ps(std::numeric_limits::quiet_NaN()); + #pragma omp parallel for + for (int i = 0; i < n; ++i) { + __m128 val = *raw++; + //mask = (val < low) + __m128 mask = _mm_cmplt_ps(val, low_vec); + //val = (~mask & val) | (mask & NAN) + val = _mm_or_ps(_mm_andnot_ps(mask, val), _mm_and_ps(mask, nan_vec)); + *out++ = val; + } +} +void Polarization::mask_high(const size_t n, const __m128* raw, float high, __m128* out) { + __m128 high_vec = _mm_set1_ps(high); + __m128 nan_vec = _mm_set1_ps(std::numeric_limits::quiet_NaN()); + #pragma omp parallel for + for (int i = 0; i < n; ++i) { + __m128 val = *raw++; + //mask = (val > high) + __m128 mask = _mm_cmpgt_ps(val, high_vec); + //val = (~mask & val) | (mask & NAN) + val = _mm_or_ps(_mm_andnot_ps(mask, val), _mm_and_ps(mask, nan_vec)); + *out++ = val; + } +} + +void Polarization::calibrate_matrix(const size_t n, const __m128* raw, const __m128* darks, const __m128* gains, __m128* out) { + #pragma omp parallel for + for (int i = 0; i < n; ++i) { + out[i] = dot(&gains[i], _mm_sub_ps(raw[i], darks[i])); + } +} + +void Polarization::calibrate_matrix2(const size_t n, const __m128* raw, const __m128* darks, const __m128* gains, __m128* out) { + #pragma omp parallel for + for (int i = 0; i < n; ++i) { + out[i] = dot(_mm_sub_ps(raw[i], darks[i]), &gains[i]); + } +} + +void Polarization::filter(const size_t rows, const size_t cols, const __m128* in, __m128* out, const size_t filt_rows, const size_t filt_cols, const __m128* filt, const edge_mode::t mode) { + switch (mode) { + case edge_mode::ZERO: + return filter_zero(rows, cols, in, out, filt_rows, filt_cols, filt); + case edge_mode::WRAP: + return filter_wrap(rows, cols, in, out, filt_rows, filt_cols, filt); + case edge_mode::REFLECT: + return filter_reflect(rows, cols, in, out, filt_rows, filt_cols, filt); + } +} + +void Polarization::filter(const size_t rows, const size_t cols, const __m128* in, __m128* out, const size_t filt_size, const __m128* filt, const edge_mode::t mode) { + switch (mode) { + case edge_mode::ZERO: + return filter_zero(rows, cols, in, out, filt_size, filt); + case edge_mode::WRAP: + return filter_wrap(rows, cols, in, out, filt_size, filt); + case edge_mode::REFLECT: + return filter_reflect(rows, cols, in, out, filt_size, filt); + } +} + +void Polarization::filter_zero(const size_t rows, const size_t cols, const __m128* in, __m128* out, const size_t filt_rows, const size_t filt_cols, const __m128* filt) +{ + int roff = filt_rows / 2; + int coff = filt_cols / 2; + int r, c, fr, fc, rx, cx; + __m128 sum; + #pragma omp parallel for private(r,c,sum,fr,fc,rx,cx) + for (r = 0; r < rows; ++r) { + for (c = 0; c < cols; ++c) { + sum = _mm_set1_ps(0); + for (fr = 0; fr < filt_rows; ++fr) { + for (fc = 0; fc < filt_cols; ++fc) { + rx = r + fr - roff; + cx = c + fc - coff; + if (rx < 0 || cx < 0 || rx >= rows || cx >= cols) + continue; //sum += 0 + else //sum += in[rx,cx]*filt[fr,fc]; + sum = _mm_add_ps(sum, _mm_mul_ps(in[IDX(rx,cx,cols)],filt[IDX(fr,fc,filt_cols)])); + } + } + out[IDX(r, c, cols)] = sum; + } + } +} + +void Polarization::filter_zero(const size_t rows, const size_t cols, const __m128* in, __m128* out, const size_t filt_size, const __m128* filt) +{ + int off = filt_size / 2; + int r, c, fi, rx, cx; + __m128 sum; + //apply to rows + #pragma omp parallel for private(r,c,sum,fi,rx) + for (r = 0; r < rows; ++r) { + for (c = 0; c < cols; ++c) { + sum = _mm_set1_ps(0); + for (fi = 0; fi < filt_size; ++fi) { + rx = r + fi - off; + if(rx < 0 || rx >= rows) + continue; + else + sum = _mm_add_ps(sum, _mm_mul_ps(in[IDX(rx,c,cols)],filt[fi])); + } + out[IDX(r,c,cols)] = sum; + } + } + //apply to columns: + #pragma omp parallel for private(r,c,sum,fi,cx) + for (r = 0; r < rows; ++r) { + for (c = 0; c < cols; ++c) { + sum = out[IDX(r,c,cols)]; + for (fi = 0; fi < filt_size; ++fi) { + cx = c + fi - off; + if(cx < 0 || cx >= cols) + continue; + else + sum = _mm_add_ps(sum, _mm_mul_ps(in[IDX(r,cx,cols)],filt[fi])); + } + out[IDX(r,c,cols)] = sum; + } + } +} + +void Polarization::filter_wrap(const size_t rows, const size_t cols, const __m128* in, __m128* out, const size_t filt_rows, const size_t filt_cols, const __m128* filt) +{ + int roff = filt_rows / 2; + int coff = filt_cols / 2; + int r, c, fr, fc, rx, cx; + __m128 sum; + #pragma omp parallel for private(r,c,sum,fr,fc,rx,cx) + for (r = 0; r < rows; ++r) { + for (c = 0; c < cols; ++c) { + sum = _mm_set1_ps(0); + for (fr = 0; fr < filt_rows; ++fr) { + for (fc = 0; fc < filt_cols; ++fc) { + rx = WRAP(r + fr - roff, rows); + cx = WRAP(c + fc - coff, cols); + sum = _mm_add_ps(sum, _mm_mul_ps(in[IDX(rx, cx, cols)], filt[IDX(fr, fc, filt_cols)])); + } + } + out[IDX(r, c, cols)] = sum; + } + } +} + +void Polarization::filter_wrap(const size_t rows, const size_t cols, const __m128* in, __m128* out, const size_t filt_size, const __m128* filt) +{ + int off = filt_size / 2; + int r, c, fi, rx, cx; + __m128 sum; + //apply to rows + #pragma omp parallel for private(r,c,sum,fi,rx) + for (r = 0; r < rows; ++r) { + for (c = 0; c < cols; ++c) { + sum = _mm_set1_ps(0); + for (fi = 0; fi < filt_size; ++fi) { + rx = WRAP(r + fi - off, rows); + sum = _mm_add_ps(sum, _mm_mul_ps(in[IDX(rx,c,cols)],filt[fi])); + } + out[IDX(r,c,cols)] = sum; + } + } + //apply to columns: + #pragma omp parallel for private(r,c,sum,fi,cx) + for (r = 0; r < rows; ++r) { + for (c = 0; c < cols; ++c) { + sum = out[IDX(r,c,cols)]; + for (fi = 0; fi < filt_size; ++fi) { + cx = WRAP(c + fi - off, cols); + sum = _mm_add_ps(sum, _mm_mul_ps(in[IDX(r,cx,cols)],filt[fi])); + } + out[IDX(r,c,cols)] = sum; + } + } +} + +void Polarization::filter_reflect(const size_t rows, const size_t cols, const __m128* in, __m128* out, const size_t filt_rows, const size_t filt_cols, const __m128* filt) +{ + int roff = filt_rows / 2; + int coff = filt_cols / 2; + int r, c, fr, fc, rx, cx; + __m128 sum; + #pragma omp parallel for private(r,c,sum,fr,fc,rx,cx) + for (r = 0; r < rows; ++r) { + for (c = 0; c < cols; ++c) { + sum = _mm_set1_ps(0); + for (fr = 0; fr < filt_rows; ++fr) { + for (fc = 0; fc < filt_cols; ++fc) { + rx = REFLECT(r + fr - roff, rows); + cx = REFLECT(c + fc - coff, cols); + sum = _mm_add_ps(sum, _mm_mul_ps(in[IDX(rx, cx, cols)], filt[IDX(fr, fc, filt_cols)])); + } + } + out[IDX(r, c, cols)] = sum; + } + } +} + +void Polarization::filter_reflect(const size_t rows, const size_t cols, const __m128* in, __m128* out, const size_t filt_size, const __m128* filt) +{ + int off = filt_size / 2; + int r, c, fi, rx, cx; + __m128 sum; + //apply to rows + #pragma omp parallel for private(r,c,sum,fi,rx) + for (r = 0; r < rows; ++r) { + for (c = 0; c < cols; ++c) { + sum = _mm_set1_ps(0); + for (fi = 0; fi < filt_size; ++fi) { + rx = REFLECT(r + fi - off, rows); + sum = _mm_add_ps(sum, _mm_mul_ps(in[IDX(rx,c,cols)],filt[fi])); + } + out[IDX(r,c,cols)] = sum; + } + } + //apply to columns: + #pragma omp parallel for private(r,c,sum,fi,cx) + for (r = 0; r < rows; ++r) { + for (c = 0; c < cols; ++c) { + sum = out[IDX(r,c,cols)]; + for (fi = 0; fi < filt_size; ++fi) { + cx = REFLECT(c + fi - off, cols); + sum = _mm_add_ps(sum, _mm_mul_ps(in[IDX(r,cx,cols)],filt[fi])); + } + out[IDX(r,c,cols)] = sum; + } + } +} + +//does dot(R[i], img[i]) where R[i] is row-major +void Polarization::stokes(const size_t n, const __m128* img, const __m128* R, __m128* out) { + #pragma omp parallel for + for (int i = 0; i < n; ++i) { + out[i] = dot(&R[i], img[i]); + } +} +//does dot(R, img[i]) where R is row-major +void Polarization::stokesR(const size_t n, const __m128* img, const __m128 R[4], __m128* out) { + #pragma omp parallel for + for (int i = 0; i < n; ++i) { + out[i] = dot(R, img[i]); + } +} +//does dot(img[i], R[i]) where R[i] is column-major +void Polarization::stokes2(const size_t n, const __m128* img, const __m128* R, __m128* out) { + #pragma omp parallel for + for (int i = 0; i < n; ++i) { + out[i] = dot(img[i], &R[i]); + } +} +//does dot(img[i], R) where R is column-major +void Polarization::stokes2R(const size_t n, const __m128* img, const __m128 R[4], __m128* out) { + #pragma omp parallel for + for (int i = 0; i < n; ++i) { + out[i] = dot(img[i], R); + } +} +//assumes 0,90,45,135 pixel pattern: +void Polarization::stokes(const size_t n, const __m128* img, __m128* out) { + __m128 R[] = { + _mm_setr_ps(0.5, 1.0, 0.0, 0.0), + _mm_setr_ps(0.5, -1.0, 0.0, 0.0), + _mm_setr_ps(0.5, 0.0, 1.0, 0.0), + _mm_setr_ps(0.5, 0.0, -1.0, 0.0) + }; + stokes2R(n, img, R, out); +} + +void Polarization::element(const size_t param, const size_t n, const __m128* simg, float* out) { + #pragma omp parallel for + for (int i = 0; i < n; ++i) { + out[i] = simg[i].m128_f32[param]; + } +} + +//degree of polarization: hypot(s1,s2,s3)/s0 +void Polarization::dop(const size_t n, const __m128* simg, float* out) { + __m128 tmp; + float sum; + #pragma omp parallel for + for (int i = 0; i < n; ++i) { + //square each element + tmp = _mm_mul_ps(simg[i], simg[i]); + //(s1^2+s2^2+s3^2)/s0^2 + tmp.m128_f32[0] = (tmp.m128_f32[1] + tmp.m128_f32[2] + tmp.m128_f32[3])/tmp.m128_f32[0]; + out[i] = _mm_sqrt_ss(tmp).m128_f32[0]; + } +} + +//degree of linear polarization: hypot(s1,s2)/s0 +void Polarization::dolp(const size_t n, const __m128* simg, float* out) { + __m128 tmp; + float sum; + #pragma omp parallel for private(tmp, sum) + for (int i = 0; i < n; ++i) { + //square each element + tmp = _mm_mul_ps(simg[i], simg[i]); + //(s1^2 + s2^2)/s0^2 + tmp.m128_f32[0] = (tmp.m128_f32[1] + tmp.m128_f32[2]) / tmp.m128_f32[0]; + out[i] = _mm_sqrt_ss(tmp).m128_f32[0]; + } +} + +//degree of circular polarization: abs(s3)/s0 +void Polarization::docp(const size_t n, const __m128* simg, float* out){ + #pragma omp parallel for + for (int i = 0; i < n; ++i) { + out[i] = fabs(simg[i].m128_f32[3] / simg[i].m128_f32[0]); + } +} + +//angle of polarization: 0.5*atan(s2/s1) +void Polarization::aop(const size_t n, const __m128* simg, float* out) { + #pragma omp parallel for + for (int i = 0; i < n; ++i) { + out[i] = 0.5*atan2f(simg[i].m128_f32[2], simg[i].m128_f32[1]); + } +} + +//2x angle of polarization: atan(s2/s1) +void Polarization::aopx2(const size_t n, const __m128* simg, float* out) { + #pragma omp parallel for + for (int i = 0; i < n; ++i) { + out[i] = atan2f(simg[i].m128_f32[2], simg[i].m128_f32[1]); + } +} + +//ellipticity angle: 0.5*asin(s3/s0) +void Polarization::ella(const size_t n, const __m128* simg, float* out) { + #pragma omp parallel for + for (int i = 0; i < n; ++i) { + out[i] = 0.5*asinf(simg[i].m128_f32[3] / simg[i].m128_f32[0]); + } +} diff --git a/polarization.hpp b/polarization.hpp new file mode 100644 index 0000000..028f0f5 --- /dev/null +++ b/polarization.hpp @@ -0,0 +1,207 @@ +/********** + * Copyright 2014 Samuel Bear Powell + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. +\**********/ +#ifndef POLARIZATION_HPP +#define POLARIZATION_HPP + +#include //SSE1,2 +#include //SSE4 +#include + +//pattern encoding for 2x2 super-pixels +//PIXn(r,c) specifies that pixel n (0,1,2,3) belongs in (row, column) = (r,c) of the super-pixel. r, c must be either 0 or 1 +#define PIX0(r,c) (((r)&1) | (((c)&1) << 1)) +#define PIXN(r,c,n) (PIX0(r,c) << (2*(n))) +#define PIX1(r,c) PIXN(r,c,1) +#define PIX2(r,c) PIXN(r,c,2) +#define PIX3(r,c) PIXN(r,c,3) + +//pattern decoding for 2x2 super-pixels +//PATR(pattern,n) yields 0 or 1 for which row pixel n belongs to as specified by the pattern +//PATC(pattern,n) yields the column. +#define PATR(p,n) (((p) >> (2*(n))) & 1) +#define PATC(p,n) (((p) >> (2*(n)+1)) & 1) + +//compute an index for a row-major storage matrix +#define IDX(r,c,cols) ((r)*(cols)+(c)) +//wrap an index 'x' from 0 to max +#define WRAP(x,max) ((x) < 0 ? ((max) - ((max) % -(x))) : ((max) % (x))) +//reflect an index x between 0 to max +#define REFLECT(x,max) (WRAP((x),2*(max)) > (max) ? 2*(max)-WRAP((x),2*(max)) : WRAP((x),2*(max))) + + +//#define DEFAULT_PATTERN (PIX0(0, 0) | PIX1(1, 1) | PIX2(0, 1) | PIX3(1, 0)) + +namespace Polarization { + + extern const uint8_t default_pattern; + + //transpose a 4x4 matrix + void trans(__m128 mat[4]); + + //invert a 4x4 matrix + void inv(const __m128 mat[4], __m128 out[4]); + + //perform a dot product of a row-vector and a matrix + //the matrix is in column-major order!! + __m128 dot(const __m128 vec, const __m128 mat[4]); + + //perform a dot product of a matrix with a column vector + //the matrix is in row-major order!! + __m128 dot(const __m128 mat[4], const __m128 vec); + + float dot(const __m128 vec0, const __m128 vec1); + + //packs superpixels into SSE registers according to pattern + //the pattern is specified by P0(r,c) to P3(r,c) macros + //P0(r,c) means that pixel 0 of the SSE register appears in row,col = r,c of the 2x2 super pixel. + template + void pack_superpixels(const size_t rows, const size_t cols, const raw_type* raw, __m128* out, uint8_t pattern = default_pattern) { + size_t rows_2 = rows / 2, cols_2 = cols / 2; + int r,c,rx2,cx2; + #pragma omp parallel for private(r,c,rx2,cx2) + for (r = 0; r < rows_2; ++r) { + for (c = 0; c < cols_2; ++c) { + rx2 = r << 1, cx2 = c << 1; + out[IDX(r,c,cols_2)] = + _mm_setr_ps( + float(raw[IDX(rx2 | PATR(pattern, 0), cx2 | PATC(pattern, 0), cols)]), + float(raw[IDX(rx2 | PATR(pattern, 1), cx2 | PATC(pattern, 1), cols)]), + float(raw[IDX(rx2 | PATR(pattern, 2), cx2 | PATC(pattern, 2), cols)]), + float(raw[IDX(rx2 | PATR(pattern, 3), cx2 | PATC(pattern, 2), cols)]) + ); + } + } + } + + void unpack_superpixels(const size_t rows, const size_t cols, const __m128* packed, __m128* out, uint8_t pattern = default_pattern); + + void mask_low_high(const size_t n, const __m128* raw, float low, float high, __m128* out); + void mask_low(const size_t n, const __m128* raw, float low, __m128* out); + void mask_high(const size_t n, const __m128* raw, float high, __m128* out); + + //packs super-pixels and does dot(gains[i], raw[i] - darks[i]) where gains[i] is row-major + template + void calibrate_matrix(const size_t rows, const size_t cols, const raw_type* raw, const __m128* darks, const __m128* gains, __m128* out, uint8_t pattern = default_pattern) { + size_t rows_2 = rows / 2, cols_2 = cols / 2; + int r, c, rx2, cx2; + __m128 packed; + #pragma omp parallel for private(r,c,rx2,cx2,packed) + for (r = 0; r < rows_2; ++r) { + for (c = 0; c < cols_2; ++c) { + rx2 = r << 1, cx2 = c << 1; + packed = + _mm_setr_ps( + float(raw[IDX(rx2 | PATR(pattern, 0), cx2 | PATC(pattern, 0), cols)]), + float(raw[IDX(rx2 | PATR(pattern, 1), cx2 | PATC(pattern, 1), cols)]), + float(raw[IDX(rx2 | PATR(pattern, 2), cx2 | PATC(pattern, 2), cols)]), + float(raw[IDX(rx2 | PATR(pattern, 3), cx2 | PATC(pattern, 2), cols)]) + ); + out[IDX(r, c, cols_2)] = dot(&gains[IDX(r, c, cols_2)], _mm_sub_ps(packed, darks[IDX(r, c, cols_2)])); + } + } + } + + //packs super-pixels does dot(raw[i] - darks[i], gains[i]) where gains[i] is column-major + template + void calibrate_matrix2(const size_t rows, const size_t cols, const raw_type* raw, const __m128* darks, const __m128* gains, __m128* out, uint8_t pattern = default_pattern) { + size_t rows_2 = rows / 2, cols_2 = cols / 2; + int r, c, rx2, cx2; + __m128 packed; + #pragma omp parallel for private(r,c,rx2,cx2,packed) + for (r = 0; r < rows_2; ++r) { + for (c = 0; c < cols_2; ++c) { + rx2 = r << 1, cx2 = c << 1; + packed = + _mm_setr_ps( + float(raw[IDX(rx2 | PATR(pattern, 0), cx2 | PATC(pattern, 0), cols)]), + float(raw[IDX(rx2 | PATR(pattern, 1), cx2 | PATC(pattern, 1), cols)]), + float(raw[IDX(rx2 | PATR(pattern, 2), cx2 | PATC(pattern, 2), cols)]), + float(raw[IDX(rx2 | PATR(pattern, 3), cx2 | PATC(pattern, 2), cols)]) + ); + out[IDX(r, c, cols_2)] = dot(_mm_sub_ps(packed, darks[IDX(r, c, cols_2)]), gains[IDX(r, c, cols_2)]); + } + } + } + + //does dot(gains[i], raw[i] - darks[i]) where gains[i] is row-major + void calibrate_matrix(const size_t n, const __m128* raw, const __m128* darks, const __m128* gains, __m128* out); + + //does dot(raw[i] - darks[i], gains[i]) where gains[i] is column-major + void calibrate_matrix2(const size_t n, const __m128* raw, const __m128* darks, const __m128* gains, __m128* out); + + struct edge_mode { + enum t { + ZERO, + WRAP, + REFLECT + }; + }; + + //2D filtering (with 2D kernel) + void filter(const size_t rows, const size_t cols, const __m128* in, __m128* out, const size_t filt_rows, const size_t filt_cols, const __m128* filt, const edge_mode::t mode); + void filter_zero(const size_t rows, const size_t cols, const __m128* in, __m128* out, const size_t filt_rows, const size_t filt_cols, const __m128* filt); + void filter_wrap(const size_t rows, const size_t cols, const __m128* in, __m128* out, const size_t filt_rows, const size_t filt_cols, const __m128* filt); + void filter_reflect(const size_t rows, const size_t cols, const __m128* in, __m128* out, const size_t filt_rows, const size_t filt_cols, const __m128* filt); + + //2D filtering (with 1D kernel applied on both axes) + void filter(const size_t rows, const size_t cols, const __m128 *in, __m128 *out, const size_t filt_size, const __m128 *filt, const edge_mode::t mode); + void filter_zero(const size_t rows, const size_t cols, const __m128 *in, __m128 *out, const size_t filt_size, const __m128 *filt); + void filter_wrap(const size_t rows, const size_t cols, const __m128 *in, __m128 *out, const size_t filt_size, const __m128 *filt); + void filter_reflect(const size_t rows, const size_t cols, const __m128 *in, __m128 *out, const size_t filt_size, const __m128 *filt); + + //does dot(R[i], img[i]) where R[i] is row-major + void stokes(const size_t n, const __m128* img, const __m128* R, __m128* out); + + //does dot(R, img[i]) where R is row-major + void stokesR(const size_t n, const __m128* img, const __m128 R[4], __m128* out); + + //does dot(img[i], R[i]) where R[i] is column-major + void stokes2(const size_t n, const __m128* img, const __m128* R, __m128* out); + + //does dot(img[i], R) where R is column-major + void stokes2R(const size_t n, const __m128* img, const __m128 R[4], __m128* out); + + //assumes 0,90,45,135 pixel pattern: + void stokes(const size_t n, const __m128* img, __m128* out); + + //extract a single element from the __m128 + void element(const size_t param, const size_t n, const __m128* simg, float* out); + + //degree of polarization: hypot(s1,s2,s3)/s0 + void dop(const size_t n, const __m128* simg, float* out); + + //degree of linear polarization: hypot(s1,s2)/s0 + void dolp(const size_t n, const __m128* simg, float* out); + + //degree of circular polarization: abs(s3)/s0 + void docp(const size_t n, const __m128* simg, float* out); + + //angle of polarization: 0.5*atan(s2/s1) + void aop(const size_t n, const __m128* simg, float* out); + + //2x angle of polarization: atan(s2/s1) + void aopx2(const size_t n, const __m128* simg, float* out); + + //ellipticity angle: 0.5*asin(s3/s0) + void ella(const size_t n, const __m128* simg, float* out); + + //TODO: + //decompose stokes vector? + +} + + +#endif // POLARIZATION_HPP