diff --git a/source.list b/source.list index df35cdd26..4a0b98477 100644 --- a/source.list +++ b/source.list @@ -909,6 +909,8 @@ script/api/script_window.cpp blitter/32bpp_anim.cpp blitter/32bpp_anim.hpp #if SSE +blitter/32bpp_anim_sse2.cpp +blitter/32bpp_anim_sse2.hpp blitter/32bpp_anim_sse4.cpp blitter/32bpp_anim_sse4.hpp #end diff --git a/src/blitter/32bpp_anim.cpp b/src/blitter/32bpp_anim.cpp index 532ad23cc..f380b968a 100644 --- a/src/blitter/32bpp_anim.cpp +++ b/src/blitter/32bpp_anim.cpp @@ -22,7 +22,7 @@ static FBlitter_32bppAnim iFBlitter_32bppAnim; Blitter_32bppAnim::~Blitter_32bppAnim() { - free(this->anim_buf); + free(this->anim_alloc); } template @@ -39,8 +39,7 @@ inline void Blitter_32bppAnim::Draw(const Blitter::BlitterParams *bp, ZoomLevel } Colour *dst = (Colour *)bp->dst + bp->top * bp->pitch + bp->left; - assert(_screen.pitch == this->anim_buf_pitch); // precondition for translating 'bp->dst' into an 'anim_buf' offset below. - uint16 *anim = this->anim_buf + ((uint32 *)bp->dst - (uint32 *)_screen.dst_ptr) + bp->top * this->anim_buf_pitch + bp->left; + uint16 *anim = this->anim_buf + this->ScreenToAnimOffset((uint32 *)bp->dst) + bp->top * this->anim_buf_pitch + bp->left; const byte *remap = bp->remap; // store so we don't have to access it via bp everytime @@ -280,8 +279,7 @@ void Blitter_32bppAnim::DrawColourMappingRect(void *dst, int width, int height, } Colour *udst = (Colour *)dst; - assert(_screen.pitch == this->anim_buf_pitch); // precondition for translating 'dst' into an 'anim_buf' offset below. - uint16 *anim = this->anim_buf + ((uint32 *)dst - (uint32 *)_screen.dst_ptr); + uint16 *anim = this->anim_buf + this->ScreenToAnimOffset((uint32 *)dst); if (pal == PALETTE_TO_TRANSPARENT) { do { @@ -319,8 +317,7 @@ void Blitter_32bppAnim::SetPixel(void *video, int x, int y, uint8 colour) /* Set the colour in the anim-buffer too, if we are rendering to the screen */ if (_screen_disable_anim) return; - assert(_screen.pitch == this->anim_buf_pitch); // precondition for translating 'video' into an 'anim_buf' offset below. - this->anim_buf[((uint32 *)video - (uint32 *)_screen.dst_ptr) + x + y * this->anim_buf_pitch] = colour | (DEFAULT_BRIGHTNESS << 8); + this->anim_buf[this->ScreenToAnimOffset((uint32 *)video) + x + y * this->anim_buf_pitch] = colour | (DEFAULT_BRIGHTNESS << 8); } void Blitter_32bppAnim::DrawRect(void *video, int width, int height, uint8 colour) @@ -332,8 +329,7 @@ void Blitter_32bppAnim::DrawRect(void *video, int width, int height, uint8 colou } Colour colour32 = LookupColourInPalette(colour); - assert(_screen.pitch == this->anim_buf_pitch); // precondition for translating 'video' into an 'anim_buf' offset below. - uint16 *anim_line = ((uint32 *)video - (uint32 *)_screen.dst_ptr) + this->anim_buf; + uint16 *anim_line = this->ScreenToAnimOffset((uint32 *)video) + this->anim_buf; do { Colour *dst = (Colour *)video; @@ -357,8 +353,7 @@ void Blitter_32bppAnim::CopyFromBuffer(void *video, const void *src, int width, assert(video >= _screen.dst_ptr && video <= (uint32 *)_screen.dst_ptr + _screen.width + _screen.height * _screen.pitch); Colour *dst = (Colour *)video; const uint32 *usrc = (const uint32 *)src; - assert(_screen.pitch == this->anim_buf_pitch); // precondition for translating 'video' into an 'anim_buf' offset below. - uint16 *anim_line = ((uint32 *)video - (uint32 *)_screen.dst_ptr) + this->anim_buf; + uint16 *anim_line = this->ScreenToAnimOffset((uint32 *)video) + this->anim_buf; for (; height > 0; height--) { /* We need to keep those for palette animation. */ @@ -401,8 +396,7 @@ void Blitter_32bppAnim::CopyToBuffer(const void *video, void *dst, int width, in if (this->anim_buf == NULL) return; - assert(_screen.pitch == this->anim_buf_pitch); // precondition for translating 'video' into an 'anim_buf' offset below. - const uint16 *anim_line = ((const uint32 *)video - (uint32 *)_screen.dst_ptr) + this->anim_buf; + const uint16 *anim_line = this->ScreenToAnimOffset((const uint32 *)video) + this->anim_buf; for (; height > 0; height--) { memcpy(udst, src, width * sizeof(uint32)); @@ -485,18 +479,22 @@ void Blitter_32bppAnim::PaletteAnimate(const Palette &palette) Colour *dst = (Colour *)_screen.dst_ptr; /* Let's walk the anim buffer and try to find the pixels */ + const int width = this->anim_buf_width; + const int pitch_offset = _screen.pitch - width; + const int anim_pitch_offset = this->anim_buf_pitch - width; for (int y = this->anim_buf_height; y != 0 ; y--) { - for (int x = this->anim_buf_width; x != 0 ; x--) { - uint colour = GB(*anim, 0, 8); + for (int x = width; x != 0 ; x--) { + uint16 value = *anim; + uint8 colour = GB(value, 0, 8); if (colour >= PALETTE_ANIM_START) { /* Update this pixel */ - *dst = this->AdjustBrightness(LookupColourInPalette(colour), GB(*anim, 8, 8)); + *dst = this->AdjustBrightness(LookupColourInPalette(colour), GB(value, 8, 8)); } dst++; anim++; } - dst += _screen.pitch - this->anim_buf_width; - anim += this->anim_buf_pitch - this->anim_buf_width; + dst += pitch_offset; + anim += anim_pitch_offset; } /* Make sure the backend redraws the whole screen */ @@ -510,13 +508,15 @@ Blitter::PaletteAnimation Blitter_32bppAnim::UsePaletteAnimation() void Blitter_32bppAnim::PostResize() { - if (_screen.width != this->anim_buf_width || _screen.height != this->anim_buf_height || - _screen.pitch != this->anim_buf_pitch) { + if (_screen.width != this->anim_buf_width || _screen.height != this->anim_buf_height) { /* The size of the screen changed; we can assume we can wipe all data from our buffer */ - free(this->anim_buf); + free(this->anim_alloc); this->anim_buf_width = _screen.width; this->anim_buf_height = _screen.height; - this->anim_buf_pitch = _screen.pitch; - this->anim_buf = CallocT(this->anim_buf_height * this->anim_buf_pitch); + this->anim_buf_pitch = (_screen.width + 7) & ~7; + this->anim_alloc = CallocT(this->anim_buf_pitch * this->anim_buf_height + 8); + + /* align buffer to next 16 byte boundary */ + this->anim_buf = reinterpret_cast((reinterpret_cast(this->anim_alloc) + 0xF) & (~0xF)); } } diff --git a/src/blitter/32bpp_anim.hpp b/src/blitter/32bpp_anim.hpp index 1b35c1766..da33ec95b 100644 --- a/src/blitter/32bpp_anim.hpp +++ b/src/blitter/32bpp_anim.hpp @@ -18,14 +18,16 @@ class Blitter_32bppAnim : public Blitter_32bppOptimized { protected: uint16 *anim_buf; ///< In this buffer we keep track of the 8bpp indexes so we can do palette animation + void *anim_alloc; ///< The raw allocated buffer, not necessarily aligned correctly int anim_buf_width; ///< The width of the animation buffer. int anim_buf_height; ///< The height of the animation buffer. - int anim_buf_pitch; ///< The pitch of the animation buffer. + int anim_buf_pitch; ///< The pitch of the animation buffer (width rounded up to 16 byte boundary). Palette palette; ///< The current palette. public: Blitter_32bppAnim() : anim_buf(NULL), + anim_alloc(NULL), anim_buf_width(0), anim_buf_height(0), anim_buf_pitch(0) @@ -58,6 +60,15 @@ public: return this->palette.palette[index]; } + inline int ScreenToAnimOffset(const uint32 *video) + { + int raw_offset = video - (const uint32 *)_screen.dst_ptr; + if (_screen.pitch == this->anim_buf_pitch) return raw_offset; + int lines = raw_offset / _screen.pitch; + int across = raw_offset % _screen.pitch; + return across + (lines * this->anim_buf_pitch); + } + template void Draw(const Blitter::BlitterParams *bp, ZoomLevel zoom); }; diff --git a/src/blitter/32bpp_anim_sse2.cpp b/src/blitter/32bpp_anim_sse2.cpp new file mode 100644 index 000000000..f1bc5b3d9 --- /dev/null +++ b/src/blitter/32bpp_anim_sse2.cpp @@ -0,0 +1,100 @@ +/* $Id$ */ + +/* + * This file is part of OpenTTD. + * OpenTTD is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, version 2. + * OpenTTD is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with OpenTTD. If not, see . + */ + +/** @file 32bpp_anim.cpp Implementation of a partially SSSE2 32bpp blitter with animation support. */ + +#ifdef WITH_SSE + +#include "../stdafx.h" +#include "../video/video_driver.hpp" +#include "32bpp_anim_sse2.hpp" +#include "32bpp_sse_func.hpp" + +#include "../safeguards.h" + +/** Instantiation of the partially SSSE2 32bpp with animation blitter factory. */ +static FBlitter_32bppSSE2_Anim iFBlitter_32bppSSE2_Anim; + +void Blitter_32bppSSE2_Anim::PaletteAnimate(const Palette &palette) +{ + assert(!_screen_disable_anim); + + this->palette = palette; + /* If first_dirty is 0, it is for 8bpp indication to send the new + * palette. However, only the animation colours might possibly change. + * Especially when going between toyland and non-toyland. */ + assert(this->palette.first_dirty == PALETTE_ANIM_START || this->palette.first_dirty == 0); + + const uint16 *anim = this->anim_buf; + Colour *dst = (Colour *)_screen.dst_ptr; + + bool screen_dirty = false; + + /* Let's walk the anim buffer and try to find the pixels */ + const int width = this->anim_buf_width; + const int screen_pitch = _screen.pitch; + const int anim_pitch = this->anim_buf_pitch; + __m128i anim_cmp = _mm_set1_epi16(PALETTE_ANIM_START - 1); + __m128i brightness_cmp = _mm_set1_epi16(Blitter_32bppBase::DEFAULT_BRIGHTNESS); + __m128i colour_mask = _mm_set1_epi16(0xFF); + for (int y = this->anim_buf_height; y != 0 ; y--) { + Colour *next_dst_ln = dst + screen_pitch; + const uint16 *next_anim_ln = anim + anim_pitch; + int x = width; + while (x > 0) { + __m128i data = _mm_load_si128((const __m128i *) anim); + + /* low bytes only, shifted into high positions */ + __m128i colour_data = _mm_and_si128(data, colour_mask); + + /* test if any colour >= PALETTE_ANIM_START */ + int colour_cmp_result = _mm_movemask_epi8(_mm_cmpgt_epi16(colour_data, anim_cmp)); + if (unlikely(colour_cmp_result)) { + /* test if any brightness is unexpected */ + if (unlikely(x < 8 || colour_cmp_result != 0xFFFF || + _mm_movemask_epi8(_mm_cmpeq_epi16(_mm_srli_epi16(data, 8), brightness_cmp)) != 0xFFFF)) { + /* slow path: < 8 pixels left or unexpected brightnesses */ + for (int z = min(x, 8); z != 0 ; z--) { + int value = _mm_extract_epi16(data, 0); + uint8 colour = GB(value, 0, 8); + if (colour >= PALETTE_ANIM_START) { + /* Update this pixel */ + *dst = AdjustBrightneSSE(LookupColourInPalette(colour), GB(value, 8, 8)); + screen_dirty = true; + } + data = _mm_srli_si128(data, 2); + dst++; + } + } else { + /* medium path: 8 pixels to animate all of expected brightnesses */ + for (int z = 0; z < 8; z++) { + *dst = LookupColourInPalette(_mm_extract_epi16(colour_data, 0)); + colour_data = _mm_srli_si128(colour_data, 2); + dst++; + } + screen_dirty = true; + } + } else { + /* fast path, no animation */ + dst += 8; + } + anim += 8; + x -= 8; + } + dst = next_dst_ln; + anim = next_anim_ln; + } + + if (screen_dirty) { + /* Make sure the backend redraws the whole screen */ + VideoDriver::GetInstance()->MakeDirty(0, 0, _screen.width, _screen.height); + } +} + +#endif /* WITH_SSE */ diff --git a/src/blitter/32bpp_anim_sse2.hpp b/src/blitter/32bpp_anim_sse2.hpp new file mode 100644 index 000000000..0d4a5f1e6 --- /dev/null +++ b/src/blitter/32bpp_anim_sse2.hpp @@ -0,0 +1,43 @@ +/* $Id$ */ + +/* + * This file is part of OpenTTD. + * OpenTTD is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, version 2. + * OpenTTD is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with OpenTTD. If not, see . + */ + +/** @file 32bpp_anim.hpp A partially SSE2 32 bpp blitter with animation support. */ + +#ifndef BLITTER_32BPP_SSE2_ANIM_HPP +#define BLITTER_32BPP_SSE2_ANIM_HPP + +#ifdef WITH_SSE + +#ifndef SSE_VERSION +#define SSE_VERSION 2 +#endif + +#ifndef FULL_ANIMATION +#define FULL_ANIMATION 1 +#endif + +#include "32bpp_anim.hpp" +#include "32bpp_sse2.hpp" + +/** A partially 32 bpp blitter with palette animation. */ +class Blitter_32bppSSE2_Anim : public Blitter_32bppAnim { +public: + /* virtual */ void PaletteAnimate(const Palette &palette); + /* virtual */ const char *GetName() { return "32bpp-sse2-anim"; } +}; + +/** Factory for the partially 32bpp blitter with animation. */ +class FBlitter_32bppSSE2_Anim : public BlitterFactory { +public: + FBlitter_32bppSSE2_Anim() : BlitterFactory("32bpp-sse2-anim", "32bpp partially SSE2 Animation Blitter (palette animation)", HasCPUIDFlag(1, 3, 26)) {} + /* virtual */ Blitter *CreateInstance() { return new Blitter_32bppSSE2_Anim(); } +}; + +#endif /* WITH_SSE */ +#endif /* BLITTER_32BPP_ANIM_HPP */ diff --git a/src/blitter/32bpp_anim_sse4.cpp b/src/blitter/32bpp_anim_sse4.cpp index 987cb0c6a..219fb466b 100644 --- a/src/blitter/32bpp_anim_sse4.cpp +++ b/src/blitter/32bpp_anim_sse4.cpp @@ -35,8 +35,7 @@ inline void Blitter_32bppSSE4_Anim::Draw(const Blitter::BlitterParams *bp, ZoomL { const byte * const remap = bp->remap; Colour *dst_line = (Colour *) bp->dst + bp->top * bp->pitch + bp->left; - assert(_screen.pitch == this->anim_buf_pitch); // precondition for translating 'bp->dst' into an 'anim_buf' offset below. - uint16 *anim_line = this->anim_buf + ((uint32 *)bp->dst - (uint32 *)_screen.dst_ptr) + bp->top * this->anim_buf_pitch + bp->left; + uint16 *anim_line = this->anim_buf + this->ScreenToAnimOffset((uint32 *)bp->dst) + bp->top * this->anim_buf_pitch + bp->left; int effective_width = bp->width; /* Find where to start reading in the source sprite. */ diff --git a/src/blitter/32bpp_anim_sse4.hpp b/src/blitter/32bpp_anim_sse4.hpp index e2d4cfc23..9d9ad5dd9 100644 --- a/src/blitter/32bpp_anim_sse4.hpp +++ b/src/blitter/32bpp_anim_sse4.hpp @@ -23,13 +23,14 @@ #endif #include "32bpp_anim.hpp" +#include "32bpp_anim_sse2.hpp" #include "32bpp_sse4.hpp" #undef MARGIN_NORMAL_THRESHOLD #define MARGIN_NORMAL_THRESHOLD 4 /** The SSE4 32 bpp blitter with palette animation. */ -class Blitter_32bppSSE4_Anim FINAL : public Blitter_32bppAnim, public Blitter_32bppSSE_Base { +class Blitter_32bppSSE4_Anim FINAL : public Blitter_32bppSSE2_Anim, public Blitter_32bppSSE_Base { private: public: diff --git a/src/blitter/32bpp_base.cpp b/src/blitter/32bpp_base.cpp index 26dd2f037..344d017ef 100644 --- a/src/blitter/32bpp_base.cpp +++ b/src/blitter/32bpp_base.cpp @@ -143,6 +143,36 @@ void Blitter_32bppBase::PaletteAnimate(const Palette &palette) /* By default, 32bpp doesn't have palette animation */ } +Colour Blitter_32bppBase::ReallyAdjustBrightness(Colour colour, uint8 brightness) +{ + assert(DEFAULT_BRIGHTNESS == 1 << 7); + + uint64 combined = (((uint64) colour.r) << 32) | (colour.g << 16) | colour.b; + combined *= brightness; + + uint16 r = GB(combined, 39, 8); + uint16 g = GB(combined, 23, 8); + uint16 b = GB(combined, 7, 8); + + if ((combined & 0x800080008000L) == 0L) { + return Colour(r, g, b, colour.a); + } + + uint16 ob = 0; + /* Sum overbright */ + if (r > 255) ob += r - 255; + if (g > 255) ob += g - 255; + if (b > 255) ob += b - 255; + + /* Reduce overbright strength */ + ob /= 2; + return Colour( + r >= 255 ? 255 : min(r + ob * (255 - r) / 256, 255), + g >= 255 ? 255 : min(g + ob * (255 - g) / 256, 255), + b >= 255 ? 255 : min(b + ob * (255 - b) / 256, 255), + colour.a); +} + Blitter::PaletteAnimation Blitter_32bppBase::UsePaletteAnimation() { return Blitter::PALETTE_ANIMATION_NONE; diff --git a/src/blitter/32bpp_base.hpp b/src/blitter/32bpp_base.hpp index 26c3dee3f..6b8f87cc6 100644 --- a/src/blitter/32bpp_base.hpp +++ b/src/blitter/32bpp_base.hpp @@ -146,30 +146,14 @@ public: static const int DEFAULT_BRIGHTNESS = 128; + static Colour ReallyAdjustBrightness(Colour colour, uint8 brightness); + static inline Colour AdjustBrightness(Colour colour, uint8 brightness) { /* Shortcut for normal brightness */ - if (brightness == DEFAULT_BRIGHTNESS) return colour; - - uint16 ob = 0; - uint16 r = colour.r * brightness / DEFAULT_BRIGHTNESS; - uint16 g = colour.g * brightness / DEFAULT_BRIGHTNESS; - uint16 b = colour.b * brightness / DEFAULT_BRIGHTNESS; - - /* Sum overbright */ - if (r > 255) ob += r - 255; - if (g > 255) ob += g - 255; - if (b > 255) ob += b - 255; + if (likely(brightness == DEFAULT_BRIGHTNESS)) return colour; - if (ob == 0) return Colour(r, g, b, colour.a); - - /* Reduce overbright strength */ - ob /= 2; - return Colour( - r >= 255 ? 255 : min(r + ob * (255 - r) / 256, 255), - g >= 255 ? 255 : min(g + ob * (255 - g) / 256, 255), - b >= 255 ? 255 : min(b + ob * (255 - b) / 256, 255), - colour.a); + return ReallyAdjustBrightness(colour, brightness); } }; diff --git a/src/blitter/32bpp_sse_func.hpp b/src/blitter/32bpp_sse_func.hpp index fb0ce9eb6..e5c23ccd2 100644 --- a/src/blitter/32bpp_sse_func.hpp +++ b/src/blitter/32bpp_sse_func.hpp @@ -138,7 +138,7 @@ IGNORE_UNINITIALIZED_WARNING_STOP static inline Colour AdjustBrightneSSE(Colour colour, uint8 brightness) { /* Shortcut for normal brightness. */ - if (brightness == Blitter_32bppBase::DEFAULT_BRIGHTNESS) return colour; + if (likely(brightness == Blitter_32bppBase::DEFAULT_BRIGHTNESS)) return colour; return ReallyAdjustBrightness(colour, brightness); } diff --git a/src/gfxinit.cpp b/src/gfxinit.cpp index 06534ad29..76f59552f 100644 --- a/src/gfxinit.cpp +++ b/src/gfxinit.cpp @@ -284,6 +284,9 @@ static bool SwitchNewGRFBlitter() #endif { "8bpp-optimized", 2, 8, 8, 8, 8 }, { "32bpp-optimized", 0, 8, 32, 8, 32 }, +#ifdef WITH_SSE + { "32bpp-sse2-anim", 1, 8, 32, 8, 32 }, +#endif { "32bpp-anim", 1, 8, 32, 8, 32 }, }; diff --git a/src/stdafx.h b/src/stdafx.h index 43e52ff27..572f1a809 100644 --- a/src/stdafx.h +++ b/src/stdafx.h @@ -535,4 +535,12 @@ static inline void free(const void *ptr) #define IGNORE_UNINITIALIZED_WARNING_STOP #endif +#ifdef __GNUC__ +#define likely(x) __builtin_expect(!!(x), 1) +#define unlikely(x) __builtin_expect(!!(x), 0) +#else +#define likely(x) (x) +#define unlikely(x) (x) +#endif + #endif /* STDAFX_H */