Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix typo in src/GUI/PageInput.h #1049

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,16 @@ project(simplescreenrecorder VERSION 0.4.4)

if(CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64|amd64|i386|i686")
set(PROCESSOR_IS_X86 TRUE)
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "loongarch")
set(PROCESSOR_IS_LOONGARCH TRUE)
else()
set(PROCESSOR_IS_X86 FALSE)
set(PROCESSOR_IS_LOONGARCH FALSE)
endif()

option(ENABLE_32BIT_GLINJECT "Build the 32-bit version of 'libssr-glinject' on 64-bit systems (in addition to the 64-bit version). Required for OpenGL recording of 32-bit applications on 64-bit systems." FALSE)
option(ENABLE_X86_ASM "Allow x86/x64 assembly or intrinsics." ${PROCESSOR_IS_X86})
option(ENABLE_LOONGARCH_ASM "Allow loongarch assembly or intrinsics." ${PROCESSOR_IS_LOONGARCH})
option(ENABLE_FFMPEG_VERSIONS "Use FFmpeg version numbers for feature support tests. Enable when using FFmpeg, disable when using Libav." TRUE)
option(ENABLE_JACK_METADATA "Use the JACK metadata API. May not work with very old JACK versions." TRUE)
option(WITH_OPENGL_RECORDING "Build with OpenGL recording support." TRUE)
Expand All @@ -28,6 +32,7 @@ option(WITH_JACK "Build with JACK support." TRUE)
option(WITH_QT5 "Build with Qt5 (instead of Qt4)." FALSE)
option(WITH_SIMPLESCREENRECORDER "Build the 'simplescreenrecorder' executable." TRUE)
option(WITH_GLINJECT "Build the 'libssr-glinject' library. Required for OpenGL recording." TRUE)
option(WITH_SIMDE "Build with simde support." ${PROCESSOR_IS_LOONGARCH})

set(CMAKE_MODULE_PATH ${CMAKE_SOURCE_DIR}/cmake)

Expand Down
13 changes: 13 additions & 0 deletions cmake/FindSIMDE.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# rules for finding the SIMDE library

find_package(PkgConfig REQUIRED)
pkg_check_modules(PC_SIMDE simde)

find_path(SIMDE_INCLUDE_DIR simde/x86/sse2.h simde/x86/ssse3.h HINTS ${PC_SIMDE_INCLUDEDIR} ${PC_SIMDE_INCLUDE_DIRS})

include(FindPackageHandleStandardArgs)
find_package_handle_standard_args(SIMDE DEFAULT_MSG SIMDE_INCLUDE_DIR)

mark_as_advanced(SIMDE_INCLUDE_DIR)

set(SIMDE_INCLUDE_DIRS ${SIMDE_INCLUDE_DIR})
11 changes: 10 additions & 1 deletion src/AV/FastResampler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -105,13 +105,22 @@ FastResampler::FastResampler(unsigned int channels, float gain) {
default: m_firfilter2_ptr = &FastResampler_FirFilter2_Cn_SSE2; break;
}
} else {
#endif
#if SSR_USE_LOONGARCH_ASM
if(CPUFeatures::HasLSX()) {
switch(m_channels) {
case 1: m_firfilter2_ptr = &FastResampler_FirFilter2_C1_LSX; break;
case 2: m_firfilter2_ptr = &FastResampler_FirFilter2_C2_LSX; break;
default: m_firfilter2_ptr = &FastResampler_FirFilter2_Cn_LSX; break;
}
} else {
#endif
switch(m_channels) {
case 1: m_firfilter2_ptr = &FastResampler_FirFilter2_C1_Fallback; break;
case 2: m_firfilter2_ptr = &FastResampler_FirFilter2_C2_Fallback; break;
default: m_firfilter2_ptr = &FastResampler_FirFilter2_Cn_Fallback; break;
}
#if SSR_USE_X86_ASM
#if SSR_USE_X86_ASM || SSR_USE_LOONGARCH_ASM
}
#endif

Expand Down
6 changes: 6 additions & 0 deletions src/AV/FastResampler_FirFilter.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,3 +31,9 @@ void FastResampler_FirFilter2_C1_SSE2(unsigned int channels, unsigned int filter
void FastResampler_FirFilter2_C2_SSE2(unsigned int channels, unsigned int filter_length, float* coef1, float* coef2, float frac, float* input, float* output);
void FastResampler_FirFilter2_Cn_SSE2(unsigned int channels, unsigned int filter_length, float* coef1, float* coef2, float frac, float* input, float* output);
#endif

#if SSR_USE_LOONGARCH_ASM
void FastResampler_FirFilter2_C1_LSX(unsigned int channels, unsigned int filter_length, float* coef1, float* coef2, float frac, float* input, float* output);
void FastResampler_FirFilter2_C2_LSX(unsigned int channels, unsigned int filter_length, float* coef1, float* coef2, float frac, float* input, float* output);
void FastResampler_FirFilter2_Cn_LSX(unsigned int channels, unsigned int filter_length, float* coef1, float* coef2, float frac, float* input, float* output);
#endif
90 changes: 90 additions & 0 deletions src/AV/FastResampler_FirFilter_LSX.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
/*
Copyright (c) 2012-2024 Maarten Baert <[email protected]>

This file is part of SimpleScreenRecorder.

SimpleScreenRecorder is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.

SimpleScreenRecorder is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with SimpleScreenRecorder. If not, see <http://www.gnu.org/licenses/>.
*/

#include "FastResampler_FirFilter.h"

#if SSR_USE_LOONGARCH_ASM

#ifndef SIMDE_ENABLE_NATIVE_ALIASES
#define SIMDE_ENABLE_NATIVE_ALIASES
#endif
//using simde to translate SSE2 to LSX(loongarch 128-bit simd)
#include<simde/x86/sse2.h>

//void FastResampler_FirFilter2_C1_SSE2(...)
void FastResampler_FirFilter2_C1_LSX(unsigned int channels, unsigned int filter_length, float* coef1, float* coef2, float frac, float* input, float* output) {
Q_UNUSED(channels);
__m128 sum = _mm_setzero_ps();
__m128 v_frac = _mm_set1_ps(frac);
for(unsigned int i = 0; i < filter_length / 4; ++i) {
__m128 v_coef1 = _mm_load_ps(coef1), v_coef2 = _mm_load_ps(coef2);
coef1 += 4; coef2 += 4;
__m128 filter_value = _mm_add_ps(v_coef1, _mm_mul_ps(_mm_sub_ps(v_coef2, v_coef1), v_frac));
__m128 v_input = _mm_loadu_ps(input);
input += 4;
sum = _mm_add_ps(sum, _mm_mul_ps(v_input, filter_value));
}
__m128 sum2 = _mm_add_ps(sum, _mm_shuffle_ps(sum, sum, 0x0e));
__m128 sum3 = _mm_add_ss(sum2, _mm_shuffle_ps(sum2, sum2, 0x01));
_mm_store_ss(output, sum3);
}

//void FastResampler_FirFilter2_C2_SSE2(...)
void FastResampler_FirFilter2_C2_LSX(unsigned int channels, unsigned int filter_length, float* coef1, float* coef2, float frac, float* input, float* output) {
Q_UNUSED(channels);
__m128 sum = _mm_setzero_ps();
__m128 v_frac = _mm_set1_ps(frac);
for(unsigned int i = 0; i < filter_length / 4; ++i) {
__m128 v_coef1 = _mm_load_ps(coef1), v_coef2 = _mm_load_ps(coef2);
coef1 += 4; coef2 += 4;
__m128 filter_value = _mm_add_ps(v_coef1, _mm_mul_ps(_mm_sub_ps(v_coef2, v_coef1), v_frac));
__m128 v_input1 = _mm_loadu_ps(input), v_input2 = _mm_loadu_ps(input + 4);
input += 8;
sum = _mm_add_ps(sum, _mm_mul_ps(v_input1, _mm_unpacklo_ps(filter_value, filter_value)));
sum = _mm_add_ps(sum, _mm_mul_ps(v_input2, _mm_unpackhi_ps(filter_value, filter_value)));
}
__m128 sum2 = _mm_add_ps(sum, _mm_shuffle_ps(sum, sum, 0xee));
_mm_store_sd((double*) output, _mm_castps_pd(sum2));
}

//void FastResampler_FirFilter2_Cn_SSE2(...)
void FastResampler_FirFilter2_Cn_LSX(unsigned int channels, unsigned int filter_length, float* coef1, float* coef2, float frac, float* input, float* output) {
Q_UNUSED(channels);
for(unsigned int c = 0; c < channels; ++c) {
__m128 sum = _mm_setzero_ps();
__m128 v_frac = _mm_set1_ps(frac);
float *input2 = input + c;
for(unsigned int i = 0; i < filter_length / 4; ++i) {
__m128 v_coef1 = _mm_load_ps(coef1), v_coef2 = _mm_load_ps(coef2);
coef1 += 4; coef2 += 4;
__m128 filter_value = _mm_add_ps(v_coef1, _mm_mul_ps(_mm_sub_ps(v_coef2, v_coef1), v_frac));
__m128 v_input1 = _mm_load_ss(input2); input2 += channels;
__m128 v_input2 = _mm_load_ss(input2); input2 += channels;
__m128 v_input3 = _mm_load_ss(input2); input2 += channels;
__m128 v_input4 = _mm_load_ss(input2); input2 += channels;
__m128 v_input = _mm_movelh_ps(_mm_unpacklo_ps(v_input1, v_input2), _mm_unpacklo_ps(v_input3, v_input4));
sum = _mm_add_ps(sum, _mm_mul_ps(v_input, filter_value));
}
__m128 sum2 = _mm_add_ps(sum, _mm_shuffle_ps(sum, sum, 0x0e));
__m128 sum3 = _mm_add_ss(sum2, _mm_shuffle_ps(sum2, sum2, 0x01));
_mm_store_ss(output + c, sum3);
}
}

#endif
48 changes: 48 additions & 0 deletions src/AV/FastScaler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -165,6 +165,14 @@ void FastScaler::Convert_BGRA_YUV444(unsigned int width, unsigned int height, co
return;
}
#endif
#if SSR_USE_LOONGARCH_ASM
if(CPUFeatures::HasLSX()) {
Convert_BGRA_YUV444_LSX(width, height, in_data, in_stride, out_data, out_stride);
} else {
Convert_BGRA_YUV444_Fallback(width, height, in_data, in_stride, out_data, out_stride);
}
return;
#endif

Convert_BGRA_YUV444_Fallback(width, height, in_data, in_stride, out_data, out_stride);

Expand All @@ -190,6 +198,14 @@ void FastScaler::Convert_BGRA_YUV422(unsigned int width, unsigned int height, co
return;
}
#endif
#if SSR_USE_LOONGARCH_ASM
if(CPUFeatures::HasLSX()) {
Convert_BGRA_YUV422_LSX(width, height, in_data, in_stride, out_data, out_stride);
} else {
Convert_BGRA_YUV422_Fallback(width, height, in_data, in_stride, out_data, out_stride);
}
return;
#endif

Convert_BGRA_YUV422_Fallback(width, height, in_data, in_stride, out_data, out_stride);

Expand All @@ -215,6 +231,14 @@ void FastScaler::Convert_BGRA_YUV420(unsigned int width, unsigned int height, co
return;
}
#endif
#if SSR_USE_LOONGARCH_ASM
if(CPUFeatures::HasLSX()) {
Convert_BGRA_YUV420_LSX(width, height, in_data, in_stride, out_data, out_stride);
} else {
Convert_BGRA_YUV420_Fallback(width, height, in_data, in_stride, out_data, out_stride);
}
return;
#endif

Convert_BGRA_YUV420_Fallback(width, height, in_data, in_stride, out_data, out_stride);

Expand All @@ -239,6 +263,14 @@ void FastScaler::Convert_BGRA_NV12(unsigned int width, unsigned int height, cons
return;
}
#endif
#if SSR_USE_LOONGARCH_ASM
if(CPUFeatures::HasLSX()) {
Convert_BGRA_NV12_LSX(width, height, in_data, in_stride, out_data, out_stride);
} else {
Convert_BGRA_NV12_Fallback(width, height, in_data, in_stride, out_data, out_stride);
}
return;
#endif

Convert_BGRA_NV12_Fallback(width, height, in_data, in_stride, out_data, out_stride);

Expand All @@ -261,6 +293,14 @@ void FastScaler::Convert_BGRA_BGR(unsigned int width, unsigned int height, const
return;
}
#endif
#if SSR_USE_LOONGARCH_ASM
if(CPUFeatures::HasLSX()) {
Convert_BGRA_BGR_LSX(width, height, in_data, in_stride, out_data, out_stride);
} else {
Convert_BGRA_BGR_Fallback(width, height, in_data, in_stride, out_data, out_stride);
}
return;
#endif

Convert_BGRA_BGR_Fallback(width, height, in_data, in_stride, out_data, out_stride);

Expand All @@ -284,6 +324,14 @@ void FastScaler::Scale_BGRA(unsigned int in_width, unsigned int in_height, const
return;
}
#endif
#if SSR_USE_LOONGARCH_ASM
if(CPUFeatures::HasLSX()) {
Scale_BGRA_LSX(in_width, in_height, in_data, in_stride, out_width, out_height, out_data, out_stride);
} else {
Scale_BGRA_Fallback(in_width, in_height, in_data, in_stride, out_width, out_height, out_data, out_stride);
}
return;
#endif

Scale_BGRA_Fallback(in_width, in_height, in_data, in_stride, out_width, out_height, out_data, out_stride);

Expand Down
8 changes: 8 additions & 0 deletions src/AV/FastScaler_Convert.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,3 +33,11 @@ void Convert_BGRA_YUV420_SSSE3(unsigned int w, unsigned int h, const uint8_t* in
void Convert_BGRA_NV12_SSSE3(unsigned int w, unsigned int h, const uint8_t* in_data, int in_stride, uint8_t* const out_data[2], const int out_stride[2]);
void Convert_BGRA_BGR_SSSE3(unsigned int w, unsigned int h, const uint8_t* in_data, int in_stride, uint8_t* out_data, int out_stride);
#endif

#if SSR_USE_LOONGARCH_ASM
void Convert_BGRA_YUV444_LSX(unsigned int w, unsigned int h, const uint8_t* in_data, int in_stride, uint8_t* const out_data[3], const int out_stride[3]);
void Convert_BGRA_YUV422_LSX(unsigned int w, unsigned int h, const uint8_t* in_data, int in_stride, uint8_t* const out_data[3], const int out_stride[3]);
void Convert_BGRA_YUV420_LSX(unsigned int w, unsigned int h, const uint8_t* in_data, int in_stride, uint8_t* const out_data[3], const int out_stride[3]);
void Convert_BGRA_NV12_LSX(unsigned int w, unsigned int h, const uint8_t* in_data, int in_stride, uint8_t* const out_data[2], const int out_stride[2]);
void Convert_BGRA_BGR_LSX(unsigned int w, unsigned int h, const uint8_t* in_data, int in_stride, uint8_t* out_data, int out_stride);
#endif
Loading