Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Enhancements for 4:1 eSBR configuration for USAC encoder #94

Merged
merged 1 commit into from
Dec 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions encoder/iusace_cnst.h
Original file line number Diff line number Diff line change
Expand Up @@ -210,3 +210,4 @@
#define MAX_PREROLL_CONFIG_SIZE (1024)
#define CC_NUM_PREROLL_FRAMES (1)
#define USAC_FIRST_FRAME_FLAG_DEFAULT_VALUE (1)
#define USAC_DEFAULT_DELAY_ADJUSTMENT_VALUE (1)
1 change: 1 addition & 0 deletions encoder/iusace_config.h
Original file line number Diff line number Diff line change
Expand Up @@ -286,6 +286,7 @@ typedef struct {
ia_drc_internal_config str_internal_drc_cfg;
WORD32 use_measured_loudness;
UWORD16 stream_id;
FLAG use_delay_adjustment;
} ia_usac_encoder_config_struct;

typedef struct {
Expand Down
241 changes: 108 additions & 133 deletions encoder/ixheaace_api.c

Large diffs are not rendered by default.

3 changes: 2 additions & 1 deletion encoder/ixheaace_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,7 @@ typedef struct {
UWORD32 measurement_system;
FLOAT32 sample_peak_level;
UWORD16 stream_id;
FLAG use_delay_adjustment;
} ixheaace_input_config;

typedef struct {
Expand All @@ -140,7 +141,7 @@ typedef struct {
WORD32 header_samp_freq;
WORD32 audio_profile;
FLOAT32 down_sampling_ratio;
pWORD32 pb_inp_buf_32;
WORD32 expected_frame_count;
} ixheaace_output_config;

typedef struct {
Expand Down
17 changes: 13 additions & 4 deletions encoder/ixheaace_loudness_measurement.c
Original file line number Diff line number Diff line change
Expand Up @@ -354,8 +354,12 @@ FLOAT64 ixheaace_measure_integrated_loudness(pVOID loudness_handle) {
pstr_loudness_hdl->no_of_mf_passing_rel_gate = 0;
pstr_loudness_hdl->tot_int_val_mf_passing_rel_gate = 0;

avg = (pstr_loudness_hdl->tot_int_val_mf_passing_abs_gate /
pstr_loudness_hdl->no_of_mf_passing_abs_gate);
if (pstr_loudness_hdl->no_of_mf_passing_abs_gate) {
avg = (pstr_loudness_hdl->tot_int_val_mf_passing_abs_gate /
pstr_loudness_hdl->no_of_mf_passing_abs_gate);
} else {
avg = IXHEAACE_SUM_SQUARE_EPS / pstr_loudness_hdl->num_samples_per_ch;
}
pstr_loudness_hdl->rel_gate = -0.691 + 10 * log10(avg) - 10;

while (count < pstr_loudness_hdl->ml_count_fn_call) {
Expand All @@ -368,8 +372,13 @@ FLOAT64 ixheaace_measure_integrated_loudness(pVOID loudness_handle) {
count++;
}

loudness = -0.691 + 10 * log10((pstr_loudness_hdl->tot_int_val_mf_passing_rel_gate /
(FLOAT64)pstr_loudness_hdl->no_of_mf_passing_rel_gate));
if (pstr_loudness_hdl->no_of_mf_passing_rel_gate) {
loudness = -0.691 + 10 * log10((pstr_loudness_hdl->tot_int_val_mf_passing_rel_gate /
(FLOAT64)pstr_loudness_hdl->no_of_mf_passing_rel_gate));
} else {
loudness =
-0.691 + 10 * log10(IXHEAACE_SUM_SQUARE_EPS / pstr_loudness_hdl->num_samples_per_ch);
}

return loudness;
}
Expand Down
1 change: 1 addition & 0 deletions encoder/ixheaace_loudness_measurement.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
#define IXHEAACE_DEFAULT_SHORT_TERM_LOUDENSS (-1000)
#define IXHEAACE_DEFAULT_MOMENTARY_LOUDENSS (-1000)
#define IXHEAACE_SEC_TO_100MS_FACTOR (60 * 10)
#define IXHEAACE_SUM_SQUARE_EPS (1/32768.0f * 1/32768.0f)

typedef struct {
BOOL passes_abs_gate;
Expand Down
5 changes: 5 additions & 0 deletions encoder/ixheaace_rom.h
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,11 @@ input buffer (1ch)
/* For 1:3 resampler -> max phase delay * resamp_fac */
#define MAXIMUM_DS_1_3_FILTER_DELAY (36)

#define CC_DELAY_ADJUSTMENT (448)
#define SBR_2_1_DELAY_ADJUSTMENT (-70)
#define SBR_4_1_DELAY_ADJUSTMENT (218)
#define SBR_8_3_DELAY_ADJUSTMENT (-74)

extern const FLOAT32 ixheaace_fd_quant_table[257];
extern const FLOAT32 ixheaace_fd_inv_quant_table[257];
extern const FLOAT32 ixheaace_pow_4_3_table[64];
Expand Down
69 changes: 45 additions & 24 deletions encoder/ixheaace_sbr_env_est.c
Original file line number Diff line number Diff line change
Expand Up @@ -201,8 +201,8 @@ static IA_ERRORCODE ixheaace_calculate_sbr_envelope(

i = 0;
while (i < n_envelopes) {
start_pos = time_step * pstr_const_frame_info->borders[i];
stop_pos = time_step * pstr_const_frame_info->borders[i + 1];
start_pos = pstr_const_frame_info->borders[i];
stop_pos = pstr_const_frame_info->borders[i + 1];
freq_res = pstr_const_frame_info->freq_res[i];
num_bands = pstr_sbr_cfg->num_scf[freq_res];

Expand All @@ -216,7 +216,7 @@ static IA_ERRORCODE ixheaace_calculate_sbr_envelope(
stop_pos = stop_pos - temp;
}
} else {
stop_pos = stop_pos - time_step;
stop_pos = stop_pos - 1;
}
}
for (j = 0; j < num_bands; j++) {
Expand Down Expand Up @@ -265,14 +265,22 @@ static IA_ERRORCODE ixheaace_calculate_sbr_envelope(
if (missing_harmonic) {
count = stop_pos - start_pos;
for (l = start_pos; l < stop_pos; l++) {
energy_left += ptr_y_buf_left[l / 2][li];
if (pstr_sbr_cfg->is_ld_sbr) {
energy_left += ptr_y_buf_left[l >> 1][li];
} else {
energy_left += ptr_y_buf_left[l][li];
}
}

k = li + 1;
while (k < ui) {
tmp_ene_l = 0.0f;
for (l = start_pos; l < stop_pos; l++) {
tmp_ene_l += ptr_y_buf_left[l / 2][k];
if (pstr_sbr_cfg->is_ld_sbr) {
tmp_ene_l += ptr_y_buf_left[l >> 1][k];
} else {
tmp_ene_l += ptr_y_buf_left[l][k];
}
}

if (tmp_ene_l > energy_left) {
Expand All @@ -291,14 +299,22 @@ static IA_ERRORCODE ixheaace_calculate_sbr_envelope(

if (stereo_mode == SBR_COUPLING) {
for (l = start_pos; l < stop_pos; l++) {
energy_right += ptr_y_buf_right[l / 2][li];
if (pstr_sbr_cfg->is_ld_sbr) {
energy_right += ptr_y_buf_right[l >> 1][li];
} else {
energy_right += ptr_y_buf_right[l][li];
}
}

k = li + 1;
while (k < ui) {
tmp_ene_r = 0.0f;
for (l = start_pos; l < stop_pos; l++) {
tmp_ene_r += ptr_y_buf_right[l / 2][k];
if (pstr_sbr_cfg->is_ld_sbr) {
tmp_ene_r += ptr_y_buf_right[l >> 1][k];
} else {
tmp_ene_r += ptr_y_buf_right[l][k];
}
}

if (tmp_ene_r > energy_right) {
Expand All @@ -314,22 +330,17 @@ static IA_ERRORCODE ixheaace_calculate_sbr_envelope(
energy_right = energy_right * 0.5f;
}
}

tmp_ene_l = energy_left;
energy_left = (energy_left + energy_right) * 0.5f;
energy_right = (tmp_ene_l + 1) / (energy_right + 1);
energy_right = ((tmp_ene_l * time_step) + 1) / ((energy_right * time_step) + 1);
}
} else {
count = (stop_pos - start_pos) * (ui - li);

k = li;
while (k < ui) {
for (l = start_pos; l < stop_pos; l++) {
if (pstr_sbr_cfg->is_ld_sbr) {
energy_left += ptr_y_buf_left[l][k];
} else {
energy_left += ptr_y_buf_left[l / 2][k];
}
energy_left += ptr_y_buf_left[l][k];
}
k++;
}
Expand All @@ -338,17 +349,21 @@ static IA_ERRORCODE ixheaace_calculate_sbr_envelope(
k = li;
while (k < ui) {
for (l = start_pos; l < stop_pos; l++) {
energy_right += ptr_y_buf_right[l / 2][k];
if (pstr_sbr_cfg->is_ld_sbr) {
energy_right += ptr_y_buf_right[l >> 1][k];
} else {
energy_right += ptr_y_buf_right[l][k];
}
}
k++;
}
tmp_ene_l = energy_left;
energy_left = (energy_left + energy_right) * 0.5f;
energy_right = (tmp_ene_l + 1) / (energy_right + 1);
energy_right = ((tmp_ene_l * time_step) + 1) / ((energy_right * time_step) + 1);
}
}

energy_left = (FLOAT32)(log(energy_left / (count * 64) + EPS) * SBR_INV_LOG_2);
energy_left = (FLOAT32)(log((energy_left / (count * 64)) + EPS) * SBR_INV_LOG_2);

if (energy_left < 0.0f) {
energy_left = 0.0f;
Expand All @@ -374,8 +389,9 @@ static IA_ERRORCODE ixheaace_calculate_sbr_envelope(

for (j = 0; j < num_bands; j++) {
if (freq_res == FREQ_RES_HIGH && pstr_sbr->str_sbr_extract_env.envelope_compensation[j]) {
ptr_sfb_ene_l[m] -= (WORD32)(
ca * ixheaac_abs32(pstr_sbr->str_sbr_extract_env.envelope_compensation[j]));
ptr_sfb_ene_l[m] -=
(WORD32)(ca *
ixheaac_abs32(pstr_sbr->str_sbr_extract_env.envelope_compensation[j]));
}

if (ptr_sfb_ene_l[m] < 0) {
Expand Down Expand Up @@ -2124,17 +2140,22 @@ IA_ERRORCODE ixheaace_extract_sbr_envelope(FLOAT32 *ptr_in_time, FLOAT32 *ptr_co
pstr_sbr_extract_env->ptr_y_buffer + pstr_sbr_extract_env->y_buffer_write_offset,
pstr_sbr_extract_env->ptr_r_buffer, pstr_sbr_extract_env->ptr_i_buffer,
pstr_sbr_cfg->is_ld_sbr, pstr_env_ch[ch]->str_sbr_qmf.num_time_slots, samp_ratio_fac,
pstr_hbe_enc, (IXHEAACE_OP_DELAY_OFFSET + IXHEAACE_ESBR_HBE_DELAY_OFFSET +
IXHEAACE_SBR_HF_ADJ_OFFSET), pstr_sbr_hdr->sbr_harmonic);
pstr_hbe_enc,
(IXHEAACE_OP_DELAY_OFFSET + IXHEAACE_ESBR_HBE_DELAY_OFFSET + IXHEAACE_SBR_HF_ADJ_OFFSET),
pstr_sbr_hdr->sbr_harmonic);

ixheaace_calculate_tonality_quotas(
&pstr_env_ch[ch]->str_ton_corr, pstr_sbr_extract_env->ptr_r_buffer,
pstr_sbr_extract_env->ptr_i_buffer,
pstr_sbr_cfg->ptr_freq_band_tab[HI][pstr_sbr_cfg->num_scf[HI]],
pstr_env_ch[ch]->str_sbr_qmf.num_time_slots, pstr_sbr_cfg->is_ld_sbr);
pstr_env_ch[ch]->str_sbr_qmf.num_time_slots, pstr_sbr_extract_env->time_step);
if (pstr_sbr_cfg->is_ld_sbr) {
ixheaace_detect_transient_eld(pstr_sbr_extract_env->ptr_y_buffer,
&pstr_env_ch[ch]->str_sbr_trans_detector, transient_info[ch]);
} else if (pstr_sbr_extract_env->time_step == 4) {
ixheaace_detect_transient_4_1(pstr_sbr_extract_env->ptr_y_buffer,
&pstr_env_ch[ch]->str_sbr_trans_detector, transient_info[ch],
pstr_sbr_extract_env->time_step, pstr_sbr_cfg->sbr_codec);
} else {
ixheaace_detect_transient(pstr_sbr_extract_env->ptr_y_buffer,
&pstr_env_ch[ch]->str_sbr_trans_detector, transient_info[ch],
Expand Down Expand Up @@ -2913,8 +2934,8 @@ IA_ERRORCODE ixheaace_extract_sbr_envelope(FLOAT32 *ptr_in_time, FLOAT32 *ptr_co
FLOAT32 *ptr_tmp;
ptr_tmp = pstr_sbr_extract_env->ptr_y_buffer[i];
pstr_sbr_extract_env->ptr_y_buffer[i] =
pstr_sbr_extract_env->ptr_y_buffer[i + (pstr_sbr_extract_env->no_cols >> 1)];
pstr_sbr_extract_env->ptr_y_buffer[i + (pstr_sbr_extract_env->no_cols >> 1)] = ptr_tmp;
pstr_sbr_extract_env->ptr_y_buffer[i + pstr_sbr_extract_env->time_slots];
pstr_sbr_extract_env->ptr_y_buffer[i + pstr_sbr_extract_env->time_slots] = ptr_tmp;
}

pstr_sbr_extract_env->buffer_flag ^= 1;
Expand Down
7 changes: 2 additions & 5 deletions encoder/ixheaace_sbr_env_est_init.c
Original file line number Diff line number Diff line change
Expand Up @@ -99,12 +99,9 @@ ixheaace_create_extract_sbr_envelope(WORD32 ch,
if ((sbr_codec == USAC_SBR) && (USAC_SBR_RATIO_INDEX_4_1 == sbr_ratio_idx)) {
qmf_time_slots = QMF_TIME_SLOTS_USAC_4_1;
y_buffer_write_offset = QMF_TIME_SLOTS_USAC_4_1;
no_cols = qmf_time_slots;
}
if (is_ld_sbr && frame_flag_480) {
y_buffer_write_offset = 30;
no_cols = 30;
time_slots = 15;
}

pstr_sbr_ext_env->y_buffer_write_offset = y_buffer_write_offset;

y_buffer_length = pstr_sbr_ext_env->y_buffer_write_offset + y_buffer_write_offset;
Expand Down
7 changes: 6 additions & 1 deletion encoder/ixheaace_sbr_main.c
Original file line number Diff line number Diff line change
Expand Up @@ -431,7 +431,7 @@ UWORD32 ixheaace_sbr_limit_bitrate(UWORD32 bit_rate, UWORD32 num_ch, UWORD32 cor
VOID ixheaace_adjust_sbr_settings(const ixheaace_pstr_sbr_cfg pstr_config, UWORD32 bit_rate,
UWORD32 num_ch, UWORD32 fs_core, UWORD32 trans_fac,
UWORD32 std_br, ixheaace_str_qmf_tabs *pstr_qmf_tab,
WORD32 aot) {
WORD32 aot, WORD32 is_esbr_4_1) {
FLAG table_found = IXHEAACE_TABLE_IDX_NOT_FOUND;
WORD32 idx_sr = 0;
WORD32 idx_ch = 0;
Expand Down Expand Up @@ -538,6 +538,11 @@ VOID ixheaace_adjust_sbr_settings(const ixheaace_pstr_sbr_cfg pstr_config, UWORD
pstr_config->ps_mode = ixheaace_get_ps_mode(bit_rate);
}
}

if (is_esbr_4_1) {
pstr_config->start_freq = 10;
pstr_config->stop_freq = 11;
}
}

VOID ixheaace_initialize_sbr_defaults(ixheaace_pstr_sbr_cfg pstr_config) {
Expand Down
2 changes: 1 addition & 1 deletion encoder/ixheaace_sbr_main.h
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ UWORD32 ixheaace_sbr_limit_bitrate(UWORD32 bit_rate, UWORD32 num_channels,
VOID ixheaace_adjust_sbr_settings(const ixheaace_pstr_sbr_cfg pstr_config, UWORD32 bit_rate,
UWORD32 num_channels, UWORD32 fs_core, UWORD32 trans_fac,
UWORD32 standard_bitrate, ixheaace_str_qmf_tabs *ptr_qmf_tab,
WORD32 aot);
WORD32 aot, WORD32 is_esbr_4_1);

VOID ixheaace_initialize_sbr_defaults(ixheaace_pstr_sbr_cfg pstr_config);

Expand Down
24 changes: 8 additions & 16 deletions encoder/ixheaace_sbr_qmf_enc.c
Original file line number Diff line number Diff line change
Expand Up @@ -961,17 +961,14 @@ VOID ixheaace_get_energy_from_cplx_qmf(
}
if (0 == is_ld_sbr) {
FLOAT32 *ptr_energy_val = &ptr_energy_vals[0][0];
FLOAT32 *ptr_real = &ptr_real_values[0][0];
FLOAT32 *ptr_imag = &ptr_imag_values[0][0];
FLOAT32 *ptr_hbe_real = NULL;
FLOAT32 *ptr_hbe_imag = NULL;
if (harmonic_sbr == 1) {
ptr_hbe_real = &pstr_hbe_enc->qmf_buf_real[op_delay][0];
ptr_hbe_imag = &pstr_hbe_enc->qmf_buf_imag[op_delay][0];
}
k = (num_time_slots - 1);
while (k >= 0) {
for (j = 63; j >= 0; j--) {
for (k = 0; k < num_time_slots; k++) {
for (j = 0; j < IXHEAACE_QMF_CHANNELS; j++) {
FLOAT32 tmp = 0.0f;
if (harmonic_sbr == 1) {
FLOAT32 real_hbe, imag_hbe;
Expand All @@ -981,28 +978,23 @@ VOID ixheaace_get_energy_from_cplx_qmf(
*ptr_energy_val = tmp;
ptr_hbe_real++;
ptr_hbe_imag++;
ptr_energy_val++;
} else {
FLOAT32 real, imag;
WORD32 i;
WORD32 i, subband;
subband = samp_ratio_fac * k;
for (i = 0; i < samp_ratio_fac; i++) {
real = *(ptr_real + i * IXHEAACE_QMF_CHANNELS);
imag = *(ptr_imag + i * IXHEAACE_QMF_CHANNELS);
real = ptr_real_values[subband + i][j];
imag = ptr_imag_values[subband + i][j];
tmp += (real * real) + (imag * imag);
}
*ptr_energy_val = tmp * avg_fac;
ptr_real++;
ptr_imag++;
ptr_energy_vals[k][j] = tmp * avg_fac;
}
ptr_energy_val++;
}
if (harmonic_sbr == 1) {
ptr_hbe_real += 64;
ptr_hbe_imag += 64;
} else {
ptr_real += 64;
ptr_imag += 64;
}
k--;
}
} else {
FLOAT32 *ptr_real = &ptr_real_values[0][0];
Expand Down
15 changes: 6 additions & 9 deletions encoder/ixheaace_sbr_ton_corr_hp.c
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ static VOID ixheaace_calc_auto_corr_second_order(ixheaace_acorr_coeffs *pstr_ac,

VOID ixheaace_calculate_tonality_quotas(ixheaace_pstr_sbr_ton_corr_est pstr_ton_corr,
FLOAT32 **ptr_real, FLOAT32 **ptr_imag, WORD32 usb,
WORD32 num_time_slots, WORD32 is_ld_sbr) {
WORD32 num_time_slots, WORD32 time_step) {
WORD32 i, k, r, time_index;
FLOAT32 alphar[2], alphai[2], r01r, r02r, r11r, r12r, r01i, r02i, r12i, det, r00r;
ixheaace_acorr_coeffs ac;
Expand All @@ -129,7 +129,7 @@ VOID ixheaace_calculate_tonality_quotas(ixheaace_pstr_sbr_ton_corr_est pstr_ton_
WORD32 no_est_per_frame = pstr_ton_corr->est_cnt_per_frame;
WORD32 move = pstr_ton_corr->move;
WORD32 num_qmf_ch = pstr_ton_corr->num_qmf_ch;
WORD32 len = num_time_slots;
WORD32 len;
WORD32 qm_len;
for (i = 0; i < move; i++) {
memcpy(ptr_quota_mtx[i], ptr_quota_mtx[i + no_est_per_frame],
Expand All @@ -139,12 +139,9 @@ VOID ixheaace_calculate_tonality_quotas(ixheaace_pstr_sbr_ton_corr_est pstr_ton_
memmove(ptr_energy_vec, ptr_energy_vec + no_est_per_frame, move * sizeof(ptr_energy_vec[0]));
memset(ptr_energy_vec + start_index_matrix, 0,
(tot_no_est - start_index_matrix) * sizeof(ptr_energy_vec[0]));
if (is_ld_sbr) {
len = num_time_slots / 2;
qm_len = 2 + len;
} else {
qm_len = 18;
}

len = (num_time_slots * time_step) / 2;
qm_len = 2 + len;

for (r = 0; r < usb; r++) {
k = 2;
Expand Down Expand Up @@ -185,7 +182,7 @@ VOID ixheaace_calculate_tonality_quotas(ixheaace_pstr_sbr_ton_corr_est pstr_ton_
}
ptr_energy_vec[time_index] += r00r;

k += is_ld_sbr ? len : 16;
k += len;

time_index++;
}
Expand Down
Loading
Loading