diff -Naur old/common/arm/dct-a.S new/common/arm/dct-a.S --- old/common/arm/dct-a.S 2020-06-10 15:10:38 -0600 +++ new/common/arm/dct-a.S 2020-04-09 06:36:22 -0600 @@ -725,15 +725,15 @@ vadd.s16 q3, q3, q15 vsub.s16 d17, d0, d1 @ b4 vadd.s16 d18, d2, d3 @ b1 - vsub.s16 d19, d2, d3 @ b5 + vsub.s16 d19, d2, d3 @ b5 vadd.s16 d20, d4, d5 @ b2 - vsub.s16 d21, d4, d5 @ b6 + vsub.s16 d21, d4, d5 @ b6 vadd.s16 d22, d6, d7 @ b3 - vsub.s16 d23, d6, d7 @ b7 + vsub.s16 d23, d6, d7 @ b7 vadd.s16 q0, q8, q9 @ b0 + b1, b4 + b5; a0, a2 - vsub.s16 q1, q8, q9 @ b0 - b1, b4 - b5; a4, a6 + vsub.s16 q1, q8, q9 @ b0 - b1, b4 - b5; a4, a6 vadd.s16 q2, q10, q11 @ b2 + b3, b6 + b7; a1, a3 - vsub.s16 q3, q10, q11 @ b2 - b3, b6 - b7; a5, a7 + vsub.s16 q3, q10, q11 @ b2 - b3, b6 - b7; a5, a7 vadd.s16 q8, q0, q2 @ a0 + a1, a2 + a3 vsub.s16 q9, q0, q2 @ a0 - a1, a2 - a3 diff -Naur old/common/frame.c new/common/frame.c --- old/common/frame.c 2020-06-10 15:10:38 -0600 +++ new/common/frame.c 2020-04-09 06:36:22 -0600 @@ -685,14 +685,12 @@ x264_pthread_mutex_unlock( &frame->mutex ); } -int x264_frame_cond_wait( x264_frame_t *frame, int i_lines_completed ) +void x264_frame_cond_wait( x264_frame_t *frame, int i_lines_completed ) { - int completed; x264_pthread_mutex_lock( &frame->mutex ); - while( (completed = frame->i_lines_completed) < i_lines_completed && i_lines_completed >= 0 ) + while( frame->i_lines_completed < i_lines_completed ) x264_pthread_cond_wait( &frame->cv, &frame->mutex ); x264_pthread_mutex_unlock( &frame->mutex ); - return completed; } void x264_threadslice_cond_broadcast( x264_t *h, int pass ) diff -Naur old/common/frame.h new/common/frame.h --- old/common/frame.h 2020-06-10 15:10:38 -0600 +++ new/common/frame.h 2020-04-09 06:36:22 -0600 @@ -251,7 +251,7 @@ #define x264_frame_cond_broadcast x264_template(frame_cond_broadcast) void x264_frame_cond_broadcast( x264_frame_t *frame, int i_lines_completed ); #define x264_frame_cond_wait x264_template(frame_cond_wait) -int x264_frame_cond_wait( x264_frame_t *frame, int i_lines_completed ); +void x264_frame_cond_wait( x264_frame_t *frame, int i_lines_completed ); #define x264_frame_new_slice x264_template(frame_new_slice) int x264_frame_new_slice( x264_t *h, x264_frame_t *frame ); diff -Naur old/common/x86/x86inc.asm new/common/x86/x86inc.asm --- old/common/x86/x86inc.asm 2020-06-10 15:10:38 -0600 +++ new/common/x86/x86inc.asm 2020-04-09 06:36:22 -0600 @@ -356,7 +356,7 @@ %define vzeroupper_required (mmsize > 16 && (ARCH_X86_64 == 0 || xmm_regs_used > 16 || notcpuflag(avx512))) %define high_mm_regs (16*cpuflag(avx512)) -%macro ALLOC_STACK 0-2 0, 0 ; stack_size, n_xmm_regs (for win64 only) +%macro ALLOC_STACK 1-2 0 ; stack_size, n_xmm_regs (for win64 only) %ifnum %1 %if %1 != 0 %assign %%pad 0 @@ -401,7 +401,7 @@ %endif %endmacro -%macro SETUP_STACK_POINTER 0-1 0 +%macro SETUP_STACK_POINTER 1 %ifnum %1 %if %1 != 0 && required_stack_alignment > STACK_ALIGNMENT %if %1 > 0 diff -Naur old/configure new/configure --- old/configure 2020-06-10 15:10:38 -0600 +++ new/configure 2020-04-09 06:36:22 -0600 @@ -541,7 +541,6 @@ CC="${CC-${cross_prefix}gcc}" STRIP="${STRIP-${cross_prefix}strip}" -STRINGS="${STRINGS-${cross_prefix}strings}" INSTALL="${INSTALL-install}" PKGCONFIG="${PKGCONFIG-${cross_prefix}pkg-config}" @@ -1018,10 +1017,10 @@ if [ $compiler = GNU ]; then echo "int i[2] = {0x42494745,0}; double f[2] = {0x1.0656e6469616ep+102,0};" > conftest.c $CC $CFLAGS conftest.c -c -o conftest.o 2>/dev/null || die "endian test failed" - if (${STRINGS} -a conftest.o | grep -q BIGE) && (${STRINGS} -a conftest.o | grep -q FPendian) ; then + if (${cross_prefix}strings -a conftest.o | grep -q BIGE) && (${cross_prefix}strings -a conftest.o | grep -q FPendian) ; then define WORDS_BIGENDIAN CPU_ENDIAN="big-endian" - elif !(${STRINGS} -a conftest.o | grep -q EGIB && ${STRINGS} -a conftest.o | grep -q naidnePF) ; then + elif !(${cross_prefix}strings -a conftest.o | grep -q EGIB && ${cross_prefix}strings -a conftest.o | grep -q naidnePF) ; then die "endian test failed" fi fi diff -Naur old/encoder/analyse.c new/encoder/analyse.c --- old/encoder/analyse.c 2020-06-10 15:10:38 -0600 +++ new/encoder/analyse.c 2020-04-09 06:36:22 -0600 @@ -359,8 +359,8 @@ for( int i = (h->sh.i_type == SLICE_TYPE_B); i >= 0; i-- ) for( int j = 0; j < h->i_ref[i]; j++ ) { - int completed = x264_frame_cond_wait( h->fref[i][j]->orig, thresh ); - thread_mvy_range = X264_MIN( thread_mvy_range, completed - pix_y ); + x264_frame_cond_wait( h->fref[i][j]->orig, thresh ); + thread_mvy_range = X264_MIN( thread_mvy_range, h->fref[i][j]->orig->i_lines_completed - pix_y ); } if( h->param.b_deterministic ) @@ -3869,7 +3869,7 @@ int ref = h->mb.cache.ref[l][x264_scan8[0]]; if( ref < 0 ) continue; - completed = x264_frame_cond_wait( h->fref[l][ ref >> MB_INTERLACED ]->orig, -1 ); + completed = h->fref[l][ ref >> MB_INTERLACED ]->orig->i_lines_completed; if( (h->mb.cache.mv[l][x264_scan8[15]][1] >> (2 - MB_INTERLACED)) + h->mb.i_mb_y*16 > completed ) { x264_log( h, X264_LOG_WARNING, "internal error (MV out of thread range)\n"); diff -Naur old/encoder/ratecontrol.c new/encoder/ratecontrol.c --- old/encoder/ratecontrol.c 2020-06-10 15:10:38 -0600 +++ new/encoder/ratecontrol.c 2020-04-09 06:36:22 -0600 @@ -7,7 +7,7 @@ * Michael Niedermayer * Gabriel Bouvigne * Fiona Glaser - * Måns Rullgård + * M毳 Rullg泤 * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -2030,6 +2030,8 @@ x264_zone_t *zone = get_zone( h, frame_num ); // force I/B quants as a function of P quants + const double last_p_q = rcc->last_qscale_for[SLICE_TYPE_P]; + const double last_non_b_q= rcc->last_qscale_for[rcc->last_non_b_pict_type]; if( pict_type == SLICE_TYPE_I ) { double iq = q; @@ -2048,7 +2050,7 @@ else if( pict_type == SLICE_TYPE_B ) { if( h->param.rc.f_pb_factor > 0 ) - q = rcc->last_qscale_for[rcc->last_non_b_pict_type]; + q = last_non_b_q; if( !rce->kept_as_ref ) q *= fabs( h->param.rc.f_pb_factor ); } @@ -2056,7 +2058,7 @@ && rcc->last_non_b_pict_type == SLICE_TYPE_P && rce->tex_bits == 0 ) { - q = rcc->last_qscale_for[SLICE_TYPE_P]; + q = last_p_q; } /* last qscale / qdiff stuff */ diff -Naur old/tools/checkasm.c new/tools/checkasm.c --- old/tools/checkasm.c 2020-06-10 15:10:38 -0600 +++ new/tools/checkasm.c 2020-04-09 06:36:22 -0600 @@ -780,7 +780,7 @@ x264_emms(); res_c = x264_pixel_ssim_wxh( &pixel_c, pbuf1+2, 32, pbuf2+2, 32, 32, 28, pbuf3, &cnt ); res_a = x264_pixel_ssim_wxh( &pixel_asm, pbuf1+2, 32, pbuf2+2, 32, 32, 28, pbuf3, &cnt ); - if( fabs( res_c - res_a ) > 1e-5 ) + if( fabs( res_c - res_a ) > 1e-6 ) { ok = 0; fprintf( stderr, "ssim: %.7f != %.7f [FAILED]\n", res_c, res_a );