Av1an/av1an-core/src/parse.rs

476 lines
16 KiB
Rust
Raw Normal View History

//! Functions for parsing frames from encoder output.
//!
//! Some functions are optimized with SIMD, and need
//! runtime detection that the corresponding feature
//! set is available before calling them.
use std::borrow::Cow;
use std::collections::HashSet;
use crate::encoder::Encoder;
// We can safely always ignore this prefix, as the second number will
// always be at some point after this prefix. See examples of aomenc
// output below to see why this is the case.
#[rustfmt::skip]
const AOM_VPX_IGNORED_PREFIX: &str =
"Pass x/x frame x/";
// Pass 1/1 frame 3/2 2131B 5997 us 500.25 fps [ETA unknown]
// ^ relevant output starts at this character
// Pass 1/1 frame 84/83 81091B 132314 us 634.85 fps [ETA unknown]
// ^
// Pass 1/1 frame 142/141 156465B 208875 us 679.83 fps [ETA unknown]
// ^
// Pass 1/1 frame 4232/4231 5622510B 5518075 us 766.93 fps [ETA unknown]
// ^
// Pass 1/1 frame 13380/13379 17860525B 16760 ms 798.31 fps [ETA unknown]
// ^
// Pass 1/1 frame 102262/102261 136473850B 131502 ms 777.65 fps [ETA unknown] 1272F
// ^
// Pass 1/1 frame 1022621/1022611 136473850B 131502 ms 777.65 fps [ETA unknown] 1272F
// ^
//
// As you can see, the relevant part of the output always starts past
// the length of the ignored prefix.
pub fn parse_aom_vpx_frames(s: &str) -> Option<u64> {
if !(s.starts_with("Pass 2/2") || s.starts_with("Pass 1/1")) {
return None;
}
// The numbers for aomenc/vpxenc are buffered/encoded frames, so we want the
// second number (actual encoded frames)
let first_digit_index = s
.as_bytes()
.get(AOM_VPX_IGNORED_PREFIX.len() - 1..)?
.iter()
.position(|&c| c == b'/')?;
let first_space_index = s
.get(AOM_VPX_IGNORED_PREFIX.len() + first_digit_index..)?
.as_bytes()
.iter()
.position(|&c| c == b' ')?
+ first_digit_index;
s.get(
AOM_VPX_IGNORED_PREFIX.len() + first_digit_index
..AOM_VPX_IGNORED_PREFIX.len() + first_space_index,
)?
.parse()
.ok()
}
/// x86 SIMD implementation of parsing aomenc/vpxenc output using
/// SSSE3+SSE4.1, returning the number of frames processed, or `None`
/// if the input did not match.
///
/// This function also works for parsing vpxenc output, as its progress
/// printing is exactly the same.
///
/// # Safety
///
/// The CPU must support SSSE3 and SSE4.1.
#[inline]
#[target_feature(enable = "ssse3,sse4.1")]
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
pub unsafe fn parse_aom_vpx_frames_sse41(s: &[u8]) -> Option<u64> {
#[cfg(target_arch = "x86")]
use std::arch::x86::*;
#[cfg(target_arch = "x86_64")]
use std::arch::x86_64::*;
use std::mem::transmute;
// This implementation matches the *second* number in the output. Ex:
// Pass 1/1 frame 142/141 156465B 208875 us 679.83 fps [ETA unknown]
// ^^^
// matches this number and returns `Some(141)`
//
// Pass 1/1 frame 102262/102261 136473850B 131502 ms 777.65 fps [ETA unknown] 1272F
// ^^^^^^
// matches this number and returns `Some(102261)`
//
// If invalid input is detected, this function returns `None`.
// We cheat in this implementation by taking a mutable slice to the string
// so we can reuse its allocation to add padding zeroes for free.
// Number of bytes processed (size in bytes of xmm register)
//
// There is no benefit to using wider SIMD lanes in this case, so we just
// use the most commonly available SIMD width. This is because we want
// to parse the fewest number of bytes possible to get the correct result.
const CHUNK_SIZE: usize = 16;
// This implementation needs to read `CHUNK_SIZE` bytes past the ignored
// prefix, so we pay the cost of the bounds check only once at the start
// of this function. This also serves as an input validation check.
if s.len() < AOM_VPX_IGNORED_PREFIX.len() + CHUNK_SIZE {
return None;
}
// Sanity check to see if we should parse the line. Processing invalid input
// anyway would result in returning a garbage value, ultimately causing the
// frame counter to be completely off.
if !(s.starts_with(b"Pass 2/2") || s.starts_with(b"Pass 1/1")) {
return None;
}
// Since the aomenc output follows a particular pattern, we can calculate the
// position of the '/' character from the index of the first space (how to
// do so is explained later on). We create this mask to find the first space
// in the output.
let spaces = _mm_set1_epi8(b' ' as i8);
// Load the relevant part of the output, which are the 16 bytes after the ignored prefix.
// This is safe because we already asserted that at least `IGNORED_PREFIX.len() + CHUNK_SIZE`
// bytes are available, and `_mm_loadu_si128` loads `CHUNK_SIZE` (16) bytes.
let relevant_output = _mm_loadu_si128(
s.get_unchecked(AOM_VPX_IGNORED_PREFIX.len()..)
.as_ptr()
.cast(),
);
// Compare the relevant output to spaces to create a mask where each bit
// is set to 1 if the corresponding character was a space, and 0 otherwise.
// The LSB corresponds to the match between the first characters.
//
// Only the lower 16 bits are relevant, as the rest are always set to 0.
let mask16 = _mm_movemask_epi8(_mm_cmpeq_epi8(relevant_output, spaces));
// The bits in the mask are set as so:
//
// "141 156465B 208875 us 679.83 fps [ETA unknown]"
// mask: 110000000111000
// ^^^
// These bits correspond to the first 3 characters: "141".
// Since they do not match the space, they are set to 0 in the mask.
// As printed, the leftmost bit is the most significant bit.
// ^^^
// These bits correspond to the 3 spaces after the "141".
//
// "2/102261 136473850B 131502 ms 777.65 fps [ETA unknown] 1272F"
// mask: 100000000
// ^^^^^^^^
// These bits correspond to the first 8 characters: "2/102261".
//
// To get the index of the first space, we need to get the trailing zeros,
// which correspond to the first characters.
//
// This value is such that `relevant_output[first_space_index]` gives the
// actual first space character.
let first_space_index = mask16.trailing_zeros() as usize;
let first_digit_index = {
_mm_movemask_epi8(_mm_cmpeq_epi8(
_mm_loadu_si128(s.get(AOM_VPX_IGNORED_PREFIX.len() - 1..)?.as_ptr().cast()),
_mm_set1_epi8(b'/' as i8),
))
.trailing_zeros() as usize
};
let ascending = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
let num_digits = first_space_index - first_digit_index;
let dynamic_mask = _mm_cmplt_epi8(ascending, _mm_set1_epi8(num_digits as i8));
// At this point, we have done all the setup and can use the actual SIMD integer
// parsing algorithm. The description of the algorithm can be found here:
// https://kholdstare.github.io/technical/2020/05/26/faster-integer-parsing.html
let mut chunk = _mm_loadu_si128(
s.as_ptr()
.add(AOM_VPX_IGNORED_PREFIX.len() + first_space_index - CHUNK_SIZE)
2021-11-20 18:05:25 +00:00
.cast(),
);
let zeros = _mm_set1_epi8(b'0' as i8);
chunk = _mm_sub_epi8(chunk, zeros);
// Mask out the irrelevant bits, effectively parsing them as if they were 0.
chunk = _mm_and_si128(chunk, dynamic_mask);
let mult = _mm_set_epi8(1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10);
chunk = _mm_maddubs_epi16(chunk, mult);
let mult = _mm_set_epi16(1, 100, 1, 100, 1, 100, 1, 100);
chunk = _mm_madd_epi16(chunk, mult);
chunk = _mm_packus_epi32(chunk, chunk);
let mult = _mm_set_epi16(0, 0, 0, 0, 1, 10000, 1, 10000);
chunk = _mm_madd_epi16(chunk, mult);
let chunk = transmute::<_, [u64; 2]>(chunk);
2021-11-20 18:05:25 +00:00
Some(((chunk[0] & 0xffff_ffff) * 100_000_000) + (chunk[0] >> 32))
}
pub fn parse_rav1e_frames(s: &str) -> Option<u64> {
#[rustfmt::skip]
const RAV1E_IGNORED_PREFIX: &str =
"encoded ";
// encoded 1 frames, 126.416 fps, 16.32 Kb/s, elap. time: 1m 36s
// encoded 12 frames, 126.416 fps, 16.32 Kb/s, elap. time: 1m 36s
// encoded 122 frames, 126.416 fps, 16.32 Kb/s, elap. time: 1m 36s
// encoded 1220 frames, 126.416 fps, 16.32 Kb/s, elap. time: 1m 36s
// encoded 12207 frames, 126.416 fps, 16.32 Kb/s, elap. time: 1m 36s
if !s.starts_with(RAV1E_IGNORED_PREFIX) {
return None;
}
s.get(RAV1E_IGNORED_PREFIX.len()..)?
.split_ascii_whitespace()
.next()
.and_then(|s| s.parse().ok())
}
pub fn parse_svt_av1_frames(s: &str) -> Option<u64> {
const SVT_AV1_IGNORED_PREFIX: &str = "Encoding frame";
if !s.starts_with(SVT_AV1_IGNORED_PREFIX) {
return None;
}
s.get(SVT_AV1_IGNORED_PREFIX.len()..)?
.split_ascii_whitespace()
.next()
.and_then(|s| s.parse().ok())
}
pub fn parse_x26x_frames(s: &str) -> Option<u64> {
s.split_ascii_whitespace()
.next()
.and_then(|s| s.parse().ok())
}
/// Returns the set of valid parameters given a help text for the given encoder
#[must_use]
pub fn valid_params(help_text: &str, encoder: Encoder) -> HashSet<Cow<'_, str>> {
// x265 has 292 parameters, which is the most of any encoder, so we round up
// slightly just in case
let mut params = HashSet::with_capacity(300);
for s in help_text.split_ascii_whitespace() {
if s.starts_with('-') {
if s.len() == 1 || s == "--" {
continue;
}
if encoder == Encoder::x265 {
// x265 does this: -m/--subme
// or even: -w/--[no-]weightp
// So we need to ensure that in this case the short parameter is also handled.
let s = s.get("-x/".len()..).map_or(s, |stripped| {
if stripped.starts_with("--") {
params.insert(Cow::Borrowed(&s[..2]));
stripped
} else {
s
}
});
// Somehow x265 manages to have a buggy --help, where a single option (--[no-]-hrd-concat)
// has an extra dash.
let arg = s
.strip_prefix("--[no-]")
.map(|stripped| stripped.strip_prefix('-').unwrap_or(stripped));
if let Some(arg) = arg {
params.insert(Cow::Owned(format!("--{}", arg)));
params.insert(Cow::Owned(format!("--no-{}", arg)));
continue;
}
}
// aomenc outputs '--tune=<arg>' for example, so we have to find the character
// from the left so as to not miss the leftmost char
if let Some(idx) = s.find(|c: char| !c.is_ascii_alphanumeric() && c != '-' && c != '_') {
// In some weird cases (like with x264) there may be a dash followed by a non alphanumeric
// character, so we just ignore that.
if idx > 1 {
params.insert(Cow::Borrowed(&s[..idx]));
}
} else {
// It's a little concerning how *two* encoders manage to have buggy help output.
let arg = if encoder == Encoder::vpx {
// vpxenc randomly truncates the "Vizier Rate Control Options" in the help
// output, which sometimes causes it to truncate at a dash, which breaks the
// tests if we don't do this. Not sure what the correct solution in this case is.
s.strip_suffix('-').unwrap_or(s)
} else {
s
};
params.insert(Cow::Borrowed(arg));
}
}
}
params
}
#[cfg(test)]
mod tests {
use crate::parse::*;
#[test]
2021-12-29 20:30:10 +00:00
#[allow(clippy::cognitive_complexity)]
fn valid_params_works() {
use std::borrow::Borrow;
macro_rules! generate_tests {
($($x:ident),* $(,)?) => {
$(
let returned: HashSet<String> = valid_params(include_str!(concat!("../tests/", stringify!($x), "_help.txt")), Encoder::$x)
.iter()
.map(|s| s.to_string())
.collect();
let expected: HashSet<String> = include_str!(concat!("../tests/", stringify!($x), "_params.txt"))
.split_ascii_whitespace()
.map(|s| s.to_string())
.collect();
for arg in expected.iter() {
assert!(returned.contains(Borrow::<str>::borrow(&**arg)), "expected '{}', but it was missing in return value (for encoder {})", arg, stringify!($x));
}
for arg in returned.iter() {
assert!(expected.contains(Borrow::<str>::borrow(&**arg)), "return value contains '{}', but it was not expected (for encoder {})", arg, stringify!($x));
}
assert_eq!(returned, expected);
)*
};
}
generate_tests!(aom, rav1e, svt_av1, vpx, x264, x265);
}
#[test]
fn rav1e_parsing() {
let test_cases = [
(
"encoded 1 frames, 126.416 fps, 16.32 Kb/s, elap. time: 1m 36s",
Some(1),
),
(
"encoded 12 frames, 126.416 fps, 16.32 Kb/s, elap. time: 1m 36s",
Some(12),
),
(
"encoded 122 frames, 126.416 fps, 16.32 Kb/s, elap. time: 1m 36s",
Some(122),
),
(
"encoded 1220 frames, 126.416 fps, 16.32 Kb/s, elap. time: 1m 36s",
Some(1220),
),
(
"encoded 12207 frames, 126.416 fps, 16.32 Kb/s, elap. time: 1m 36s",
Some(12207),
),
("invalid", None),
(
"encoded xxxx frames, 126.416 fps, 16.32 Kb/s, elap. time: 1m 36s",
None,
),
];
for (s, ans) in test_cases {
assert_eq!(parse_rav1e_frames(s), ans);
}
}
#[test]
fn x26x_parsing() {
let test_cases = [
("24 frames: 39.11 fps, 14.60 kb/s", Some(24)),
("240 frames: 39.11 fps, 14.60 kb/s", Some(240)),
("2445 frames: 39.11 fps, 14.60 kb/s", Some(2445)),
("24145 frames: 39.11 fps, 14.60 kb/s", Some(24145)),
2021-12-29 20:30:10 +00:00
("246434 frames: 39.11 fps, 14.60 kb/s", Some(246_434)),
("2448732 frames: 39.11 fps, 14.60 kb/s", Some(2_448_732)),
("invalid data", None),
("", None),
];
for (s, ans) in test_cases {
assert_eq!(parse_x26x_frames(s), ans);
}
}
#[test]
fn svt_av1_parsing() {
let test_cases = [
("Encoding frame 0 1.08 kbps 2.00 fps", Some(0)),
("Encoding frame 7 1.08 kbps 2.00 fps", Some(7)),
("Encoding frame 22 2.03 kbps 3.68 fps", Some(22)),
("Encoding frame 7654 1.08 kbps 2.00 fps", Some(7654)),
("Encoding frame 72415 1.08 kbps 2.00 fps", Some(72415)),
2021-12-29 20:30:10 +00:00
("Encoding frame 778743 1.08 kbps 2.00 fps", Some(778_743)),
(
"Encoding frame 53298734 1.08 kbps 2.00 fps",
Some(53_298_734),
),
("invalid input", None),
("", None),
];
for (s, ans) in test_cases {
assert_eq!(parse_svt_av1_frames(s), ans);
}
}
#[test]
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
fn aom_vpx_parsing() {
let test_cases = [
(
"Pass 1/1 frame 3/2 2131B 5997 us 500.25 fps [ETA unknown]",
Some(2),
),
(
"Pass 2/2 frame 84/83 81091B 132314 us 634.85 fps [ETA unknown]",
Some(83),
),
(
"Pass 1/1 frame 142/141 156465B 208875 us 679.83 fps [ETA unknown]",
Some(141),
),
(
"Pass 1/2 frame 142/141 156465B 208875 us 679.83 fps [ETA unknown]",
None,
),
(
"Pass 2/2 frame 4232/4231 5622510B 5518075 us 766.93 fps [ETA unknown]",
Some(4231),
),
(
"Pass 1/1 frame 13380/13379 17860525B 16760 ms 798.31 fps [ETA unknown]",
Some(13379),
),
(
"Pass 1/2 frame 13380/13379 17860525B 16760 ms 798.31 fps [ETA unknown]",
None,
),
("invalid data", None),
(
"Pass 2/2 frame 102262/102261 136473850B 131502 ms 777.65 fps [ETA unknown] 1272F",
2021-12-29 20:30:10 +00:00
Some(102_261),
),
(
"Pass 1/1 frame 1022621/1022611 136473850B 131502 ms 777.65 fps [ETA unknown] 1272F",
2021-12-29 20:30:10 +00:00
Some(1_022_611),
),
(
"Pass 1/1 frame 10000/9999 5319227B 8663074 us 1154.32 fps [ETA unknown]",
Some(9999),
),
];
if is_x86_feature_detected!("sse4.1") && is_x86_feature_detected!("ssse3") {
for (s, ans) in test_cases {
assert_eq!(unsafe { parse_aom_vpx_frames_sse41(s.as_bytes()) }, ans);
}
}
for (s, ans) in test_cases {
2021-12-29 20:30:10 +00:00
assert_eq!(parse_aom_vpx_frames(s), ans);
}
}
}