From 04786d26fd3f6bef23317c686b99e51e21a24aad Mon Sep 17 00:00:00 2001 From: Anders Martinsson Date: Wed, 11 Dec 2019 19:58:30 +0100 Subject: [PATCH] Add operation vectors for better encoding performance Using stored operation vectors when generating intermediate symbols make encoding around three times faster (depends on block size). Signed-off-by: Anders Martinsson --- benches/codec_benchmark.rs | 6 +- benches/decode_benchmark.rs | 2 +- benches/encode_benchmark.rs | 30 +++++- benches/matrix_sparsity.rs | 2 +- src/decoder.rs | 37 +++++-- src/encoder.rs | 122 +++++++++++++++++++--- src/lib.rs | 2 + src/operation_vector.rs | 199 ++++++++++++++++++++++++++++++++++++ src/pi_solver.rs | 59 ++++++----- 9 files changed, 401 insertions(+), 58 deletions(-) create mode 100644 src/operation_vector.rs diff --git a/benches/codec_benchmark.rs b/benches/codec_benchmark.rs index e45b304..1440951 100644 --- a/benches/codec_benchmark.rs +++ b/benches/codec_benchmark.rs @@ -77,7 +77,7 @@ fn criterion_benchmark(c: &mut Criterion) { "encode 10KB", Benchmark::new("", move |b| { b.iter(|| { - let encoder = SourceBlockEncoder::new(1, symbol_size, &encode_data); + let encoder = SourceBlockEncoder::new(1, symbol_size, &encode_data, None); return encoder.source_packets(); }) }) @@ -89,7 +89,7 @@ fn criterion_benchmark(c: &mut Criterion) { "roundtrip 10KB", Benchmark::new("", move |b| { b.iter(|| { - let encoder = SourceBlockEncoder::new(1, symbol_size, &roundtrip_data); + let encoder = SourceBlockEncoder::new(1, symbol_size, &roundtrip_data, None); let mut decoder = SourceBlockDecoder::new(1, symbol_size, elements as u64); return decoder.decode(encoder.source_packets()); }) @@ -102,7 +102,7 @@ fn criterion_benchmark(c: &mut Criterion) { "roundtrip repair 10KB", Benchmark::new("", move |b| { b.iter(|| { - let encoder = SourceBlockEncoder::new(1, symbol_size, &repair_data); + let encoder = SourceBlockEncoder::new(1, symbol_size, &repair_data, None); let repair_packets = (elements / symbol_size as usize) as u32; let mut decoder = SourceBlockDecoder::new(1, symbol_size, elements as u64); return decoder.decode(encoder.repair_packets(0, repair_packets)); diff --git a/benches/decode_benchmark.rs b/benches/decode_benchmark.rs index 084f4cb..e077bd1 100644 --- a/benches/decode_benchmark.rs +++ b/benches/decode_benchmark.rs @@ -21,7 +21,7 @@ fn benchmark(symbol_size: u16, overhead: f64) -> u64 { } let iterations = TARGET_TOTAL_BYTES / elements; - let encoder = SourceBlockEncoder::new(1, symbol_size, &data); + let encoder = SourceBlockEncoder::new(1, symbol_size, &data, None); let elements_and_overhead = (symbol_count as f64 * (1.0 + overhead)) as u32; let mut packets = encoder.repair_packets(0, (iterations as u32 * elements_and_overhead) as u32); diff --git a/benches/encode_benchmark.rs b/benches/encode_benchmark.rs index 4d4432e..d4ae726 100644 --- a/benches/encode_benchmark.rs +++ b/benches/encode_benchmark.rs @@ -1,5 +1,6 @@ use rand::Rng; use raptorq::SourceBlockEncoder; +use raptorq::SourceBlockEncoderCache; use std::time::Instant; const TARGET_TOTAL_BYTES: usize = 128 * 1024 * 1024; @@ -11,10 +12,8 @@ fn black_box(value: u64) { } } -fn main() { +fn benchmark(symbol_size: u16, cache: Option<&SourceBlockEncoderCache>) -> u64 { let mut black_box_value = 0; - let symbol_size = 1280; - println!("Symbol size: {} bytes", symbol_size); for symbol_count in SYMBOL_COUNTS.iter() { let elements = symbol_count * symbol_size as usize; let mut data: Vec = vec![0; elements]; @@ -22,10 +21,15 @@ fn main() { data[i] = rand::thread_rng().gen(); } + if cache.is_some() { + // Create and store the operation vector to measure performance when the cache is in use for all blocks. + SourceBlockEncoder::new(1, symbol_size, &data, cache); + } + let now = Instant::now(); let iterations = TARGET_TOTAL_BYTES / elements; for _ in 0..iterations { - let encoder = SourceBlockEncoder::new(1, symbol_size, &data); + let encoder = SourceBlockEncoder::new(1, symbol_size, &data, cache); let packets = encoder.repair_packets(0, 1); black_box_value += packets[0].data()[0] as u64; } @@ -40,5 +44,21 @@ fn main() { throughput ); } - black_box(black_box_value); + return black_box_value; +} + +fn main() { + let symbol_size = 1280; + println!( + "Symbol size: {} bytes (without operation vectors)", + symbol_size + ); + black_box(benchmark(symbol_size, None)); + println!(); + let cache = SourceBlockEncoderCache::new(); + println!( + "Symbol size: {} bytes (with operation vectors)", + symbol_size + ); + black_box(benchmark(symbol_size, Some(&cache))); } diff --git a/benches/matrix_sparsity.rs b/benches/matrix_sparsity.rs index 14210c5..59c11cd 100644 --- a/benches/matrix_sparsity.rs +++ b/benches/matrix_sparsity.rs @@ -56,7 +56,7 @@ fn main() { ); let symbols = vec![Symbol::zero(1usize); a.width()]; - let mut decoder = IntermediateSymbolDecoder::new(a, hdpc, symbols, num_symbols); + let mut decoder = IntermediateSymbolDecoder::new(a, hdpc, symbols, num_symbols, false); println!( "Initial memory usage: {}KB", decoder.get_non_symbol_bytes() / 1024 diff --git a/src/decoder.rs b/src/decoder.rs index f7db11c..bba3b93 100644 --- a/src/decoder.rs +++ b/src/decoder.rs @@ -156,9 +156,10 @@ impl SourceBlockDecoder { hdpc_rows, symbols, self.source_block_symbols, + false, ) { - None => return None, - Some(s) => s, + (None, _) => return None, + (Some(s), _) => s, }; let mut result = vec![]; @@ -295,6 +296,7 @@ mod codec_tests { use crate::Encoder; use crate::SourceBlockDecoder; use crate::SourceBlockEncoder; + use crate::SourceBlockEncoderCache; use rand::seq::SliceRandom; use rand::Rng; @@ -375,7 +377,7 @@ mod codec_tests { println!("Completed {} symbols", symbol_count) } - let encoder = SourceBlockEncoder::new(1, symbol_size as u16, &data); + let encoder = SourceBlockEncoder::new(1, symbol_size as u16, &data, None); let mut decoder = SourceBlockDecoder::new(1, symbol_size as u16, elements as u64); decoder.set_sparse_threshold(sparse_threshold); @@ -393,26 +395,43 @@ mod codec_tests { #[test] #[ignore] fn repair_dense_extended() { - repair(99_999, 5000, true); + repair(99_999, 5000, true, None); } #[test] #[ignore] fn repair_sparse_extended() { - repair(0, 56403, true); + repair(0, 56403, true, None); } #[test] fn repair_dense() { - repair(99_999, 50, false); + repair(99_999, 50, false, None); } #[test] fn repair_sparse() { - repair(0, 50, false); + repair(0, 50, false, None); } - fn repair(sparse_threshold: u32, max_symbols: usize, progress: bool) { + #[test] + fn repair_dense_cache() { + let cache = SourceBlockEncoderCache::new(); + repair(99_999, 50, false, Some(&cache)); + } + + #[test] + fn repair_sparse_cache() { + let cache = SourceBlockEncoderCache::new(); + repair(0, 50, false, Some(&cache)); + } + + fn repair( + sparse_threshold: u32, + max_symbols: usize, + progress: bool, + cache: Option<&SourceBlockEncoderCache>, + ) { let symbol_size = 8; for symbol_count in 1..=max_symbols { let elements = symbol_size * symbol_count; @@ -425,7 +444,7 @@ mod codec_tests { println!("[repair] Completed {} symbols", symbol_count) } - let encoder = SourceBlockEncoder::new(1, 8, &data); + let encoder = SourceBlockEncoder::new(1, 8, &data, cache); let mut decoder = SourceBlockDecoder::new(1, 8, elements as u64); decoder.set_sparse_threshold(sparse_threshold); diff --git a/src/encoder.rs b/src/encoder.rs index 56f0dc0..934e8fa 100644 --- a/src/encoder.rs +++ b/src/encoder.rs @@ -4,6 +4,7 @@ use crate::base::EncodingPacket; use crate::base::PayloadId; use crate::constraint_matrix::generate_constraint_matrix; use crate::matrix::DenseBinaryMatrix; +use crate::operation_vector::{perform_op, SymbolOps}; use crate::pi_solver::fused_inverse_mul_symbols; use crate::sparse_matrix::SparseBinaryMatrix; use crate::symbol::Symbol; @@ -16,6 +17,8 @@ use crate::systematic_constants::num_pi_symbols; use crate::systematic_constants::{calculate_p1, systematic_index}; use crate::ObjectTransmissionInformation; use serde::{Deserialize, Serialize}; +use std::collections::HashMap; +use std::sync::{Arc, RwLock}; pub const SPARSE_MATRIX_THRESHOLD: u32 = 250; @@ -39,6 +42,7 @@ impl Encoder { assert_eq!(1, config.sub_blocks()); // let (tl, ts, nl, ns) = partition((config.symbol_size() / config.alignment() as u16) as u32, config.sub_blocks()); + let cache = SourceBlockEncoderCache::new(); let mut data_index = 0; let mut blocks = vec![]; for i in 0..zl { @@ -47,6 +51,7 @@ impl Encoder { i as u8, config.symbol_size(), &data[data_index..(data_index + offset)], + Some(&cache), )); data_index += offset; } @@ -58,6 +63,7 @@ impl Encoder { i as u8, config.symbol_size(), &data[data_index..(data_index + offset)], + Some(&cache), )); } else { // Should only be possible when Kt * T > F. See third to last paragraph in section 4.4.1.2 @@ -72,6 +78,7 @@ impl Encoder { i as u8, config.symbol_size(), &padded, + Some(&cache), )); } data_index += offset; @@ -98,6 +105,18 @@ impl Encoder { } } +#[derive(Default)] +pub struct SourceBlockEncoderCache { + cache: Arc>>>, +} + +impl SourceBlockEncoderCache { + pub fn new() -> SourceBlockEncoderCache { + let cache = Arc::new(RwLock::new(HashMap::new())); + SourceBlockEncoderCache { cache } + } +} + #[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)] pub struct SourceBlockEncoder { source_block_id: u8, @@ -106,17 +125,60 @@ pub struct SourceBlockEncoder { } impl SourceBlockEncoder { - pub fn new(source_block_id: u8, symbol_size: u16, data: &[u8]) -> SourceBlockEncoder { + pub fn new( + source_block_id: u8, + symbol_size: u16, + data: &[u8], + cache: Option<&SourceBlockEncoderCache>, + ) -> SourceBlockEncoder { assert_eq!(data.len() % symbol_size as usize, 0); let source_symbols: Vec = data .chunks(symbol_size as usize) .map(|x| Symbol::new(Vec::from(x))) .collect(); - let intermediate_symbols = gen_intermediate_symbols( - &source_symbols, - symbol_size as usize, - SPARSE_MATRIX_THRESHOLD, - ); + + let intermediate_symbols = match cache { + Some(c) => { + let key = source_symbols.len(); + let read_map = c.cache.read().unwrap(); + let value = read_map.get(&key); + + match value { + None => { + drop(read_map); + let (is, ops_vec) = gen_intermediate_symbols( + &source_symbols, + symbol_size as usize, + SPARSE_MATRIX_THRESHOLD, + true, + ); + let mut write_map = c.cache.write().unwrap(); + write_map.insert(key, ops_vec.unwrap()); + drop(write_map); + is.unwrap() + } + Some(operation_vector) => { + let is = gen_intermediate_symbols_ops_vec( + &source_symbols, + symbol_size as usize, + &(*operation_vector), + ); + drop(read_map); + is + } + } + } + None => { + let (is, _ops_vec) = gen_intermediate_symbols( + &source_symbols, + symbol_size as usize, + SPARSE_MATRIX_THRESHOLD, + false, + ); + is.unwrap() + } + }; + SourceBlockEncoder { source_block_id, source_symbols, @@ -162,17 +224,15 @@ impl SourceBlockEncoder { } } -// See section 5.3.3.4 #[allow(non_snake_case)] -fn gen_intermediate_symbols( +fn create_d( source_block: &[Symbol], symbol_size: usize, - sparse_threshold: u32, + extended_source_symbols: usize, ) -> Vec { let L = num_intermediate_symbols(source_block.len() as u32); let S = num_ldpc_symbols(source_block.len() as u32); let H = num_hdpc_symbols(source_block.len() as u32); - let extended_source_symbols = extended_source_block_symbols(source_block.len() as u32); let mut D = Vec::with_capacity(L as usize); for _ in 0..(S + H) { @@ -186,19 +246,47 @@ fn gen_intermediate_symbols( D.push(Symbol::zero(symbol_size)); } assert_eq!(D.len(), L as usize); + D +} + +// See section 5.3.3.4 +#[allow(non_snake_case)] +fn gen_intermediate_symbols( + source_block: &[Symbol], + symbol_size: usize, + sparse_threshold: u32, + store_operations: bool, +) -> (Option>, Option>) { + let extended_source_symbols = extended_source_block_symbols(source_block.len() as u32); + let D = create_d(source_block, symbol_size, extended_source_symbols as usize); let indices: Vec = (0..extended_source_symbols).collect(); if extended_source_symbols >= sparse_threshold { let (A, hdpc) = generate_constraint_matrix::(extended_source_symbols, &indices); - return fused_inverse_mul_symbols(A, hdpc, D, extended_source_symbols).unwrap(); + return fused_inverse_mul_symbols(A, hdpc, D, extended_source_symbols, store_operations); } else { let (A, hdpc) = generate_constraint_matrix::(extended_source_symbols, &indices); - return fused_inverse_mul_symbols(A, hdpc, D, extended_source_symbols).unwrap(); + return fused_inverse_mul_symbols(A, hdpc, D, extended_source_symbols, store_operations); } } +#[allow(non_snake_case)] +fn gen_intermediate_symbols_ops_vec( + source_block: &[Symbol], + symbol_size: usize, + operation_vector: &[SymbolOps], +) -> Vec { + let extended_source_symbols = extended_source_block_symbols(source_block.len() as u32); + let mut D = create_d(source_block, symbol_size, extended_source_symbols as usize); + + for op in operation_vector { + perform_op(op, &mut D); + } + D +} + // Enc[] function, as defined in section 5.3.5.3 #[allow(clippy::many_single_char_names)] fn enc( @@ -287,8 +375,10 @@ mod tests { fn enc_constraint(sparse_threshold: u32) { let source_symbols = gen_test_symbols(); - let intermediate_symbols = - gen_intermediate_symbols(&source_symbols, SYMBOL_SIZE, sparse_threshold); + + let (is, _ops_vec) = + gen_intermediate_symbols(&source_symbols, SYMBOL_SIZE, sparse_threshold, false); + let intermediate_symbols = is.unwrap(); let lt_symbols = num_lt_symbols(NUM_SYMBOLS); let sys_index = systematic_index(NUM_SYMBOLS); @@ -313,7 +403,9 @@ mod tests { #[allow(non_snake_case)] fn ldpc_constraint(sparse_threshold: u32) { - let C = gen_intermediate_symbols(&gen_test_symbols(), SYMBOL_SIZE, sparse_threshold); + let (is, _ops_vec) = + gen_intermediate_symbols(&gen_test_symbols(), SYMBOL_SIZE, sparse_threshold, false); + let C = is.unwrap(); let S = num_ldpc_symbols(NUM_SYMBOLS) as usize; let P = num_pi_symbols(NUM_SYMBOLS) as usize; let W = num_lt_symbols(NUM_SYMBOLS) as usize; diff --git a/src/lib.rs b/src/lib.rs index dadc5dd..7a8260a 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -11,6 +11,7 @@ mod matrix; mod octet; mod octet_matrix; mod octets; +mod operation_vector; mod pi_solver; mod rng; mod sparse_matrix; @@ -26,6 +27,7 @@ pub use crate::decoder::Decoder; pub use crate::decoder::SourceBlockDecoder; pub use crate::encoder::Encoder; pub use crate::encoder::SourceBlockEncoder; +pub use crate::encoder::SourceBlockEncoderCache; #[cfg(feature = "benchmarking")] pub use crate::constraint_matrix::generate_constraint_matrix; diff --git a/src/operation_vector.rs b/src/operation_vector.rs new file mode 100644 index 0000000..d565ddc --- /dev/null +++ b/src/operation_vector.rs @@ -0,0 +1,199 @@ +use crate::octet::Octet; +use crate::symbol::Symbol; +use crate::util::get_both_indices; +use serde::{Deserialize, Serialize}; + +#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, PartialOrd, Eq, Ord, Hash)] +pub enum SymbolOps { + AddAssign { + dest: usize, + src: usize, + }, + MulAssign { + dest: usize, + scalar: Octet, + }, + FMA { + dest: usize, + src: usize, + scalar: Octet, + }, + Reorder { + order: Vec, + }, +} + +pub fn perform_op(op: &SymbolOps, symbols: &mut Vec) { + match op { + SymbolOps::AddAssign { dest, src } => { + let (dest, temp) = get_both_indices(symbols, *dest, *src); + *dest += temp; + } + SymbolOps::MulAssign { dest, scalar } => { + symbols[*dest].mulassign_scalar(scalar); + } + SymbolOps::FMA { dest, src, scalar } => { + let (dest, temp) = get_both_indices(symbols, *dest, *src); + dest.fused_addassign_mul_scalar(temp, scalar); + } + SymbolOps::Reorder { order } => { + /* TODO: Reorder is the last step of the algorithm. It should be + * possible to move reorder to be the first step and use when + * creating D (place all rows in correct position before + * calculations). This will however force an update on all + * row-numbers used in all other "Operations". */ + let mut temp_symbols: Vec> = symbols.drain(..).map(Some).collect(); + for row_index in order.iter() { + symbols.push(temp_symbols[*row_index].take().unwrap()); + } + } + } +} + +#[cfg(test)] +mod tests { + use rand::Rng; + + use crate::octet::Octet; + use crate::operation_vector::{perform_op, SymbolOps}; + use crate::symbol::Symbol; + + #[test] + fn test_add() { + let rows = 2; + let symbol_size = 1316; + let mut data: Vec = Vec::with_capacity(rows); + + for _i in 0..rows { + let mut symbol_data: Vec = vec![0; symbol_size]; + for j in 0..symbol_size { + symbol_data[j] = rand::thread_rng().gen(); + } + let symbol = Symbol::new(symbol_data); + data.push(symbol); + } + + let mut data0: Vec = vec![0; symbol_size]; + let mut data1: Vec = vec![0; symbol_size]; + let mut result: Vec = vec![0; symbol_size]; + for i in 0..symbol_size { + data0[i] = data[0].as_bytes()[i]; + data1[i] = data[1].as_bytes()[i]; + result[i] = data0[i] ^ data1[i]; + } + let mut symbol0 = Symbol::new(data0); + let symbol1 = Symbol::new(data1); + + symbol0 += &symbol1; + + perform_op(&SymbolOps::AddAssign { dest: 0, src: 1 }, &mut data); + assert_eq!(result, data[0].as_bytes()); + } + + #[test] + fn test_add_mul() { + let rows = 2; + let symbol_size = 1316; + let mut data: Vec = Vec::with_capacity(rows); + + for _i in 0..rows { + let mut symbol_data: Vec = vec![0; symbol_size]; + for j in 0..symbol_size { + symbol_data[j] = rand::thread_rng().gen(); + } + let symbol = Symbol::new(symbol_data); + data.push(symbol); + } + + let value = 173; + let mut data0: Vec = vec![0; symbol_size]; + let mut data1: Vec = vec![0; symbol_size]; + let mut result: Vec = vec![0; symbol_size]; + for i in 0..symbol_size { + data0[i] = data[0].as_bytes()[i]; + data1[i] = data[1].as_bytes()[i]; + result[i] = data0[i] ^ (Octet::new(data1[i]) * Octet::new(value)).byte(); + } + + perform_op( + &SymbolOps::FMA { + dest: 0, + src: 1, + scalar: Octet::new(value), + }, + &mut data, + ); + assert_eq!(result, data[0].as_bytes()); + } + + #[test] + fn test_mul() { + let rows = 1; + let symbol_size = 1316; + let mut data: Vec = Vec::with_capacity(rows); + + for _i in 0..rows { + let mut symbol_data: Vec = vec![0; symbol_size]; + for j in 0..symbol_size { + symbol_data[j] = rand::thread_rng().gen(); + } + let symbol = Symbol::new(symbol_data); + data.push(symbol); + } + + let value = 215; + let mut data0: Vec = vec![0; symbol_size]; + let mut result: Vec = vec![0; symbol_size]; + for i in 0..symbol_size { + data0[i] = data[0].as_bytes()[i]; + result[i] = (Octet::new(data0[i]) * Octet::new(value)).byte(); + } + + perform_op( + &SymbolOps::MulAssign { + dest: 0, + scalar: Octet::new(value), + }, + &mut data, + ); + assert_eq!(result, data[0].as_bytes()); + } + + #[test] + fn test_reorder() { + let rows = 10; + let symbol_size = 10; + let mut data: Vec = Vec::with_capacity(rows); + + for i in 0..rows { + let mut symbol_data: Vec = vec![0; symbol_size]; + for j in 0..symbol_size { + symbol_data[j] = i as u8; + } + let symbol = Symbol::new(symbol_data); + data.push(symbol); + } + + assert_eq!(data[0].as_bytes()[0], 0); + assert_eq!(data[1].as_bytes()[0], 1); + assert_eq!(data[2].as_bytes()[0], 2); + assert_eq!(data[9].as_bytes()[0], 9); + + perform_op( + &SymbolOps::Reorder { + order: vec![9, 7, 5, 3, 1, 8, 0, 6, 2, 4], + }, + &mut data, + ); + assert_eq!(data[0].as_bytes()[0], 9); + assert_eq!(data[1].as_bytes()[0], 7); + assert_eq!(data[2].as_bytes()[0], 5); + assert_eq!(data[3].as_bytes()[0], 3); + assert_eq!(data[4].as_bytes()[0], 1); + assert_eq!(data[5].as_bytes()[0], 8); + assert_eq!(data[6].as_bytes()[0], 0); + assert_eq!(data[7].as_bytes()[0], 6); + assert_eq!(data[8].as_bytes()[0], 2); + assert_eq!(data[9].as_bytes()[0], 4); + } +} diff --git a/src/pi_solver.rs b/src/pi_solver.rs index 7652fa6..c340d18 100644 --- a/src/pi_solver.rs +++ b/src/pi_solver.rs @@ -3,6 +3,7 @@ use crate::arraymap::{U16ArrayMap, U32VecMap}; use crate::matrix::BinaryMatrix; use crate::octet::Octet; use crate::octet_matrix::DenseOctetMatrix; +use crate::operation_vector::SymbolOps; use crate::symbol::Symbol; use crate::systematic_constants::num_hdpc_symbols; use crate::systematic_constants::num_intermediate_symbols; @@ -12,23 +13,6 @@ use crate::util::get_both_indices; use serde::{Deserialize, Serialize}; use std::mem::size_of; -#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, PartialOrd, Eq, Ord, Hash)] -enum SymbolOps { - AddAssign { - dest: usize, - src: usize, - }, - MulAssign { - dest: usize, - scalar: Octet, - }, - FMA { - dest: usize, - src: usize, - scalar: Octet, - }, -} - #[derive(Clone, Debug, Serialize, Deserialize, PartialEq, PartialOrd, Eq, Ord, Hash)] struct FirstPhaseRowSelectionStats { original_degree: U16ArrayMap, @@ -362,6 +346,7 @@ pub struct IntermediateSymbolDecoder { // Operations on D are deferred to the end of the codec to improve cache hits deferred_D_ops: Vec, num_source_symbols: u32, + store_operations: bool, debug_symbol_mul_ops: u32, debug_symbol_add_ops: u32, debug_symbol_mul_ops_by_phase: Vec, @@ -375,6 +360,7 @@ impl IntermediateSymbolDecoder { hdpc_rows: DenseOctetMatrix, symbols: Vec, num_source_symbols: u32, + store_operations: bool, ) -> IntermediateSymbolDecoder { assert!(matrix.width() <= symbols.len()); assert_eq!(matrix.height(), symbols.len()); @@ -411,6 +397,7 @@ impl IntermediateSymbolDecoder { L: intermediate_symbols, deferred_D_ops: Vec::with_capacity(70 * intermediate_symbols), num_source_symbols, + store_operations, debug_symbol_mul_ops: 0, debug_symbol_add_ops: 0, debug_symbol_mul_ops_by_phase: vec![0; 5], @@ -446,6 +433,7 @@ impl IntermediateSymbolDecoder { let (dest, temp) = get_both_indices(&mut self.D, dest, src); dest.fused_addassign_mul_scalar(&temp, &scalar); } + SymbolOps::Reorder { order: _order } => {} } } } @@ -1122,23 +1110,29 @@ impl IntermediateSymbolDecoder { } #[inline(never)] - pub fn execute(&mut self) -> Option> { + pub fn execute(&mut self) -> (Option>, Option>) { self.X.disable_column_acccess_acceleration(); if !self.first_phase() { - return None; + return (None, None); } self.A.disable_column_acccess_acceleration(); if !self.second_phase() { - return None; + return (None, None); } self.third_phase(); self.fourth_phase(); self.fifth_phase(); + let mut operation_vector: Vec = if self.store_operations { + self.deferred_D_ops.clone() + } else { + Vec::with_capacity(0) + }; + self.apply_deferred_symbol_ops(); // See end of section 5.4.2.1 @@ -1157,7 +1151,16 @@ impl IntermediateSymbolDecoder { removable_D.push(None); result.push(removable_D.swap_remove(index_mapping[i]).unwrap()); } - Some(result) + if self.store_operations { + let mut reorder = Vec::with_capacity(self.L); + for i in index_mapping.iter().take(self.L) { + reorder.push(*i); + } + operation_vector.push(SymbolOps::Reorder { order: reorder }); + return (Some(result), Some(operation_vector)); + } + + (Some(result), None) } } @@ -1168,8 +1171,16 @@ pub fn fused_inverse_mul_symbols( hdpc_rows: DenseOctetMatrix, symbols: Vec, num_source_symbols: u32, -) -> Option> { - IntermediateSymbolDecoder::new(matrix, hdpc_rows, symbols, num_source_symbols).execute() + store_operations: bool, +) -> (Option>, Option>) { + IntermediateSymbolDecoder::new( + matrix, + hdpc_rows, + symbols, + num_source_symbols, + store_operations, + ) + .execute() } #[cfg(test)] @@ -1193,7 +1204,7 @@ mod tests { let indices: Vec = (0..num_symbols).collect(); let (a, hdpc) = generate_constraint_matrix::(num_symbols, &indices); let symbols = vec![Symbol::zero(1usize); a.width()]; - let mut decoder = IntermediateSymbolDecoder::new(a, hdpc, symbols, num_symbols); + let mut decoder = IntermediateSymbolDecoder::new(a, hdpc, symbols, num_symbols, false); decoder.execute(); assert!( (decoder.get_symbol_mul_ops() as f64 / num_symbols as f64) < expected_mul_ops,