raptorq/src/encoder.rs

#[cfg(feature = "std")]
use std::vec::Vec;

#[cfg(not(feature = "std"))]
use alloc::vec::Vec;

use crate::base::intermediate_tuple;
use crate::base::partition;
use crate::base::EncodingPacket;
use crate::base::PayloadId;
use crate::constraint_matrix::generate_constraint_matrix;
use crate::matrix::DenseBinaryMatrix;
use crate::operation_vector::{perform_op, SymbolOps};
use crate::pi_solver::fused_inverse_mul_symbols;
use crate::sparse_matrix::SparseBinaryMatrix;
use crate::symbol::Symbol;
use crate::systematic_constants::extended_source_block_symbols;
use crate::systematic_constants::num_hdpc_symbols;
use crate::systematic_constants::num_intermediate_symbols;
use crate::systematic_constants::num_ldpc_symbols;
use crate::systematic_constants::num_lt_symbols;
use crate::systematic_constants::num_pi_symbols;
use crate::systematic_constants::{calculate_p1, systematic_index};
use crate::util::int_div_ceil;
use crate::ObjectTransmissionInformation;
#[cfg(feature = "serde_support")]
use serde::{Deserialize, Serialize};

pub const SPARSE_MATRIX_THRESHOLD: u32 = 250;

#[derive(Default, Clone, Debug, PartialEq, Eq)]
#[cfg_attr(feature = "serde_support", derive(Serialize, Deserialize))]
pub struct EncoderBuilder {
    decoder_memory_requirement: u64,
    max_packet_size: u16,
}

impl EncoderBuilder {
    pub fn new() -> EncoderBuilder {
        EncoderBuilder {
            decoder_memory_requirement: 10 * 1024 * 1024,
            max_packet_size: 1024,
        }
    }

    pub fn set_decoder_memory_requirement(&mut self, bytes: u64) {
        self.decoder_memory_requirement = bytes;
    }

    pub fn set_max_packet_size(&mut self, bytes: u16) {
        self.max_packet_size = bytes;
    }

    pub fn build(&self, data: &[u8]) -> Encoder {
        let config = ObjectTransmissionInformation::generate_encoding_parameters(
            data.len() as u64,
            self.max_packet_size,
            self.decoder_memory_requirement,
        );

        Encoder::new(data, config)
    }
}

// Calculate the splits [start, end) of an object for encoding as blocks.
// If a block extends past the end of the object, it must be zero padded
pub fn calculate_block_offsets(
    data: &[u8],
    config: &ObjectTransmissionInformation,
) -> Vec<(usize, usize)> {
    let kt = int_div_ceil(config.transfer_length(), config.symbol_size() as u64);

    let (kl, ks, zl, zs) = partition(kt, config.source_blocks());

    let mut data_index = 0;
    let mut blocks = vec![];
    if zl > 0 {
        for _ in 0..zl {
            let offset = kl as usize * config.symbol_size() as usize;
            blocks.push((data_index, (data_index + offset)));
            data_index += offset;
        }
    }

    if zs > 0 {
        for _ in zl..(zl + zs) {
            let offset = ks as usize * config.symbol_size() as usize;
            if data_index + offset > data.len() {
                // Should only be possible when Kt * T > F. See third to last paragraph in section 4.4.1.2
                assert!(kt as usize * config.symbol_size() as usize > data.len());
            }
            blocks.push((data_index, (data_index + offset)));
            data_index += offset;
        }
    }

    blocks
}

#[derive(Clone, Debug, PartialEq, Eq)]
#[cfg_attr(feature = "serde_support", derive(Serialize, Deserialize))]
pub struct Encoder {
    config: ObjectTransmissionInformation,
    blocks: Vec<SourceBlockEncoder>,
}

impl Encoder {
    pub fn new(data: &[u8], config: ObjectTransmissionInformation) -> Encoder {
        let mut block_encoders = vec![];
        let mut cached_plan: Option<SourceBlockEncodingPlan> = None;
        for (i, (start, end)) in calculate_block_offsets(data, &config).drain(..).enumerate() {
            // Zero pad if necessary
            let mut padded;
            let block: &[u8] = if end > data.len() {
                padded = Vec::from(&data[start..]);
                padded.extend(vec![0; end - data.len()]);
                &padded
            } else {
                &data[start..end]
            };

            let symbol_count = block.len() / config.symbol_size() as usize;
            if cached_plan.is_none()
                || cached_plan.as_ref().unwrap().source_symbol_count != symbol_count as u16
            {
                let plan = SourceBlockEncodingPlan::generate(symbol_count as u16);
                cached_plan = Some(plan);
            }
            block_encoders.push(SourceBlockEncoder::with_encoding_plan(
                i as u8,
                &config,
                block,
                cached_plan.as_ref().unwrap(),
            ));
        }

        Encoder {
            config,
            blocks: block_encoders,
        }
    }

    pub fn with_defaults(data: &[u8], maximum_transmission_unit: u16) -> Encoder {
        let config = ObjectTransmissionInformation::with_defaults(
            data.len() as u64,
            maximum_transmission_unit,
        );

        Encoder::new(data, config)
    }

    pub fn get_config(&self) -> ObjectTransmissionInformation {
        self.config
    }

    pub fn get_encoded_packets(&self, repair_packets_per_block: u32) -> Vec<EncodingPacket> {
        let mut packets = vec![];
        for encoder in self.blocks.iter() {
            packets.extend(encoder.source_packets());
            packets.extend(encoder.repair_packets(0, repair_packets_per_block));
        }
        packets
    }

    pub fn get_block_encoders(&self) -> &Vec<SourceBlockEncoder> {
        &self.blocks
    }
}

#[derive(Clone, Debug, PartialEq, Eq)]
#[cfg_attr(feature = "serde_support", derive(Serialize, Deserialize))]
pub struct SourceBlockEncodingPlan {
    operations: Vec<SymbolOps>,
    source_symbol_count: u16,
}

impl SourceBlockEncodingPlan {
    // Generates an encoding plan that is valid for any combination of data length and symbol size
    // where ceil(data_length / symbol_size) = symbol_count
    pub fn generate(symbol_count: u16) -> SourceBlockEncodingPlan {
        // TODO: refactor pi_solver, so that we don't need this dummy data to generate a plan
        let symbols = vec![Symbol::new(vec![0]); symbol_count as usize];
        let (_, ops) = gen_intermediate_symbols(&symbols, 1, SPARSE_MATRIX_THRESHOLD);
        SourceBlockEncodingPlan {
            operations: ops.unwrap(),
            source_symbol_count: symbol_count,
        }
    }
}

#[derive(Clone, Debug, PartialEq, Eq)]
#[cfg_attr(feature = "serde_support", derive(Serialize, Deserialize))]
pub struct SourceBlockEncoder {
    source_block_id: u8,
    source_symbols: Vec<Symbol>,
    intermediate_symbols: Vec<Symbol>,
}

impl SourceBlockEncoder {
    fn create_symbols(config: &ObjectTransmissionInformation, data: &[u8]) -> Vec<Symbol> {
        assert_eq!(data.len() % config.symbol_size() as usize, 0);
        if config.sub_blocks() > 1 {
            let mut symbols = vec![vec![]; data.len() / config.symbol_size() as usize];
            let (tl, ts, nl, ns) = partition(
                (config.symbol_size() / config.symbol_alignment() as u16) as u32,
                config.sub_blocks(),
            );
            // Divide the block into sub-blocks and then concatenate the sub-symbols into symbols
            // See second to last paragraph in section 4.4.1.2.
            let mut offset = 0;
            for sub_block in 0..(nl + ns) {
                let bytes = if sub_block < nl {
                    tl as usize * config.symbol_alignment() as usize
                } else {
                    ts as usize * config.symbol_alignment() as usize
                };
                for symbol in &mut symbols {
                    symbol.extend_from_slice(&data[offset..offset + bytes]);
                    offset += bytes;
                }
            }
            assert_eq!(offset, data.len());
            symbols.drain(..).map(Symbol::new).collect()
        } else {
            data.chunks(config.symbol_size() as usize)
                .map(|x| Symbol::new(Vec::from(x)))
                .collect()
        }
    }

    pub fn new(
        source_block_id: u8,
        config: &ObjectTransmissionInformation,
        data: &[u8],
    ) -> SourceBlockEncoder {
        let source_symbols = SourceBlockEncoder::create_symbols(config, data);

        let (intermediate_symbols, _) = gen_intermediate_symbols(
            &source_symbols,
            config.symbol_size() as usize,
            SPARSE_MATRIX_THRESHOLD,
        );

        SourceBlockEncoder {
            source_block_id,
            source_symbols,
            intermediate_symbols: intermediate_symbols.unwrap(),
        }
    }

    pub fn with_encoding_plan(
        source_block_id: u8,
        config: &ObjectTransmissionInformation,
        data: &[u8],
        plan: &SourceBlockEncodingPlan,
    ) -> SourceBlockEncoder {
        let source_symbols = SourceBlockEncoder::create_symbols(config, data);
        // TODO: this could be more lenient and support anything with the same extended symbol count
        assert_eq!(source_symbols.len(), plan.source_symbol_count as usize);

        let intermediate_symbols = gen_intermediate_symbols_with_plan(
            &source_symbols,
            config.symbol_size() as usize,
            &plan.operations,
        );

        SourceBlockEncoder {
            source_block_id,
            source_symbols,
            intermediate_symbols,
        }
    }

    pub fn source_packets(&self) -> Vec<EncodingPacket> {
        let mut esi: i32 = -1;
        self.source_symbols
            .iter()
            .map(|symbol| {
                esi += 1;
                EncodingPacket::new(
                    PayloadId::new(self.source_block_id, esi as u32),
                    symbol.as_bytes().to_vec(),
                )
            })
            .collect()
    }

    // See section 5.3.4
    pub fn repair_packets(&self, start_repair_symbol_id: u32, packets: u32) -> Vec<EncodingPacket> {
        let start_encoding_symbol_id = start_repair_symbol_id
            + extended_source_block_symbols(self.source_symbols.len() as u32);
        let mut result = vec![];
        let lt_symbols = num_lt_symbols(self.source_symbols.len() as u32);
        let sys_index = systematic_index(self.source_symbols.len() as u32);
        let p1 = calculate_p1(self.source_symbols.len() as u32);
        for i in 0..packets {
            let tuple = intermediate_tuple(start_encoding_symbol_id + i, lt_symbols, sys_index, p1);
            result.push(EncodingPacket::new(
                PayloadId::new(
                    self.source_block_id,
                    self.source_symbols.len() as u32 + start_repair_symbol_id + i,
                ),
                enc(
                    self.source_symbols.len() as u32,
                    &self.intermediate_symbols,
                    tuple,
                )
                .into_bytes(),
            ));
        }
        result
    }
}

#[allow(non_snake_case)]
fn create_d(
    source_block: &[Symbol],
    symbol_size: usize,
    extended_source_symbols: usize,
) -> Vec<Symbol> {
    let L = num_intermediate_symbols(source_block.len() as u32);
    let S = num_ldpc_symbols(source_block.len() as u32);
    let H = num_hdpc_symbols(source_block.len() as u32);

    let mut D = Vec::with_capacity(L as usize);
    for _ in 0..(S + H) {
        D.push(Symbol::zero(symbol_size));
    }
    for symbol in source_block {
        D.push(symbol.clone());
    }
    // Extend the source block with padding. See section 5.3.2
    for _ in 0..(extended_source_symbols - source_block.len()) {
        D.push(Symbol::zero(symbol_size));
    }
    assert_eq!(D.len(), L as usize);
    D
}

// See section 5.3.3.4
#[allow(non_snake_case)]
fn gen_intermediate_symbols(
    source_block: &[Symbol],
    symbol_size: usize,
    sparse_threshold: u32,
) -> (Option<Vec<Symbol>>, Option<Vec<SymbolOps>>) {
    let extended_source_symbols = extended_source_block_symbols(source_block.len() as u32);
    let D = create_d(source_block, symbol_size, extended_source_symbols as usize);

    let indices: Vec<u32> = (0..extended_source_symbols).collect();
    if extended_source_symbols >= sparse_threshold {
        let (A, hdpc) =
            generate_constraint_matrix::<SparseBinaryMatrix>(extended_source_symbols, &indices);
        return fused_inverse_mul_symbols(A, hdpc, D, extended_source_symbols);
    } else {
        let (A, hdpc) =
            generate_constraint_matrix::<DenseBinaryMatrix>(extended_source_symbols, &indices);
        return fused_inverse_mul_symbols(A, hdpc, D, extended_source_symbols);
    }
}

#[allow(non_snake_case)]
fn gen_intermediate_symbols_with_plan(
    source_block: &[Symbol],
    symbol_size: usize,
    operation_vector: &[SymbolOps],
) -> Vec<Symbol> {
    let extended_source_symbols = extended_source_block_symbols(source_block.len() as u32);
    let mut D = create_d(source_block, symbol_size, extended_source_symbols as usize);

    for op in operation_vector {
        perform_op(op, &mut D);
    }
    D
}

// Enc[] function, as defined in section 5.3.5.3
#[allow(clippy::many_single_char_names)]
fn enc(
    source_block_symbols: u32,
    intermediate_symbols: &[Symbol],
    source_tuple: (u32, u32, u32, u32, u32, u32),
) -> Symbol {
    let w = num_lt_symbols(source_block_symbols);
    let p = num_pi_symbols(source_block_symbols);
    let p1 = calculate_p1(source_block_symbols);
    let (d, a, mut b, d1, a1, mut b1) = source_tuple;

    assert!(1 <= a && a < w);
    assert!(b < w);
    assert!(d1 == 2 || d1 == 3);
    assert!(1 <= a1 && a < w);
    assert!(b1 < w);

    let mut result = intermediate_symbols[b as usize].clone();
    for _ in 1..d {
        b = (b + a) % w;
        result += &intermediate_symbols[b as usize];
    }

    while b1 >= p {
        b1 = (b1 + a1) % p1;
    }

    result += &intermediate_symbols[(w + b1) as usize];

    for _ in 1..d1 {
        b1 = (b1 + a1) % p1;
        while b1 >= p {
            b1 = (b1 + a1) % p1;
        }
        result += &intermediate_symbols[(w + b1) as usize];
    }

    result
}

#[cfg(feature = "std")]
#[cfg(test)]
mod tests {
    use rand::Rng;
    use std::vec::Vec;

    use super::*;

    use crate::base::intermediate_tuple;
    use crate::symbol::Symbol;
    use crate::systematic_constants::num_lt_symbols;
    use crate::systematic_constants::num_pi_symbols;
    use crate::systematic_constants::{
        calculate_p1, num_ldpc_symbols, systematic_index, MAX_SOURCE_SYMBOLS_PER_BLOCK,
    };
    use crate::PayloadId;
    #[cfg(not(feature = "python"))]
    use crate::{Encoder, EncoderBuilder, EncodingPacket, ObjectTransmissionInformation};
    #[cfg(not(feature = "python"))]
    use std::collections::HashSet;

    const SYMBOL_SIZE: usize = 4;
    const NUM_SYMBOLS: u32 = 100;

    fn gen_test_data(size: usize) -> Vec<u8> {
        let mut data: Vec<u8> = vec![0; size];
        for i in 0..size {
            data[i] = rand::thread_rng().gen();
        }
        data
    }

    fn gen_test_symbols() -> Vec<Symbol> {
        let mut source_block: Vec<Symbol> = vec![];
        for _ in 0..NUM_SYMBOLS {
            let data = gen_test_data(SYMBOL_SIZE);
            source_block.push(Symbol::new(data));
        }
        source_block
    }

    #[test]
    fn enc_constraint_dense() {
        enc_constraint(MAX_SOURCE_SYMBOLS_PER_BLOCK + 1);
    }

    #[test]
    fn enc_constraint_sparse() {
        enc_constraint(0);
    }

    fn enc_constraint(sparse_threshold: u32) {
        let source_symbols = gen_test_symbols();

        let (intermediate_symbols, _) =
            gen_intermediate_symbols(&source_symbols, SYMBOL_SIZE, sparse_threshold);
        let intermediate_symbols = intermediate_symbols.unwrap();

        let lt_symbols = num_lt_symbols(NUM_SYMBOLS);
        let sys_index = systematic_index(NUM_SYMBOLS);
        let p1 = calculate_p1(NUM_SYMBOLS);
        // See section 5.3.3.4.1, item 1.
        for i in 0..source_symbols.len() {
            let tuple = intermediate_tuple(i as u32, lt_symbols, sys_index, p1);
            let encoded = enc(NUM_SYMBOLS, &intermediate_symbols, tuple);
            assert_eq!(source_symbols[i], encoded);
        }
    }

    #[test]
    fn ldpc_constraint_dense() {
        ldpc_constraint(MAX_SOURCE_SYMBOLS_PER_BLOCK + 1);
    }

    #[test]
    fn ldpc_constraint_sparse() {
        ldpc_constraint(0);
    }

    #[allow(non_snake_case)]
    fn ldpc_constraint(sparse_threshold: u32) {
        let (intermediate_symbols, _) =
            gen_intermediate_symbols(&gen_test_symbols(), SYMBOL_SIZE, sparse_threshold);
        let C = intermediate_symbols.unwrap();
        let S = num_ldpc_symbols(NUM_SYMBOLS) as usize;
        let P = num_pi_symbols(NUM_SYMBOLS) as usize;
        let W = num_lt_symbols(NUM_SYMBOLS) as usize;
        let B = W - S;

        // See section 5.3.3.3
        let mut D = vec![];
        for i in 0..S {
            D.push(C[B + i].clone());
        }

        for i in 0..B {
            let a = 1 + i / S;
            let b = i % S;
            D[b] += &C[i];

            let b = (b + a) % S;
            D[b] += &C[i];

            let b = (b + a) % S;
            D[b] += &C[i];
        }

        for i in 0..S {
            let a = i % P;
            let b = (i + 1) % P;
            D[i] += &C[W + a];
            D[i] += &C[W + b];
        }

        for i in 0..S {
            assert_eq!(Symbol::zero(SYMBOL_SIZE), D[i]);
        }
    }

    #[test]
    fn encoding_creates_expected_packets() {
        let symbol_size = 2;
        let data: [u8; 6] = [0, 1, 2, 3, 4, 5];
        let encoder = SourceBlockEncoder::new(
            0,
            &ObjectTransmissionInformation::new(0, symbol_size, 1, 1, 1),
            &data,
        );
        assert_eq!(
            encoder.source_packets(),
            [[0, 1], [2, 3], [4, 5]]
                .into_iter()
                .enumerate()
                .map(|(i, d)| EncodingPacket::new(PayloadId::new(0, i as u32), d.into()))
                .collect::<Vec<_>>()
        );
        assert_eq!(
            encoder
                .repair_packets(2, 4)
                .into_iter()
                .map(|p| p.payload_id.encoding_symbol_id())
                .collect::<Vec<_>>(),
            &[5, 6, 7, 8]
        );
    }

    #[cfg(not(feature = "python"))]
    #[test]
    fn test_builder() {
        let data = vec![0, 1, 2, 3];
        let encoder = Encoder::with_defaults(&data, 1024);
        let mut builder = EncoderBuilder::new();
        builder.set_max_packet_size(1024);
        assert_eq!(builder.build(&data), encoder);
    }

    #[cfg(not(feature = "python"))]
    #[test]
    fn padding_constraint_exact() {
        let packet_size: u16 = 1024;
        let padding_size: usize = 0;
        let data_size: usize = packet_size as usize * 2 - padding_size;
        padding_constraint(packet_size, padding_size, data_size);
    }

    #[cfg(not(feature = "python"))]
    #[test]
    fn padding_constraint_42_bytes() {
        let packet_size: u16 = 1024;
        let padding_size: usize = 42;
        let data_size: usize = packet_size as usize * 2 - padding_size;
        padding_constraint(packet_size, padding_size, data_size);
    }

    #[cfg(not(feature = "python"))]
    fn padding_constraint(packet_size: u16, padding_size: usize, data_size: usize) {
        let data = gen_test_data(data_size);
        let encoder = Encoder::with_defaults(&data, packet_size);

        fn accumulate_data(acc: Vec<u8>, packet: EncodingPacket) -> Vec<u8> {
            let mut updated_acc = acc;
            updated_acc.extend_from_slice(packet.data());
            updated_acc
        }

        let padded_data = encoder
            .get_block_encoders()
            .iter()
            .flat_map(|block| block.source_packets())
            .fold(vec![], accumulate_data);

        assert_eq!(data_size + padding_size, padded_data.len());
        assert_eq!(data[..], padded_data[..data_size]);
    }

    #[cfg(not(feature = "python"))]
    #[test]
    fn unique_blocks() {
        let data = gen_test_data(120);
        let config = ObjectTransmissionInformation::new(120, 10, 10, 0, 2);
        let encoder = Encoder::new(&data, config);
        assert!(encoder.get_block_encoders().len() > 1);
        let mut ids = HashSet::new();
        for block in encoder.get_block_encoders().iter() {
            ids.insert(block.source_block_id);
        }
        assert_eq!(ids.len(), encoder.get_block_encoders().len());
    }
}