mirror of
https://github.com/cberner/raptorq.git
synced 2024-06-27 09:19:02 +00:00
Optimize HDPC generation with recursive calculation
Improves performance on large symbol counts by > 10%
This commit is contained in:
parent
f36bf73ca9
commit
eb07e23208
@ -2,7 +2,6 @@ use crate::base::intermediate_tuple;
|
||||
use crate::matrix::BinaryMatrix;
|
||||
use crate::octet::Octet;
|
||||
use crate::octet_matrix::DenseOctetMatrix;
|
||||
use crate::octets::{add_assign, fused_addassign_mul_scalar};
|
||||
use crate::rng::rand;
|
||||
use crate::systematic_constants::extended_source_block_symbols;
|
||||
use crate::systematic_constants::num_hdpc_symbols;
|
||||
@ -59,61 +58,41 @@ pub fn enc_indices(
|
||||
#[allow(non_snake_case)]
|
||||
fn generate_hdpc_rows(Kprime: usize, S: usize, H: usize) -> DenseOctetMatrix {
|
||||
let mut matrix = DenseOctetMatrix::new(H, Kprime + S + H, 0);
|
||||
// G_HDPC
|
||||
// Compute G_HDPC using recursive formulation, since this is much faster than a
|
||||
// naive matrix multiplication approach
|
||||
|
||||
// Generates the MT matrix
|
||||
// See section 5.3.3.3
|
||||
let mut mt: Vec<Vec<u8>> = vec![vec![0; Kprime + S]; H];
|
||||
let mut result: Vec<Vec<u8>> = vec![vec![0; Kprime + S]; H];
|
||||
// Initialize the last column to alpha^i, which comes from multiplying the last column of MT
|
||||
// with the lower right 1 in GAMMA
|
||||
#[allow(clippy::needless_range_loop)]
|
||||
for i in 0..H {
|
||||
mt[i][Kprime + S - 1] = Octet::alpha(i).byte();
|
||||
result[i][Kprime + S - 1] = Octet::alpha(i).byte();
|
||||
}
|
||||
#[allow(clippy::needless_range_loop)]
|
||||
for j in 0..=(Kprime + S - 2) {
|
||||
|
||||
// Now compute the rest of G_HDPC.
|
||||
// Note that for each row in GAMMA, i'th col = alpha * (i + 1)'th col
|
||||
// Therefore we can compute this right to left, by multiplying by alpha each time, and adding
|
||||
// the Rand() entries which will be associatively handled
|
||||
for j in (0..=(Kprime + S - 2)).rev() {
|
||||
#[allow(clippy::needless_range_loop)]
|
||||
for i in 0..H {
|
||||
result[i][j] = (Octet::alpha(1) * Octet::new(result[i][j + 1])).byte();
|
||||
}
|
||||
let rand6 = rand((j + 1) as u32, 6u32, H as u32) as usize;
|
||||
let rand7 = rand((j + 1) as u32, 7u32, (H - 1) as u32) as usize;
|
||||
let i1 = rand6;
|
||||
let i2 = (rand6 + rand7 + 1) % H;
|
||||
mt[i1][j] = 1;
|
||||
mt[i2][j] = 1;
|
||||
}
|
||||
// Multiply by the GAMMA matrix
|
||||
// See section 5.3.3.3
|
||||
let mut gamma_row = vec![0; Kprime + S];
|
||||
// We only create the last row of the GAMMA matrix, as all preceding rows are just a shift left
|
||||
#[allow(clippy::needless_range_loop)]
|
||||
for j in 0..(Kprime + S) {
|
||||
// The spec says "alpha ^^ (i-j)". However, this clearly can overflow since alpha() is
|
||||
// only defined up to input < 256. Since alpha() only has 255 unique values, we must
|
||||
// take the input mod 255. Without this the constraint matrix ends up being singular
|
||||
// for 1698 and 8837 source symbols.
|
||||
gamma_row[j] = Octet::alpha((Kprime + S - 1 - j) % 255).byte();
|
||||
result[i1][j] ^= Octet::one().byte();
|
||||
result[i2][j] ^= Octet::one().byte();
|
||||
}
|
||||
|
||||
// Copy G_HDPC into matrix
|
||||
#[allow(clippy::needless_range_loop)]
|
||||
for i in 0..H {
|
||||
let mut result_row = vec![0; Kprime + S];
|
||||
for j in 0..(Kprime + S) {
|
||||
let scalar = Octet::new(mt[i][j]);
|
||||
if scalar == Octet::zero() {
|
||||
continue;
|
||||
}
|
||||
if scalar == Octet::one() {
|
||||
add_assign(
|
||||
&mut result_row[0..=j],
|
||||
&gamma_row[(Kprime + S - j - 1)..(Kprime + S)],
|
||||
);
|
||||
} else {
|
||||
fused_addassign_mul_scalar(
|
||||
&mut result_row[0..=j],
|
||||
&gamma_row[(Kprime + S - j - 1)..(Kprime + S)],
|
||||
&scalar,
|
||||
);
|
||||
}
|
||||
}
|
||||
#[allow(clippy::needless_range_loop)]
|
||||
for j in 0..(Kprime + S) {
|
||||
if result_row[j] != 0 {
|
||||
matrix.set(i, j, Octet::new(result_row[j]));
|
||||
if result[i][j] != 0 {
|
||||
matrix.set(i, j, Octet::new(result[i][j]));
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -188,3 +167,111 @@ pub fn generate_constraint_matrix<T: BinaryMatrix>(
|
||||
|
||||
(matrix, generate_hdpc_rows(Kprime, S, H))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use crate::constraint_matrix::generate_hdpc_rows;
|
||||
use crate::octet_matrix::DenseOctetMatrix;
|
||||
use crate::octets::{add_assign, fused_addassign_mul_scalar};
|
||||
use crate::rng::rand;
|
||||
use crate::systematic_constants::{num_hdpc_symbols, num_ldpc_symbols};
|
||||
use crate::{extended_source_block_symbols, Octet};
|
||||
use rand::Rng;
|
||||
|
||||
#[allow(non_snake_case)]
|
||||
fn reference_generate_hdpc_rows(Kprime: usize, S: usize, H: usize) -> DenseOctetMatrix {
|
||||
let mut matrix = DenseOctetMatrix::new(H, Kprime + S + H, 0);
|
||||
// G_HDPC
|
||||
|
||||
// Generates the MT matrix
|
||||
// See section 5.3.3.3
|
||||
let mut mt: Vec<Vec<u8>> = vec![vec![0; Kprime + S]; H];
|
||||
#[allow(clippy::needless_range_loop)]
|
||||
for i in 0..H {
|
||||
#[allow(clippy::needless_range_loop)]
|
||||
for j in 0..=(Kprime + S - 2) {
|
||||
let rand6 = rand((j + 1) as u32, 6u32, H as u32) as usize;
|
||||
let rand7 = rand((j + 1) as u32, 7u32, (H - 1) as u32) as usize;
|
||||
if i == rand6 || i == (rand6 + rand7 + 1) % H {
|
||||
mt[i][j] = 1;
|
||||
}
|
||||
}
|
||||
mt[i][Kprime + S - 1] = Octet::alpha(i).byte();
|
||||
}
|
||||
// Multiply by the GAMMA matrix
|
||||
// See section 5.3.3.3
|
||||
let mut gamma_row = vec![0; Kprime + S];
|
||||
// We only create the last row of the GAMMA matrix, as all preceding rows are just a shift left
|
||||
#[allow(clippy::needless_range_loop)]
|
||||
for j in 0..(Kprime + S) {
|
||||
// The spec says "alpha ^^ (i-j)". However, this clearly can overflow since alpha() is
|
||||
// only defined up to input < 256. Since alpha() only has 255 unique values, we must
|
||||
// take the input mod 255. Without this the constraint matrix ends up being singular
|
||||
// for 1698 and 8837 source symbols.
|
||||
gamma_row[j] = Octet::alpha((Kprime + S - 1 - j) % 255).byte();
|
||||
}
|
||||
#[allow(clippy::needless_range_loop)]
|
||||
for i in 0..H {
|
||||
let mut result_row = vec![0; Kprime + S];
|
||||
for j in 0..(Kprime + S) {
|
||||
let scalar = Octet::new(mt[i][j]);
|
||||
if scalar == Octet::zero() {
|
||||
continue;
|
||||
}
|
||||
if scalar == Octet::one() {
|
||||
add_assign(
|
||||
&mut result_row[0..=j],
|
||||
&gamma_row[(Kprime + S - j - 1)..(Kprime + S)],
|
||||
);
|
||||
} else {
|
||||
fused_addassign_mul_scalar(
|
||||
&mut result_row[0..=j],
|
||||
&gamma_row[(Kprime + S - j - 1)..(Kprime + S)],
|
||||
&scalar,
|
||||
);
|
||||
}
|
||||
}
|
||||
#[allow(clippy::needless_range_loop)]
|
||||
for j in 0..(Kprime + S) {
|
||||
if result_row[j] != 0 {
|
||||
matrix.set(i, j, Octet::new(result_row[j]));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// I_H
|
||||
for i in 0..H {
|
||||
matrix.set(i, i + (Kprime + S) as usize, Octet::one());
|
||||
}
|
||||
|
||||
matrix
|
||||
}
|
||||
|
||||
fn assert_matrices_eq(matrix1: &DenseOctetMatrix, matrix2: &DenseOctetMatrix) {
|
||||
assert_eq!(matrix1.height(), matrix2.height());
|
||||
assert_eq!(matrix1.width(), matrix2.width());
|
||||
for i in 0..matrix1.height() {
|
||||
for j in 0..matrix1.width() {
|
||||
assert_eq!(
|
||||
matrix1.get(i, j),
|
||||
matrix2.get(i, j),
|
||||
"Matrices are not equal at row={} col={}",
|
||||
i,
|
||||
j
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[allow(non_snake_case)]
|
||||
fn fast_hdpc() {
|
||||
let source_block_symbols = rand::thread_rng().gen_range(1..50000);
|
||||
let Kprime = extended_source_block_symbols(source_block_symbols) as usize;
|
||||
let S = num_ldpc_symbols(source_block_symbols) as usize;
|
||||
let H = num_hdpc_symbols(source_block_symbols) as usize;
|
||||
let expected = reference_generate_hdpc_rows(Kprime, S, H);
|
||||
let generated = generate_hdpc_rows(Kprime, S, H);
|
||||
assert_matrices_eq(&expected, &generated);
|
||||
}
|
||||
}
|
||||
|
@ -47,6 +47,11 @@ impl DenseOctetMatrix {
|
||||
self.height
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
pub fn width(&self) -> usize {
|
||||
self.width
|
||||
}
|
||||
|
||||
#[cfg(feature = "benchmarking")]
|
||||
pub fn size_in_bytes(&self) -> usize {
|
||||
let mut bytes = size_of::<Self>();
|
||||
|
Loading…
Reference in New Issue
Block a user