1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
use pyo3::prelude::*;
use pyo3::wrap_pyfunction;
mod matrix;
pub use matrix::Matrix;
use rayon::prelude::*;
use std::cmp::{max, min};
use std::collections::HashMap;
const GAP: u16 = 2;
const MATCH: u16 = 0;
const MISMATCH: u16 = 1;
#[allow(unused_variables)]
#[pymodule]
fn seqalign(py: Python, m: &PyModule) -> PyResult<()> {
m.add_wrapped(wrap_pyfunction!(par_compare))?;
m.add_wrapped(wrap_pyfunction!(compare_samples))?;
Ok(())
}
#[pyfunction]
pub fn compare_samples(s1: &str, s2: &str) -> PyResult<u16> {
let result = needleman_wunsch(s1, s2);
Ok(result)
}
#[pyfunction]
pub fn par_compare(v: Vec<(&str, &str)>, map: HashMap<&str, &str>, num_threads: &str) -> PyResult<Vec<(String, String, u16)>> {
std::env::set_var("RAYON_NUM_THREADS", num_threads);
let results =
v.par_iter()
.map(|x| {
(
(x.0).to_string(),
(x.1).to_string(),
needleman_wunsch(map[x.0], map[x.1]),
)
})
.collect();
Ok(results)
}
pub fn needleman_wunsch(x: &str, y: &str) -> u16 {
let (x_len, y_len) = (x.len(), y.len());
let mut matrix: Matrix<u16> = Matrix::new(x_len + 1, y_len + 1);
for i in 0..max(x_len + 1, y_len + 1) {
let value = GAP * i as u16;
if i < x_len + 1 {
matrix[(i, 0)] = value;
}
if i < y_len + 1 {
matrix[(0, i)] = value;
}
}
for (i, c1) in x.bytes().enumerate() {
for (j, c2) in y.bytes().enumerate() {
let fit = matrix[(i, j)] + check_match(c1, c2);
let delete = matrix[(i, j + 1)] + GAP;
let insert = matrix[(i + 1, j)] + GAP;
let min_val = min(min(fit, delete), min(fit, insert));
matrix[(i + 1, j + 1)] = min_val;
}
}
matrix[(x_len, y_len)]
}
fn check_match(b1: u8, b2: u8) -> u16 {
if b1 == b2 || b1 == b'N' || b2 == b'N' {
MATCH
} else {
MISMATCH
}
}