Skip to content

Commit f7ba0b7

Browse files
committed
add problem of dna search
1 parent dc055c6 commit f7ba0b7

File tree

3 files changed

+57
-0
lines changed

3 files changed

+57
-0
lines changed

README.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -943,6 +943,7 @@ Collections.binarySearch(arrayList, 3); // for list
943943

944944
- Find k-th smallest/largest element in an array (`FindKthSmallestElement`/`FindKthLargestElement`), EPI#11.8: [c++](cpp-algorithm/src/search) | Find the k-th smallest/largest element in an array using the quickselect algorithm (`QuickSelectAlgorithm`).
945945
- Find the minimum and maximum elements in an array (`FindMinMax`), EPI#11.7: [c++](cpp-algorithm/src/search)
946+
- Search a codon(combinations of three nucleotides) in a gene (`linear_contains`, `binary_contains`), CCSP#2.1: [python](python-algorithm/src/search) | Search a codon(combinations of three nucleotides) in a gene using linear search and binary search.
946947
- Search a sorted array for entry equal to its index (`SearchEntryEqualToItsIndex`), EPI#11.2: [c++](cpp-algorithm/src/search)
947948
- Search a sorted array for the first greater than a key (`SearchFirstGreaterThanKey`): [c++](cpp-algorithm/src/search)
948949
- Search a sorted array for the first occurrence of a key (`SearchFirstOfKey`), EPI#11.1: [c++](cpp-algorithm/src/search)
@@ -1153,6 +1154,7 @@ var str = collection.stream()
11531154
- Discrete Mathematics and Its Applications, 8th Edition, by Kenneth H. Rosen
11541155
- Cracking the Coding Interview, 6th Edition, by Gayle Laakmann McDowell
11551156
- Elements of Programming Interviews, 2nd Edition, by Adnan Aziz, Tsung-Hsien Lee and Amit Prakash
1157+
- Classic Computer Science Problems in Python, by David Kopec
11561158
11571159
[:arrow_up_small: back to toc](#table-of-contents)
11581160
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
from enum import IntEnum
2+
from typing import Tuple, List
3+
4+
Nucleotide = IntEnum('Nucleotide', ('A', 'C', 'G', 'T'))
5+
Codon = Tuple[Nucleotide, Nucleotide, Nucleotide]
6+
Gene = List[Codon]
7+
8+
9+
def string_to_gene(s: str) -> Gene:
10+
gene = [(Nucleotide[s[i]], Nucleotide[s[i + 1]], Nucleotide[s[i + 2]])
11+
for i in range(0, len(s), 3)
12+
if i + 2 < len(s)]
13+
return gene
14+
15+
16+
def linear_contains(gene: Gene, key_codon: Codon) -> bool:
17+
return any(codon == key_codon for codon in gene)
18+
19+
20+
def binary_contains(gene: Gene, key_codon: Codon) -> bool:
21+
low: int = 0
22+
high: int = len(gene) - 1
23+
while low <= high:
24+
mid: int = (low + high) // 2
25+
if gene[mid] < key_codon:
26+
low = mid + 1
27+
elif gene[mid] > key_codon:
28+
high = mid - 1
29+
else:
30+
return True
31+
return False
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
import pytest
2+
from algorithm.search.dna_search import Nucleotide, string_to_gene, linear_contains, binary_contains
3+
4+
5+
@pytest.mark.benchmark(group="linear_contains")
6+
@pytest.mark.parametrize("gene, key_codon, expected", [
7+
("ACGTGGCTCTCTAACGTACGTACGTACGGGGTTTATATATACCCTAGGACTCCCTTT", (Nucleotide.A, Nucleotide.C, Nucleotide.G), True),
8+
("ACGTGGCTCTCTAACGTACGTACGTACGGGGTTTATATATACCCTAGGACTCCCTTT", (Nucleotide.G, Nucleotide.A, Nucleotide.T), False),
9+
], ids=["successful", "failed"])
10+
def test_linear_contains(benchmark, gene_str, key_codon, expected):
11+
gene = string_to_gene(gene_str)
12+
result = benchmark(linear_contains, gene, key_codon)
13+
assert expected == result
14+
15+
16+
@pytest.mark.benchmark(group="binary_contains")
17+
@pytest.mark.parametrize("gene, key_codon, expected", [
18+
("ACGTGGCTCTCTAACGTACGTACGTACGGGGTTTATATATACCCTAGGACTCCCTTT", (Nucleotide.A, Nucleotide.C, Nucleotide.G), True),
19+
("ACGTGGCTCTCTAACGTACGTACGTACGGGGTTTATATATACCCTAGGACTCCCTTT", (Nucleotide.G, Nucleotide.A, Nucleotide.T), False),
20+
], ids=["successful", "failed"])
21+
def test_binary_contains(benchmark, gene_str, key_codon, expected):
22+
sorted_gene = sorted(string_to_gene(gene_str))
23+
result = benchmark(binary_contains, sorted_gene, key_codon)
24+
assert expected == result

0 commit comments

Comments
 (0)