187. 重复的DNA序列
DNA序列 由一系列核苷酸组成,缩写为 'A'
, 'C'
, 'G'
和 'T'
.。
"ACGAATTCCG"
是一个 DNA序列 。在研究 DNA 时,识别 DNA 中的重复序列非常有用。
给定一个表示 DNA序列 的字符串 s
,返回所有在 DNA 分子中出现不止一次的 长度为 10
的序列(子字符串)。你可以按 任意顺序 返回答案。
示例 1:
输入:s = "AAAAACCCCCAAAAACCCCCCAAAAAGGGTTT" 输出:["AAAAACCCCC","CCCCCAAAAA"]
示例 2:
输入:s = "AAAAAAAAAAAAA" 输出:["AAAAAAAAAA"]
提示:
0 <= s.length <= 105
s[i]
==
'A'
、'C'
、'G'
or 'T'
原站题解
cpp 解法, 执行用时: 40 ms, 内存消耗: 15.6 MB, 提交时间: 2023-11-05 22:33:38
class Solution { const int L = 10; unordered_map<char, int> bin = {{'A', 0}, {'C', 1}, {'G', 2}, {'T', 3}}; public: vector<string> findRepeatedDnaSequences(string s) { vector<string> ans; int n = s.length(); if (n <= L) { return ans; } int x = 0; for (int i = 0; i < L - 1; ++i) { x = (x << 2) | bin[s[i]]; } unordered_map<int, int> cnt; for (int i = 0; i <= n - L; ++i) { x = ((x << 2) | bin[s[i + L - 1]]) & ((1 << (L * 2)) - 1); if (++cnt[x] == 2) { ans.push_back(s.substr(i, L)); } } return ans; } };
cpp 解法, 执行用时: 48 ms, 内存消耗: 23.1 MB, 提交时间: 2023-11-05 22:33:12
class Solution { const int L = 10; public: vector<string> findRepeatedDnaSequences(string s) { vector<string> ans; unordered_map<string, int> cnt; int n = s.length(); for (int i = 0; i <= n - L; ++i) { string sub = s.substr(i, L); if (++cnt[sub] == 2) { ans.push_back(sub); } } return ans; } };
javascript 解法, 执行用时: 76 ms, 内存消耗: 48.4 MB, 提交时间: 2022-12-14 11:57:51
/** * @param {string} s * @return {string[]} */ var findRepeatedDnaSequences = function(s) { const L = 10; const bin = new Map(); bin.set('A', 0); bin.set('C', 1); bin.set('G', 2); bin.set('T', 3); const ans = []; const n = s.length; if (n <= L) { return ans; } let x = 0; for (let i = 0; i < L - 1; ++i) { x = (x << 2) | bin.get(s[i]); } const cnt = new Map(); for (let i = 0; i <= n - L; ++i) { x = ((x << 2) | bin.get(s[i + L - 1])) & ((1 << (L * 2)) - 1); cnt.set(x, (cnt.get(x) || 0) + 1); if (cnt.get(x) === 2) { ans.push(s.slice(i, i + L)); } } return ans; };
golang 解法, 执行用时: 16 ms, 内存消耗: 8.8 MB, 提交时间: 2022-12-14 11:57:37
const L = 10 var bin = map[byte]int{'A': 0, 'C': 1, 'G': 2, 'T': 3} func findRepeatedDnaSequences(s string) (ans []string) { n := len(s) if n <= L { return } x := 0 for _, ch := range s[:L-1] { x = x<<2 | bin[byte(ch)] } cnt := map[int]int{} for i := 0; i <= n-L; i++ { x = (x<<2 | bin[s[i+L-1]]) & (1<<(L*2) - 1) cnt[x]++ if cnt[x] == 2 { ans = append(ans, s[i:i+L]) } } return ans }
java 解法, 执行用时: 27 ms, 内存消耗: 46.9 MB, 提交时间: 2022-12-14 11:57:23
class Solution { static final int L = 10; Map<Character, Integer> bin = new HashMap<Character, Integer>() {{ put('A', 0); put('C', 1); put('G', 2); put('T', 3); }}; public List<String> findRepeatedDnaSequences(String s) { List<String> ans = new ArrayList<String>(); int n = s.length(); if (n <= L) { return ans; } int x = 0; for (int i = 0; i < L - 1; ++i) { x = (x << 2) | bin.get(s.charAt(i)); } Map<Integer, Integer> cnt = new HashMap<Integer, Integer>(); for (int i = 0; i <= n - L; ++i) { x = ((x << 2) | bin.get(s.charAt(i + L - 1))) & ((1 << (L * 2)) - 1); cnt.put(x, cnt.getOrDefault(x, 0) + 1); if (cnt.get(x) == 2) { ans.add(s.substring(i, i + L)); } } return ans; } }
python3 解法, 执行用时: 92 ms, 内存消耗: 25.4 MB, 提交时间: 2022-12-14 11:57:10
''' 哈希表+滑动窗口+位运算 ''' L = 10 bin = {'A': 0, 'C': 1, 'G': 2, 'T': 3} class Solution: def findRepeatedDnaSequences(self, s: str) -> List[str]: n = len(s) if n <= L: return [] ans = [] x = 0 for ch in s[:L - 1]: x = (x << 2) | bin[ch] cnt = defaultdict(int) for i in range(n - L + 1): x = ((x << 2) | bin[s[i + L - 1]]) & ((1 << (L * 2)) - 1) cnt[x] += 1 if cnt[x] == 2: ans.append(s[i : i + L]) return ans
javascript 解法, 执行用时: 92 ms, 内存消耗: 54.2 MB, 提交时间: 2022-12-14 11:56:37
/** * @param {string} s * @return {string[]} */ var findRepeatedDnaSequences = function(s) { const L = 10; const ans = []; const cnt = new Map(); const n = s.length; for (let i = 0; i <= n - L; ++i) { const sub = s.slice(i, i + L) cnt.set(sub, (cnt.get(sub) || 0) + 1); if (cnt.get(sub) === 2) { ans.push(sub); } } return ans; };
golang 解法, 执行用时: 16 ms, 内存消耗: 9.2 MB, 提交时间: 2022-12-14 11:56:25
const L = 10 func findRepeatedDnaSequences(s string) (ans []string) { cnt := map[string]int{} for i := 0; i <= len(s)-L; i++ { sub := s[i : i+L] cnt[sub]++ if cnt[sub] == 2 { ans = append(ans, sub) } } return }
java 解法, 执行用时: 21 ms, 内存消耗: 49.9 MB, 提交时间: 2022-12-14 11:56:14
class Solution { static final int L = 10; public List<String> findRepeatedDnaSequences(String s) { List<String> ans = new ArrayList<String>(); Map<String, Integer> cnt = new HashMap<String, Integer>(); int n = s.length(); for (int i = 0; i <= n - L; ++i) { String sub = s.substring(i, i + L); cnt.put(sub, cnt.getOrDefault(sub, 0) + 1); if (cnt.get(sub) == 2) { ans.add(sub); } } return ans; } }
python3 解法, 执行用时: 56 ms, 内存消耗: 27.9 MB, 提交时间: 2022-12-14 11:56:00
''' 哈希表 我们可以用一个哈希表统计 s 所有长度为 10 的子串的出现次数,返回所有出现次数超过 1 的子串。 代码实现时,可以一边遍历子串一边记录答案,为了不重复记录答案,我们只统计当前出现次数为 2 的子串。 ''' L = 10 class Solution: def findRepeatedDnaSequences(self, s: str) -> List[str]: ans = [] cnt = defaultdict(int) for i in range(len(s) - L + 1): sub = s[i : i + L] cnt[sub] += 1 if cnt[sub] == 2: ans.append(sub) return ans