列表

详情


NC20453. [TJOI2017]DNA

描述

加里敦大学的生物研究所,发现了决定人喜不喜欢吃藕的基因序列S,有这个序列的碱基序列就会表现出喜欢吃藕的性状,但是研究人员发现对碱基序列S,任意修改其中不超过3个碱基,依然能够表现出吃藕的性状。现在研究人员想知道这个基因在DNA链S0上的位置。所以你需要统计在一个表现出吃藕性状的人的DNA序列S0上,有多少个连续子串可能是该基因,即有多少个S0的连续子串修改小于等于三个字母能够变成S。

输入描述

第一行有一个数T,表示有几组数据 
每组数据第一行一个长度不超过10^5的碱基序列S0
每组数据第二行一个长度不超过10^5的吃藕基因序列S

输出描述

共T行,第i行表示第i组数据中,在S0中有多少个与S等长的连续子串可能是表现吃藕性状的碱基序列

示例1

输入:

1
ATCGCCCTA
CTTCA

输出:

2

原站题解

上次编辑到这里,代码来自缓存 点击恢复默认模板

C++14(g++5.4) 解法, 执行用时: 466ms, 内存消耗: 21368K, 提交时间: 2020-09-03 18:36:30

#include<iostream>
#include<algorithm>
using namespace std;
#define re register
const int max_n = 2e5 + 100;
int ranks[max_n], SA[max_n], height[max_n];
int wa[max_n], wb[max_n], wvarr[max_n], wsarr[max_n];
inline int cmp(int* r, int a, int b, int l) {
	return r[a] == r[b] && r[a + l] == r[b + l];
}
inline void get_sa(int* r, int* sa, int n, int m) {
	++n;
	re int i, j, p, * x = wa, * y = wb, * t;
	for (i = 0; i < m; ++i) wsarr[i] = 0;
	for (i = 0; i < n; ++i) wsarr[x[i] = r[i]]++;
	for (i = 1; i < m; ++i) wsarr[i] += wsarr[i - 1];
	for (i = n - 1; i >= 0; --i) sa[--wsarr[x[i]]] = i;
	for (j = 1, p = 1; p < n; j <<= 1, m = p) {
		for (p = 0, i = n - j; i < n; ++i) y[p++] = i;
		for (i = 0; i < n; ++i) if (sa[i] >= j) y[p++] = sa[i] - j;
		for (i = 0; i < n; ++i) wvarr[i] = x[y[i]];
		for (i = 0; i < m; ++i) wsarr[i] = 0;
		for (i = 0; i < n; ++i) wsarr[wvarr[i]]++;
		for (i = 1; i < m; ++i) wsarr[i] += wsarr[i - 1];
		for (i = n - 1; i >= 0; --i) sa[--wsarr[wvarr[i]]] = y[i];
		for (t = x, x = y, y = t, p = 1, x[sa[0]] = 0, i = 1; i < n; ++i)
			x[sa[i]] = cmp(y, sa[i - 1], sa[i], j) ? p - 1 : p++;
	}
}
void get_height(int* r, int* sa, int n) {
	re int i, j, k = 0;
	for (i = 0; i <= n; ++i) ranks[sa[i]] = i;
	for (i = 0; i < n; height[ranks[i++]] = k)
		for (k ? k-- : 0, j = sa[ranks[i] - 1]; r[i + k] == r[j + k]; k++);
	return;
}

int st[max_n][32];
void initSt(int a[], int n) {
	for (int i = 0;i <= n;++i)st[i][0] = height[i];
	int mxk = (int)log2(n + 1);
	for (int k = 1;k <= mxk;++k) {
		for (int i = 0;i <= n;++i) {
			if (i + (1 << k) - 1 > n)break;
			st[i][k] = min(st[i][k - 1], st[i + (1 << (k - 1))][k - 1]);
		}
	}
}
int que(int l, int r) {
	l = ranks[l];r = ranks[r];
	if (l > r)swap(l, r);
	++l;
	int len = log2(r - l + 1);
	return min(st[l][len], st[r - (1 << len) + 1][len]);
}

int a[max_n];
string s1, s2, s3;
void init(int n) {
	fill(a, a + n + 3, 0);
	fill(ranks, ranks + n + 3, 0);
	fill(SA, SA + n + 3, 0);
	fill(height, height + n + 3, 0);
	fill(wa, wa + n + 3, 0);
	fill(wb, wb + n + 3, 0);
	fill(wsarr, wsarr + n + 3, 0);
	fill(wvarr, wvarr + n + 3, 0);
}
int ToInt(char ch) {
	if (ch == 'A')return 1;
	else if (ch == 'T')return 2;
	else if (ch == 'C')return 3;
	else if (ch == 'G')return 4;
	else return 5;
}
int main() {
	ios::sync_with_stdio(0);
	int T;cin >> T;
	while (T--) {
		cin >> s1;cin >> s2;
		s3 = s1 + s2;
		int n = s3.size();
		init(n);
		for (re int i = 0;i < n;++i)a[i] = ToInt(s3[i]);
		get_sa(a, SA, n, 7);
		get_height(a, SA, n);
		initSt(a, n);
		int ans = 0;
		for (re int i = s2.size();i <= s1.size();++i) {
			int left = i - s2.size();
			int cur = left;
			for (int k = 0;k <= 3;++k) {
				cur += que(cur, s1.size() + cur - left);
				if (k < 3)++cur;
				if (cur >= i) {
					++ans;
					break;
				}
			}
		}cout << ans << endl;
	}
}

C++(clang++11) 解法, 执行用时: 82ms, 内存消耗: 3836K, 提交时间: 2021-02-13 15:33:10

#include<stdio.h>
#include<string.h>
#include<algorithm>
using namespace std;
#define N 100050
#define LL unsigned long long
int T,l1,l2;
char s1[N],s2[N];
LL mi[N],p=19260817,h1[N],h2[N];
bool check(int x)
{
	int i=x,j=1;
	int l=1,r=l2+1,mid;
	for(int k=1;k<=3;++k)
	{
		while(l<r)
		{
			mid=(l+r)>>1;
			if(h1[i+mid-1]-h1[i-1]*mi[mid]==h2[j+mid-1]-h2[j-1]*mi[mid])
			l=mid+1;
			else
			r=mid;
		}
		i=i+l;
		j=j+l;
		l=1;
		r=l2-j+2;
		if(j>l2)
		return 1;
		mid=l2-j+1;
		if(h1[i+mid-1]-h1[i-1]*mi[mid]==h2[j+mid-1]-h2[j-1]*mi[mid])
		return 1;
	}
	return 0;
}
int main()
{
	scanf("%d",&T);
	while(T--)
	{
		int i,ans=0;
		scanf("%s%s",s1+1,s2+1);
		l1=strlen(s1+1);
		l2=strlen(s2+1);
		if(l1<l2)
		{
			puts("0");
			continue;
		}
		mi[0]=1;
		for(i=1;i<=l1;++i)
		{
			mi[i]=mi[i-1]*p;
			h1[i]=h1[i-1]*p+s1[i]; 
		}
		for(i=1;i<=l2;++i)
		h2[i]=h2[i-1]*p+s2[i];
		for(i=1;i<=l1-l2+1;++i)
		{
			if(check(i))
		     ans++;
		}
		printf("%d\n",ans);
	}
}

上一题