Archive for the ‘String Processing’ Category

UVA – 11732 – “strcmp()” Anyone?   Leave a comment


At first I thought this problem can be solved using trie, which would be an easy way however the simple trie would use a lot of memory 4000 * 1000 * 26 integers or so, Then I had to use another way which would be sorting, using any comparison based sorting algorithm as quick sort or merge sort would also cause Time limit exceeded so instead I used my not-so-efficient radix sort which runs in O(num_words*max_length), then using the sorted words you can easily calculate the result, see the solution for more information.

#include <stdio.h>
#include <string.h>
#include <stack>
using namespace std;
typedef long long ll;
char words[4005][1005];
int len[4005];
int loc[4005];
int temp[4005];
int word_count;
int freq[128];

int charAt(int i, int j) {
	return (j < len[loc[i]]) ? words[loc[i]][j] : 0;
}
void count_sort(int j) {
	memset(freq, 0, sizeof freq);
	for (int i = 0; i < word_count; i++)
		freq[charAt(i, j)]++;
	for (int i = 1; i < 128; i++)
		freq[i] += freq[i - 1];
	for (int i = word_count - 1; i >= 0; i--)
		temp[--freq[charAt(i, j)]] = loc[i];
	memcpy(loc, temp, word_count * 4);
}
void radix_sort() {
	int max_len = 0;
	for (int i = 0; i < word_count; i++) {
		loc[i] = i;
		if (len[i] > max_len)
			max_len = len[i];
	}
	for (int i = max_len - 1; i >= 0; i--)
		count_sort(i);
}
int main(int argc, char *argv[]) {
	int cc = 1;
	while (scanf("%d", &word_count) && word_count) {
		getchar();
		for (int i = 0; i < word_count; i++) {
			gets(words[i]);
			len[i] = strlen(words[i]);
		}
		radix_sort();
		stack<int> s;
		s.push(0);
		s.push(word_count - 1);
		s.push(0);
		ll res = 0;
		memset(freq, 0, sizeof(freq));
		while (s.size()) {
			int j = s.top();
			s.pop();
			int r = s.top();
			s.pop();
			int l = s.top();
			s.pop();
			int all = r - l + 1;
			for (int i = l; i <= r; i++)
				freq[charAt(i, j)]++;
			res += freq[0] * (freq[0] - 1);
			res += freq[0] * (all - freq[0]);
			freq[0] = 0;
			for (int i = l; i <= r; i++) {
				char c = charAt(i, j);
				if (!c)
					continue;
				if ((i > l && c == charAt(i - 1, j)) || !c)
					continue;
				res += freq[c] * (freq[c] - 1);
				res += freq[c] * (all - freq[c] - i + l);
				s.push(i);
				s.push(i + freq[c] - 1);
				s.push(j + 1);
				freq[c] = 0;
			}
		}
		printf("Case %d: %lld\n", cc++, res);
	}
	return 0;
}
Advertisements

Posted May 7, 2013 by epicrado in Combinatorics, String Processing

UVa – 11468 – Substring   Leave a comment


Dynamic Programming + Aho Corasik String matching

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.ArrayDeque;
import java.util.Arrays;
import java.util.Queue;
import java.util.StringTokenizer;

public class UVa11468 {
	class AhoCorasik {
		Node root;

		class Node {
			Node next[];
			Node fail;
			boolean flag;// true iff current prefix matched ANY string in the
							// set
			int label;

			public Node() {
				next = new Node[128];
			}
		}

		AhoCorasik(char[][] patterns) {
			root = new Node();
			for (int i = 0; i < patterns.length; i++)
				TrieInsert(patterns[i]);
			Queue<Node> Q = new ArrayDeque<Node>();
			Queue<Character> Q2 = new ArrayDeque<Character>();
			root.fail = root;
			Q.add(root);
			while (!Q.isEmpty()) {
				Node current = Q.poll();
				for (int i = 0; i < current.next.length; i++)
					if (current.next[i] != null) {
						Q.add(current.next[i]);
						Q.add(current);
						Q2.add((char) i);
					}
				if (current == root)
					continue;
				Node parent = Q.poll();
				Character last = Q2.poll();
				Node cfail = parent.fail;
				while (cfail != root && cfail.next[last] == null)
					cfail = cfail.fail;
				current.fail = cfail.next[last];
				if (current.fail == null || current.fail == current)
					current.fail = root;
				current.flag = current.flag || current.fail.flag;
			}
		}

		void TrieInsert(char[] pattern) {
			Node current = root;
			for (int i = 0; i < pattern.length; i++) {
				if (current.next[pattern[i]] == null)
					current.next[pattern[i]] = new Node();
				current = current.next[pattern[i]];
			}
			current.flag = true;
		}

		Node next(Node current, char c) {
			while (current.next[c] == null && current != root)
				current = current.fail;
			return current.next[c] == null ? root : current.next[c];
		}
	}

	double[][] dp = new double[105][20 * 25];
	AhoCorasik.Node[] nodes = new AhoCorasik.Node[20 * 25];
	double[] prob = new double[128];
	char[] chr = new char[128];
	int all;
	AhoCorasik tree;

	public double f(int l, int state) {
		if (nodes[state].flag)
			return 0.0;
		else if (l == 0)
			return 1;
		else if (dp[l][state] != -1)
			return dp[l][state];
		else {
			double res = 0;
			for (int i = 0; i < all; i++)
				res += prob[i]
						* f(l - 1, tree.next(nodes[state], chr[i]).label);
			return dp[l][state] = res;
		}
	}

	public void run() throws Exception {
		InputReader in = new InputReader(System.in);
		int tc = in.nextInt();
		int cc = 1;
		while (tc-- > 0) {
			int patterns = in.nextInt();
			char[][] pat = new char[patterns][];
			for (int i = 0; i < pat.length; i++)
				pat[i] = in.next().toCharArray();
			tree = new AhoCorasik(pat);
			Queue<AhoCorasik.Node> Q = new ArrayDeque<AhoCorasik.Node>();
			Q.add(tree.root);
			int top = 0;
			while (!Q.isEmpty()) {
				AhoCorasik.Node cur = Q.poll();
				cur.label = top;
				nodes[top] = cur;
				top++;
				for (int j = 0; j < cur.next.length; j++)
					if (cur.next[j] != null)
						Q.add(cur.next[j]);
			}
			top = 0;
			all = in.nextInt();
			for (top = 0; top < all; top++) {
				char c = in.next().charAt(0);
				double d = Double.parseDouble(in.next());
				chr[top] = c;
				prob[top] = d;
			}
			int l = in.nextInt();
			for (int i = 0; i <= l; i++)
				Arrays.fill(dp[i], -1);
			System.out.printf("Case #%d: %.6f\n", cc++, f(l, 0));
		}
	}

	static class InputReader {
		private BufferedReader reader;
		private StringTokenizer tokenizer;

		public InputReader(InputStream stream) {
			reader = new BufferedReader(new InputStreamReader(stream));
			tokenizer = null;
		}

		public String next() {
			while (tokenizer == null || !tokenizer.hasMoreTokens()) {
				try {
					tokenizer = new StringTokenizer(reader.readLine());
				} catch (IOException e) {
					throw new RuntimeException(e);
				}
			}
			return tokenizer.nextToken();
		}

		public int nextInt() {
			return Integer.parseInt(next());
		}
	}

	public static void main(String[] args) throws Exception {
		(new UVa11468()).run();
	}
}

Posted November 15, 2012 by epicrado in Dynamic programming, String Processing

UVA – 10617 – Again Palindrome   1 comment


#include <stdio.h>
char s[100];
int len;
long long int dp[100][100];
long long int go(int i,int j)
{
	if(i>j)
		return 1;
	return dp[i][j]=(dp[i][j]!=-1)?dp[i][j]:go(i+1,j) + go(i,j-1)-(s[i]!=s[j])*go(i+1,j-1);
}
int main()
{
	int tc,i,j;
	scanf("%d",&tc);
	getchar();
	while(tc-->0)
	{
		gets(s);
		len = strlen(s);
		for(i=0;i<len+1;i++)
			for(j=0;j<len+1;j++)
				dp[i][j]=-1;
		printf("%lld\n",go(0,len-1)-1);
	}
}

Posted April 13, 2012 by epicrado in Dynamic programming, String Processing

UVA – 760 – DNA Sequencing   Leave a comment


import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.util.TreeSet;

public class DNASequencing {
	public static long mod(long a, long b) {
		return (a % b + b) % b;
	}

	public static void main(String[] args) throws Exception {
		long M = 2147483647;
		long B = 1000000007;
		long[] modPow = new long[301];
		modPow[0] = 1;
		for (int i = 1; i < modPow.length; i++)
			modPow[i] = (modPow[i - 1] * B) % M;
		BufferedReader in = new BufferedReader(new InputStreamReader(System.in));
		while (true) {
			char[] a = in.readLine().toCharArray();
			char[] b = in.readLine().toCharArray();
			long[] Ha = new long[a.length];
			long[] Hb = new long[b.length];
			Ha[0] = a[0];
			Hb[0] = b[0];
			for (int i = 1; i < Ha.length; i++)
				Ha[i] = (Ha[i - 1] * B + a[i]) % M;
			for (int i = 1; i < Hb.length; i++)
				Hb[i] = (Hb[i - 1] * B + b[i]) % M;
			TreeSet<Long>[] S = new TreeSet[b.length + 1];
			for (int i = 1; i < S.length; i++)
				S[i] = new TreeSet<Long>();
			for (int i = 0; i < b.length; i++)
				for (int j = b.length - 1; j >= i; j--) {
					long cH = Hb[j];
					if (i > 0)
						cH = mod(cH - Hb[i - 1] * modPow[j - i + 1], M);
					S[j - i + 1].add(cH);
				}
			boolean found = false;
			TreeSet<String> res = new TreeSet<String>();
			for (int len = Math.min(a.length, b.length); len >= 1 && !found; len--)
				for (int i = 0; i + len - 1 < a.length; i++) {
					int j = i + len - 1;
					long cH = Ha[j];
					if (i > 0)
						cH = mod(cH - Ha[i - 1] * modPow[j - i + 1], M);
					if (len <= b.length && S[len].contains(cH)) {
						char[] arr = new char[len];
						for (int k = i, l = 0; k <= j; k++)
							arr[l++] = a[k];
						res.add(new String(arr));
						found = true;
					}
				}
			if (res.isEmpty())
				System.out.println("No common sequence.");
			else
				while (!res.isEmpty())
					System.out.println(res.pollFirst());
			if (in.readLine() == null)
				break;
			else
				System.out.println();
		}
	}
}

Posted February 16, 2012 by epicrado in String Processing

UVA – 11475 – Extend to Palindrome   2 comments


Get longest suffix that is also a palindrome and append the rest of characters,We can check weather a substring is palindrome or not in O(1) using hashing.

#include <iostream>
#include <algorithm>
#include <vector>
#include <string>
#include <stdio.h>
#include <string.h>
#include <stdlib.h>

using namespace std;
#define base 1000000007LL
#define mod 2096725343LL
char str[100005];
typedef long long ll;
int main() {
	while (gets(str)) {
		int n = strlen(str);
		int l = n - 1;
		ll h1 = 0;
		ll h2 = 0;
		ll cbase = 1;
		int best = 0;
		for (int j = l; j >= 0; j--) {
			h2 = (h2 * base + str[j]) % mod;
			h1 = (h1 + str[j] * cbase) % mod;
			if (h1 == h2)
				best = n - j;
			cbase = (cbase * base) % mod;
		}
		for (int i = 0; i < n; i++)
			putchar(str[i]);
		for (int i = (n - best) - 1; i >= 0; i--)
			putchar(str[i]);
		putchar('\n');
	}
	return 0;
}

Posted February 16, 2012 by epicrado in String Processing

UVA – 11888 – Abnormal 89’s   Leave a comment


ACC using hashing! read about it here.

#include <stdio.h>
#include <string.h>
#define N 200001
long M1 = 2147483647;
long B1 = 1000000007;
long right[200001];
long left[200001];
long modPow[200001];
char arr[200001];
int len;
long mod(long a, long b) {
	return (a % b + b) % b;
}
long calcHash_L(int l, int r) {
	long res = left[l];
	if (r < len - 1)
		res = mod(res - (left[r + 1] * modPow[r - l + 1]) % M1, M1);
	return res;
}
long calcHash_R(int l, int r) {
	long res = right[r];
	if (l > 0)
		res = mod(res - (right[l - 1] * modPow[r - l + 1]) % M1, M1);
	return res;
}
int isPalindrome(int l, int r) {
	int len = l - r + 1;
	if (len == 1)
		return 1;
	return calcHash_R(l,r) == calcHash_L(l,r);
}
int main()
{
	int i,alin,tc;
	modPow[0] = 1;
	for (i = 1; i <N; i++)
		modPow[i] = (B1 * modPow[i - 1]) % M1;
	scanf("%d",&tc);
	getchar();
	while(tc-->0)
	{
		gets(arr);
		len = strlen(arr);
		right[0] = arr[0];
		left[len - 1] = arr[len - 1];
		for (i = 1; i < len; i++)
			right[i] = (right[i - 1] * B1 + arr[i]) % M1;
		for (i = len - 2; i >= 0; i--)
			left[i] = (left[i + 1] * B1 + arr[i]) % M1;
		alin = 0;
		for (i = 1; i < len && !alin; i++) 
			alin = alin|| (isPalindrome(0, i - 1) && isPalindrome(i,len-1));
		if (alin)
			puts("alindrome");
		else if (isPalindrome(0, len - 1))
			puts("palindrome");
		else
			puts("simple");
	}
	return 0;
}

Posted February 16, 2012 by epicrado in String Processing

UVA – 11362 – Phone List   Leave a comment


Sort the strings lexicographically.then if there exists some S[i] which is a prefix of S[j] then j = i+n
and S[i] is also a prefix for all Strings between i+1 and j inclusive.so to check if there a string which is a prefix for another we should only check S[i] and S[i+1].

import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.util.Arrays;

public class PhoneList {
	public static void main(String[] args) throws Exception {
		BufferedReader in = new BufferedReader(new InputStreamReader(System.in));
		int tc = Integer.parseInt(in.readLine().trim());
		while (tc-- > 0) {
			String[] arr = new String[Integer.parseInt(in.readLine().trim())];
			for (int i = 0; i < arr.length; i++)
				arr[i] = in.readLine().trim();
			Arrays.sort(arr);
			boolean flag = false;
			for (int i = 0; i < arr.length - 1 && !flag; i++) {
				if (arr[i].length() <= arr[i + 1].length()
						&& arr[i].equals(arr[i + 1].subSequence(0,
								arr[i].length())))
					flag = true;
			}
			if (flag)
				System.out.println("NO");
			else
				System.out.println("YES");
		}
	}
}

Posted February 15, 2012 by epicrado in String Processing