\documentclass[11pt]{article}
\usepackage{fullpage}
\usepackage{graphicx}
\usepackage{amsmath}
\usepackage{times}
\usepackage{url}
\usepackage{latexsym}
\usepackage{amssymb}
\usepackage{amsthm}
\begin{document}
\title{{\bf 11-711: Algorithms for NLP}\\
Homework Assignment \#1: Finite Automata}
\author{}
\date{Out: September 15, 2015\\
Due: September 29, 2015}
\maketitle
\paragraph{Instructions.} Create a \LaTeX-typeset writeup of your solutions. For the problems that require that you give FSAs, we encourage you to create them using drawing tools, but we will accept hand-drawn figures for full credit. Make sure initial and final states are clearly identified and explicitly list all symbols on each arc (i.e., no wildcards). We ask that you do not use any automated tools to \emph{solve} these problems as you will not have access to such tools for similar problems on the exams.
\section*{Problem 1 [15 points]}
For the following NFA containing $\varepsilon$-moves,
\includegraphics[scale=0.66]{epsnfa.pdf}
\noindent give an equivalent DFA.
\section*{Problem 2 [15 points]}
\noindent Give DFAs accepting the following languages.
\begin{enumerate}
\item {\bf [5 points]} The set of strings over the alphabet $\{a,b,c\}$ in which the
substring $bc$ never occurs.
\item {\bf [5 points]} The set of strings over the alphabet $\{1,7\}$ that end in $11711$.
\item {\bf [5 points]} The set of strings over the alphabet $\{0, 1\}$ which are divisible by three
when interpreted as a binary number (ignoring leading zeroes).
For example $00000_b = 0$, which is divisible by 3, so $00000$ should be accepted.
$001001_b = 9$ and thus should be accepted also.
$101_b = 5$ is not divisible by 3 and thus should be rejected.
{\bf Note:} $\varepsilon$ should be interpreted as 0 and thus should be accepted.
\end{enumerate}
\section*{Problem 3 [10 points]}
Give a {\em non-deterministic} FSA (possibly with $\varepsilon$-moves) accepting
the following language.
\begin{enumerate}
\item The set of strings over $\{1,7\}$ ending in $11711$.
How does your NFA
differ from the DFA you constructed in Problem~2.2?
\end{enumerate}
\section*{Problem 4 [15 points]}
Give regular expressions for each of the following languages:
\begin{enumerate}
\item {\bf [7 points]} The set of strings over $\{a,b,c\}$ in which the
substring $bc$ never occurs.
\item {\bf [8 points]} The language described in Problem~2.3:
The set of strings over $\{0, 1\}$ which are divisible by three
when interpreted as a binary number (ignoring leading zeroes).
For example $00000_b = 0$, which is divisible by 3, so $00000$ should be accepted.
$001001_b = 9$, and thus should be accepted also.
$101_b = 5$, is not divisible by 3, and thus should be rejected.
{\bf Note:} $\varepsilon$ should be interpreted as 0, and thus should be accepted.
\end{enumerate}
\section*{Problem 5 [20 points]}
We may define \textbf{generalized regular expressions} (GREs) as follows:
\begin{enumerate}
\item $\emptyset$ is a GRE denoting the empty language;
\item $\varepsilon$ is a GRE denoting the language $\{ \varepsilon \}$;
\item for each $\sigma \in \Sigma$, $\sigma$ is a GRE denoting the language $\{ \sigma \}$;
\item if $\alpha$ and $\beta$ are GREs, denoting the languages $A$ and $B$, respectively, then
\begin{itemize}
\item $(\alpha \mid \beta)$ is a GRE denoting $A \cup B$;
\item $(\alpha \beta)$ is a GRE denoting $A.B$;
\item $\alpha^*$ is a GRE denoting $A^*$;
\item \textbf{[new]} $(\alpha \wedge \beta)$ is a GRE denoting $A \cap B$; and
\item \textbf{[new]} $\neg \alpha$ is a GRE denoting $\overline{A}$.
\end{itemize}
\end{enumerate}
Prove that the languages denoted by GREs are regular.
\\
\\
\noindent \textbf{Note:} you may use refer to the definitions and proofs about REs we provided in class and only deal with the two new clauses in the generalized definition.
\section*{Problem 6 [25 points]}
For each of the following statements, answer whether the claim is {\bf true}
or {\bf false}, and give a {\em short} (one- to two-sentence) explanation if true or counterexample if false.
\begin{enumerate}
\item {\bf [5 points]} Let $L_1 \subset L_2$. If $L_1$ is not regular, then $L_2$ must also be not regular.
\item {\bf [5 points]} $L = L_1 \cap L_2$. If $L_1$ and $L$ are regular languages, then $L_2$ must also be a regular language.
\item {\bf [5 points]} $L = L_1 \cup L_2$. If $L_1$ and $L$ are regular languages, then $L_2$ must also be a regular language.
\item {\bf [5 points]} $L = \bigcap_{i=1}^{\infty} L_i$. If all of the $L_i$
are regular languages, then $L$ is also a regular language.
\item {\bf [5 points]} Let $\alpha$, $\beta$ and $\gamma$ be regular expressions.
If $L(\beta \mid \alpha \gamma) \subseteq L(\gamma)$, then $L(\alpha^* \beta) \subseteq L(\gamma)$.
\end{enumerate}
\end{document}