\documentclass[12pt,reqno]{article}
\usepackage[usenames]{color}
\usepackage{amssymb}
\usepackage{graphicx}
\usepackage{amscd}
\usepackage[colorlinks=true,
linkcolor=webgreen,
filecolor=webbrown,
citecolor=webgreen]{hyperref}
\definecolor{webgreen}{rgb}{0,.5,0}
\definecolor{webbrown}{rgb}{.6,0,0}
\usepackage{color}
\usepackage{fullpage}
\usepackage{float}
\usepackage{psfig}
\usepackage{graphics,amsmath,amssymb}
\usepackage{amsthm}
\usepackage{amsfonts}
\usepackage{latexsym}
\usepackage{epsf}
\setlength{\textwidth}{6.5in}
\setlength{\oddsidemargin}{.1in}
\setlength{\evensidemargin}{.1in}
\setlength{\topmargin}{-.1in}
\setlength{\textheight}{8.4in}
\newcommand{\seqnum}[1]{\href{http://oeis.org/#1}{\underline{#1}}}
\begin{document}
\begin{center}
\epsfxsize=4in
\leavevmode\epsffile{logo129.eps}
\end{center}
\theoremstyle{plain}
\newtheorem{theorem}{Theorem}
\newtheorem{corollary}[theorem]{Corollary}
\newtheorem{lemma}[theorem]{Lemma}
\newtheorem{proposition}[theorem]{Proposition}
\theoremstyle{definition}
\newtheorem{definition}[theorem]{Definition}
\newtheorem{example}[theorem]{Example}
\newtheorem{conjecture}[theorem]{Conjecture}
\theoremstyle{remark}
\newtheorem{remark}[theorem]{Remark}
\begin{center}
\vskip 1cm{\LARGE\bf
The Least Self-Shuffle of the \\
\vskip .1in
Thue-Morse Sequence
}
\vskip 1cm
\large
James D. Currie\footnote{The author is supported by an
NSERC Discovery grant.}\\
Department of Mathematics \& Statistics\\
University of Winnipeg \\
Winnipeg, MB R3B 2E9 \\
Canada \\
\href{mailto:j.currie@uwinnipeg.ca}{\tt j.currie@uwinnipeg.ca}
\end{center}
\vskip .2 in
\begin{abstract}
We show that the self-shuffle of Thue-Morse given by Charlier et al.\ is optimal/canonical in the sense that among self-shuffles of Thue-Morse, it has the lexicographically least directive sequence starting with 1.
\end{abstract}
\section{Introduction}
Henshall et al.\ \cite{henshall} initiated the topic of self-shuffles of finite words. They considered, in particular, closure properties of languages under self-shuffles, proving several results as well as posing open problems.
No non-empty finite word can be equal to one of its self-shuffles, but for infinite words, the question of whether a word can be written as a self-shuffle is interesting. Charlier et al.\ \cite{charlier} exhibited a self-shuffle of the Thue-Morse word. The Thue-Morse word is the fixed point of a morphism, so that we can immediately get other shuffles; the image of any self-shuffle under the morphism gives a different self-shuffle. Endrullis and Hendriks \cite{optimal} proved that there are in fact other self-shuffles; in particular, they showed that a shuffle distinct from that of Charlier et al.\ is {\em optimal} --- it switches back and forth between shuffled copies as quickly as possible. The Thue-Morse word thus allows at least two distinct families of self-shuffles.
In this note, we show that the self-shuffle of Thue-Morse given by Charlier et al.\ is optimal/canonical in a different sense: among self-shuffles of Thue-Morse, it has the lexicographically least directive sequence starting with 1.
\section{Notation}
We follow Lothaire \cite{lothaire} as a standard notational reference for combinatorics on words. Thus $|x|$ is the length of word $x$, $|x|_0$ the number of 0's in $x$, etc. If $x$ is a non-empty word, let $x'$ denote the word obtained by deleting the last letter of $x$. Thus, $(12341234)'=1234123$, for example.
Let $u$, $v$, $w$ be finite words, and let $d$ be a word over $\{0,1\}$ such that $|w|=|d|=|u|+|v|$. We define recursively what it means for $w$ to be {\it the shuffle of $u$ and $v$ directed by $d$}, written $w=u\oplus_d v$:
\begin{enumerate}
\item If $d=\epsilon$, then $w=u\oplus_d v$
\item If the last letter of $d$ is 0 then $w=u\oplus_d v$ if
\begin{enumerate}
\item $w'=u'\oplus_{d'} v$
\item The last letter of $w$ is the same as the last letter of $u$
\end{enumerate}
\item If the last letter of $d$ is 1 then $w=u\oplus_d v$ if
\begin{enumerate}
\item $w'=u\oplus_{d'} v'$
\item The last letter of $w$ is the same as the last letter of $v$
\end{enumerate}
\end{enumerate}
In other words, each letter of $w$ is read from either $u$ or $v$, and $d$ determines whether we read it from $u$ (0) or from $v$ (1). We call $d$ the {\it directive word} of the shuffle.
By $\omega$-word we mean a $1$-sided infinite word.
For $\omega$-words {\bf u}, {\bf v}, {\bf w}, {\bf d}, we extend the definition
above and write ${\bf w}={\bf u}\oplus_{\bf d} {\bf v}$ if there are arbitrarily long prefixes $\hat{u}$, $\hat{v}$, $\hat{w}$, $\hat{d}$ of {\bf u}, {\bf v}, {\bf w}, {\bf d}, respectively, such that $\hat{u}\oplus_{\hat{d}} \hat{v}=\hat{w}$.
\begin{remark}\label{prefix}
Suppose that $d_0\in\{0,1\}^*$ is a finite prefix of ${\bf d}$ and write ${\bf d}=d_0{\bf d_1}$. \begin{itemize}
\item
Let $w_0$ be the prefix of ${\bf w}$ of length $|d_0|$ and write ${\bf w}=w_0{\bf w_1}$.
\item Let $u_0$ be the prefix of ${\bf u}$ of length $|d_0|_0$ and write ${\bf u}=u_0{\bf u_1}$.
\item Let $v_0$ be the prefix of ${\bf v}$ of length $|d_0|_1$ and write ${\bf v}=v_0{\bf v_1}$.
\end{itemize}
Then
$${\bf w}={\bf u}\oplus_{\bf d}{\bf v} \Leftrightarrow \left(w_0=u_0\oplus_{d_0}v_0 \text{ and } {\bf w_1}={\bf u_1}\oplus_{\bf d_1}{\bf v_1}\right)
$$
\end{remark}
We say that an $\omega$-word {\bf w} {\it allows a non-trivial self-shuffle} if we can write ${\bf w}={\bf w}\oplus_{\bf d} {\bf w}$ for some non-constant $\omega$-word {\bf d}. Evidently, for any $\omega$-word {\bf w}, ${\bf w}={\bf w}\oplus_{0^\omega}{\bf w}={\bf w}\oplus_{1^\omega} {\bf w}$; we call these the {\it trivial self-shuffles} of {\bf w}.
Write $x \preceq y$ (resp., $x \prec y$) to say that word $x$ is no greater than (resp., less than) $y$ in the natural lexicographic order where 0 precedes 1.
Because we have the trivial self-shuffles, the lexicographically least $\omega$-word {\bf d} such that ${\bf w}={\bf w}\oplus_{\bf d} {\bf w}$ is just ${\bf d}=0^\omega$. Seeking the lexicographically least directive sequence {\it starting with 1} is a reasonable attempt to force non-trivial shuffling.
Thus, if $\omega$-word {\bf w} allows a non-trivial self-shuffle, a natural question is
\begin{quote}What is the lexicographically least $\omega$-word {\bf d} {\em with prefix 1} such that ${\bf w}={\bf w}\oplus_{\bf d} {\bf w}$?
\end{quote}
\section{Lexicographically least shuffles}
In this section, ${\bf u}$, ${\bf v}$, ${\bf w}$ will be arbitrary but fixed effectively given $\omega$-words.
\begin{lemma}
Let a word $d_0\in\{0,1\}^*$ be specified. Let $$D=\{{\bf d}\in \{0,1\}^\omega: {\bf w}={\bf u}\oplus_{\bf d} {\bf v}\}.$$ If $D\cap d_0\{0,1\}^\omega$ is non-empty, then it has a lexicographically least element.
\end{lemma}
\begin{proof} For a positive integer $n$, suppose that $d_{n-1}$ has been defined and $D\cap d_{n-1}\{0,1\}^\omega$ is non-empty. It follows that at least one of
$D\cap d_{n-1}0\{0,1\}^\omega$ and $D\cap d_{n-1}1\{0,1\}^\omega$ is non-empty. We can thus define an infinite sequence of words $\{d_n\}_{n=0}^\infty$, each $d_{n}$ an extension of $d_{n-1}$, by
\begin{equation*}d_n=\begin{cases}
d_{n-1}0,&\text{ if }D\cap d_{n-1}0\{0,1\}^\omega\text{ is non-empty};\\
d_{n-1}1,&\text{ otherwise.}
\end{cases}
\end{equation*}
Let ${\bf \bar{d}}=\lim_{n\rightarrow\infty}d_n.$ We claim that ${\bf \bar{d}}$ is the lexicographically least element of $D\cap d_0\{0,1\}^\omega.$ Each finite prefix $d_n$ of ${\bf \bar{d}}$ has been chosen to be the prefix of a word of $D$, so that ${\bf w}={\bf u}\oplus_{\bf \bar{d}} {\bf v}$. On the other hand, if for some ${\bf \hat{d}}\in D\cap d_0\{0,1\}^\omega$, ${\bf \hat{d}}\prec{\bf \bar{d}}$, consider the shortest prefix $p$ of ${\bf \hat{d}}$ which is not a prefix of $\bar{d}$. For some positive $n$, $p=d_{n-1}0$, while $d_n=d_{n-1}1$. However, this implies that $D\cap d_{n-1}0\{0,1\}^\omega$ is empty, and ${\bf \hat{d}}\notin D\cap d_0\{0,1\}^\omega$. This is a contradiction.\end{proof}
\begin{remark}\label{decidable} Suppose that $${\bf w}={\bf u}\oplus_{\bf d} {\bf v}$$
has solutions ${\bf d}\in1\{0,1\}^*$.
For a fixed prefix $w_0$ of ${\bf w}$, we can effectively determine the lexicographically least element $d_0$ of $1\{0,1\}^*$ such that there exist prefixes $u_0$ and $v_0$ of ${\bf u}$ and ${\bf v}$, respectively, such that \begin{equation}\label{finite shuffle}w_0=u_0\oplus_{d_0} v_0.\end{equation}
There are only $2^{|w_0|-1}$ candidates for $d_0$. We can check for each candidate $d_0$, and the corresponding prefixes $u_0$, $v_0$ of {\bf u}, {\bf v}, with lengths $|d_0|_0$, $|d_0|_1$, whether (\ref{finite shuffle}) is satisfied. Note that the lengths of prefixes $u_0$, $v_0$ are always at most $|w_0|$.
\end{remark}
\begin{lemma}
Suppose that ${\bf d}$ is the lexicographically least element of $1\{0,1\}^\omega$ such that $${\bf w}={\bf u}\oplus_{\bf d} {\bf v}.$$
Let ${w_0}$ be a fixed non-empty prefix of ${\bf w}$.
Let $d_0$ be the lexicographically least element of $1\{0,1\}^*$ such that there exist prefixes $u_0$ and $v_0$ of ${\bf u}$ and ${\bf v}$, respectively, such that $$w_0=u_0\oplus_{d_0} v_0.$$
Suppose $d_0\in\{0,1\}^*1$; write ${\bf w}=w'_0{\bf W}$, ${\bf u}=u_0{\bf U}$, ${\bf v}=v'_0{\bf V}$ (so that $w'_0=u_0\oplus_{d'_0} v'_0$).
Suppose that there exists an element $\delta\in1\{0,1\}^\omega$ such that
$${\bf W}={\bf U}\oplus_{\bf \delta} {\bf V}.$$
Then
$${\bf d}=d'_0{\bf \Delta},$$
where ${\bf \Delta}$ is the lexicographically least such $ \delta.$ In particular, $d_0$ is a prefix of ${\bf d}$.
\end{lemma}
\begin{proof} Since
$w'_0=u_0\oplus_{d'_0} v'_0$ and ${\bf W}={\bf U}\oplus_{\bf \delta} {\bf V},$ by Remark \ref{prefix}, we have
$${\bf w}={\bf u}\oplus_{d'_0\Delta} {\bf v}.$$
Let $d$ be the length $|w_0|$ prefix of ${\bf d}$. By the minimality of ${\bf d}$, $d\preceq d'_01$.
Since $${\bf w}={\bf u}\oplus_{\bf d} {\bf v},$$ it follows from Remark \ref{prefix} that
$$w_0=\hat{u}_0\oplus_{d} \hat{v}_0,$$
where $\hat{u}_0$ is the length $|d|_0$ prefix of ${\bf u}$, and $\hat{v}_0$ is the length $|d|_1$ prefix of ${\bf v}$. By the lexicographic minimality of $d_0$, $d_0'1=d_0\preceq d$, so that $d_0=d$.
Therefore, write ${\bf d}=d'_0\hat{\Delta}$, where $\hat{\Delta}\in 1\{0,1\}^\omega$. By Remark \ref{prefix},
$${\bf W}={\bf U}\oplus_{\hat{\Delta}} {\bf V}.$$
By the minimality of $\Delta$, $\Delta\preceq \hat{\Delta}$. However, by the minimality of ${\bf d}$, $d'_0\hat{\Delta}={\bf d}\preceq {d'_0\Delta}$. Thus $\Delta=\hat{\Delta}$, so that
${\bf d}=d'_0{\bf \Delta}.$
\end{proof}
\begin{corollary}\label{inductive step}
Let ${\bf d}$ be the lexicographically least element of $1\{0,1\}^\omega$ such that $${\bf w}={\bf u}\oplus_{\bf d} {\bf v}.$$
Suppose that for each positive integer $i$ there are finite words $W_i$, $U_i$ and $V_i$, and $\omega$-words ${\bf w}_{i}$, ${\bf u}_{i}$ and ${\bf v}_{i}$, where
\begin{itemize}
\item ${\bf w}_1={\bf w}, {\bf u}_1={\bf u} \text{ and }{\bf v}_1={\bf v},$
\item $W_i,U_i,V_i\text{ are prefixes of length 2 or more of }{\bf w}_{i},{\bf u}_{i}, {\bf v}_{i}\text{, respectively, }$
\item ${\bf w}_{i+1}=(W'_i)^{-1}{\bf w}_{i}, {\bf u}_{i+1}=(U'_i)^{-1}{\bf u}_{i}, {\bf v}_{i+1}=(V'_i)^{-1}{\bf v}_{i}.$
\end{itemize}
so that, for each $i$,
\begin{eqnarray*}
{\bf w}_{i}&=&\prod_{j=i}^\infty W'_j\\
{\bf u}_{i}&=&\prod_{j=i}^\infty U'_j\\
{\bf v}_{i}&=&\prod_{j=i}^\infty V'_j.
\end{eqnarray*}
For each $i$, let $D_i$ be the lexicographically least word starting with 1 such that
$$W_i=\hat{u}_i\oplus_{D_i} \hat{v}_i$$
for some prefixes $\hat{u}_i$ of ${\bf u}_{i}$ and $\hat{v}_i$ of ${\bf v}_{i}$.
Suppose that, for each $i$, $D_i$ ends in a 1, $\hat{u}_i=U_i$ and
$\hat{v}_i=V_i$.
Then
$${\bf d}=\prod_{i=1}^\infty D'_i.$$
\end{corollary}
\begin{proof}
This follows from the previous lemma by induction. \end{proof}
\section{The Thue-Morse word}
Consider the binary version of the Thue-Morse word (\seqnum{A001285}), namely, ${\bf t}=\mu^\omega(0)$ where $\mu(0)=01$, $\mu(1)=10$. Thus
$${\bf t}=0110100110010110\cdots$$
The length 2 factors of the Thue-Morse word are 00, 01, 10, 11. If ${\bf t}[j..j+1]=ab$, $a$, $b\in\{0,1\}$, then
$${\bf t}[8j..8j+15]=\mu^3(ab)$$ and
$${\bf t}[16j..16j+31]=\mu^4(ab).$$
It follows that $$\langle {\bf t}[8j+1..8j+8],{\bf t}[8j+5..8j+13],{\bf t}[16j+6..16j+22]\rangle$$ takes on one of 4 possible values:\vspace{.1in}
If ${\bf t}[j..j+1]=00$, then
\begin{eqnarray*}
{\bf t}[8j..8j+15]&=&0\underline{1101\overline{0010}}\overline{11010}01\\
{\bf t}[16j16j+31]&=&011010\underline{01100101100110100}110010110
\end{eqnarray*}
so that
$$\langle {\bf t}[8j+1..8j+8],{\bf t}[8j+5..8j+13],{\bf t}[16j+6..16j+22]\rangle$$
$$=\langle 11010010,001011010,01100101100110100\rangle.$$
Arguing similarly in the other three cases, we find that $$\langle {\bf t}[8j+1..8j+8],{\bf t}[8j+5..8j+13],{\bf t}[16j+6..16j+22]\rangle\in\langle U_i,V_i,W_i\rangle$$ where the values of the $U_i$, $V_i$, $W_i$ are as follows:\vspace{.1in}
\begin{center}
\begin{tabular}{|c|c|c|c|}\hline
$i$&$U_i$&$V_i$&$W_i$\\\hline
1&11010010&001011010&01100101100110100\\
2&11010011&001100101&01100101101001011\\
3&00101100&110011010&10011010010110100\\
4&00101101&110100101&10011010011001011\\\hline
\end{tabular}\vspace{.1in}
\end{center}
For each non-negative integer $j$, let $i_j\in\{1,2,3,4\}$ be the unique value such that
$$
{\bf t}[8j+1..8j+8]= U_{i_j}.$$
Let $D_1=10001110100011101$,
$D_2=10001001100111101$.
One checks that
\begin{eqnarray*}
W_1&=&U_1\oplus_{D_1}V_1\\
W_2&=&U_2\oplus_{D_2}V_2\\
W_3&=&U_3\oplus_{D_2}V_3\\
W_4&=&U_4\oplus_{D_1}V_4.
\end{eqnarray*}
For a given value of $j$, consider the $\omega$-words ${\bf U}={\bf t}[8j+1..\infty]$, ${\bf V}={\bf t}[8j+5..\infty]$, ${\bf W}={\bf t}[16j+6..\infty]$. Let the length 17 prefix of ${\bf W}$ be $W_0$. Thus $W_0\in\{W_1,W_2,W_3,W_4\}$. As per Remark~\ref{decidable}, one can determine the lexicographically least $D_0$ with prefix 1 such that $W_0=U_0\oplus_{D_0} V_0$ for some prefixes $U_0$ of $U$ and $V_0$ of $V$; we need only consider prefixes of ${\bf U}$ and ${\bf V}$ of lengths at most 17.
It is therefore a finite computation to show that whenever $W_0\in\{W_1,W_4\}$, then $D_0=D_1$ and when $W_0\in\{W_2,W_3\}$, then $D_0=D_2$. For convenience, define $\delta:\{1,2,3,4\}\rightarrow\{1,2\}$ by $\delta(1)=\delta(4)=1$, $\delta(2)=\delta(3)=2.$
Let $T_0=0110100$, the length 7 prefix of the Thue-Morse word ${\bf t}$. A short computation (feasible by hand) shows that the lexicographically least word $\Delta_0$ with prefix 1 such that $T_0=T_1\oplus_{\Delta_0}T_2$ for prefixes $T_1,T_2$ of ${\bf t}$ is $\Delta_0=1111101$.
We remark that each of $D_1$, $D_2$ and $\Delta_0$ ends in a 1.
\begin{theorem}The lexicographically least word $d$ with prefix 1 such that ${\bf t}={\bf t}\oplus_d {\bf t}$ is
$$d=111110\prod_{j=0}^\infty ({D}_{\delta(i_j)})'.$$
\end{theorem}
\begin{proof} Note that
$${\bf t}=011010\prod_{j=0}^\infty W'_{i_j}=0\prod_{j=0}^\infty U'_{i_j}=01101\prod_{j=0}^\infty V'_{i_j}.$$
The result thus follows from Corollary \ref{inductive step}.\end{proof}
\begin{remark}
One verifies that this is the shuffle given by Charlier et al.\ in \cite{charlier}.
\end{remark}
\section{Acknowledgments}
Thanks to the anonymous and thorough referee, and to Narad Rampersad,
for their comments.
\begin{thebibliography}{9}
\bibitem{charlier}
E. Charlier, T. Kamae, S. Puzynina, and L. Q. Zamboni, Self-shuffling
words, {\em J. Combin. Theory Ser. A} {\bf 128} (2014), 1--40.
\bibitem{optimal} J. Endrullis and D. Hendriks,
The optimal self-shuffle of the Thue-Morse word,
preprint available at
\url{http://www.cs.vu.nl/~diem/publication/pdf/self-shuffle.pdf},\\
2014.
\bibitem{henshall} D. Henshall, N. Rampersad, and J. Shallit,
Shuffling and unshuffling, {\em Bull. Europ. Assoc. Theoret. Comput. Sci.}
No.~107 (June 2012), 131--142.
\bibitem{lothaire} M. Lothaire, {\it Algebraic Combinatorics on Words},
Cambridge University Press, 2002.
\bibitem{thue} Axel Thue,
{\"Uber} die gegenseitige {Lage} gleicher
{Teile} gewisser {Zeichenreihen},
{\it Norske Vid. Selsk. Skr. I. Mat. Nat. Kl. Christiana} {\bf 1} (1912), 1--67.
Reprinted in {\it Selected Mathematical Papers of Axel Thue},
T. Nagell, editor, Universitetsforlaget, Oslo, 1977, pp.~413--478.
\end{thebibliography}
\bigskip
\hrule
\bigskip
\noindent 2010 {\it Mathematics Subject Classification}: Primary 68R15.
\noindent \emph{Keywords: } Word shuffling, Thue-Morse word, lexicographical order.
\bigskip
\hrule
\bigskip
\noindent (Concerned with sequence \seqnum{A001285}).
\bigskip
\hrule
\bigskip
\vspace*{+.1in}
\noindent
Received September 12 2014;
revised versions received October 11 2014; October 12 2014.
Published in {\it Journal of Integer Sequences}, November 2 2014.
\bigskip
\hrule
\bigskip
\noindent
Return to
\htmladdnormallink{Journal of Integer Sequences home page}{http://www.cs.uwaterloo.ca/journals/JIS/}.
\vskip .1in
\end{document}