\documentclass[12pt,reqno]{article}

\usepackage[usenames]{color}
\usepackage{amssymb}
\usepackage{graphicx}
\usepackage{amscd}

\usepackage[colorlinks=true,
linkcolor=webgreen,
filecolor=webbrown,
citecolor=webgreen]{hyperref}

\definecolor{webgreen}{rgb}{0,.5,0}
\definecolor{webbrown}{rgb}{.6,0,0}

\usepackage{color}
\usepackage{fullpage}
\usepackage{float}

\usepackage{psfig}
\usepackage{graphics,amsmath,amssymb}
\usepackage{amsthm}
\usepackage{amsfonts}
\usepackage{latexsym}
\usepackage{epsf}

\setlength{\textwidth}{6.5in}
\setlength{\oddsidemargin}{.1in}
\setlength{\evensidemargin}{.1in}
\setlength{\topmargin}{-.5in}
\setlength{\textheight}{8.9in}

\newcommand{\seqnum}[1]{\href{http://oeis.org/#1}{\underline{#1}}}

\begin{document}

\begin{center}
\epsfxsize=4in
\leavevmode\epsffile{logo129.eps}
\end{center}

\theoremstyle{plain}
\newtheorem{theorem}{Theorem}
\newtheorem{corollary}[theorem]{Corollary}
\newtheorem{lemma}[theorem]{Lemma}
\newtheorem{proposition}[theorem]{Proposition}

\theoremstyle{definition}
\newtheorem{definition}[theorem]{Definition}
\newtheorem{example}[theorem]{Example}
\newtheorem{conjecture}[theorem]{Conjecture}

\theoremstyle{remark}
\newtheorem{remark}[theorem]{Remark}

\newtheorem{algorithm}[theorem]{Algorithm}


\begin{center}
\vskip 1cm{\LARGE\bf A Space-Efficient Algorithm for Calculating \\
\vskip .05in
the Digit Distribution in the Kolakoski \\
\vskip .08in
Sequence }
\vskip 1cm
\large
Johan Nilsson\\
Fakult\"at f\"ur Mathematik\\
Universit\"at Bielefeld \\
Postfach 100131 \\
33501 Bielefeld \\
Germany \\
\href{mailto:jnilsson@math.uni-bielefeld.de}{\tt jnilsson@math.uni-bielefeld.de}\\
\end{center}

\vskip .2 in

\begin{abstract}
With standard algorithms for generating the classical Kola\-koski
sequence, the numerical calculation of the digit distribution uses
a linear amount of space. Here, we present an algorithm for calculating
the distribution of the digits in the classical Kolakoski sequence
that uses logarithmic space and still runs in linear
time. The algorithm is easily adaptable to generalized Kolakoski
sequences.
\end{abstract}



\section{Introduction}

The classical Kolakoski sequence $K=(K_{n})_{n=1}^{\infty}$ is the
unique sequence over the alphabet $\{\mathtt{1},\mathtt{2}\}$, starting
with $\mathtt{1}$ and having the sequence of run lengths identical with itself.
The classical Kolakoski sequence was first studied in a
work by Oldenburger \cite{oldenburger}, where it appears as the unique
solution to the problem of a trajectory on the alphabet
$\{\texttt{1},\texttt{2}\}$ which is identical to its exponent
trajectory. The name of the Kolakoski sequences, however, originates
from \cite{kolakoski65, kolakoski66}. In the On-Line Encyclopedia of
Integer Sequences \cite{sloane} the Kolakoski sequence has entry number
\seqnum{A000002}. The first letters of $K$ are

\begin{equation}
\label{eq: def of kolakoski}
\begin{tabular}{c}
\begin{picture}(265,40)
\put(  0,  0){$K=$}
\put( 30,  0){$\mathtt{1}$}
\put( 46,  0){$\mathtt{2}$}
\put( 62,  0){$\mathtt{2}$}
\put( 78,  0){$\mathtt{1}$}
\put( 94,  0){$\mathtt{1}$}
\put(110,  0){$\mathtt{2}$}
\put(126,  0){$\mathtt{1}$}
\put(142,  0){$\mathtt{2}$}
\put(158,  0){$\mathtt{2}$}
\put(174,  0){$\mathtt{1}$}
\put(190,  0){$\mathtt{2}$}
\put(206,  0){$\mathtt{2}$}
\put(222,  0){$\mathtt{1}$}
\put(238,  0){$\mathtt{1}$}
\put(254,  0){$\ldots$}

\put(  0, 30){$K=$}
\put( 30, 30){$\mathtt{1}$}
\put( 54, 30){$\mathtt{2}$}
\put( 86, 30){$\mathtt{2}$}
\put(110, 30){$\mathtt{1}$}
\put(126, 30){$\mathtt{1}$}
\put(150, 30){$\mathtt{2}$}
\put(174, 30){$\mathtt{1}$}
\put(198, 30){$\mathtt{2}$}
\put(230, 30){$\mathtt{2}$}
\put(246, 30){$\ldots$}

\put( 33, 13){\line(0, 1){ 10}}
\multiput(49, 13)(32, 0){2} {\qbezier(0, 0)(4, 5)( 8,10) \qbezier(16, 0)(12, 5)( 8,10)}
\put(113, 13){\line(0, 1){10}}
\put(129, 13){\line(0, 1){10}}
\multiput(145, 13)(0, 0){1} {\qbezier(0, 0)(4, 5)( 8,10) \qbezier(16, 0)(12, 5)( 8,10)}
\put(177, 13){\line(0, 1){10}}
\multiput(193, 13)(32, 0){2}{\qbezier(0, 0)(4, 5)( 8,10) \qbezier(16, 0)(12, 5)( 8,10)}
\end{picture}
\end{tabular}
\end{equation}
%
There are several interesting questions, answered and unanswered, on
the properties of the classical Kolakoski sequence;  Kimberling
presents several of these in \cite{kimberling}. One of the simplest,
and yet unresolved, questions is that of the distribution of digits in
$K$. If we let $o_{n}$ be the number of $\mathtt{1}$s in $K$ up to and
including position $n$, that is $o_{n} = |\{i: K_{i}=\mathtt{1}, 1\leq
i \leq n\}|$, then the conjecture is

\begin{conjecture} 
\label{conj: distribution}
The limit $o:=\lim_{n\to\infty} \frac{o_{n}}{n}$ exists and equals $\frac{1}{2}$.
\end{conjecture}

Both parts of Conjecture \ref{conj: distribution} --- the
existence and the value --- are still open. Several aspects of the conjecture (along with other properties and questions regarding the Kolakoski sequence) are considered by Dekking in \cite{dekking79, dekking80, dekking97}; see also the survey by Sing \cite{sing2010} and further references therein.  

While an analytic solution to the conjecture remains elusive, one may at least estimate the value of $o$ numerically by generating a large number of digits of the sequence $K$. In $\cite{steinsky}$ Steinsky describes a recursion that generates the letters $K_n$ and uses it to find one such numerical estimate, for $n=3\cdot 10 ^8$. It is worth noting that a straight-forward implementation of Steinsky's recursion leads to an algorithm that either runs in exponential time or requires a linear amount of space. For some time, Steinsky's result raised doubt as to the validity of Conjecture \ref{conj: distribution}; however, subsequent work by Monteil \cite{monteil} suggested once again that the conjecture should hold. Monteil used a brute-force method, requiring linear time and linear space in $n$, to push the calculation to $n=10^{11}$. 

The brute-force, or straightforward, method to find $o_n$ generates a prefix of length $n$ of the sequence $K$, using the intuitive method suggested by (\ref{eq: def of kolakoski}). That is, starting from a suitable initial sequence, we step through and read off the symbols one by one, with each letter telling us what to write in the sequence beneath, and thus what to append to the end of the current sequence.

Here we present an algorithm to find $o_n$ that runs in linear time,
yet only uses logarithmic space.
It is thus more efficient than those presented previously, and we have used it to push the calculation further; here we present values of $o_n$ up to $n= 10^{13}$ (Table \ref{table: classical kolakoski}).
Our calculation indicates that Conjecture \ref{conj: distribution} should hold, but once again gives no definite answer. We present our algorithm in Section \ref{sec: algorithm} and state and prove the algorithm's 
time performance in Section \ref{sec: analysis}. In Section \ref{sec: general}, we briefly remark on our algorithm's adaptability to more general Kolakoski sequences, and finally in Section \ref{sec: calculations} we present the results of our calculations. 



\section{The Algorithm}
\label{sec: algorithm}



Here we present an algorithm for calculating the number of $\mathtt{1}$s and $\mathtt{2}$s in the classical Kolakoski sequence $K$ up to a position $n$. Our algorithm is more memory-efficient than the straight-forward algorithm for finding $K_{n}$; it requires only $O(\log n)$ space
(Proposition \ref{prop: space})
compared to the $\Theta(n)$ for brute-force algorithm.
Here we use the standard asymptotic notation. That is,
we write $f = O(g)$ if there is a constant $c$ such that $f(n)\leq c\,g(n)$ for all sufficiently large
$n$.  Similarly $f = \Theta(g)$ if $f = O(g)$ and $g = O(f)$.  
(For more about asymptotic notation, see \cite{cormen}.)
The running
time of our algorithm is $O(n)$ to find $o_n$ (Proposition \ref{prop: time}); this is the same as for the brute-force method.

The idea in our algorithm is that if we set out only to find $o_{n}$, we do not have to save the complete sequence up to position $n$ when stepping through the sequence $K$. As in the intuitive way of generating $K$, we look back at a previous position to see which symbol run to append. However, this previous position is itself determined by a letter even further back, and so on. If we keep track only of these positions that we ``look back at'', we can drastically reduce the amount of space needed by the algorithm.

To get a hint of how this can be done, we take as a starting point a scheme, as in (\ref{eq: def of kolakoski}). We see that the upper row defines (or conversely, may be defined as) the run lengths of the symbols in the lower one. We expand this scheme by adding more rows above and connecting each symbol to the symbol in the row above that has (via run length) generated it. In this way, we obtain a tree structure, as illustrated in Figure \ref{fig: kolakoski tree}. 


\begin{figure}
\begin{center}
\begin{tabular}{c}
\begin{picture}(265,100)
\put(  0,  0){$K=$}
\put( 30,  0){$\mathtt{1}$}
\put( 46,  0){$\mathtt{2}$}
\put( 62,  0){$\mathtt{2}$}
\put( 78,  0){$\mathtt{1}$}
\put( 94,  0){$\mathtt{1}$}
\put(110,  0){$\mathtt{2}$}
\put(126,  0){$\mathtt{1}$}
\put(142,  0){$\mathtt{2}$}
\put(158,  0){$\mathtt{2}$}
\put(174,  0){$\mathtt{1}$}
\put(190,  0){$\mathtt{2}$}
\put(206,  0){$\mathtt{2}$}
\put(222,  0){$\mathtt{1}$}
\put(238,  0){$\mathtt{1}$}
\put(254,  0){$\ldots$}

\put(  0, 30){$K=$}
\put( 30, 30){$\mathtt{1}$}
\put( 54, 30){$\mathtt{2}$}
\put( 86, 30){$\mathtt{2}$}
\put(110, 30){$\mathtt{1}$}
\put(126, 30){$\mathtt{1}$}
\put(150, 30){$\mathtt{2}$}
\put(174, 30){$\mathtt{1}$}
\put(198, 30){$\mathtt{2}$}
\put(230, 30){$\mathtt{2}$}
\put(246, 30){$\ldots$}

\put(  0, 60){$K=$}
\put( 30, 60){$\mathtt{1}$}
\put( 70, 60){$\mathtt{2}$}
\put(118, 60){$\mathtt{2}$}
\put(150, 60){$\mathtt{1}$}
\put(174, 60){$\mathtt{1}$}
\put(214, 60){$\mathtt{2}$}
\put(230, 60){$\ldots$}

\put(  0, 90){$K=$}
\put( 30, 90){$\mathtt{1}$}
\put( 94, 90){$\mathtt{2}$}
\put(162, 90){$\mathtt{2}$}
\put(214, 90){$\mathtt{1}$}
\put(230, 90){$\ldots$}
\put( 33, 13){\line(0, 1){10}}
\multiput( 49, 13)(32, 0){2}{\qbezier( 0, 0)( 4, 5)( 8,10) \qbezier(16, 0)(12, 5)( 8,10)}
\put(113, 13){\line(0, 1){ 10}}
\put(129, 13){\line(0, 1){ 10}}
\multiput(145, 13)( 0, 0){1}{\qbezier( 0, 0)( 4, 5)( 8,10) \qbezier(16, 0)(12, 5)( 8,10)}
\put(177, 13){\line(0, 1){ 10}}
\multiput(193, 13)(32, 0){2}{\qbezier(0, 0)(4, 5)(8, 10)\qbezier(16, 0)(12, 5)(8, 10)}

\put( 33, 43){\line(0, 1){ 10}}
\multiput( 57, 43)( 0, 0){1}{\qbezier( 0, 0)( 8, 5)(16,10) \qbezier(32, 0)(24, 5)(16,10)}
\multiput(113, 43)( 0, 0){1}{\qbezier( 0, 0)( 4, 5)( 8,10) \qbezier(16, 0)(12, 5)( 8,10)}
\put( 153, 43){\line(0, 1){ 10}}
\put( 177, 43){\line(0, 1){ 10}}
\multiput(201, 43)( 0, 0){1}{\qbezier( 0, 0)( 8, 5)(16,10) \qbezier(32, 0)(24, 5)(16,10)}

\put( 33, 73){\line(0, 1){ 10}}
\multiput( 73, 73)( 0, 0){1}{\qbezier( 0, 0)(12, 5)(24,10) \qbezier(48, 0)(36, 5)(24,10)}
\multiput(153, 73)( 0, 0){1}{\qbezier( 0, 0)( 6, 5)(12,10) \qbezier(24, 0)(18, 5)(12,10)}
\put(217, 73){\line(0, 1){ 10}}

\end{picture}
\end{tabular}
\caption{\label{fig: kolakoski tree}The tree structure in the Kolakoski sequence.}
\end{center}
\end{figure}


We may thus interpret the letters in the classical Kolakoski sequence $K$ as the leaves of a tree (the leaves are the symbols in the bottom row in Figure \ref{fig: kolakoski tree}). Each internal node in this tree structure is a symbol in an upper row interpreted as a run length. Each letter is connected to the letter above that has generated it (called an ancestor), and also to the letter(s) below that it generates, termed children. This tree structure continues upwards without bound as we step through the symbols of the Kolakoski sequence. However, we only need to go up in the tree until we find an ancestor, to the leaf we are currently looking at, at a left most position. 

From this point on we shall consider the sequence $K'$, defined by $K= \mathtt{12}K'$. This simplifies matters somewhat, as we do not then have to deal with the left most $\mathtt{1}$s at each height in the tree, and we do not have to deal with the leaves in the first level of the tree separately.  

The algorithm for finding $o_n$ can concisely be described as an  ``in-order 
traversal'' of this tree structure, where we start from the lower left, and where we keep track of the symbols we see in the leaves during the traversal. While traversing, we add new ancestors when needed; that is we build the tree as we traverse it. To reduce the memory requirement, we dynamically generate and keep track only of the part of the tree that we currently use for the traversal. While doing so, we store the ancestors along with an indicator that tells us which of its children we have already traversed. To this end, we introduce pointers $P_k$, which are assigned values from the set $S = \{\mathtt{1},\mathtt{2},\mathtt{11},\mathtt{22}\}$. Note that here, a run is defined as word from the set $S$. At any given time, the pointer $P_0$ holds the current run in the leaves and $P_1$ holds the ancestor to $P_0$. Similarly, any $P_k$ that has been initiated holds the ancestor to $P_{k-1}$. 

We say here that pointers ``hold'' and not ``are'' a run because $P_k$ may contain more than just the single-symbol ancestor of $P_{k-1}$, it may also contain a sibling of $P_{k}$. Here we refer to the single symbols (that is, $\mathtt{1}$s or $\mathtt{2}$s) of a two symbol run ($\mathtt{11}$ or $\mathtt{22}$) as siblings.

The algorithm can now be described as follows.

\begin{algorithm}
\label{alg: main algorithm}~

\begin{itemize}
\item[-] To increment (or to assign a new value to) the pointer $P_k$ we proceed as follows. Firstly, if $P_k$ has not been initialized, let $P_k = \mathtt{22}$. If $P_k$ contains two symbols then remove one of the symbols in $P_k$.

If, on the other hand, $P_k$ contains only one symbol, then increase $P_{k+1}$ recursively. When this increment is done, the new run to write in $P_{k}$ is of the length given by the first symbol in $P_{k+1}$ and the run to write has symbol(s) opposite to the symbol(s) previously held by $P_{k}$. Note that here we do not remove the first symbol of $P_{k+1}$ when we return from the recursion. 

\item[-] To step through the sequence $K$ (from its second symbol onwards) and calculate $o_n$, we repeatedly increment the pointer $P_0$ and keep track of the number of $\mathtt{1}$s and $\mathtt{2}$s that we see.  
\hfill$\diamond$
\end{itemize}
\end{algorithm}


Note that for a given run contained in $P_0$, the algorithm will generate only the pointers $P_1,\ldots, P_N$ to $P_0$, where the ancestor in $P_N$ is at the left most position in the sequence $K'$. (And it is this height $N$ that we shall shortly show is of the order of $\log n$ when $P_0$ holds the $n$th letter in the sequence). As we step through the algorithm, we shall see that the successive runs held by the pointer $P_0$ (and also for other $P_k$) are the symbols in the sequence $K'$. 

In pseudo-code the increment of $P_0$, (or the step by step traverse of the leaves), would be done with the recursive call of the procedure \texttt{IncrementPointer} as presented below.


\begin{verbatim}
// Increments the pointer at height k, and returns the current symbol. 
// Succesive calls to IncrementPointer(0) will return the next symbol 
// in the Kolakoski sequence from the third term onward.

int IncrementPointer(int k)
{   if(P[k] has not been initiated) 
    {   P[k] = 22
    }

    if(P[k] == 11)
    {   P[k] = 1
        return 1
    }else if(P[k] == 22)
    {   P[k] = 2
        return 2
    }else if(P[k] == 1)
    {   P[k] = increment(k+1) == 1 ? 2 : 22
        return 2
    }else if(P[k] == 2)
    {   P[k] = increment(k+1) == 1 ? 1 : 11
        return 1
    }
}
\end{verbatim}



To illustrate how the algorithm works, we now present a run through of its initial steps.

\begin{example}
Incrementing the pointer $P_{0}$ once is done through the following procedure;

\begin{figure}[H]
\begin{center}
\begin{tabular}{c@{\hspace{30pt}}c}
\begin{picture}(70,80)
\put(  0,  0){$K':$}
\put( 30, 30){$\mathtt{2}$}
\put( 46, 30){$\mathtt{2}$}
\put( 29, 29){\framebox(24,10){}}
\put( 56, 30){$:P_0$}
\end{picture}
&
\begin{picture}(70,80)
\put(  0,  0){$K':$}
\put( 46,  0){$\mathtt{2}$}
\put( 30, 30){$\mathtt{2}$}
\put( 46, 30){$\mathtt{2}$}
\put( 45, 29){\framebox( 8,10){}}
\put( 56, 30){$:P_0$}
\put(49, 13){\line(0, 1){10}}
\end{picture}
\\
(a) & (b)
\end{tabular}
\end{center}
\caption{\label{fig: first}The first increment of the pointer $P_0$.}
\end{figure}

Figure \ref{fig: first} illustrates the first increment of the pointer $P_0$ in the algorithm. 
(a) The initiation of $P_0$. The framed symbols $\mathtt{22}$ are the contents of the pointer $P_0$. 
(b) The pointer $P_0$ holds two symbols so the first of them is discarded and the second is returned, which is the third symbol in $K$. 

\begin{figure}[H]
\begin{center}
\begin{tabular}{c@{\hspace{30pt}}c@{\hspace{30pt}}c}
\begin{picture}(70,80)
\put(  0,  0){$K':$}
\put( 46,  0){$\mathtt{2}$}
\put( 30, 30){$\mathtt{2}$}
\put( 46, 30){$\mathtt{2}$}
\put( 45, 29){\framebox( 8,10){}}
\put( 56, 30){$:P_0$}
\end{picture}
&
\begin{picture}(94,80)
\put(  0,  0){$K':$}
\put( 46,  0){$\mathtt{2}$}
\put( 30, 30){$\mathtt{2}$}
\put( 46, 30){$\mathtt{2}$}
\put( 45, 29){\framebox( 8,10){}}
\put( 56, 30){$:P_0$}
\put( 38, 60){$\mathtt{2}$}
\put( 70, 60){$\mathtt{2}$}
\put( 37, 59){\framebox(40,10){}}
\put( 80, 60){$:P_1$}
\multiput(33, 43)(32, 0){1}{\qbezier(0, 0)(4, 5)(8,10)\qbezier(16, 0)(12, 5)(8,10)}
\end{picture}
&
\begin{picture}(102,80)
\put(  0,  0){$K':$}
\put( 46,  0){$\mathtt{2}$}
\put( 62,  0){$\mathtt{1}$}
\put( 30, 30){$\mathtt{2}$}
\put( 46, 30){$\mathtt{2}$}
\put( 62, 30){$\mathtt{1}$}
\put( 78, 30){$\mathtt{1}$}
\put( 61, 29){\framebox(24,10){}}
\put( 88, 30){$:P_0$}
\put( 38, 60){$\mathtt{2}$}
\put( 70, 60){$\mathtt{2}$}
\put( 69, 59){\framebox( 8,10){}}
\put( 80, 60){$:P_1$}
\put(65, 13){\line(0, 1){10}}
\multiput(33, 43)(32, 0){2}{\qbezier(0, 0)(4, 5)(8,10) \qbezier(16, 0)(12, 5)(8,10)}
\end{picture}
\\
(a) & (b) &(c)
\end{tabular}
\end{center}
\caption{\label{fig: second}The second increment of the pointer $P_0$.}
\end{figure}

Figure \ref{fig: second} illustrates the second increment of the pointer $P_0$ in the algorithm. 
(a) The tree structure after the first increment of $P_0$.  
(b) To continue our traverse we must generate the next leaf. This is done by looking at the ancestor of the part of the run held by $P_0$. As this ancestor does not exist we have to generate it, that is we set $P_1 = \mathtt{22}$. 
(c) The first symbol of $P_1$ already has children (that is, it generated the initial run held by $P_0$). Therefore we step to the second symbol of $P_1$. The new run to assign to $P_0$ (that is, the new leaf we traverse) is then $\mathtt{11}$, since the current symbol in $P_1$ is $\mathtt{2}$ and $P_0$ currently holds a $\mathtt{2}$. Thus the symbol to return is the first in $P_0$, a $\mathtt{1}$.



\begin{figure}[H]
\begin{center}
\begin{tabular}{c@{\hspace{30pt}}c}
\begin{picture}(102,80)
\put(  0,  0){$K':$}
\put( 46,  0){$\mathtt{2}$}
\put( 62,  0){$\mathtt{1}$}
\put( 30, 30){$\mathtt{2}$}
\put( 46, 30){$\mathtt{2}$}
\put( 62, 30){$\mathtt{1}$}
\put( 78, 30){$\mathtt{1}$}
\put( 61, 29){\framebox(24,10){}}
\put( 88, 30){$:P_0$}
\put( 38, 60){$\mathtt{2}$}
\put( 70, 60){$\mathtt{2}$}
\put( 69, 59){\framebox( 8,10){}}
\put( 80, 60){$:P_1$}
\multiput(33, 43)(32, 0){2}{\qbezier(0, 0)(4, 5)(8,10) \qbezier(16, 0)(12, 5)(8,10)}
\end{picture}
&
\begin{picture}(102,80)
\put(  0,  0){$K':$}
\put( 46,  0){$\mathtt{2}$}
\put( 62,  0){$\mathtt{1}$}
\put( 78,  0){$\mathtt{1}$}
\put( 30, 30){$\mathtt{2}$}
\put( 46, 30){$\mathtt{2}$}
\put( 62, 30){$\mathtt{1}$}
\put( 78, 30){$\mathtt{1}$}
\put( 77, 29){\framebox( 8,10){}}
\put( 88, 30){$:P_0$}
\put( 38, 60){$\mathtt{2}$}
\put( 70, 60){$\mathtt{2}$}
\put( 69, 59){\framebox( 8,10){}}
\put( 80, 60){$:P_1$}
\put(81, 13){\line(0, 1){10}}
\multiput(33, 43)(32, 0){2}{\qbezier(0, 0)(4, 5)(8,10) \qbezier(16, 0)(12, 5)(8,10)}
\end{picture}
\\
(a) & (b)
\end{tabular}
\end{center}
\caption{\label{fig: third}The third increment of the pointer $P_0$.}
\end{figure}

Figure \ref{fig: third} illustrates the third increment of the pointer $P_0$ in the algorithm. 
(a) The tree structure after the second increment of $P_0$.  
(b) The pointer $P_0$ holds two symbols so the first of them is discarded and the second is returned, which is the fifth symbol in $K$. 


\begin{figure}[H]
\begin{center}
\begin{tabular}{c@{\hspace{30pt}}c@{\hspace{30pt}}c}
\begin{picture}(110,110)
\put(  0,  0){$K':$}
\put( 46,  0){$\mathtt{2}$}
\put( 62,  0){$\mathtt{1}$}
\put( 78,  0){$\mathtt{1}$}
\put( 30, 30){$\mathtt{2}$}
\put( 46, 30){$\mathtt{2}$}
\put( 62, 30){$\mathtt{1}$}
\put( 78, 30){$\mathtt{1}$}
\put( 77, 29){\framebox( 8,10){}}
\put( 88, 30){$:P_0$}
\put( 38, 60){$\mathtt{2}$}
\put( 70, 60){$\mathtt{2}$}
\put( 69, 59){\framebox( 8,10){}}
\put( 80, 60){$:P_1$}
\put( 54, 90){$\mathtt{2}$}
\put(102, 90){$\mathtt{2}$}
\put( 53, 89){\framebox( 56,10){}}
\put(112, 90){$:P_2$}
\multiput(33, 43)(32, 0){2}{\qbezier(0, 0)(4, 5)( 8,10) \qbezier(16, 0)(12, 5)( 8,10)}
\multiput(41, 73)( 0, 0){1}{\qbezier(0, 0)(8, 5)(16,10) \qbezier(32, 0)(24, 5)(16,10)}
\end{picture}
&
\begin{picture}(124,110)
\put(  0,  0){$K':$}
\put( 46,  0){$\mathtt{2}$}
\put( 62,  0){$\mathtt{1}$}
\put( 78,  0){$\mathtt{1}$}
\put( 30, 30){$\mathtt{2}$}
\put( 46, 30){$\mathtt{2}$}
\put( 62, 30){$\mathtt{1}$}
\put( 78, 30){$\mathtt{1}$}
\put( 77, 29){\framebox( 8,10){}}
\put( 88, 30){$:P_0$}
\put( 38, 60){$\mathtt{2}$}
\put( 70, 60){$\mathtt{2}$}
\put( 94, 60){$\mathtt{1}$}
\put(110, 60){$\mathtt{1}$}
\put( 93, 59){\framebox(24,10){}}
\put(120, 60){$:P_1$}
\put( 54, 90){$\mathtt{2}$}
\put(102, 90){$\mathtt{2}$}
\put(101, 89){\framebox( 8,10){}}
\put(112, 90){$:P_2$}
\multiput(33, 43)(32, 0){2}{\qbezier(0, 0)(4, 5)( 8,10) \qbezier(16, 0)(12, 5)( 8,10)}
\multiput(41, 73)( 0, 0){1}{\qbezier(0, 0)(8, 5)(16,10) \qbezier(32, 0)(24, 5)(16,10)}
\multiput(97, 73)( 0, 0){1}{\qbezier(0, 0)(4, 5)( 8,10) \qbezier(16, 0)(12, 5)( 8,10)}
\end{picture}
&
\begin{picture}(124,110)
\put(  0,  0){$K':$}
\put( 46,  0){$\mathtt{2}$}
\put( 62,  0){$\mathtt{1}$}
\put( 78,  0){$\mathtt{1}$}
\put( 94,  0){$\mathtt{2}$}
\put( 30, 30){$\mathtt{2}$}
\put( 46, 30){$\mathtt{2}$}
\put( 62, 30){$\mathtt{1}$}
\put( 78, 30){$\mathtt{1}$}
\put( 94, 30){$\mathtt{2}$}
\put( 93, 29){\framebox(8,10){}}
\put(104, 30){$:P_0$}
\put( 38, 60){$\mathtt{2}$}
\put( 70, 60){$\mathtt{2}$}
\put( 94, 60){$\mathtt{1}$}
\put(110, 60){$\mathtt{1}$}
\put( 93, 59){\framebox(24,10){}}
\put(120, 60){$:P_1$}
\put( 54, 90){$\mathtt{2}$}
\put(102, 90){$\mathtt{2}$}
\put(101, 89){\framebox( 8,10){}}
\put(112, 90){$:P_2$}
\put( 97, 13){\line(0, 1){10}}
\multiput(33, 43)(32, 0){2}{\qbezier(0, 0)(4, 5)( 8,10) \qbezier(16, 0)(12, 5)( 8,10)}
\multiput(41, 73)( 0, 0){1}{\qbezier(0, 0)(8, 5)(16,10) \qbezier(32, 0)(24, 5)(16,10)}
\multiput(97, 73)( 0, 0){1}{\qbezier(0, 0)(4, 5)( 8,10) \qbezier(16, 0)(12, 5)( 8,10)}
\put( 97, 43){\line(0, 1){10}}
\end{picture}
\\
(a) & (b) & (c)
\end{tabular}
\caption{\label{fig: fourth}The fourth increment of the pointer $P_0$.}
\end{center}
\end{figure}

Figure \ref{fig: fourth} illustrates the fourth increment of the pointer $P_0$ in the algorithm.
(a) The status of the pointers after the third increment of $P_0$. 
(b) We have used the symbols held by the pointers $P_0$ and $P_1$, but only the first in the run held by $P_2$. To generate the new contents of $P_1$, we discard the first symbol of $P_2$ and use the second one to create the new run in $P_1$, which is $\mathtt{11}$, since $P_2 = \mathtt{2}$ and $P_1$ previously held a $\mathtt{2}$. 
(c) The new run length of the run to assign to $P_0$ is now $1$ since the first symbol in $P_1$ is a $\mathtt{1}$, and the symbol to write is $\mathtt{2}$ since $P_0$ previously held a $\mathtt{2}$, which is also the symbol which returned as the sixth symbol in $K$.


\begin{figure}[H]
\begin{center}
\begin{tabular}{c@{\hspace{30pt}}c@{\hspace{30pt}}c}
\begin{picture}(124,110)
\put(  0,  0){$K':$}
\put( 46,  0){$\mathtt{2}$}
\put( 62,  0){$\mathtt{1}$}
\put( 78,  0){$\mathtt{1}$}
\put( 94,  0){$\mathtt{2}$}
\put( 30, 30){$\mathtt{2}$}
\put( 46, 30){$\mathtt{2}$}
\put( 62, 30){$\mathtt{1}$}
\put( 78, 30){$\mathtt{1}$}
\put( 94, 30){$\mathtt{2}$}
\put( 93, 29){\framebox(8,10){}}
\put(104, 30){$:P_0$}
\put( 38, 60){$\mathtt{2}$}
\put( 70, 60){$\mathtt{2}$}
\put( 94, 60){$\mathtt{1}$}
\put(110, 60){$\mathtt{1}$}
\put( 93, 59){\framebox(24,10){}}
\put(120, 60){$:P_1$}
\put( 54, 90){$\mathtt{2}$}
\put(102, 90){$\mathtt{2}$}
\put(101, 89){\framebox( 8,10){}}
\put(112, 90){$:P_2$}
\multiput(33, 43)(32, 0){2}{\qbezier(0, 0)(4, 5)( 8,10) \qbezier(16, 0)(12, 5)( 8,10)}
\multiput(41, 73)( 0, 0){1}{\qbezier(0, 0)(8, 5)(16,10) \qbezier(32, 0)(24, 5)(16,10)}
\multiput(97, 73)( 0, 0){1}{\qbezier(0, 0)(4, 5)( 8,10) \qbezier(16, 0)(12, 5)( 8,10)}
\put( 97, 43){\line(0, 1){10}}
\end{picture}
&
\begin{picture}(124,110)
\put(  0,  0){$K':$}
\put( 46,  0){$\mathtt{2}$}
\put( 62,  0){$\mathtt{1}$}
\put( 78,  0){$\mathtt{1}$}
\put( 94,  0){$\mathtt{2}$}
\put(110,  0){$\mathtt{1}$}
\put( 30, 30){$\mathtt{2}$}
\put( 46, 30){$\mathtt{2}$}
\put( 62, 30){$\mathtt{1}$}
\put( 78, 30){$\mathtt{1}$}
\put( 94, 30){$\mathtt{2}$}
\put( 93, 29){\framebox(8,10){}}
\put(120, 30){$:P_0$}
\put( 38, 60){$\mathtt{2}$}
\put( 70, 60){$\mathtt{2}$}
\put( 94, 60){$\mathtt{1}$}
\put(110, 60){$\mathtt{1}$}
\put(109, 59){\framebox(8,10){}}
\put(120, 60){$:P_1$}
\put( 54, 90){$\mathtt{2}$}
\put(102, 90){$\mathtt{2}$}
\put(101, 89){\framebox( 8,10){}}
\put(112, 90){$:P_2$}
\multiput(33, 43)(32, 0){2}{\qbezier(0, 0)(4, 5)( 8,10) \qbezier(16, 0)(12, 5)( 8,10)}
\multiput(41, 73)( 0, 0){1}{\qbezier(0, 0)(8, 5)(16,10) \qbezier(32, 0)(24, 5)(16,10)}
\multiput(97, 73)( 0, 0){1}{\qbezier(0, 0)(4, 5)( 8,10) \qbezier(16, 0)(12, 5)( 8,10)}
\put( 97, 43){\line(0, 1){10}}
\end{picture}
&
\begin{picture}(124,110)
\put(  0,  0){$K':$}
\put( 46,  0){$\mathtt{2}$}
\put( 62,  0){$\mathtt{1}$}
\put( 78,  0){$\mathtt{1}$}
\put( 94,  0){$\mathtt{2}$}
\put(110,  0){$\mathtt{1}$}
\put( 30, 30){$\mathtt{2}$}
\put( 46, 30){$\mathtt{2}$}
\put( 62, 30){$\mathtt{1}$}
\put( 78, 30){$\mathtt{1}$}
\put( 94, 30){$\mathtt{2}$}
\put(110, 30){$\mathtt{1}$}
\put(109, 29){\framebox(8,10){}}
\put(120, 30){$:P_0$}
\put( 38, 60){$\mathtt{2}$}
\put( 70, 60){$\mathtt{2}$}
\put( 94, 60){$\mathtt{1}$}
\put(110, 60){$\mathtt{1}$}
\put(109, 59){\framebox(8,10){}}
\put(120, 60){$:P_1$}
\put( 54, 90){$\mathtt{2}$}
\put(102, 90){$\mathtt{2}$}
\put(101, 89){\framebox( 8,10){}}
\put(112, 90){$:P_2$}
\put(113, 13){\line(0, 1){10}}
\multiput(33, 43)(32, 0){2}{\qbezier(0, 0)(4, 5)( 8,10) \qbezier(16, 0)(12, 5)( 8,10)}
\multiput(41, 73)( 0, 0){1}{\qbezier(0, 0)(8, 5)(16,10) \qbezier(32, 0)(24, 5)(16,10)}
\multiput(97, 73)( 0, 0){1}{\qbezier(0, 0)(4, 5)( 8,10) \qbezier(16, 0)(12, 5)( 8,10)}
\put( 97, 43){\line(0, 1){10}}
\put(113, 43){\line(0, 1){10}}
\end{picture}
\\
(a) & (b) & (c)
\end{tabular}
\caption{\label{fig: fifth}The fifth increment of the pointer $P_0$.}
\end{center}
\end{figure}

Figure \ref{fig: fifth} illustrates the fifth increment of the pointer $P_0$ in the algorithm.
(a) The status of the pointers after the fourth increment of $P_0$.  
(b) Since we already used the first symbol held by $P_1$, we step to the second one. 
(c) The new run to write in $P_0$ is now $\mathtt{1}$, since $P_1= \mathtt{1}$ and $P_0$ previously held the symbol $\mathtt{2}$, which is also the next symbol in $K$.


\begin{figure}[H]
\begin{center}
\begin{tabular}{c@{\hspace{30pt}}c}
\begin{picture}(168,140)
\put(  0,  0){$K':$}
\put( 46,  0){$\mathtt{2}$}
\put( 62,  0){$\mathtt{1}$}
\put( 78,  0){$\mathtt{1}$}
\put( 94,  0){$\mathtt{2}$}
\put(110,  0){$\mathtt{1}$}
\put( 30, 30){$\mathtt{2}$}
\put( 46, 30){$\mathtt{2}$}
\put( 62, 30){$\mathtt{1}$}
\put( 78, 30){$\mathtt{1}$}
\put( 94, 30){$\mathtt{2}$}
\put(110, 30){$\mathtt{1}$}
\put(109, 29){\framebox(8,10){}}
\put(120, 30){$:P_0$}
\put( 38, 60){$\mathtt{2}$}
\put( 70, 60){$\mathtt{2}$}
\put( 94, 60){$\mathtt{1}$}
\put(110, 60){$\mathtt{1}$}
\put(109, 59){\framebox( 8,10){}}
\put(120, 60){$:P_1$}
\put( 54, 90){$\mathtt{2}$}
\put(102, 90){$\mathtt{2}$}
\put(101, 89){\framebox( 8,10){}}
\put(112, 90){$:P_2$}
\put( 78,120){$\mathtt{2}$}
\put(146,120){$\mathtt{2}$}
\put( 77,119){\framebox(76,10){}}
\put(156,120){$:P_3$}
\multiput(33, 43)(32, 0){2}{\qbezier(0, 0)(4, 5)( 8, 10) \qbezier(16, 0)(12, 5)( 8,10)}
\multiput(41, 73)( 0, 0){1}{\qbezier(0, 0)(8, 5)(16, 10) \qbezier(32, 0)(24, 5)(16,10)}
\multiput(97, 73)( 0, 0){1}{\qbezier(0, 0)(4, 5)( 8, 10) \qbezier(16, 0)(12, 5)( 8,10)}
\put( 97, 43){\line(0, 1){10}}
\put(113, 43){\line(0, 1){10}}
\multiput(57,103)(0, 0){1}{\qbezier( 0, 0)(12, 5)(24,10) \qbezier(48, 0)(36, 5)(24,10)}
\end{picture}
&
\begin{picture}(180,140)
\put(  0,  0){$K':$}
\put( 46,  0){$\mathtt{2}$}
\put( 62,  0){$\mathtt{1}$}
\put( 78,  0){$\mathtt{1}$}
\put( 94,  0){$\mathtt{2}$}
\put(110,  0){$\mathtt{1}$}
\put( 30, 30){$\mathtt{2}$}
\put( 46, 30){$\mathtt{2}$}
\put( 62, 30){$\mathtt{1}$}
\put( 78, 30){$\mathtt{1}$}
\put( 94, 30){$\mathtt{2}$}
\put(110, 30){$\mathtt{1}$}
\put(109, 29){\framebox(8,10){}}
\put(120, 30){$:P_0$}
\put( 38, 60){$\mathtt{2}$}
\put( 70, 60){$\mathtt{2}$}
\put( 94, 60){$\mathtt{1}$}
\put(110, 60){$\mathtt{1}$}
\put(109, 59){\framebox( 8,10){}}
\put(120, 60){$:P_1$}
\put( 54, 90){$\mathtt{2}$}
\put(102, 90){$\mathtt{2}$}
\put(134, 90){$\mathtt{1}$}
\put(158, 90){$\mathtt{1}$}
\put(133, 89){\framebox(32,10){}}
\put(168, 90){$:P_2$}
\put( 78,120){$\mathtt{2}$}
\put(146,120){$\mathtt{2}$}
\put(145,119){\framebox( 8,10){}}
\put(156,120){$:P_3$}
\multiput(33, 43)(32, 0){2}{\qbezier(0, 0)( 4, 5)( 8,10) \qbezier(16, 0)(12, 5)( 8,10)}
\multiput(41, 73)( 0, 0){1}{\qbezier(0, 0)( 8, 5)(16,10) \qbezier(32, 0)(24, 5)(16,10)}
\multiput(97, 73)( 0, 0){1}{\qbezier(0, 0)( 4, 5)( 8,10) \qbezier(16, 0)(12, 5)( 8,10)}
\put( 97, 43){\line(0, 1){10}}
\put(113, 43){\line(0, 1){10}}
\multiput( 57,103)(32, 0){1}{\qbezier( 0, 0)(12, 5)(24,10) \qbezier(48, 0)(36, 5)(24,10)}
\multiput(137,103)(32, 0){1}{\qbezier( 0, 0)( 6, 5)(12,10) \qbezier(24, 0)(18, 5)(12,10)}
\end{picture}
\\
(a) & (b)
\end{tabular}
\caption{\label{fig: sixth first}The first part of the sixth increment of the pointer $P_0$.}
\end{center}
\end{figure}

Figure \ref{fig: sixth first} illustrates the first part of the sixth increment of the pointer $P_0$ in the algorithm.
(a) To increase $P_0$ we have to look at the ancestors of the run held by $P_0$. We see that we have used all symbols in all of the ancestors, therefore we have to initiate  the new pointer $P_3=\mathtt{22}$.
(b) We have already used the first symbol held by $P_3$ and therefore we step to its second symbol. The new run to assign to $P_2$ is now $\mathtt{11}$ since  $P_3=\mathtt{2}$ and $P_2= \mathtt{22}$. 


\begin{figure}[H]
\begin{center}
\begin{tabular}{c@{\hspace{30pt}}c@{\hspace{30pt}}c}
\begin{picture}(180,140)
\put(  0,  0){$K':$}
\put( 46,  0){$\mathtt{2}$}
\put( 62,  0){$\mathtt{1}$}
\put( 78,  0){$\mathtt{1}$}
\put( 94,  0){$\mathtt{2}$}
\put(110,  0){$\mathtt{1}$}
\put( 30, 30){$\mathtt{2}$}
\put( 46, 30){$\mathtt{2}$}
\put( 62, 30){$\mathtt{1}$}
\put( 78, 30){$\mathtt{1}$}
\put( 94, 30){$\mathtt{2}$}
\put(110, 30){$\mathtt{1}$}
\put(109, 29){\framebox(8,10){}}
\put(120, 30){$:P_0$}
\put( 38, 60){$\mathtt{2}$}
\put( 70, 60){$\mathtt{2}$}
\put( 94, 60){$\mathtt{1}$}
\put(110, 60){$\mathtt{1}$}
\put(134, 60){$\mathtt{2}$}
\put(133, 59){\framebox( 8,10){}}
\put(144, 60){$:P_1$}
\put( 54, 90){$\mathtt{2}$}
\put(102, 90){$\mathtt{2}$}
\put(134, 90){$\mathtt{1}$}
\put(158, 90){$\mathtt{1}$}
\put(133, 89){\framebox(32,10){}}
\put(168, 90){$:P_2$}
\put( 78,120){$\mathtt{2}$}
\put(146,120){$\mathtt{2}$}
\put(145,119){\framebox( 8,10){}}
\put(156,120){$:P_3$}
\multiput( 33, 43)(32, 0){2}{\qbezier( 0, 0)( 4, 5)( 8,10) \qbezier(16, 0)(12, 5)( 8,10)}
\multiput( 41, 73)( 0, 0){1}{\qbezier( 0, 0)( 8, 5)(16,10) \qbezier(32, 0)(24, 5)(16,10)}
\multiput( 97, 73)( 0, 0){1}{\qbezier( 0, 0)( 4, 5)( 8,10) \qbezier(16, 0)(12, 5)( 8,10)}
\put( 97, 43){\line(0, 1){10}}
\put(113, 43){\line(0, 1){10}}
\multiput( 57,103)(32, 0){1}{\qbezier( 0, 0)(12, 5)(24,10) \qbezier(48, 0)(36, 5)(24,10)}
\multiput(137,103)(32, 0){1}{\qbezier( 0, 0)( 6, 5)(12,10) \qbezier(24, 0)(18, 5)(12,10)}
\put(137, 73){\line(0, 1){ 10}}
\end{picture}
&
\begin{picture}(180,140)
\put(  0,  0){$K':$}
\put( 46,  0){$\mathtt{2}$}
\put( 62,  0){$\mathtt{1}$}
\put( 78,  0){$\mathtt{1}$}
\put( 94,  0){$\mathtt{2}$}
\put(110,  0){$\mathtt{1}$}
\put(126,  0){$\mathtt{2}$}
\put( 30, 30){$\mathtt{2}$}
\put( 46, 30){$\mathtt{2}$}
\put( 62, 30){$\mathtt{1}$}
\put( 78, 30){$\mathtt{1}$}
\put( 94, 30){$\mathtt{2}$}
\put(110, 30){$\mathtt{1}$}
\put(126, 30){$\mathtt{2}$}
\put(142, 30){$\mathtt{2}$}
\put(125, 29){\framebox(24,10){}}
\put(152, 30){$:P_0$}
\put( 38, 60){$\mathtt{2}$}
\put( 70, 60){$\mathtt{2}$}
\put( 94, 60){$\mathtt{1}$}
\put(110, 60){$\mathtt{1}$}
\put(134, 60){$\mathtt{2}$}
\put(133, 59){\framebox( 8,10){}}
\put(144, 60){$:P_1$}
\put( 54, 90){$\mathtt{2}$}
\put(102, 90){$\mathtt{2}$}
\put(134, 90){$\mathtt{1}$}
\put(158, 90){$\mathtt{1}$}
\put(133, 89){\framebox(32,10){}}
\put(168, 90){$:P_2$}
\put( 78,120){$\mathtt{2}$}
\put(146,120){$\mathtt{2}$}
\put(145,119){\framebox( 8,10){}}
\put(156,120){$:P_3$}
\multiput( 33, 43)(32, 0){2}{\qbezier( 0, 0)( 4, 5)( 8,10) \qbezier(16, 0)(12, 5)( 8,10)}
\put( 97, 43){\line(0, 1){ 10}}
\put(113, 43){\line(0, 1){ 10}}
\multiput(129, 43)( 0, 0){1}{\qbezier( 0, 0)( 4, 5)( 8,10) \qbezier(16, 0)(12, 5)( 8,10)}
\multiput( 41, 73)( 0, 0){1}{\qbezier( 0, 0)( 8, 5)(16,10) \qbezier(32, 0)(24, 5)(16,10)}
\multiput( 97, 73)(32, 0){1}{\qbezier( 0, 0)( 4, 5)( 8,10) \qbezier(16, 0)(12, 5)( 8,10)}
\put(137, 73){\line(0, 1){ 10}}
\multiput( 57,103)( 0, 0){1}{\qbezier( 0, 0)(12, 5)(24,10) \qbezier(48, 0)(36, 5)(24,10)}
\multiput(137,103)( 0, 0){1}{\qbezier( 0, 0)( 6, 5)(12,10) \qbezier(24, 0)(18, 5)(12,10)}
\put(129, 13){\line(0, 1){10}}
\end{picture}
\\
(a) & (b)
\end{tabular}
\caption{\label{fig: sixth second} The second part of the sixth increment of the pointer $P_0$.}
\end{center}
\end{figure}

Figure \ref{fig: sixth second} illustrates the second part of the sixth increment of the pointer $P_0$ in the algorithm. 
(a) We have not yet used any of the symbols held by $P_2$ and therefore the current one is the first. Then the new symbol in $P_1$ is $\mathtt{2}$ since the current symbol in $P_2$ is $\mathtt{1}$ and $P_1=\mathtt{1}$.
(b) The new run to assign to $P_0$ is now $\mathtt{22}$ since the first symbol in $P_1$ is $\mathtt{2}$ and $P_0=\mathtt{1}$, and therefore we return a $\mathtt{2}$ as the next symbol in $K$. 
\qed
\end{example}


Note that the algorithm does not need to keep track of the tree structure that it steps through. The algorithm only keeps track of the current contents of the pointers $P_k$ and how many of each symbol we have got in return from each increment of the pointer $P_0$. 



\section{Run Time Analysis of the Algorithm}
\label{sec: analysis}

Let $t_n$ be the number of $\mathtt{2}$s in $K$ up to and including position $n$. That is,
$$t_{n} = |\{i: K_{i}=\mathtt{1}, 1\leq i \leq n\}|.$$ 
Recall that we have already similarly defined $o_n$ as the number of ones. By considering the subwords of $K$ of length five with the maximal and minimal possible letter distributions, that is the words
\[	\mathtt{11211} 
	\quad \textnormal{and} \quad
	\mathtt{22122},
\]
we see that we have the bounds 
\begin{equation}
\label{eq: frequency bound}
	\frac{1}{4} \leq \frac{o_n}{t_n} \leq 4
\end{equation}
for $n\geq 2$. For the analysis, let $P(n)$ be the number of pointers used by Algorithm \ref{alg: main algorithm} to calculate $o
_n$. 



\begin{proposition}
\label{prop: space}
The amount of space used by Algorithm \ref{alg: main algorithm} to find $o_n$ is logarithmic in $n$. That is, $P(n) = O(\log n)$.
\end{proposition}

\begin{proof}
Let $w_0 = \mathtt{122}$ and $w_1 = \mathtt{12211}$ and similarly let the letters in $w_k$ define the runs in  $w_{k+1}$. Then $w_k$ is a prefix of the sequence $K$ for all $k\geq0$. (The collection of the words $w_k$ is known as the Kolakoski fan.)  By the frequency bound (\ref{eq: frequency bound}) it follows that 
\[	\frac{6}{5} \leq \frac{|w_{k+1}|}{|w_{k}|} \leq \frac{9}{5} 
\]
whenever $k\geq 1$ and where $|\cdot|$ denotes the length of a word. 

This implies that if pointer $P_0$ holds the symbol at position $n$ in $K'$ then the pointer $P_1$ is at most at position $\frac{5}{6}n$ and at least at position $\frac{5}{9}n$ in the Kolakoski sequence. 
Accordingly, pointer $P_2$ is at most at position $(\frac{5}{6})^2n$ and at least at position $(\frac{5}{9})^2n$, and so on at higher levels in the tree. We only have to continue upwards in the tree until the pointer $P_k$ holds the first position in $K'$, that is $\left[(\frac{5}{6})^k n \right]=1$. This gives that the bound of the number of pointers needed to find $o_n$ is given by
\[	P(n) \leq \left\lceil \frac{1}{\log \frac{6}{5}} \, \log n \right\rceil = O(\log n), 
\]
which completes the proof.
\end{proof}


If Conjecture \ref{conj: distribution} were shown to be true, it would follow that the number of pointers needed to find $o_n$ is $P(n) \approx \frac{1}{\log \frac{3}{2}} \, \log n$.


\begin{proposition}
\label{prop: time}
Algorithm \ref{alg: main algorithm} runs in (amortized) linear time. That is, to find $o_n$ we have to do an amount of work of order $O(n)$.
\end{proposition}

\begin{proof}
Let us consider the maximal amount of work we have to do to make $n$ increments of the pointer $P_0$ (to generate $n$ symbols). 

Let $p_k(n)$ be the number of times we change the contents of pointer $P_k$ under these $n$ increments. Then the sum of the $p_k$s will give us the total amount of work we have to do. It is clear that $p_0(n) = n$, since we change $P_0$ at each increment. The other pointers do not change every time; for $k\geq1$ we make a change to $P_k$ only when $P_{k-1}$ consists of a single symbol. 

Let $a_k(n)$ be the number of times the pointer $P_k$ holds a single symbol under $n$ increments of $P_0$. Similarly let $b_k(n)$ be the number of times that $P_k$ holds two symbols under the $n$ increments of $P_0$. From the algorithm we see that to find the maximal amount of work, we have to look for the maximal number of single-symbol pointer contents, since this is what forces us to go recursively higher in the tree.
For the pointer $P_0$ it follows from (\ref{eq: frequency bound}) that we have the bounds
\[	\frac{1}{4} \leq \frac{a_0(n)}{b_0(n)} \leq 4
\]
for $n\geq 1$. For pointers higher up, we have that the number of times $P_k$ holds a single symbol is at most four times the number of times it holds two symbols plus the number of times it holds two symbols, since in the latter case $P_k$ will hold a single symbol in the next step of the algorithm. 
This gives
\[	a_k(n) \leq 4 b_k(n) + b_k(n) = 5 b_n(k)
\]
Therefore, our upper bound on the number of times a pointer holds a single symbol gives the bound on the amount of work we have to do with a pointer $P_k$ compared to the amount of work for the pointer holding the children of $P_k$. This is 
\begin{equation}
\label{eq: bound on work between pointers}
	p_{k+1}(n) \leq \frac{5}{6}\, p_k(n) 
\end{equation}
for $k\geq1$. The total amount of work we now have to do to increment the pointer $P_0$ $n$ times is therefore bounded by the initial amount of work plus the convergent geometric series obtained from (\ref{eq: bound on work between pointers})
We have
\begin{equation}
\label{eq: sum of work}
P(n) =  \sum_{i=0}^{\infty}p_i(n) \leq  n + c\log n + n\sum_{i=0}^{\infty} \left(\frac{5}{6}\right)^i \leq (7+C)n,  
\end{equation}
where $c\log n$ is the initial amount of work for each pointer before we can apply our estimates above. 
\end{proof}


\section{Generalized Kolakoski Sequences}
\label{sec: general}

In this this section we remark that our algorithm is also applicable to a general Kolakoski sequence. By a generalized Kolakoski sequence we mean a sequence that is defined as its symbols' run length, as for the classical Kolakoski sequence, but the symbols may be taken from any alphabet $\{r, s\}$, where $r$ and $s$ are natural numbers, as discussed in \cite{dekking80}. We denote a generalized Kolakoski sequence over $r$ and $s$ with $K(r,s)$ and shall assume that $K(r,s)$ starts with the symbol $r$. The classical Kolakoski sequence is then $K = K(\mathtt{1},\mathtt{2})$. 

It is known that if $r+s$ is an even number, then the letter frequency in $K(r,s)$ can be calculated; see \cite{baake, sing2002, sing2003, sing2010}. When $r+s$ is odd, the existence and the value of the letter frequencies are still unknown, but are believed to exist and equal $\frac{1}{2}$. 

Our algorithm easily adopts to count the letters in a generalized Kola\-koski sequence; we may only have to change the initiation of new pointers. By applying the same idea as in the proof of Proposition \ref{prop: space} we see that the algorithm in this case with a generalized Kolakoski sequence uses fewer pointers than for the classical Kolakoski sequence, and therefore the space requirement must again be at most logarithmic. 

Similarly, by looking at the proof of Proposition \ref{prop: time} we see that the number of times we use a pointer for a general Kolakoski sequence before having to consider its ancestor is longer than for the classical Kolakoski sequence. Therefore the bounding factor for the quoted amount of work between two consecutive levels (\ref{eq: bound on work between pointers}) must be smaller than the $\frac{5}{6}$ given for the classical Kolakoski sequence. This gives then, by summing up as in (\ref{eq: sum of work}), that the total amount of work for the generalized Kolakoski sequence is also linear in $n$. 


\section{Calculations}
\label{sec: calculations}


In Table \ref{table: classical kolakoski} we present a short output from an implementation in Java of our Algorithm \ref{alg: main algorithm} for calculating the number of $\mathtt{1}$s in the classical Kolakoski sequence. The program  was run on a standard PC. In Table \ref{table: generalized kolakoski} we present results of a calculation of the number of $\mathtt{2}$s in the generalized Kolakoski sequence $K(\mathtt{2}, \mathtt{3})$, the sequence \seqnum{A071820} in the On-Line Encyclopedia of Integer Sequences \cite{sloane}. 

We denote for the classical Kolakoski sequence the maximal deviation of the proportion of $\mathtt{1}$s from $\frac{1}{2}$ in a logarithmic decade by 
\[	D(n) = \max_{\frac{1}{10}n < i \leq n }\left|\frac{1}{2}-\frac{o_i}{i}\right|,
\]
where $o_i$ is the number of $\mathtt{1}$s up to position $i$. We can similarly define the deviation for the generalized Kolakoski sequence $K(\mathtt{2}, \mathtt{3})$.

\begin{table}[ht]
\begin{center}
\texttt{
\renewcommand{\arraystretch}{1.2}
\begin{tabular}{|*{5}{r|}}
\hline 
\multicolumn{1}{|c|}{$n$} &{\textnormal{Number of $\mathtt{1}$s}}& \multicolumn{1}{|c|}{ $P(n)$ } & \multicolumn{1}{|c|}{$D(n)$} \\ \hline 
     1 &                      1 &    &  \\ \hline
   $\mathtt{10^{ 1}}$ &                      5 &  4 & $\mathtt{1.667\cdot 10^{-1}}$ \\ \hline
   $\mathtt{10^{ 2}}$ &                     49 & 10 & $\mathtt{8.333\cdot 10^{-2}}$ \\ \hline
   $\mathtt{10^{ 3}}$ &                    502 & 16 & $\mathtt{1.351\cdot 10^{-2}}$ \\ \hline
   $\mathtt{10^{ 4}}$ &                 4\,996 & 22 & $\mathtt{3.588\cdot 10^{-3}}$ \\ \hline
   $\mathtt{10^{ 5}}$ &                49\,972 & 27 & $\mathtt{5.481\cdot 10^{-4}}$ \\ \hline   
   $\mathtt{10^{ 6}}$ &               499\,986 & 33 & $\mathtt{2.800\cdot 10^{-4}}$ \\ \hline   
   $\mathtt{10^{ 7}}$ &            5\,000\,046 & 39 & $\mathtt{3.892\cdot 10^{-5}}$ \\ \hline
   $\mathtt{10^{ 8}}$ &           50\,000\,675 & 44 & $\mathtt{2.054\cdot 10^{-5}}$ \\ \hline
   $\mathtt{10^{ 9}}$ &          500\,001\,223 & 50 & $\mathtt{8.586\cdot 10^{-6}}$ \\ \hline
   $\mathtt{10^{10}}$ &       4\,999\,997\,671 & 56 & $\mathtt{2.152\cdot 10^{-6}}$ \\ \hline
   $\mathtt{10^{11}}$ &      50\,000\,001\,587 & 61 & $\mathtt{4.453\cdot 10^{-7}}$ \\ \hline
   $\mathtt{10^{12}}$ &     500\,000\,050\,701 & 67 & $\mathtt{2.140\cdot 10^{-7}}$ \\ \hline
   $\mathtt{10^{13}}$ &  5\,000\,000\,008\,159 & 73 & $\mathtt{6.774\cdot 10^{-8}}$ \\ \hline
\end{tabular}}
\caption{\label{table: classical kolakoski}The output of the calculation of the number of $\mathtt{1}$s in the classical Kolakoski sequence. Our result here exceeds the calculations made by Steinsky up to $n = 3\cdot 10^8$, and Monteil up to $n=10^{11}$, as mentioned earlier.  The column with the number of $\mathtt{1}$s is the sequence \seqnum{A195206} in the On-Line Encyclopedia of Integer Sequences \cite{sloane}.}
\end{center}
\end{table}

\begin{table}[ht]
\begin{center}
\texttt{
\renewcommand{\arraystretch}{1.2}
\begin{tabular}{|*{5}{r|}}
\hline 
\multicolumn{1}{|c|}{$n$} & \multicolumn{1}{|c|}{\textnormal{Number of $\mathtt{2}$s}} & \multicolumn{1}{|c|}{$P(n)$}& \multicolumn{1}{|c|}{$D(n)$} \\ \hline 
                    1 &                     1 &    &          \\ \hline
   $\mathtt{10^{ 1}}$ &                     5 &  3 & $\mathtt{2.143\cdot 10^{-1}}$ \\ \hline
   $\mathtt{10^{ 2}}$ &                    51 &  6 & $\mathtt{8.333\cdot 10^{-2}}$ \\ \hline
   $\mathtt{10^{ 3}}$ &                   502 &  9 & $\mathtt{2.459\cdot 10^{-2}}$ \\ \hline
   $\mathtt{10^{ 4}}$ &                4\,995 & 11 & $\mathtt{3.318\cdot 10^{-3}}$ \\ \hline
   $\mathtt{10^{ 5}}$ &               49\,999 & 14 & $\mathtt{6.353\cdot 10^{-4}}$ \\ \hline
   $\mathtt{10^{ 6}}$ &              499\,980 & 16 & $\mathtt{8.448\cdot 10^{-5}}$ \\ \hline
   $\mathtt{10^{ 7}}$ &           4\,999\,995 & 19 & $\mathtt{2.464\cdot 10^{-5}}$ \\ \hline
   $\mathtt{10^{ 8}}$ &          50\,000\,202 & 21 & $\mathtt{7.936\cdot 10^{-6}}$ \\ \hline
   $\mathtt{10^{ 9}}$ &         499\,999\,731 & 24 & $\mathtt{3.279\cdot 10^{-6}}$ \\ \hline
   $\mathtt{10^{10}}$ &      5\,000\,005\,565 & 26 & $\mathtt{8.382\cdot 10^{-7}}$ \\ \hline
   $\mathtt{10^{11}}$ &     50\,000\,013\,114 & 29 & $\mathtt{5.606\cdot 10^{-7}}$ \\ \hline
   $\mathtt{10^{12}}$ &    499\,999\,997\,503 & 31 & $\mathtt{1.430\cdot 10^{-7}}$ \\ \hline
   $\mathtt{10^{13}}$ & 4\,999\,999\,971\,938 & 34 & $\mathtt{3.744\cdot 10^{-8}}$ \\ \hline
\end{tabular}}
\caption{\label{table: generalized kolakoski}The output of the calculation of the number of $\mathtt{2}$s in the generalized Kolakoski sequence $K(\mathtt{2},\mathtt{3})$. The column with the number of $\mathtt{2}$s is the sequence \seqnum{A195211} in the On-Line Encyclopedia of Integer Sequences \cite{sloane}.}
\end{center}
\end{table}


\section{Acknowledgement}

The author wishes to thank M. Baake, P. Bugarin, V. Terauds and P. Zeiner at Bielefeld University, Germany, for our discussions of the problem and for reading drafts of the manuscript. This work was supported by the German Research Council (DFG), via CRC 701.



\begin{thebibliography}{99}

\bibitem{baake}
M. Baake and B. Sing,
Kolakoski(3; 1) is a (deformed) model set,
\textit{Canad. Math. Bull.} \textbf{47} (2004), 168--190.

\bibitem{cormen}
T. H. Cormen, C. E. Leiserson, and R. L. Rivest,
\textit{Introduction to Algorithms},
MIT Press,
2001.

\bibitem{dekking79}
F. M. Dekking,
Regularity and irregularity of sequences generated by automata,
Expos\'e no.\ 9,
\textit{S\'em. Th. Nombres Bordeaux}, 1979--1980, pp.\ 9.01--9.10.

\bibitem{dekking80}
F. M. Dekking,
On the structure of self generating sequences,
Expos\'e no.\ 31, \textit{S\'em. Th. Nombres Bordeaux},
1980--1981, pp.\ 31.01--31.06.

\bibitem{dekking97}
F. M. Dekking,
What is the long range order in the Kolakoski sequence?, 
\textit{The Mathematics of Long-Range Aperiodic Order},
NATO Adv. Sci. Inst. Ser. C Math. Phys. Sci.
\textbf{489}, Kluwer Acad. Publ., (1997), pp.\ 115--125.

\bibitem{kimberling}
C. Kimberling, Integer sequences and arrays, \\
\url{http://faculty.evansville.edu/ck6/integer/index.html}.

\bibitem{kolakoski65} 
W. Kolakoski, 
Problem 5304: self generating runs, 
\textit{Amer. Math. Monthly} \textbf{72} (1965), 674.

\bibitem{kolakoski66} 
W. Kolakoski, 
Problem 5304, 
\textit{Amer. Math. Monthly} \textbf{73} (1966), 681--682.

\bibitem{monteil}
T. Monteil, Brute force Kolakoski, \\
\url{http://www.lirmm.fr/~monteil/blog/BruteForceKolakoski/}.

\bibitem{oldenburger}
R. Oldenburger,
Exponent trajectories in dynamical systems, 
\textit{Trans. Amer. Math. Soc.} \textbf{46} (1939), 453--466.

\bibitem{sing2002}
B. Sing,
\textit{Spektrale Eigenschaften der Kolakoski-Sequenzen}, 
Diploma thesis, Universit\"at T\"ubingen, 2002.

\bibitem{sing2003}
B. Sing,
Kolakoski-($2m$; $2n$) are limit-periodic model sets, 
\textit{J. Math. Phys.} \textbf{44} (2003), 899--912.

\bibitem{sing2010}
B. Sing,
More Kolakoski sequences,
\textit{INTEGERS}, \textbf{11B} (2011),
\href{http://www.integers-ejcnt.org/vol11b.html}{Paper A14}.

\bibitem{sloane}
N. J. A. Sloane, The On-Line Encyclopedia of Integer Sequences.
Published electronically at \url{http://oeis.org/}.

\bibitem{steinsky}
B. Steinsky,
A recursive formula for the Kolakoski sequence \seqnum{A000002},
\textit{J. Integer Sequences}, \textbf{9} (2006), 
\href{http://www.cs.uwaterloo.ca/journals/JIS/VOL9/Steinsky/steinsky5.html}{Article 06.3.7}.

\end{thebibliography}


\bigskip
\hrule
\bigskip

\noindent 2010 {\it Mathematics Subject Classification}:
Primary 68R15; Secondary 68Q25.

\noindent \emph{Keywords:} combinatorics on words,
analysis of algorithms, linear space, Kolakoski sequence.

\bigskip
\hrule
\bigskip

\noindent (Concerned with sequences
\seqnum{A000002},
\seqnum{A071820},
\seqnum{A195206}, and
\seqnum{A195211}.)

\bigskip
\hrule
\bigskip

\vspace*{+.1in}
\noindent
Received  October 12 2011;
revised versions received  March 9 2012; June 25 2012.
Published in {\it Journal of Integer Sequences},
June 26 2012.

\bigskip
\hrule
\bigskip

\noindent
Return to
\htmladdnormallink{Journal of Integer Sequences home page}{http://www.cs.uwaterloo.ca/journals/JIS/}.
\vskip .1in


\end{document}

                                                                                


