\documentclass[12pt,reqno]{article}
\usepackage[usenames]{color}
\usepackage{amssymb}
\usepackage{graphicx}
\usepackage{amscd}
\usepackage[colorlinks=true,
linkcolor=webgreen,
filecolor=webbrown,
citecolor=webgreen]{hyperref}
\definecolor{webgreen}{rgb}{0,.5,0}
\definecolor{webbrown}{rgb}{.6,0,0}
\usepackage{color}
\usepackage{fullpage}
\usepackage{float}
\usepackage{psfig}
\usepackage{graphics,amsmath,amssymb}
\usepackage{amsthm}
\usepackage{amsfonts}
\usepackage{latexsym}
\usepackage{epsf}
\usepackage{enumitem}
\usepackage{pstricks, pst-node, pst-tree}
\setlength{\textwidth}{6.5in}
\setlength{\oddsidemargin}{.1in}
\setlength{\evensidemargin}{.1in}
\setlength{\topmargin}{-.5in}
\setlength{\textheight}{8.9in}
\newcommand{\seqnum}[1]{\href{http://www.research.att.com/cgi-bin/access.cgi/as/~njas/sequences/eisA.cgi?Anum=#1}{\underline{#1}}}
\begin{document}
\begin{center}
\epsfxsize=4in
\leavevmode\epsffile{logo129.eps}
\end{center}
\begin{center}
\vskip 1cm{\LARGE\bf Recursive Generation of $k$-ary Trees
}
\vskip 1cm
\large
K. Manes, A. Sapounakis, I. Tasoulas, and P. Tsikouras\\
Department of Informatics\\
University of Piraeus\\
18534 Piraeus\\
Greece\\
\href{mailto:kmanes@unipi.gr}{\tt kmanes@unipi.gr}\\
\href{mailto:arissap@unipi.gr}{\tt arissap@unipi.gr} \\
\href{mailto:jtas@unipi.gr}{\tt jtas@unipi.gr} \\
\href{mailto:pgtsik@unipi.gr}{\tt pgtsik@unipi.gr} \\
\end{center}
\begin{abstract}
In this paper we present a construction of every $k$-ary tree using a forest of $(k-1)$-ary trees
satisfying a particular condition. We use this method recursively for the construction of the set
of $k$-ary trees from the set of $(k-1)$-Dyck paths, thus obtaining a new bijection
$\phi$ between these two sets.
Furthermore, we introduce a new order on $[k]^*$ which is used for the full description of this bijection.
Finally, we study some new statistics on $k$-ary trees
which are transferred by $\phi$ to statistics concerning the occurrence of strings in $(k-1)$-Dyck paths.
\end{abstract}
\newtheorem{thm}{Theorem}
\newtheorem{Proposition}[thm]{Proposition}
\newtheorem{Lemma}[thm]{Lemma}
\newtheorem{Corollary}[thm]{Corollary}
%\numberwithin{equation}{section}
\newcommand{\F}{\mathcal{F}}
\newcommand{\A}{\mathcal{A}}
\section{Introduction}\label{sec:in}
The notion of $k$-ary trees has been studied extensively in the literature.
Some authors deal with the generation of $k$-ary trees using some encoding of them as integer sequences,
which are generated in a specific order
(see for example \cite{Er, KlF, KL, RR, XUT, Z}).
In another direction $k$-ary trees are related to other $k$-Catalan structures such as
staircase tilings, the tennis ball problem, noncrossing contractions and K-trees
(see for example \cite{HLM, JRZ, MSS, MSV, MN}).
Finally, there are some papers dealing with the enumeration of $k$-ary trees according to some parameters
(see for example \cite{GS, S, ZJ}).
A well known procedure for the study of trees contained in a certain set $\mathcal{T}$ is to introduce a decomposition
of these trees with respect to the size and then, using this decomposition, to rebuild $\mathcal{T}$
from trees of smaller size.
In this paper, we use a different procedure for the construction of the set $\mathcal{T}_k$ of all $k$-ary trees.
First, we present a decomposition of each $k$-ary tree to a forest of $(k-1)$-ary trees satisfying certain
properties and we show how these trees can be reconstructed from the associated forest.
Next, by introducing an operation on forests, we present a recursive construction (in terms of $k$)
of $\mathcal{T}_k$ using unary trees, thus obtaining a new bijection from $k$-ary trees to $(k-1)$-Dyck paths.
In Section \ref{sec:pre} we give some definitions and preliminary results.
In Section \ref{sec:gen} we associate every $k$-ary tree with a forest of $(k-1)$-ary trees such that
the path with ascent sequence consisting of the sizes of the trees in this forest is a Dyck path.
Conversely, every such forest generates the tree uniquely; consequently an algorithmic construction
is given.
In Section \ref{sec:un}, the method used in the previous section is applied recursively for every $k$-ary tree,
and terminates with a forest of unary trees such that the path with ascent sequence
consisting of the sizes of the trees in this forest is a $(k-1)$-Dyck path.
Conversely, this forest generates the tree uniquely, so that a new bijection $\phi$ between
$\mathcal{T}_{k}$ and the set of all $(k-1)$-Dyck paths is obtained.
In Section \ref{sec:max} we fully describe $\phi$, by introducing a new order on the set of maximal
paths of the $k$-ary tree.
Finally, in Section \ref{sec:enu} we enumerate the set $\mathcal{T}_{k}$ according to some parameters related
to the notions of the previous sections.
\section{Preliminaries}\label{sec:pre}
A $k$-ary tree, $k \geq 1$, is either the empty tree $\square$ or a vertex (or internal node), called the \emph{root} of the tree, with $k$ ordered children which are $k$-ary trees.
We define $\mathcal{T}_{k,n}$ to be the set of all $k$-ary trees with $n$ vertices,
and $\displaystyle \mathcal{T}_{k} = \bigcup_{n \geq 0} \mathcal{T}_{k,n}$.
Thus, every $T \in \mathcal{T}_k$ can be uniquely decomposed as follows:
\begin{equation}\label{ktree_decomposition}
T = \square \quad \text{or} \quad T = T_1T_2 \cdots T_k, \quad T_i \in \mathcal{T}_k, \quad i \in [k]
\end{equation}
(see Figure \ref{fig:tree_decomposition}).
\begin{figure}[htb]
\begin{center}
\includegraphics[]{images/fig1.eps}
\caption{The $k$-ary tree $T=T_1T_2\cdots T_k$.}
\label{fig:tree_decomposition}
\end{center}
\end{figure}
An empty child of a vertex is called a \emph{leaf} (or \emph{external node}) of the tree.
The \emph{size} of a $k$-ary tree $T$ is the number of its vertices and it is denoted by $s(T)$;
(see for example Figure \ref{fig:T}).
\begin{figure}[htb]
\begin{center}
\includegraphics[]{images/fig2.eps}
\caption{A $3$-ary tree of size 8.}
\label{fig:T}
\end{center}
\end{figure}
Clearly, every $T \in \mathcal{T}_{k,n}$ contains $kn+1$ nodes ($k$ children for each of the $n$ internal nodes plus the root of the tree)
and $(k-1)n+1$ leaves.
It is well known (see for example \cite{123}, p.589) that $|\mathcal{T}_{k,n}|$ is equal to the $n$-th $k$-Catalan number
\[
C_n^{(k)} = \frac{1}{kn+1}\binom{kn+1}{n} = \frac{1}{(k-1)n+1}\binom{kn}{n}.
\]
We note that $C_n^{(2)}$ are the ordinary Catalan numbers $C_n$ (\cite{OEIS}, \seqnum{A000108}).
Furthermore, the generating function
$C_k (x)= \sum_{n\geq 0}C_n^{(k)}x^n$
of the $k$-Catalan sequence satisfies the equation
\[
C_k(x) = 1 + x(C_k(x))^k,
\]
from which it can be easily shown using the Lagrange inversion formula (\cite{Deutsch}, Appendix A) that
the coefficients of $(C_k(x))^s, s \in \mathbb{N}$, are given by the formula
\[
[x^n](C_k(x))^s = \frac{s}{kn+s}\binom{kn+s}{n}.
\]
Every non-empty $(k-1)$-ary tree can be considered as a $k$-ary tree which has all its $k$-th children empty.
%A \emph{maximal $(k-1)$-ary subtree} $M$ of a $k$-ary tree $T$ is a $(k-1)$-ary subtree such that there is no other
%$(k-1)$-ary subtree of $T$ containing all the nodes of $M$. Obviously, two maximal subtrees of $T$ are disjoint.
A maximal $(k-1)$-ary subtree of a $k$-ary tree $T$ is any tree obtained by choosing a $k$-th child in $T$
(or $T$ itself) and by deleting every $k$-th child in it.
Obviously, two maximal $k-1$-ary subtrees of $T$ are disjoint;
(see for example Figure \ref{fig:T_maximal}).
\begin{figure}[htb]
\begin{center}
\includegraphics[]{images/fig3.eps}
\caption{The $3$-ary tree of Figure \ref{fig:T} and all its binary maximal subtrees.}
\label{fig:T_maximal}
\end{center}
\end{figure}
Consequently, if $T$ has $n$ vertices, then it contains $n+1$ maximal $(k-1)$-ary subtrees.
A (totally ordered) \emph{forest} of $k$-ary trees is an element $\F$ of the cartesian product $\mathcal{T}_k^\lambda$,
for some $\lambda \in \mathbb{N}^*$.
We denote, for simplicity, the forest which consists of a single tree $T$ by $T$.
%(We note that $\square$ is the non-empty forest which contains only the empty tree.)
The \emph{concatenation} of the forests $\F_1, \F_2, \ldots , \F_{\rho}$ is a new forest which consists of the trees
of each $\F_j$, $j \in [\rho]$, ordered by extending the orders of every $\F_j$, preserving at the same time
their natural order.
The \emph{length} and the \emph{size} of a forest $\F=(T_1, T_2, \ldots, T_\lambda)$ are defined respectively by
\[
|\F|= \lambda \quad \text{and} \quad s(\F) = \sum_{i=1}^{\lambda}s(T_i).
\]
%\subsection{Associated path of a forest}
A non-empty \emph{$i$-path}, $i \in \mathbb{N}^*$, is a lattice path starting at the point $(0,0)$ and consisting of steps $u_i=(1,i)$ (rises) and $d=(1,-1)$ (falls).
The empty path $\varepsilon$ is the path with no steps.
For every $i$-path $P$ we denote by $r(P)$ (respectively $f(P)$) the number of rises (respectively falls) of $P$.
An \emph{$i$-Dyck path} is an $i$-path that never falls below the $x$ axis and ends at the $x$ axis.
If $P$ is an $i$-Dyck path, then $f(P)=i\cdot r(P)$ and $P$ ends at the point $((i+1)r(P),0)$.
We will refer to a 1-path (respectively a 1-Dyck path) as a path (respectively a Dyck path);
in this case we write $u$ instead of $u_1$.
The set of all $i$-Dyck paths with $n$ rises is denoted by $\mathcal{D}_n^{(i)}$
and $\displaystyle \mathcal{D}^{(i)} = \bigcup_{n \geq 0}\mathcal{D}_n^{(i)}$.
In particular, we write $\mathcal{D}$ (respectively $\mathcal{D}_n$) instead of $\mathcal{D}^{(1)}$
(respectively $\mathcal{D}_n^{(1)}$) for the ordinary Dyck paths.
Every non empty $i$-Dyck path $P$ is written in the following form, called the \emph{first return decomposition} (for $i=1$, see \cite{Deutsch}):
\begin{equation}\label{first return decomposition}
P = u_i Q_1dQ_2d \cdots Q_idQ_{i+1}
\end{equation}
where $Q_j \in \mathcal{D}^{(i)}$, $j \in [i+1]$.
Using this decomposition and the Lagrange inversion formula, it can be easily obtained that $i$-Dyck paths with $n$ rises
are counted by $C_n^{(i+1)}$.
A simple bijection $\theta$ between the $k$-ary trees and the $(k-1)$-Dyck paths is given as follows:
\[
\theta(\square) = \varepsilon \quad \text{and} \quad \theta(T_1T_2 \cdots T_k) = u_{k-1}\theta(T_1)d\theta(T_2)d \cdots \theta(T_{k-1})d\theta(T_k).
\]
Another well known decomposition of non-empty $i$-Dyck paths which will be used in this paper is based on the length of the first ascent, i.e.,
\begin{equation}\label{first ascent decomposition}
P = u_i^\mu d Q_1 d Q_2 d \cdots Q_{\mu i}
\end{equation}
where $Q_j \in \mathcal{D}^{(i)}$ and $j \in [\mu i]$.
Every $i$-path $P$ is uniquely determined by its sequence of ascents $(l_m)_{m \in [\mu]}$, $\mu \in \mathbb{N}^*$,
according to the formula
$P = u_i^{l_1}du_i^{l_2}d \cdots u_i^{l_{\mu-1}}du_i^{l_\mu}$, where $u_i^j=u_iu_i \cdots u_i$ ($j$ times).
Clearly, if $P$ ends at the $x$ axis (as in the case of $i$-Dyck paths), then $l_\mu = 0$.
The sum of the elements of this sequence equals the number of rises in the path
and the number of its elements is one more than the number of falls;
(see for example Figure \ref{fig:ascent_sequence}).
\begin{figure}[htb]
\begin{center}
\includegraphics[]{images/fig4.eps}
\caption{The Dyck path having ascent sequence $(4,0,0,2,0,1,0,1,0)$.}
\label{fig:ascent_sequence}
\end{center}
\end{figure}
We note that the path $P = u_i^{l_1}du_i^{l_2}d \cdots u_i^{l_{\mu-1}}du_i^{l_\mu}$ is an $(i-1)$-Dyck path
if and only if the following two conditions hold:
\[
(i-1)\sum_{j=1}^{m}l_j \geq m, \, \, \, \text{for all } m \in [\mu -1] \quad \text{and} \quad
(i-1)\sum_{j=1}^{\mu}l_j = \mu -1.
\]
For every forest $\F$ we denote by $P_i(\F)$ the $i$-path with ascent sequence the sequence of sizes of the trees in $\F$.
If $i=1$ we write $P(\F)$ instead of $P_1(\F)$.
It is evident that $r(P_i(\mathcal{F})) = s(\F)$ and $f(P_i(\mathcal{F})) = |\F|-1$.
We note that if $\F$ is the concatenation of the forests $\F_1, \F_2, \ldots, \F_{\rho}$, then
\begin{equation}\label{i-concat}
P_i(\F) = P_i(\F_1) d P_i(\F_2) d \cdots P_i(\F_{\rho-1}) d P_i(\F_{\rho}).
\end{equation}
\section{Generation of $k$-ary trees from $(k-1)$-ary trees}\label{sec:gen}
For every non-empty $T \in \mathcal{T}_{k}$, $k \geq 2$, we denote by $\mathcal{F}(T)$ the forest consisting of all maximal
$(k-1)$-ary subtrees of $T$, ordered according to the first time visit (in preorder) of the trees of $\mathcal{F}(T)$ in $T$
and by $T^*$ the first component of $\F(T)$; (see for example Figure \ref{fig:forest_path}).
\begin{figure}[htb]
\begin{center}
\includegraphics[]{images/fig5.eps}
\caption{The forest $\F(T)$ and its corresponding path $P(\F(T))$ for the tree $T$ of Figure \ref{fig:T}.}
\label{fig:forest_path}
\end{center}
\end{figure}
Clearly, $T^*$ is rooted at the root of $T$ and can be obtained by deleting every
$k$-th child of $T$. If $T$ is empty then $T^*$ is empty and $\F(\square) = \square $.
It is clear that the last tree of $\F(T)$ is always the empty tree.
Using decomposition \eqref{ktree_decomposition}, we can easily check that $\F(T)$ is the concatenation
of $T^*$, $\widetilde{\mathcal{F}}(T_1)$, $\widetilde{\mathcal{F}}(T_2)$, $\ldots$, $\widetilde{\mathcal{F}}(T_{k-1})$, $\mathcal{F}(T_{k})$, where $T^* = T_1^*T_2^* \cdots T_{k-1}^*$ and
$\widetilde{\mathcal{F}}(T_j)$, $j \in [k-1]$, is $\mathcal{F}(T_{j})$ excluding $T_j^*$.
We have the following result.
\begin{Proposition}\label{forest}
For every $T=T_1T_2 \cdots T_k \in \mathcal{T}_{k}$ we have
\begin{enumerate}[label = \roman{*})]
\item $s(T^*) = 1 + \sum\limits_{j=1}^{k-1} s(T^*_j)$.
\item $s(\mathcal{F}(T)) = s(T)$.
\item $|\mathcal{F}(T)| = s(T) + 1$.
%\item $|\mathcal{F}(T^*)| = 1 + \sum\limits_{i=1}^{k} |F(T^*_i)| - \sum\limits_{i=1}^n [T_i \ne \square]$.
\item
$P(\mathcal{F}(T)) = u^{\nu}dP(\widetilde{\mathcal{F}}(T_1)) dP(\widetilde{\mathcal{F}}(T_2))
\cdots dP(\widetilde{\mathcal{F}}(T_{k-1})) d
P(\mathcal{F}(T_{k}))$, where $\nu = s(T^*)$.
\item $P(\mathcal{F}(T))$ is a Dyck path.
\end{enumerate}
\end{Proposition}
\begin{proof} The proof of $(i)$ is obvious, whereas the proof of $(iv)$ follows immediately from relation
\eqref{i-concat}. The proofs of $(ii)$, $(iii)$ and $(v)$ use induction as follows:
\begin{align*}
s(\F(T)) & = s(T^*) + \sum\limits_{j=1}^{k-1} (s(\F(T_j)) - s(T_j^*)) + s(\F(T_{k})) \\
& = s(T^*) + \sum\limits_{j=1}^{k} (s(T_j)) - \sum\limits_{j=1}^{k-1} s(T_j^*)
= 1 + \sum\limits_{j=1}^{k} s(T_j)
= s(T).
\end{align*}
\begin{align*}
|\F(T)| & = 1 + \sum\limits_{j=1}^{k-1} (|\F(T_j)| - 1) + |\F(T_{k})|
= 1 + \sum\limits_{j=1}^{k-1} s(T_j) + 1 + s(T_{k})
= 1 + s(T).
\end{align*}
Finally, since each $P(\F(T_i)), i \in [k]$ is a Dyck path,
it follows that the path
\[uP(\F(T_1)) P(\F(T_2)) \cdots P(\F(T_{k-1}))dP(\F(T_k))\]
is also a Dyck path and hence,
using the equalities $P(\F(T_j)) = u^{s(T_j^*)}dP(\widetilde{\F}(T_j))$, $j \in [k-1]$, and $(i), (iv)$,
we obtain that $P(\F(T))$ is a Dyck path.
\end{proof}
%\subsection{Recursive decomposition}
In the sequel we will show that $k$-ary trees can be generated by certain forests of $(k-1)$-ary trees.
For this, we will introduce a new decomposition of $k$-ary trees.
For $T \in \mathcal{T}_{k}$,
we denote by $Z_i$ the $k$-th child in $T$ of the $i$-th (in postorder) vertex of $T^*$.
Clearly, $T$ can be uniquely recovered by attaching each $Z_i$ as the $k$-th child to the $i$-th (in postorder)
vertex of $T^{*}$.
The trees $T^{*}, Z_1, Z_2, \ldots, Z_{\nu}$, where $\nu = s(T^*)$, form a decomposition of $T$
called the \emph{first component decomposition}; (see for example Figure \ref{fig:T_fcd}).
\begin{figure}[htb]
\begin{center}
\includegraphics[]{images/fig6.eps}
\caption{The first component decomposition of the tree of Figure \ref{fig:T}.}
\label{fig:T_fcd}
\end{center}
\end{figure}
\begin{Proposition}\label{T^*-decomposition}
For every $T \in \mathcal{T}_k$, the forest $\F(T)$ is the concatenation of the forests $T^*, \F(Z_1)$, $\F(Z_2), \ldots , \F(Z_{\nu})$.
\end{Proposition}
\begin{proof}
It is enough to show that if $X, Y$ are two trees in $\F(Z_i), \F(Z_j)$ respectively then
$X$ precedes $Y$ in $\F(T)$ if and only if $i