%\documentstyle[12pt,/usr/pope/Tex/periodsec]{article}
\documentstyle[12pt]{article}
\input papersty
\input macro
\setlength{\textwidth}{6.2in}
\setlength{\textheight}{9in}
\setlength{\oddsidemargin}{.2in}
\setlength{\topmargin}{-0.25in}
\setlength{\headheight}{0in}
\overfullrule=0pt
\def\t{{\tau}}
\def\Hb{{\bar H}}
\def\l{{\lambda}}
\def\p{{\rho}}
%\def\BR{1}
%\def\BS{2}
%\def\D{3}
%\def\DKR{4}
%\def\FO{5}
%\def\L{6}
%\def\MM{7}
%\def\O{8}
%\def\RS{9}
%\def\S{10}
%\def\W{11}
%\def\WROM{12}
\thispagestyle{empty}
\begin{document}
\begin{center}
{\Large {\bf The Distribution of Heights of Binary Trees \\
\vspace{0.1\baselineskip}
and Other Simple Trees}} \\
\vspace{\baselineskip}
{\em Philippe Flajolet}\\
\vspace{0.3\baselineskip}
INRIA, 78150 Rocquencourt \\
France \\
\vspace{\baselineskip}
{\em Zhicheng Gao} \\
\vspace{0.3\baselineskip}
Dept. of Mathematics and Statistics \\
Carleton University \\
Ottawa, Ontario K1S5B6\\
Canada \\
\vspace{\baselineskip}
{\em Andrew Odlyzko} \\
\vspace{0.3\baselineskip}
AT\&T Bell Laboratories \\
Murray Hill, New Jersey 07974 \\
United States \\
\vspace{\baselineskip}
{\em Bruce Richmond}\\
\vspace{0.3\baselineskip}
Dept. of Combinatorics and Optimization \\
University of Waterloo \\
Waterloo, Ontario, N2L3G1 \\
Canada \\
\vspace{\baselineskip}
\today \\
\vspace{1.5\baselineskip}
{\bf ABSTRACT}
\vspace{.5\baselineskip}
\end{center}
\setlength{\baselineskip}{1.5\baselineskip}
\hspace*{.25in}
The number, $B_n^{[h]}$, of rooted plane binary trees of height $\le h$ with $n$ internal
nodes is shown to satisfy
$${B_n^{[h]}-B_n^{[h-1]}\over \sum_h(B_n^{[h]}-B_n^{[h-1]})}\sim\left\{ \begin{array}{l}
2 \pi^{1/2} n^{-1/2} \b^4\sum_{m\ge 1}(m\pi)^2(2(m\pi\b)^2-3)e^{-(m\pi\b)^2}\\
2\b^{-1} n^{-1/2} \sum_{m\ge 1}m^2(2(m/\b)^2-3)e^{-(m/\b)^2}, 
\end{array}\right.
$$
uniformly for 
$\delta^{-1} ( \log n )^{-1/2} \le \b \le \delta ( \log n)^{1/2}$,
where $\b=2\sqrt{n}/h$  and $\d$ is a positive constant.
An asymptotic formula for 
$B_n^{[h]}- B_n^{[h-1]}$ is derived for $h=cn$ where $0<c<1$. 
Bounds for $B_n^{[h]}$ are also derived for large heights and small heights. 
The methods apply to any simple family of trees and the general asymptotic 
results are stated.
\clearpage
\large\normalsize
\renewcommand{\baselinestretch}{1}
\thispagestyle{empty}
\setcounter{page}{1}
\begin{center}
{\Large {\bf The Distribution of Heights of Binary Trees \\
\vspace{0.1\baselineskip}
and Other Simple Trees}} \\
\vspace{\baselineskip}
{\em Philippe Flajolet}\\
\vspace{0.3\baselineskip}
INRIA, 78150 Rocquencourt \\
\vspace{\baselineskip}
{\em Zhicheng Gao} \\
\vspace{0.3\baselineskip}
Dept. of Mathematics and Statistics \\
Carleton University \\
Ottawa, Ontario K1S5B6\\
\vspace{\baselineskip}
{\em Andrew Odlyzko} \\
\vspace{0.3\baselineskip}
AT\&T Bell Laboratories \\
Murray Hill, New Jersey 07974 \\
\vspace{\baselineskip}
{\em Bruce Richmond}\\
\vspace{0.3\baselineskip}
Dept. of Combinatorics and Optimization \\
University of Waterloo \\
Waterloo, Ontario, N2L3G1 \\
\vspace{1.5\baselineskip}
\end{center}
\setlength{\baselineskip}{1.5\baselineskip}
\section{Introduction}
\hspace*{\parindent}
We study
rooted plane trees. 
The height of a tree is the number of nodes  in a longest path from the root
node to another node.
Height is an important parameter of a tree, and so it has often been
investigated
\cite{BS,BKR,FO,L,MM,RS}.
For example, in some tree traversal algorithms,
the height of the tree is the size of the stack used by the algorithm.
In most situations in combinatorics and computer science the average height of a tree has been
of greatest interest.
However, it is often important to obtain more detailed
results about the distribution of heights,
for example to be able to estimate how often pathologically bad cases arise.
This paper proves several results in this area.

A tree is called {\em binary} if all internal nodes have
two successors. Let $B_n^{[h]}$ be the number of binary trees with $n$ internal
nodes and height $\le h$.  It is well known that 
$B_n=\sum_h(B_n^{[h]}-B_n^{[h-1]})$ is the $n$th Catalan number and that 
$$B_n={1\over \sqrt{\pi n^3}}4^n(1+O(1/n)).$$
We prove a local limit theorem for the distribution defined by the 
$B_n^{[h]}-B_n^{[h-1]}$,
assuming all binary trees with $n$ internal nodes are equally likely.
\begin{thm}
\label{th1}
For any fixed $\delta > 0$, the numbers $B_n^{[h]}$ satisfy,
as $n \to \infty$,
$${B_n^{[h]}-B_n^{[h-1]}\over B_n}\sim\left\{ \begin{array}{l}
2 \pi^{1/2} n^{-1/2} \b^4\sum_{m\ge 1}(m\pi)^2(2\b^2(\pi m)^2-3)e^{-\pi^2m^2\b^2}\\
2 \b^{-1} n^{-1/2} \sum_{m\ge 1}m^2(2m^2/\b^2-3)e^{-m^2/\b^2},
\end{array}\right.
$$
uniformly for 
$\delta^{-1} ( \log n )^{-1/2} \le \b \le \delta ( \log n )^{1/2}$,
where $\b=2\sqrt{n}/h.$
\end{thm}

Here and in the rest of the paper, $\d$ denotes a positive
constant, not necessarily the same at each occurrence.
Note that the formulas in Theorem~\ref{th1} are equivalent by Poisson's formula (cf. \cite[p.~52]{BD}). 

\begin{cor}
\label{co1}
{\rm (Flajolet and Odlyzko \cite[Theorem~B]{FO})}
The average height, $\Hb_n(B)$, of binary trees with $n$ internal nodes satisfies
$$\Hb_n(B)\sim 2\sqrt{\pi n} ~\hbox{ as } n\to \infty ~.$$
\end{cor}

In \cite{FO}, Flajolet and Odlyzko showed how their proof of Corollary~\ref{co1} could be extended by using the method of moments to a proof of
a theorem about the cumulative distribution of heights for $\beta$ and $1/ \beta$ bounded.
However, that approach is inherently incapable of yielding a wider range of validity for the approximation.
Approximations such as those of Theorem~\ref{th1} for $\beta$ and
$1/ \beta$ bounded had been obtained also for various families of trees by
ex-Soviet mathematicians (see \cite{K} for results and references), but
it is not clear to what extent those methods can be extended to cover wider
ranges of $\beta$.

Brown and Shubert \cite{BS} claimed to prove that the
approximation of Theorem~\ref{th1} is valid for $n^{3/8 + \delta} \le h \le n$.
However, there are doubts about the validity of their proof.
One problem is that they use the approach of R\'{e}nyi and Szekeres \cite{RS},
which is based on the paper of Szekeres \cite{S} on iteration of analytic functions.
Szekeres assumes in \cite{S} that the analytic functions he deals with
have real Taylor series coefficients.  That is not true
for some of the functions in \cite{BS,RS}.
Presumably the results of \cite{S} hold more generally,
but there is no proof in the literature,
although R\'{e}nyi and Szekeres \cite{RS} outline how to do this.
A more serious problem with the Brown-Shubert paper \cite{BS} is that the
proof of the key inequality (2.19)
is wrong, and that the inequality itself is incorrect.
For example, taking $\epsilon < 1/12$ and $\varphi =0$ 
in that inequality implies that the $f_k ( 1/4)$ converge 
to $\omega (1/4) =2$ exponentially in $k$,
whereas it is proved in \cite{FO} that the difference 
$f_k (1/4) - \omega (1/4)$ decreases at the rate of $1/k$.
(The proof of this fact is simple, since for $\varphi =0$ only iterations of real functions are involved.)

Our methods are a refinement of the estimates in \cite{FO}
and an adaptation of the 
arguments of R\'enyi and Szekeres \cite{RS}. 
Our proof of Theorem~\ref{th1}  is simpler
than those of R\'{e}nyi and Szekeres \cite{RS} and Brown and
Shubert \cite{BS} in that we do not need accurate estimation for the iterations
near a fixed point.
We prefer to develop the self-contained analysis of Flajolet and Odlyzko \cite{FO} since little more
needs be done. It seems that one could derive the results of Szekeres
\cite[Lemma~5]{S} for the iteration of the generating functions of simple families
of trees by developing the analysis in \cite{FO} (using techniques of 
de Bruijn \cite[p.~157]{BD}).  It is not necessary to do this for our theorems so we 
have not done so. The main advantage of the analysis of \cite{FO} is that it
is clear how the extension to simple families of trees, as defined by
Meir and Moon \cite{MM},
is done. For our paper we may extend their definition as follows :
Let $y_n^{[h]}$ be the number of trees in a family with  $n$ nodes
 and height $\le h$.
The family is {\em simple} if the generating functions
$$y^{[h]}(z)=\sum_ny_n^{[h]}z^n$$
satisfy
$$y^{[0]}(z)=0,~ y^{[h+1]}(z)=z\phi(y^{[h]}(z)).$$
We assume that the coefficients $[z^r]\phi(z)$
are bounded. Some families of trees that satisfy such functional equations are
(examples from \cite{FO})
\begin{description}
\item[(a)] the family of binary trees (in Theorem 1).
\item[(b)] the family of general planar trees; the analysis of de Bruijn,
Knuth and Rice \cite{BKR} gives the average height.
\item[(c)] the family of unary-binary trees; they appear as shapes of 
expression trees when unary as well as binary operations are allowed, 
and are counted by the Motzkin numbers.
\item[(d)] the family of 2-3 trees (unbalanced); their balanced counterparts
are a useful data structure and have been counted by Odlyzko
\cite{O}. 
\item[(e)] the family of $t$-ary trees (which appear in digital search).
\item[(f)] the family of nonplanar labelled trees. (When the derivative
of the exponential generating function is considered, this is not a simple
family in the sense of Meir and Moon.)
\end{description}
We show how the proof of Theorem 1 can be extended to derive the
following result.
\begin{thm}
\label{th2}
Consider a simple family of trees corresponding to the equation
$$y=z\phi(y),~\phi(y)=\sum c_ry^r$$
 and restrict to
$$n\equiv 1 \pmod d \hbox{  with } d=\gcd \{r: c_r\ne 0\}~.$$ 
Let
$y_n=\sum_h (y_n^{[h]} - y_n^{[h-1]} )$, $\t$ be the smallest positive solution of
$$\phi(\t)-\t \phi'(\t)=0$$
and set
$$c=  (2 \phi ( \tau ) \phi '' ( \tau ))^{1/2} / \phi ' ( \tau ) \hbox{ and } \b=2\sqrt{n}/(ch)~.$$
Then for any $\delta > 0$ we have the relation
$${y_n^{[h]}-y_n^{[h-1]}\over y_n}\sim \left\{\begin{array}{l}
2c \pi^{1/2} n^{-1/2} \b^4\sum_{m\ge 1}(m\pi)^2(2(m\pi\b)^2-3)e^{-(m\pi\b)^2}\\
2c/(\b\sqrt{n}) \sum_{m\ge 1}m^2(2(m/\b)^2-3)e^{-(m/\b)^2}
\end{array}\right.
$$
uniformly as $n \to \infty$ for $\delta^{-1} ( \log n )^{-1/2} \le \b \le \delta ( \log n )^{1/2}$.
\end{thm}
\noindent Remarks~:
\begin{enumerate}
\item If $n \not\equiv 1 \pmod d$ then $y_n^{[h]}= 0$ for all $h$.
\item From \cite{FO},
$$y_n\sim dc_1\p^{-n}n^{-3/2}, \p=\t/\phi(\t), 
c_1=\sqrt{\phi(\t)/(2\pi\phi''(\t))}.$$
\item For binary trees counted according to $n$ nodes (internal plus external)
$\phi(y)=1+y^2$, $y(z)=zB(z^2)$ \cite{FO}, so $\t=1$ and $c=\sqrt{2}$.
Also, $n$ internal
nodes gives $2n+1$ nodes in total. Our formula in Theorem~\ref{th2} with $n$ replaced by
$2n+1$ gives the formula of Theorem 1.
\item For labelled general trees, $y_n=n^{n-2}$, $\phi(y)=e^y$, so $\t=1$ and 
$c=\sqrt{2}$ and we get the R\'enyi-Szekeres formula \cite[(3.31)]{RS}.
\end{enumerate}
\begin{cor}
\label{co2}
{\rm (Flajolet and Odlyzko \cite[Theorem~S]{FO})}
The average height, $\Hb_n$, satisfies
$$\Hb_n\sim \l \sqrt{n},\quad \l=\sqrt{2\pi/(\phi(\t)\phi''(\t))}\phi'(\t)~.$$
\end{cor}


Theorems~\ref{th1} and \ref{th2} deal with the distribution of heights near the
average.
In Section~3 we consider large and small heights and prove upper bounds for them.
\begin{thm}
\label{th3}
There is a $\delta > 0$ such that the number of binary trees with $n$ internal nodes and height $h$, for $1 \le h \le n$, satisfies
$$B_n-B_n^{[h]}=O\left(B_nn^{3/2}e^{-h^2/(4n)}\right),$$ 
and
$$B_n^{[h]}=O\left(B_nn^{3/2}e^{-\d n/h^2}\right).$$
\end{thm}
\noindent Remarks~:
\begin{enumerate}
\item  By Theorem~\ref{th1}, the number of trees of height $h$ for 
$\d/\log n\le n/h^2=o(1)$ is
$$\sim {B_n h^3\over 2n^2}e^{-h^2/(4n)},$$
so the bound in Theorem~\ref{th3} is quite sharp. When $h=n$ however, 
the bound is poor. The bounds in Theorem~\ref{th4} are much better for $h=cn$,
$0 < c < 1$.
\item The proof of Theorem~\ref{th3} generalizes easily to obtain similar bounds for
any simple family.
\item It is possible to derive comparable bounds for large $h$ using the
relation between height and the number of nodes at a given altitude. The
number of nodes at a given altitude is easier to analyze than height, 
see \cite{MM} for example.
It is possible to derive an accurate estimate for the mean height
of a simple family of trees.
\end{enumerate}

We also mention in Section 2 that it is possible to
prove that $B_n^{[h]}-B_n^{[h-1]}$ is monotonic increasing then decreasing
near the peak (that is for $h^2/n,~ n/h^2\le \d\log n$). This answers a
question raised by Wimp \cite{W}.

There are other approaches to studying extremely large or small heights.
B. Pittel in an unpublished work obtained upper bounds somewhat weaker than those of Theorem~\ref{th3} for
trees of small heights by probabilistic and combinatorial arguments.
$\mbox{\L}$uczak \cite{L} found a method for rigorously extrapolating the R\'{e}nyi and Szekeres results \cite{RS} about general labeled trees to all values
of $h$ such that $h / \sqrt{n} \to \infty$.
In Section~4 we will present yet another method
and will show how to apply local limit theorems to
obtain results such as the following.
\begin{thm}
\label{th4}
\beqn
& &B_n^{[h]}-B_n^{[h-1]}\\
&\sim& {4\e^2A(\e)\over (1-\e)^2\sqrt{\pi (1+\e)n}} 
\left((1-\e)^{(1-\e)}(1+\e)^{(1+\e)}\right)^{-h/2\e}4^{n}
\eeqn
uniformly for all $h$ such that $h/n=2\e/(1+\e)$ with $\e\in [\d',~1-\d']$,
where $\d'$ is a positive constant which can be arbitrarily small and $A(\e)$
is a positive and continuous function for $\e\in [\d',~1-\d']$.
\end{thm}

\section{Densities for heights near the mean}
\hspace*{\parindent}
We begin with some notation from \cite{FO}. Let
$$B^{[h]}(z)=\sum_{n\ge 0}B_n^{[h]}z^n,\quad B(z)=\sum_{n\ge 0}B_nz^n,$$
and
\beql{eq201}
e_h(z)=(B(z)-B^{[h]}(z))/2B(z)~.
\eeq
Then
\beql{eq202}
e_{h+1}(z)=(1-\e(z))e_h(z)(1-e_h(z)), ~e_0(z)=1/2~,
\eeq
and
\beql{eq203}
B(z)=(1-\e)/2z~,
\eeq
where
$\e=\e(z)=\sqrt{1-4z}$, the determination of $\sqrt{1-4z}$ being positive for
real $z< 1/4$.
It is an easy consequence of \cite[Lemmas~1--7]{FO} that
\beql{eq204}
|e_h(z)|\le c|1-\e(z)|^h
\eeq
for some constant $c$ and all $|z|\le 1/4.$
As Brown and Shubert \cite{BS} 
point out, it is easily seen that if $z=e^{it}/4$, then $|1-\e(z)|$ is a 
decreasing function of $t$ on $0\le |t|\le \pi$ with maximum 1 at $t=0$ and
minimum $\sqrt{2}-1$ at $|t|=\pi$.

We shall, with Brown and Shubert \cite{BS},
follow R\'enyi-Szekeres \cite{RS} and investigate
$e_h(z)$ for $z=e^{it}/4$. We first consider
\beql{eq205}
|t|\le \rho^2/h^2~,
\eeq
where $\rho=\rho(h)=h^{\d}.$  
For this range of $t$ the first equality in the proof of Lemma~8 of \cite{FO} is, with $\e = \e (z) = \e (e^{it} /4)$ and
$e_h = e_h (z) = e_h (e^{it} /4 )$,
\beql{eq206}
(1-\e)^h/e_h=(1-(1-\e)^h)/\e+O(\log |\e|^{-1})~,
\eeq
and this estimate is uniform in $\{z=e^{it}/4: |t|\le \d\}$. 
All the bounds below will be uniform in the 
range of $t$ in \eqn{eq205}. We have
\beql{eq207}
\e=\sqrt{-it}\sqrt{1+it/2+\cdots}=\sqrt{-it}(1+O(\rho^2/h^2))=O(\rho/h)~.
\eeq
Now
\beql{eq208}
(1-\e)^h=\exp (-h\e+O(h\e^2))=\exp (-h\e)\left(1+O(h|\e|^2)\right)~.
\eeq
From \eqn{eq206}, \eqn{eq207}, and \eqn{eq208},
we see that for $h^{-20} \le |t| \le \rho^2 /h^2$,
\begin{eqnarray}
\label{eq209}
e_h(z)&=&{e^{-h\e}(1+O(\rho^2/h))\over (1-e^{-h\e}(1+O ( h | \e |^2)))/\e
+O(\log |\e|^{-1})}\nonumber\\
&=&{\e e^{-h \e}\over 1-e^{-h \e}} + O ( \rho^2 /h^2) ~.
\end{eqnarray}
Setting $it=4\t^2/h^2$, we obtain from \eqn{eq207} and \eqn{eq209},
again for $h^{-20} \le |t| \le \rho^2 /h^2$,
\begin{eqnarray}
\label{eq2010}
e_h(z)&=&{i2\t\over h}{e^{-i2\t}\over 1-e^{-i2\t}}
+O(\rho^2/h^2)\nonumber\\
&=&{\t\over h}(\cot(\t)-i)+O(\rho^2/h^2)~.
\end{eqnarray}

Using \eqn{eq202}, \eqn{eq207}, and \eqn{eq2010}, we obtain the estimate, valid for $h^{-20} \le |t| \le \rho^2 /h^2$, 
\beql{eq2011}
e_{h-1}(e^{it}/4)-e_h(e^{it}/4)={\t^2\over h^2\sin^2(\t)}
+O(\rho^3/h^3)~.
\eeq
Noting 
\beql{eq2012}
B(z)=2e^{-it}(1-\e)=2(1-i2\t/h)+O(\rho^2/h^2)~,
\eeq
we obtain from \eqn{eq201} and \eqn{eq2011} that
for $h^{-20} \le |t| \le \rho^2 /h^2$,
\beql{eq2013}
B^{[h]}(e^{it}/4)-B^{[h-1]}(e^{it}/4)={4\t^2\over h^2\sin^2(\t)}
+O(\rho^3/h^3)~.
\eeq
This agrees with \cite[(2.33)]{BS}.
(Note our $\t=\xi/2$.)
Now we write
\begin{eqnarray*}
B_n^{[h]}-B_n^{[h-1]}&=& {1\over 2\pi i}\int_{|z|=1/4}\left(B^{[h]}(z)
-B^{[h-1]}(z)\right)z^{-n-1}dz\\
&=&{4^n\over 2\pi}\int_{-\pi}^{\pi}\left(B^{[h]}(e^{it}/4)-B^{[h-1]}(e^{it}/4)
\right)e^{-int}dt~.
\end{eqnarray*}
Using \eqn{eq204} and the comments immediately following \eqn{eq204}, we obtain
\begin{eqnarray}
\label{eq2014}
& &B_n^{[h]}-B_n^{[h-1]}\nonumber\\
&=&{4^n\over 2\pi}\int_{|t|\le \rho^2/h^2}\left(B^{[h]}(e^{it}/4)
-B^{[h-1]}(e^{it}/4)\right)e^{-int}dt+O(4^n(1-\rho/(2h))^h)\nonumber\\
&=&{4^n\over 2\pi}\int_{|t|\le \rho^2/h^2}\left(B^{[h]}(e^{it}/4)
-B^{[h-1]}(e^{it}/4)\right)e^{-int}dt+O(4^ne^{-\rho/2}).
\end{eqnarray}
We now use \eqn{eq2013} for $h^{-20} \le |t| \le \rho^2 / h^2$,
and the bound \eqn{eq204} for $|t| \le h^{-20}$.
(Eq.~\eqn{eq2013} can be shown to be valid for all $|t| \le \rho^2 /h^2$,
but we do not need to use this.)
When we
substitute $4\t^2/h^2=it$,  we can rewrite \eqn{eq2014} as
\begin{eqnarray*}
& &B_n^{[h]}-B_n^{[h-1]}\nonumber\\
&=&{4^{n+2}\over \pi ih^4}\int_{\G}
\{ \t^3e^{-4n\t^2/h^2}/\sin^2(\t) \} d\t+O(4^n(\rho^5/h^5+e^{-\rho/2}))~,
\end{eqnarray*}
where
$$\G=\{xe^{-i\pi/4}: x \hbox{ from } \rho/2 \hbox{ to } 0\}\cup
\{xe^{i\pi/4}: x\hbox{ from } 0 \hbox{ to } \rho/2\}~.$$
It now follows by a standard argument that 
\begin{eqnarray}
\label{eq2015}
& &B_n^{[h]}-B_n^{[h-1]}\nonumber\\
&=&{32\cdot 4^n\over h^4}~\sum_{p= 1}^{[\rho/2\pi]}
{\rm Res}(\t^3e^{-\b^2\t^2}/\sin^2\t, \t=p\pi)\nonumber\\
& &+O(4^n(\rho^5/h^5+e^{-\rho/2}+e^{-\b^2\rho}/h^4))\nonumber \\
&=&{32\cdot 4^n \over h^4}~\sum_{m= 1}^{[\rho/2\pi]}(2\b^2(m\pi)^4-3(m\pi)^2)
e^{-(m\pi\b)^2}\\
& &+O(4^n(\rho^5/h^5+e^{-\rho/2}+e^{-\b^2\rho}/h^4)),\nonumber
\end{eqnarray}
where $\b=2\sqrt{n}/h$ and $O(e^{-\b^2\rho})$ comes from the integral
along the arc
$$\{(\rho/2)e^{i\theta}: -\pi/4\le \theta\le \pi/4\}.$$
Noting $\rho=h^{\d}$, we obtain 
\beql{eq2016}
B_n^{[h]}-B_n^{[h-1]}\sim
{32\cdot 4^n\over h^4}~\sum_{m\ge 1}(2\b^2(m\pi)^4-3(m\pi)^2)e^{-(m\pi\b)^2}
\eeq
uniformly for $\d\le \b \le \d\sqrt{\log n}$.  
Using Poisson's formula (cf. \cite[p.~52]{BD}), we have
\beql{eq2017}
B_n^{[h]}-B_n^{[h-1]}\sim
{4^n 32\over \sqrt{\pi}\b^5h^4}\sum_{m\ge 1}m^2(2(m/\b)^2-3)e^{-(m/\b)^2}
\eeq
uniformly for $\d\le 1/\b\le \d\sqrt{\log n}.$
Now Theorem~\ref{th1} follows from \eqn{eq2016}, \eqn{eq2017} and the classical formula
$$B_n\sim {4^n\over \sqrt{\pi n^3}}(1+O(1/n)).$$

Using \eqn{eq2015} and Poisson's formula, one can easily deduce
\beql{eq2018}
{B_n^{[h]}-B_n^{[h-1]}\over B_n}=O\left(n^{-1/2-\d}\right)
\eeq
for $h \ge \delta^{-1} n^{1/2} ( \log n )^{-1/2}$ or
$h \le \delta n^{1/2} ( \log n )^{1/2}$,
which, together with Theorem~\ref{th1}, gives Corollary~\ref{co1}.

We now give the proof of Theorem~\ref{th2}, relying heavily on the results of
\cite[Section~6]{FO}.
There we find that
$$[z^n]y(z)\sim dc_1\p^{-n}n^{-3/2},~c_1=\sqrt{\phi(\t)/(2\pi\phi''(\t))}.$$
Furthermore, if 
$$e_h(z)=y(z)-y^{[h]}(z)$$
then
$$e_{h+1}(z)=(1-\e)e_h(z)(1-\phi''(\t)e_h(z)/2\phi'(\t))+O(|e_h^2(z)|
+|e_h(z)||y-\t|),$$
where
$$\e(z)=1-z\phi'(y)=dc(1-z/\p)^{1/2}+O((y-\t)^2),$$
$$c= (2 \phi ( \tau ) \phi '' ( \tau ))^{1/2} / \phi ' ( \tau ) ,\hbox{ and } (y-\t)^2=O(1-z/\p).$$
The analysis is now similar to that for binary trees. We find that
$$e_h(z)=c_2{(1-\e(z))^h\over (1-(1-\e(z))^h)/\e(z)+O(\log |\e(z)|^{-1})},$$
where $c_2=2\phi'(\t)/\phi''(\t).$
The analysis of this paper is now easily extended. We consider $z=\p e^{it}$,
$|t|\le h^{\d-2},$  $\b=2\sqrt{n}/(ch)$  and find that 
$${y_n^{[h]}-y_n^{[h-1]}\over y_n}\sim
2c \pi^{1/2} n^{-1/2}
\b^4
\sum_{m\ge 1}(m\pi)^2(2(m\pi\b)^2-3)e^{-(m\pi\b)^2}~,$$
which gives Theorem~\ref{th2}.

The proof of Corollary~\ref{co2} is essentially the same as that of Corollary~\ref{co1}.

\section{Bounds for large and small heights}
\hspace*{\parindent}
First we make some observations about extending the range of $h$
for which we can obtain asymptotic estimates.
Note that given an approximation for $y^{[h]}(z)$ of the form
\begin{eqnarray}
\label{eq301}
& &c_0+c_1(\t)/h+(c_2(\t)\log h+c_3(\t))/h^2\nonumber\\
&+&(c_4(\t)\log ^2 h+c_5(\t)\log h +c_6(\t))/h^3+\cdots
\end{eqnarray}
it is possible to use the method of variable coefficients as do R\'enyi and 
Szekeres to find $c_i(\t)$ explicitly. One can then obtain an approximation
for $y^{[h]}-y^{[h-1]}$ to an accuracy of
$$(\hbox{polynomial in $\log h$ of degree $l-1$})/h^l~.$$
From such an approximation it is possible to obtain asymptotic estimates
for
$y^{[h]}(z)-y^{[h-1]}(z)$ for $h$ in the range
$$h^2/n,~ n/h^2\le c_h\log n,\quad c_h\to \infty\hbox{ with } h.$$
It is possible to derive the required approximations for $y^{[h]}(z)$, but 
it is tedious to do so. The approximations seem complicated.

It may be more interesting to observe that if such an approximation is
determined to $O((\log h)^mh^{-3})$ accuracy it is easy to show the second
difference of $y_n^{[h]}$ has the same sign as the derivative of the 
density functions in Theorems~\ref{th1} and \ref{th2}. To do this is rather routine so we 
simply 
sketch the proof. Eq.~\eqn{eq301} allows us to refine \eqn{eq2013} and hence \eqn{eq2016}.
(It suffices to know the $h^{-2}$ term in \eqn{eq301}.) The 
$$(c_2(\t)\log h+c_3(\t))/h^2$$
term in \eqn{eq301} gives a complicated expression that can be evaluated as a
sum of residues when it is used to refine \eqn{eq2013}. The higher terms in \eqn{eq301} 
are negligible as we shall see. 

If $h$ is incremented by one then the summation in \eqn{eq2016} is altered by its 
derivative plus a second derivative term. The derivative of $\b^2$ is of the 
form
$cnh^{-3}$, $c$ a constant. For the relevant $h$, that is near $\sqrt{n}$,
such a term introduces a factor of $h^{-1}$, a second derivative term a factor
of $h^{-2}$. Thus
$$(y_n^{[h]}-2y_n^{[h-1]}+y_n^{[h-2]})/y_n$$ 
is asymptotically the derivative of the density function of the 
$\Theta$-distribution of Theorem 2 plus terms smaller by a factor of $h^{-1}$. 
Since the limiting $\Theta$-distribution is unimodal (see the graph in 
Brown and Shubert \cite{BS}), our claim about the unimodality follows.
It would be possible in principle to derive asymptotic expressions to any
accuracy of the form $n^{-M}$ for $\Hb_n$ from \eqn{eq301}. These expressions become
complicated. 

It is possible to derive upper bounds for
$y_n^{[h]}$ for $h$ far away from $\sqrt{n}$. We illustrate with $B_n^{[h]}$.
Since the coefficients of $B(z) - B^{[h]} (z)$ are $\ge 0$, we
find that for every real $z$, $0 < z < 1/4$,
$$B_n - B_n^{[h]} \le
(B(z) - B^{[h]} (z)) z^{-n} ~,$$
and so by \eqn{eq204},
\beql{eq302}
B_n - B_n^{[h]} = O((1- \e (r))^h (1- \e^2 (r))^{-n} 4^n)
\eeq
for $0 < r < 1/4$.
To minimize the expression in 
\eqn{eq302} we set the logarithmic derivative with respect to $\e$ to zero. We find
$$h/(1-\e)=2n\e/(1-\e^2),\quad \e=h/(2n-h).$$
So
$$(1-\e)^h(1-\e^2)^{-n}=O\left((1-\e)^{(1-\e)/2\e}(1+\e)^{(1+\e)/2\e}\right)
^{-h}~.$$
Now the first part of Theorem~\ref{th3} follows from 
$$(1-\e)^{(1-\e)/2\e}(1+\e)^{(1+\e)/2\e}\ge e^{\e/2}~.$$


The second part can be derived by an argument similar to that 
of Wright, Richmond, Odlyzko and McKay \cite{WROM}. 
For $h\ge 2$, let $x_h$ be defined by 
$$B^{[h]}(x_h)=B(1/4)=2~.$$ 
It is clear that $1=x_2>x_3>\cdots>1/4.$
We use $Df(u)$ to denote the derivative of $f(z)$ at $z =u$.
\begin{lemma}
\label{le1}
There exist  positive constants $h_0$ and $c>0$ such that for $h\ge h_0$,
$$DB^{[h]}(1/4)\ge ch ~.$$
\end{lemma}
\proof
The proof is by induction on $h$. 
The claim is clearly true for $h \le 8$ if $c$ is small enough.
Suppose it is true for $h$.
From \cite[Eq.~(8)]{FO}
$$1/(4h)< e_h(1/4)<1/h~,$$
and hence
\beql{eq303}
1/h< 2-B^{[h]}(1/4)<4/h~.
\eeq
Using \eqn{eq303} and
$$B^{[h+1]}(z)=1+z(B^{[h]}(z))^2~,$$
we obtain
\begin{eqnarray*}
DB^{[h+1]}(1/4)&=&(B^{[h]}(1/4))^2+(1/2)B^{[h]}(1/4)DB^{[h]}(1/4)\\
&\ge & (2-4/h)^2+(1-2/h)DB^{[h]}(1/4)\ge
4- 16/h + (1-2/h) ch \ge c(h+1)
\end{eqnarray*}
provided $h\ge h_0 =8$ and $c \le 1/6$.
\qed

\begin{lemma}
\label{le2}
There exist positive constants $C$ and $h_1$ such that for $h\ge h_1$
\beql{eq304}
DB^{[h]}(x_h)\le Ch 
\eeq
and 
\beql{eq305}
  x_h\ge 1/4+1/(Ch^2)~.
\eeq
\end{lemma}
\proof
Using Lemma~\ref{le1}, \eqn{eq303} and 
$$2-B^{[h]}(1/4)=B^{[h]}(x_h)-B^{[h]}(1/4)\ge DB^{[h]}(1/4)(x_h-1/4)~,$$
we have
\beql{eq306}
x_h-1/4\le 4/(hDB^{[h]}(1/4))\le 4/(ch^2)~.
\eeq
Now for $h_1\ge 8/c$ and $C\ge \max \{8,DB^{[h_1]}(1/4)/h_1\}$,
\beqn
DB^{[h+1]}(x_{h+1})&\le &4+4x_hDB^{[h]}(x_h)\\
&\le & 4+(1+4/(ch^2))Ch\le C(h+1).
\eeqn
So \eqn{eq304} follows by induction.
Now \eqn{eq305} follows from \eqn{eq303}, \eqn{eq304}  and
$$2-B^{[h]}(1/4)=B^{[h]}(x_h)-B^{[h]}(1/4)\le DB^{[h]}(x_h)(x_h-1/4)~.~~ \mbox{\qed}$$

We now complete the proof of Theorem~\ref{th3}. From Lemma~\ref{le2}
$$x_h\ge 1/4+1/(Ch^2)~.$$
Hence
\begin{eqnarray*}
B_n^{[h]}&\le & B^{[h]}(x_h)x_h^{-n}\\
&=&O\left( 4^{-n}(1+4/(Ch^2))^{-n}\right)=O\left(B_nn^{3/2}e^{-\d n/h^2}\right).
\end{eqnarray*}

\section{Trees with large heights}
\hspace*{\parindent}
In this section, we use the local limit theorem
of Bender and Richmond
\cite[Th.~2]{BR} to obtain an
 asymptotic formula for the number of trees with $n$ internal nodes and
heights $h=cn$ where $0<c<1$. 
Let $\d$ and $\theta$ be some positive constants which can be arbitrarily small,
and define
$$R=[\d,~1/4-\d]\  \hbox{ and }\  N(R)=\{z=re^{it}:r\in R,|t|\le \theta\}~.$$
It was shown in \cite{FO} that
$$e_h(1/4)=O(1/h)~.$$
For $|z|\le 1/4$, we have 
$$2|B(z)e_h(z)|=|B(z)-B^{[h]}(z)|\le B(1/4)-B^{[h]}(1/4)~,$$
and hence
$$|e_h(z)|\le B(1/4)e_h(1/4)/|B(z)|~.$$
Since $|B(z)|>0$ for $|z|\le 1/4$,
\beql{eq307}
e_h(z)=O(1/h)\  \hbox{ uniformly for }\   |z|\le 1/4~.
\eeq
It is easy to check that 
\beql{eq308}
|1-\e(z)|<|1-\e(|z|)|<1\hbox{ for } z\ne |z|, |z|<1/4~.
\eeq
Using \eqn{eq307}, \eqn{eq308} and \cite[(7)]{FO}, we obtain
\beql{eq309}
(1-\e)^h/e_h\sim 1/\e +2+\sum_{j\ge 0} {e_j\over 1-e_j}(1-\e)^j
\eeq
uniformly for $|z|\in N(R)$. 
Let
\beql{eq3010}
A(z)=\left(1/\e +2+\sum_{j\ge 0} {e_j\over 1-e_j}(1-\e)^j\right)^{-1}~.
\eeq
We know from \cite{FO} that $A(z)$ is continuous in $N(R)$.
It follows from \eqn{eq309} and \eqn{eq201} that
$$ B(z)-B^{[h]}(z)\sim 2A(z)B(z)(1-\e(z))^h $$
and hence 
\beql{eq3011}
B^{[h]}(z)-B^{[h-1]}(z)\sim {2\e(z)\over 1-\e(z)}A(z)B(z)(1-\e(z))^h
\eeq
uniformly for $z\in N(R)$.

Now we compute
\beql{eq3012}
\mu={d\log (1-\e)\over d\log z}={1+\e\over 2\e}
\eeq
and
\beql{eq3013}
\si^2={d^2\log (1-\e)\over (d\log z)^2}={1-\e^2\over 4\e^3}~.
\eeq
It follows from \eqn{eq308}, \eqn{eq3011}--\eqn{eq3013} and \cite[Theorem~2]{BR} that
\beql{eq3014}
B_n^{[h]}-B_n^{[h-1]}\sim 2A(z)B(z){\e\over 1-\e}(1-\e)^hz^{-n}/\sqrt{2\pi h\si}
\eeq
uniformly for all $h$ and $n$ such that 
\beql{eq3015}
{n\over h}={1+\e\over 2\e}\ \hbox{ for }\  \e\in [\d',~ 1-\d']~,
\eeq
where $\d'$ is any small positive constant.
Noting $B(z)=2/(1+\e)$ and $z=(1-\e^2)/4$, we can simplify \eqn{eq3014} as
\begin{eqnarray*}
& &B_n^{[h]}-B_n^{[h-1]}\\
&\sim& {4\e^2A(\e)\over (1-\e)^2\sqrt{\pi (1+\e)n}} 
\left((1-\e)^{(1-\e)}(1+\e)^{(1+\e)}\right)^{-h/2\e}4^{n}~,
\end{eqnarray*}
which gives Theorem~\ref{th4}.
\clearpage
\input treeref
\end{document}
