-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathchapter-1.tex
303 lines (232 loc) · 16.6 KB
/
chapter-1.tex
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
\chapter{[the lone chapter]}
\begin{conv}
Unless stated otherwise, assume the following:
\begin{convlist}
\item $K\in\{\mathbb R, \mathbb C\}$.
\item $X$ will denote a generic set.
\item Vector spaces will be over $K$.
\item Evaluation functions on any subset of $K^X$ will be denoted by $\delta_x$'s. These are clearly linear maps in case the domain is a subspace of $K^X$.
\item $\mathscr H$ will denote a Hilbert space over $K$.
\item Abusing the notation slightly, the same notation will be used to denote the restriction to $\mathbb{R\to R}$ of $\Re$, $\Im$ and complex conjugation.
\item Whenever $\iprd{\cdot, \cdot}$ is semi-inner-product on a vector space,\footnote{
That is, it's almost a norm except not possibly satisfying the positive definiteness.
} we'll use the usual $\norm\cdot$ to denote the induced seminorm.\footnote{Note that a semi-inner-product obeys the Cauchy-Schwarz inequality (just follow Schwarz's proof using the quadratic polynomial).}
\item For any function $k\colon X\times X\to K$, we'll use $k_x$ to stand for $k(\cdot, x)\colon X\to K$.
\end{convlist}
\end{conv}
\section{[the first section]}
\begin{dfn}[p.s.d.\ kernels]
A positive semi-definite kernel on $X$ is a function $k\colon X\times X\to K$ that is
\begin{mylist}
\item \emph{conjugate symmetric}, \ie, $k(y, x) = \overline{k(x, y)}$; and,
\item \emph{positive semi-definite}, \ie, for any $x_1, \ldots, x_n\in X$ and any $\alpha_1, \ldots, \alpha_n\in K$, we have
\[
\sum_{i, j = 1}^n \overline{\alpha_i}\, k(x_i, x_j)\, \alpha_j\ge 0\text.
\]
\end{mylist}
\end{dfn}
\begin{dfn}[r.k.'s and RKHS's]
Let $\mathscr H$ be a vector subspace of $K^X$ and $k\colon X\times X\to K$ be such that for any $x\in X$, we have that $k_x\in\mathscr H$ and that it obeys the \emph{reproducing property}, \ie,\footnote{
Equivalently, $f(x) = \iprd{f, k_x}$ for any $f\in\mathscr H$.
}
\[
\delta_x = \iprd{\cdot, k_x}\text.
\]
Then we say that ``$\mathscr H$ is a \emph{reproducing kernel Hilbert space} with a \emph{reproducing kernel} $k$''.
\end{dfn}
\begin{rmk}
As is usual, we'll use the following terminology:
\begin{mylist}
\item ``$\mathscr H$ is an RKHS of functions on $X$'' iff $\mathscr H$ is a vector subspace of $K^X$ and there exists a $k\colon X\times X\to K$ such that $\mathscr H$ becomes an RKHS with a reproducing kernel $k$.
\item ``$k$ is a reproducing kernel on $X$'' iff there exists a Hilbert space $\mathscr H$ which is a vector subspace of $K^X$ such that $\mathscr H$ becomes an RKHS with a reproducing kernel $k$.
\end{mylist}
\end{rmk}
\begin{cor}[Immediate consequences]\label{COR: immedicate conseqs of RKHS's and rk's}
\leavevmode
\begin{mylist}
\item\label{CORi: immedicate conseqs of RKHS's and rk's} A reproducing kernel is a p.s.d.\ kernel.
\item\label{CORii: immedicate conseqs of RKHS's and rk's} $\mathscr H\subseteq K^X$ is an RKHS $\iff$ evaluation functionals on it are continuous.
\item Convergence in an RKHS $\implies$ pointwise convergence.\myMargin{
What about the converse?
}
\item\label{CORiv: immedicate conseqs of RKHS's and rk's} An RKHS's reproducing kernel is unique.
\end{mylist}
\end{cor}
\begin{proof}
\begin{mylist}
\item Let $k$ be a reproducing kernel of $\mathscr H\subseteq K^X$.
\begin{prooflist}
\item Conjugate symmetry: $k(y, x) = k_x(y) = \iprd{k_x, k_y} = \overline{\iprd{k_y, k_x}} = \overline{k_y(x)} = \overline{k(x, y)}$.
\item Positive semi-definite: Let $x_1, \ldots, x_n\in X$ and $\alpha_1, \cdots, \alpha_n\in K$. Then
\begin{align*}
\sum\nolimits_{i, j} \overline{\alpha_i}\, k(x_i, x_j)\, \alpha_j
& = \sum\nolimits_{i, j} \overline{\alpha_i}\, \iprd{ k_{x_j}, k_{x_i}\, } \alpha_j\\
& = \sum\nolimits_{i, j} \iprd{\alpha_j k_{x_j}, \alpha_i k_{x_i}}\\
& = \left\Vert \sum\nolimits_i \alpha_i k_{x_i} \right\Vert^2\\
& \ge 0\text.
\end{align*}
\end{prooflist}
\item If $k$ is a reproducing kernel for $\mathscr H$, then $\delta_x = \iprd{\cdot, k_x}$, which is continuous. Conversely, if the evaluations are continuous, then by Riesz, we can define $k\colon X\times X\to K$ such that $k_x\in\mathscr H$ and $\delta_x = \iprd{\cdot, k_x}$.
\item Follows from \ref{CORii: immedicate conseqs of RKHS's and rk's}.
\item Suppose $k$ and $k'$ are reproducing kernels for $\mathscr H\subseteq K^X$. Then for any $f\in \mathscr H$ and $x\in X$, we have $\iprd{f, k_x - k'_x} = f(x) - f(x) = 0$ so that $k_x = k'_x$. Since $x$ was arbitrary, $k = k'$.\qedhere
\end{mylist}
\end{proof}
\begin{rmk}
The converse of \ref{CORi: immedicate conseqs of RKHS's and rk's} is the content of \myRef{THM: Moore-Aronszajn}.
\end{rmk}
\begin{thm}[RKHS $\mapsto$ r.k.\ is injective]\label{THM: RKHS --> rk is injective}
Distinct RKHS's have distinct reproducing kernels.
\end{thm}
\begin{proof}
Let $k$ be the reproducing kernel for $\mathscr H\subseteq K^X$. It suffices to show that $\mathscr H$ is uniquely determined (along with its inner product\footnote{
Note that addition and scalar multiplication are already determined (namely, pointwise) by definition.
}) by $k$. Let $\mathscr H_0$ be the subspace of $\mathscr H$ spanned by $k_x$'s for $x\in X$. Note that $\mathscr H_0^\perp = \{0\}$ (because of $k$'s reproducing property) so that $\clos{\mathscr H_0} = (\mathscr H_0^\perp)^\perp = \mathscr H$.\footnote{
A naïve glance suggests that this fixes $\mathscr H$ as a set, but it \emph{doesn't}!---at least yet. We haven't yet reduced the description of $\mathscr H$ to only that of $k$. The closure of $\mathscr H_0$ (which \emph{does} only depend on $k$) is dependent on the norm topology induced from $\mathscr H$, which might still depend on the choice of $\mathscr H$, not just $k$.
} This determines $\mathscr H$ as a set, for a general \elt of $\mathscr H$ is a limit of the Cauchy sequence in $\mathscr H_0$, which is just its pointwise limit (which is existent due to \myRef{CORii: immedicate conseqs of RKHS's and rk's} of \myRef{COR: immedicate conseqs of RKHS's and rk's}). All that now remains is to determine is the norm on $\mathscr H$, which is determined once it's determined on its dense subset $\mathscr H_0$. Note that this will also determine the inner product on $\mathscr H_0$ (due to polarization).
Indeed, for $f = \sum_{i = 1}^n \alpha_i k_{x_i}$, we have
\begin{align*}
\norm{f}^2
& = \sum_{i, j = 1}^n \alpha_i \overline{\alpha_j} \iprd{k_{x_i}, k_{x_j}}\\
& = \sum_{i, j = 1}^n \overline{\alpha_j}\, k(x_j, x_i)\, \alpha_i\text.\tag*{\qedhere}
\end{align*}
\end{proof}
\begin{rmk}
Note that completeness of $\mathscr H$ was used in writing $\clos{\mathscr H_0} = (\mathscr H_0^\perp)^\perp$.\myMargin{
Any example to show the necessity of this?
} \sout{Other than that and in , it was not used elsewhere until now.} See \myRef{CORii: immedicate conseqs of RKHS's and rk's}.
\end{rmk}
We summarize our results so far:
\[
\begin{tikzcd}[column sep=huge]
\left\{ \begin{gathered}
\text{RKHS's of}\\
\text{functions on $X$}
\end{gathered} \right\}\ar[r, leftrightarrow, "\text{\myRef{THM: RKHS --> rk is injective}}", "\text{\myRef{COR: immedicate conseqs of RKHS's and rk's} \ref{CORiv: immedicate conseqs of RKHS's and rk's}}"']
& \{ \text{r.k.'s on $X$} \}\ar[r, hookrightarrow, "\text{\myRef{COR: immedicate conseqs of RKHS's and rk's} \ref{CORi: immedicate conseqs of RKHS's and rk's}}", "\text{(inclusion)}"']
& \left\{ \begin{gathered}
\text{p.s.d.\ kernels}\\
\text{on $X$}
\end{gathered} \right\}
\end{tikzcd}
\]
To complete the circle of ideas, we show in the next section that any p.s.d.\ kernel is a reproducing kernel for some (and hence unique) RKHS which, among other things, will lead the inclusion above to become equality.
\section{Moore-Aronszajn}
The upcoming lemmas are geared towards the following goal: Given a p.s.d.\ kernel $k$ on $X$, we find an RKHS $\mathscr H$ whose reproducing kernel is precisely $k$. We do so in the following steps:
\begin{mylist}
\item Each $k_x$ must lie in $\mathscr H$. Thus, we are motivated to first define a vector space $\mathscr H_0$ spanned by $k_x$'s.
\item We show that there's a unique inner product on $\mathscr H_0$ \wrt which $k$ has the reproducing property.
\item Finally, we complete $\mathscr H_0$, and verify that it's the required RKHS.
\end{mylist}
\begin{lem}\label{LEM: space H_0}
Let $k$ be a p.s.d.\ kernel on $X$. Define $\mathscr H_0$ to be the subspace of $K^X$ generated by $k_x$'s for $x\in X$. Then $\mathscr H_0$ admits a unique inner product \wrt which $k$ has the reproducing property.
\end{lem}
\begin{proof}
We show that
\[
\iprd{f, g}
= \sum\nolimits_{i, j} \overline{\beta_j}\, k(y_j, x_i)\,\alpha_i\addtag\label{EQN: inner prod on H_0}
\]
defines an inner product on $\mathscr H_0$ for $f = \sum_{i = 1}^m\alpha_i k_{x_i}$ and $g = \sum_{j = 1}^n \beta_j k_{y_j}$. That it's well-defined follows because
\[
\sum\nolimits_j \overline{\beta_j} f(y_j)
= \sum\nolimits_{i, j} \overline{\beta_j}\, k(y_j, x_i)\,\alpha_i
= \sum\nolimits_i \overline{g(x_i)} \alpha_i\addtag\label{EQN: inner prod on H_0 well-def}
\]
where the second equality follows since \uline{$k$ is conjugate symmetric}.
It's immediate from \myRef{EQN: inner prod on H_0} that $\iprd{\cdot, \cdot}$ is p.s.d.\ and conjugate symmetric (since \uline{$k$ is a p.s.d.\ kernel}), and from \myRef{EQN: inner prod on H_0 well-def} that it's bilinear with $\iprd{f, k_x} = f(x)$ for any $x\in X$ (take $g = k_x$). Only positive definiteness remains to be shown:
\begin{subproof}
Note that $\iprd{\cdot, \cdot}$ is a semi-inner-product so that it obeys Cauchy-Schwarz and induces a seminorm. Now, let $\norm f = 0$. Then for any $x\in X$, we have $\abs{f(x)} = \abs{\iprd{f, k_x}} \le \norm f\norm{k_x} = 0$.\qedhere
\end{subproof}
\end{proof}
\begin{lem}
Continuing \myRef{LEM: space H_0}, let\footnote{
``$S$'' for ``sequences''.
} $S := \{\text{Cauchy sequences in $\mathscr H_0$}\}$. Then there exists a linear map $\phi\colon S\to K^X$ that maps Cauchy sequences to their pointwise limits, the kernel of which consists precisely of sequences that converge to $0$ in $\mathscr H_0$.
\end{lem}
\begin{proof}
First we show that $\phi$ is indeed well-defined:
\begin{subproof}
Let $(f_n)$ be Cauchy in $\mathscr H_0$. Then for any $x\in X$, we have $\abs{ f_m(x) - f_n(x) } = \abs{\iprd{f_m - f_n, k_x}}\le \norm{f_n - f_m}\norm{k_x} \which\to 0$ as $m, n\to \infty$ so that $(f_n(x))$ is Cauchy in $K$ and hence \cgt. Thus, pointwise limits of Cauchy sequences in $\mathscr H_0$ do exist.
\end{subproof}
Linearity of $\phi$ is easy. We now compute $\ker\phi$. If $f_n\to 0$ in $\mathscr H_0$, then for any $x\in X$, we have $f_n(x) = \iprd{f_n, k_x}\which\to 0$. Conversely, let $(f_n)$ be a Cauchy sequence in $\mathscr H_0$ that converges to $0$ pointwise. We show that it converges to $0$ in $\mathscr H_0$ as well:
\begin{subproof}
Fix an $N$ and write $f_N = \sum_i \alpha_i k_{x_i}$ for finitely many $i$'s. Now,
\begin{align*}
\norm{f_n}^2
& = \bigl| \iprd{f_n - f_N, f_n} + \iprd{f_N, f_n}\bigr| \\
& \le \bigl| \iprd{f_n - f_N, f_n} \bigr| + \biggl|\sum_i \alpha_i \overline{f_n(x_i)}\biggr|\\
& \le \norm{f_n - f_N}\norm{f_n} + \sum_i \abs{\alpha_i}\abs{f_n(x_i)}
\end{align*}
so that taking $N$ large enough ensures that the above is eventually less than any arbitrary $\epsilon > 0$.\qedhere
\end{subproof}
\end{proof}
Thus, we have the following commutative diagram:
\[
\begin{tikzcd}
& S\ar[d]\ar[dr, "\phi|"] & \\
\mathscr H_0\ar[ur]\ar[r, "\iota"'] & S/\ker\phi\ar[r, "\tilde\phi"'] & \im\phi
\end{tikzcd}
\]
The map $\mathscr H_0\to S$ represents the function $f\mapsto (f, f, \ldots)$.
Now we make our final arguments:
\begin{mylist}
\item $\iota\colon f\mapsto \overline{(f, f, \ldots)}$ is a metric completion with the usual metric on $S/\ker\phi$, namely $d(\overline{(f_i)}, \overline{(g_i)}) = \lim_i d(f_i, g_i)\which = \lim_i\norm{f_i - g_i}$.
\item Thus, the metric space $S/\ker\phi$ admits a (unique) Hilbert space structure such that $\iota$ becomes a norm completion with the norm recovering the metric.
\item The vector space structure thus endowed on $S/\ker\phi$ is precisely the one due to the algebraic quotient:
\begin{subproof}
Note that any two generic \elt{s} of $S/\ker\phi$ are given by $\overline{(f_i)}$ and $\overline{(g_i)}$ where $(f_i)$, $(g_i)$ are Cauchy sequences in $\mathscr H_0$. Note that $\iota(f_n)\stackrel{n}{\to} \overline{(f_i)}$ and $\iota(g_n)\stackrel{n}{\to}\overline{(g_i)}$ (easy). Continuity of addition and linearity of $\iota$ ensure that $\iota(f_n + g_n)\stackrel{n}{\to} \overline{(f_i)} + \overline{(g_i)}$. Finally, note that $\iota(f_n + g_n)\stackrel{n}{\to}\overline{(f_i + g_i)}$ as well so that we indeed have $\overline{(f_i)} + \overline{(g_i)} = \overline{(f_i + g_i)}$, which is precisely the definition of vector addition in the algebraic quotient. Similarly, one can verify for scalar multiplication.
\end{subproof}
\item $\tilde\phi$ is a vector space \iso.\footnote{
\Wrt the algebraic vector space structure on $S/\ker\phi$, not neceassrily the vector space structure coming from completion. Thus, it was crucial to show that these two structures are exactly the same.
} Thus, the inner product on $S/\ker\phi$ can be transported to $\im\phi$ without altering the latter's vector space structure, making $\tilde\phi$ an isometric \iso.
\item Note that $\tilde\phi\circ\iota$ is precisely the inclusion $\mathscr H_0\hookrightarrow\im\phi$ (just traverse along the top arrows in the commutative diagram above) which is thus an isometric linear map.
\item $\im\phi$ is complete since $S/\ker\phi$ is, and thus is a Hilbert space.
\item Finally, we show that $k$ still has the reproducing property on $\im\phi$:
\begin{subproof}
Let $f\in\im\phi$ be the pointwise limit of the Cauchy sequence $(f_i)$ in $\mathscr H_0$. Then $f_i\to f$ in $\im\phi$ as well:
\begin{subproof}
Note that $f = \tilde\phi(\, \overline{(f_i)} \,)$ and $f_j = \tilde\phi\circ\iota(f_j)$. Thus it suffices to have $\iota(f_j)\stackrel{j}{\to} \overline{(f_i)}$ in $S/\ker\phi$ which is indeed true.
\end{subproof}
Thus, for any $x\in X$, one has $\iprd{f, k_x} = \lim_i \iprd{f_i, k_x} = \lim_i f_i(x) = f(x)$ as claimed.
\end{subproof}
\end{mylist}
We have thus constructed a Hilbert space, namely $\im\phi$, whose reproducing kernel is precisely $k$, proving the following:
\begin{thm}[Moore-Aronszajn]\label{THM: Moore-Aronszajn}
Any p.s.d.\ kernel is a reproducing kernel.
\end{thm}
\section{A Toy Example}
Here, we look at the case when $X$ is finite. \Wlogg, let $X = \{1, \ldots, n\}$ for $n\in\mathbb N$. Now, any p.s.d.\ kernel $k$ on $X$ is simply a p.s.d.\ and conjugate symmetric $n\times n$ matrix. Here, our $\mathscr H_0$ (in the language of \myRef{LEM: space H_0}) is the column space $\col(k)$ of $k$. Under the inner product defined by \myRef{EQN: inner prod on H_0}, it will be the complete (being finite dimensional) and thus itself be the required the RKHS. In this case, \myRef{EQN: inner prod on H_0} becomes
\begin{align*}
\iprd{k\alpha, k\beta}
& = \sum_{i, j = 1}^n \alpha_i k(j, i)\overline{\beta_j}\\
& = \sum_{i, j = 1}^n \alpha_i \overline{k(i, j)\beta_j}\\
& = \iprd{\alpha, k\beta}_e
\end{align*}
where $\iprd{\cdot, \cdot}_e$ denotes the usual Euclidean inner product on $K^n$.
Let $V$ be a finite dimensional subspace of $K^X$ endowed with an inner product. We investigate whether this is an RKHS. (Note that it's already Hilbert.) Suppose it indeed is with the reproducing kernel being $k$. Let $f_1, \ldots, f_n$ form an orthonormal basis for $V$. Then we must have
\begin{align*}
k_x
& = \sum_{i = 1}^n \iprd{k_x, f_i} f_i\\
& = \sum_{i = 1}^n \overline{f_i(x)} f_i
\intertext{yielding}
k(x, y) & = \sum_{i = 1}^n f_i(x)\overline{f_i(y)}\addtag\label{EQN: k in terms of orthonormal basis}\text.
\end{align*}
Now, it's straightforward to check that $k$ given by \myRef{EQN: k in terms of orthonormal basis} indeed is a reproducing kernel for $V$:
\begin{prooflist}
\item Firstly, note that each $k_x\in V$ being a linear combination of $f_i$'s.
\item Secondly, for $g\in V$, we have $\iprd{g, k_x} = \sum_i f_i(x)\iprd{g, f_i} = \bigl(\sum_i\iprd{g, f_i} f_i\bigr)(x) = g(x)$.
\end{prooflist}
\[
k(x, y) =
\begin{cases}
\frac{1 - (x\overline y)^{n+1}}{1 - x\overline y}, & x\overline y\ne 1\\
n + 1, & \text{otherwise}
\end{cases}
\]
\[
\begin{tikzcd}[row sep=huge]
\mathscr H_0\ar[d, "\iota"']\ar[dr] & \\
S/\ker\phi\ar[d, "\tilde\phi"'] & S\ar[l]\ar[dl, "\phi|"]\\
\im\phi &
\end{tikzcd}
\]