{
 "cells": [
  {
   "cell_type": "markdown",
   "source": [
    "# Joining DataFrames"
   ],
   "metadata": {}
  },
  {
   "outputs": [],
   "cell_type": "code",
   "source": [
    "using DataFrames"
   ],
   "metadata": {},
   "execution_count": 1
  },
  {
   "cell_type": "markdown",
   "source": [
    "## Preparing DataFrames for a join"
   ],
   "metadata": {}
  },
  {
   "outputs": [
    {
     "output_type": "execute_result",
     "data": {
      "text/plain": "\u001b[1m5×2 DataFrame\u001b[0m\n\u001b[1m Row \u001b[0m│\u001b[1m id      \u001b[0m\u001b[1m age   \u001b[0m\n     │\u001b[90m Int64?  \u001b[0m\u001b[90m Int64 \u001b[0m\n─────┼────────────────\n   1 │       1     21\n   2 │       2     22\n   3 │       5     23\n   4 │       6     24\n   5 │\u001b[90m missing \u001b[0m    99",
      "text/html": [
       "<div><div style = \"float: left;\"><span>5×2 DataFrame</span></div><div style = \"clear: both;\"></div></div><div class = \"data-frame\" style = \"overflow-x: scroll;\"><table class = \"data-frame\" style = \"margin-bottom: 6px;\"><thead><tr class = \"header\"><th class = \"rowNumber\" style = \"font-weight: bold; text-align: right;\">Row</th><th style = \"text-align: left;\">id</th><th style = \"text-align: left;\">age</th></tr><tr class = \"subheader headerLastRow\"><th class = \"rowNumber\" style = \"font-weight: bold; text-align: right;\"></th><th title = \"Union{Missing, Int64}\" style = \"text-align: left;\">Int64?</th><th title = \"Int64\" style = \"text-align: left;\">Int64</th></tr></thead><tbody><tr><td class = \"rowNumber\" style = \"font-weight: bold; text-align: right;\">1</td><td style = \"text-align: right;\">1</td><td style = \"text-align: right;\">21</td></tr><tr><td class = \"rowNumber\" style = \"font-weight: bold; text-align: right;\">2</td><td style = \"text-align: right;\">2</td><td style = \"text-align: right;\">22</td></tr><tr><td class = \"rowNumber\" style = \"font-weight: bold; text-align: right;\">3</td><td style = \"text-align: right;\">5</td><td style = \"text-align: right;\">23</td></tr><tr><td class = \"rowNumber\" style = \"font-weight: bold; text-align: right;\">4</td><td style = \"text-align: right;\">6</td><td style = \"text-align: right;\">24</td></tr><tr><td class = \"rowNumber\" style = \"font-weight: bold; text-align: right;\">5</td><td style = \"font-style: italic; text-align: right;\">missing</td><td style = \"text-align: right;\">99</td></tr></tbody></table></div>"
      ]
     },
     "metadata": {},
     "execution_count": 2
    }
   ],
   "cell_type": "code",
   "source": [
    "x = DataFrame(ID=[1, 2, 3, 4, missing], name=[\"Alice\", \"Bob\", \"Conor\", \"Dave\", \"Zed\"])\n",
    "y = DataFrame(id=[1, 2, 5, 6, missing], age=[21, 22, 23, 24, 99])"
   ],
   "metadata": {},
   "execution_count": 2
  },
  {
   "cell_type": "markdown",
   "source": [
    "Rules for the `on` keyword argument:\n",
    "\n",
    "+ a single `Symbol` or string if joining on one column with the same name, e.g. `on=:id`\n",
    "+ a `Pair` of `Symbol`s or string if joining on one column with different names, e.g. `on=:id=>:id2`\n",
    "+ a vector of `Symbol`s or strings if joining on multiple columns with the same name, e.g. `on=[:id1, :id2]`\n",
    "+ a vector of `Pair`s of `Symbol`s or strings if joining on multiple columns with different names, e.g. `on=[:a1=>:a2, :b1=>:b2]`\n",
    "+ a vector containing a combination of `Symbol`s or strings or `Pair` of `Symbol`s or strings, e.g. `on=[:a1=>:a2, :b1]`"
   ],
   "metadata": {}
  },
  {
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "ArgumentError(\"Missing values in key columns are not allowed when matchmissing == :error. `missing` found in column :ID in left data frame.\")"
     ]
    }
   ],
   "cell_type": "code",
   "source": [
    "try\n",
    "    innerjoin(x, y, on=:ID => :id) ## missing is not allowed to join-on by default\n",
    "catch e\n",
    "    show(e)\n",
    "end"
   ],
   "metadata": {},
   "execution_count": 3
  },
  {
   "outputs": [
    {
     "output_type": "execute_result",
     "data": {
      "text/plain": "\u001b[1m3×3 DataFrame\u001b[0m\n\u001b[1m Row \u001b[0m│\u001b[1m ID      \u001b[0m\u001b[1m name   \u001b[0m\u001b[1m age   \u001b[0m\n     │\u001b[90m Int64?  \u001b[0m\u001b[90m String \u001b[0m\u001b[90m Int64 \u001b[0m\n─────┼────────────────────────\n   1 │       1  Alice      21\n   2 │       2  Bob        22\n   3 │\u001b[90m missing \u001b[0m Zed        99",
      "text/html": [
       "<div><div style = \"float: left;\"><span>3×3 DataFrame</span></div><div style = \"clear: both;\"></div></div><div class = \"data-frame\" style = \"overflow-x: scroll;\"><table class = \"data-frame\" style = \"margin-bottom: 6px;\"><thead><tr class = \"header\"><th class = \"rowNumber\" style = \"font-weight: bold; text-align: right;\">Row</th><th style = \"text-align: left;\">ID</th><th style = \"text-align: left;\">name</th><th style = \"text-align: left;\">age</th></tr><tr class = \"subheader headerLastRow\"><th class = \"rowNumber\" style = \"font-weight: bold; text-align: right;\"></th><th title = \"Union{Missing, Int64}\" style = \"text-align: left;\">Int64?</th><th title = \"String\" style = \"text-align: left;\">String</th><th title = \"Int64\" style = \"text-align: left;\">Int64</th></tr></thead><tbody><tr><td class = \"rowNumber\" style = \"font-weight: bold; text-align: right;\">1</td><td style = \"text-align: right;\">1</td><td style = \"text-align: left;\">Alice</td><td style = \"text-align: right;\">21</td></tr><tr><td class = \"rowNumber\" style = \"font-weight: bold; text-align: right;\">2</td><td style = \"text-align: right;\">2</td><td style = \"text-align: left;\">Bob</td><td style = \"text-align: right;\">22</td></tr><tr><td class = \"rowNumber\" style = \"font-weight: bold; text-align: right;\">3</td><td style = \"font-style: italic; text-align: right;\">missing</td><td style = \"text-align: left;\">Zed</td><td style = \"text-align: right;\">99</td></tr></tbody></table></div>"
      ]
     },
     "metadata": {},
     "execution_count": 4
    }
   ],
   "cell_type": "code",
   "source": [
    "innerjoin(x, y, on=:ID => :id, matchmissing=:equal)"
   ],
   "metadata": {},
   "execution_count": 4
  },
  {
   "outputs": [
    {
     "output_type": "execute_result",
     "data": {
      "text/plain": "\u001b[1m5×3 DataFrame\u001b[0m\n\u001b[1m Row \u001b[0m│\u001b[1m ID      \u001b[0m\u001b[1m name   \u001b[0m\u001b[1m age     \u001b[0m\n     │\u001b[90m Int64?  \u001b[0m\u001b[90m String \u001b[0m\u001b[90m Int64?  \u001b[0m\n─────┼──────────────────────────\n   1 │       1  Alice        21\n   2 │       2  Bob          22\n   3 │\u001b[90m missing \u001b[0m Zed          99\n   4 │       3  Conor  \u001b[90m missing \u001b[0m\n   5 │       4  Dave   \u001b[90m missing \u001b[0m",
      "text/html": [
       "<div><div style = \"float: left;\"><span>5×3 DataFrame</span></div><div style = \"clear: both;\"></div></div><div class = \"data-frame\" style = \"overflow-x: scroll;\"><table class = \"data-frame\" style = \"margin-bottom: 6px;\"><thead><tr class = \"header\"><th class = \"rowNumber\" style = \"font-weight: bold; text-align: right;\">Row</th><th style = \"text-align: left;\">ID</th><th style = \"text-align: left;\">name</th><th style = \"text-align: left;\">age</th></tr><tr class = \"subheader headerLastRow\"><th class = \"rowNumber\" style = \"font-weight: bold; text-align: right;\"></th><th title = \"Union{Missing, Int64}\" style = \"text-align: left;\">Int64?</th><th title = \"String\" style = \"text-align: left;\">String</th><th title = \"Union{Missing, Int64}\" style = \"text-align: left;\">Int64?</th></tr></thead><tbody><tr><td class = \"rowNumber\" style = \"font-weight: bold; text-align: right;\">1</td><td style = \"text-align: right;\">1</td><td style = \"text-align: left;\">Alice</td><td style = \"text-align: right;\">21</td></tr><tr><td class = \"rowNumber\" style = \"font-weight: bold; text-align: right;\">2</td><td style = \"text-align: right;\">2</td><td style = \"text-align: left;\">Bob</td><td style = \"text-align: right;\">22</td></tr><tr><td class = \"rowNumber\" style = \"font-weight: bold; text-align: right;\">3</td><td style = \"font-style: italic; text-align: right;\">missing</td><td style = \"text-align: left;\">Zed</td><td style = \"text-align: right;\">99</td></tr><tr><td class = \"rowNumber\" style = \"font-weight: bold; text-align: right;\">4</td><td style = \"text-align: right;\">3</td><td style = \"text-align: left;\">Conor</td><td style = \"font-style: italic; text-align: right;\">missing</td></tr><tr><td class = \"rowNumber\" style = \"font-weight: bold; text-align: right;\">5</td><td style = \"text-align: right;\">4</td><td style = \"text-align: left;\">Dave</td><td style = \"font-style: italic; text-align: right;\">missing</td></tr></tbody></table></div>"
      ]
     },
     "metadata": {},
     "execution_count": 5
    }
   ],
   "cell_type": "code",
   "source": [
    "leftjoin(x, y, on=\"ID\" => \"id\", matchmissing=:equal)"
   ],
   "metadata": {},
   "execution_count": 5
  },
  {
   "outputs": [
    {
     "output_type": "execute_result",
     "data": {
      "text/plain": "\u001b[1m5×3 DataFrame\u001b[0m\n\u001b[1m Row \u001b[0m│\u001b[1m ID      \u001b[0m\u001b[1m name    \u001b[0m\u001b[1m age   \u001b[0m\n     │\u001b[90m Int64?  \u001b[0m\u001b[90m String? \u001b[0m\u001b[90m Int64 \u001b[0m\n─────┼─────────────────────────\n   1 │       1  Alice       21\n   2 │       2  Bob         22\n   3 │\u001b[90m missing \u001b[0m Zed         99\n   4 │       5 \u001b[90m missing \u001b[0m    23\n   5 │       6 \u001b[90m missing \u001b[0m    24",
      "text/html": [
       "<div><div style = \"float: left;\"><span>5×3 DataFrame</span></div><div style = \"clear: both;\"></div></div><div class = \"data-frame\" style = \"overflow-x: scroll;\"><table class = \"data-frame\" style = \"margin-bottom: 6px;\"><thead><tr class = \"header\"><th class = \"rowNumber\" style = \"font-weight: bold; text-align: right;\">Row</th><th style = \"text-align: left;\">ID</th><th style = \"text-align: left;\">name</th><th style = \"text-align: left;\">age</th></tr><tr class = \"subheader headerLastRow\"><th class = \"rowNumber\" style = \"font-weight: bold; text-align: right;\"></th><th title = \"Union{Missing, Int64}\" style = \"text-align: left;\">Int64?</th><th title = \"Union{Missing, String}\" style = \"text-align: left;\">String?</th><th title = \"Int64\" style = \"text-align: left;\">Int64</th></tr></thead><tbody><tr><td class = \"rowNumber\" style = \"font-weight: bold; text-align: right;\">1</td><td style = \"text-align: right;\">1</td><td style = \"text-align: left;\">Alice</td><td style = \"text-align: right;\">21</td></tr><tr><td class = \"rowNumber\" style = \"font-weight: bold; text-align: right;\">2</td><td style = \"text-align: right;\">2</td><td style = \"text-align: left;\">Bob</td><td style = \"text-align: right;\">22</td></tr><tr><td class = \"rowNumber\" style = \"font-weight: bold; text-align: right;\">3</td><td style = \"font-style: italic; text-align: right;\">missing</td><td style = \"text-align: left;\">Zed</td><td style = \"text-align: right;\">99</td></tr><tr><td class = \"rowNumber\" style = \"font-weight: bold; text-align: right;\">4</td><td style = \"text-align: right;\">5</td><td style = \"font-style: italic; text-align: left;\">missing</td><td style = \"text-align: right;\">23</td></tr><tr><td class = \"rowNumber\" style = \"font-weight: bold; text-align: right;\">5</td><td style = \"text-align: right;\">6</td><td style = \"font-style: italic; text-align: left;\">missing</td><td style = \"text-align: right;\">24</td></tr></tbody></table></div>"
      ]
     },
     "metadata": {},
     "execution_count": 6
    }
   ],
   "cell_type": "code",
   "source": [
    "rightjoin(x, y, on=:ID => :id, matchmissing=:equal)"
   ],
   "metadata": {},
   "execution_count": 6
  },
  {
   "outputs": [
    {
     "output_type": "execute_result",
     "data": {
      "text/plain": "\u001b[1m7×3 DataFrame\u001b[0m\n\u001b[1m Row \u001b[0m│\u001b[1m ID      \u001b[0m\u001b[1m name    \u001b[0m\u001b[1m age     \u001b[0m\n     │\u001b[90m Int64?  \u001b[0m\u001b[90m String? \u001b[0m\u001b[90m Int64?  \u001b[0m\n─────┼───────────────────────────\n   1 │       1  Alice         21\n   2 │       2  Bob           22\n   3 │\u001b[90m missing \u001b[0m Zed           99\n   4 │       3  Conor   \u001b[90m missing \u001b[0m\n   5 │       4  Dave    \u001b[90m missing \u001b[0m\n   6 │       5 \u001b[90m missing \u001b[0m      23\n   7 │       6 \u001b[90m missing \u001b[0m      24",
      "text/html": [
       "<div><div style = \"float: left;\"><span>7×3 DataFrame</span></div><div style = \"clear: both;\"></div></div><div class = \"data-frame\" style = \"overflow-x: scroll;\"><table class = \"data-frame\" style = \"margin-bottom: 6px;\"><thead><tr class = \"header\"><th class = \"rowNumber\" style = \"font-weight: bold; text-align: right;\">Row</th><th style = \"text-align: left;\">ID</th><th style = \"text-align: left;\">name</th><th style = \"text-align: left;\">age</th></tr><tr class = \"subheader headerLastRow\"><th class = \"rowNumber\" style = \"font-weight: bold; text-align: right;\"></th><th title = \"Union{Missing, Int64}\" style = \"text-align: left;\">Int64?</th><th title = \"Union{Missing, String}\" style = \"text-align: left;\">String?</th><th title = \"Union{Missing, Int64}\" style = \"text-align: left;\">Int64?</th></tr></thead><tbody><tr><td class = \"rowNumber\" style = \"font-weight: bold; text-align: right;\">1</td><td style = \"text-align: right;\">1</td><td style = \"text-align: left;\">Alice</td><td style = \"text-align: right;\">21</td></tr><tr><td class = \"rowNumber\" style = \"font-weight: bold; text-align: right;\">2</td><td style = \"text-align: right;\">2</td><td style = \"text-align: left;\">Bob</td><td style = \"text-align: right;\">22</td></tr><tr><td class = \"rowNumber\" style = \"font-weight: bold; text-align: right;\">3</td><td style = \"font-style: italic; text-align: right;\">missing</td><td style = \"text-align: left;\">Zed</td><td style = \"text-align: right;\">99</td></tr><tr><td class = \"rowNumber\" style = \"font-weight: bold; text-align: right;\">4</td><td style = \"text-align: right;\">3</td><td style = \"text-align: left;\">Conor</td><td style = \"font-style: italic; text-align: right;\">missing</td></tr><tr><td class = \"rowNumber\" style = \"font-weight: bold; text-align: right;\">5</td><td style = \"text-align: right;\">4</td><td style = \"text-align: left;\">Dave</td><td style = \"font-style: italic; text-align: right;\">missing</td></tr><tr><td class = \"rowNumber\" style = \"font-weight: bold; text-align: right;\">6</td><td style = \"text-align: right;\">5</td><td style = \"font-style: italic; text-align: left;\">missing</td><td style = \"text-align: right;\">23</td></tr><tr><td class = \"rowNumber\" style = \"font-weight: bold; text-align: right;\">7</td><td style = \"text-align: right;\">6</td><td style = \"font-style: italic; text-align: left;\">missing</td><td style = \"text-align: right;\">24</td></tr></tbody></table></div>"
      ]
     },
     "metadata": {},
     "execution_count": 7
    }
   ],
   "cell_type": "code",
   "source": [
    "outerjoin(x, y, on=:ID => :id, matchmissing=:equal)"
   ],
   "metadata": {},
   "execution_count": 7
  },
  {
   "outputs": [
    {
     "output_type": "execute_result",
     "data": {
      "text/plain": "\u001b[1m3×2 DataFrame\u001b[0m\n\u001b[1m Row \u001b[0m│\u001b[1m ID      \u001b[0m\u001b[1m name   \u001b[0m\n     │\u001b[90m Int64?  \u001b[0m\u001b[90m String \u001b[0m\n─────┼─────────────────\n   1 │       1  Alice\n   2 │       2  Bob\n   3 │\u001b[90m missing \u001b[0m Zed",
      "text/html": [
       "<div><div style = \"float: left;\"><span>3×2 DataFrame</span></div><div style = \"clear: both;\"></div></div><div class = \"data-frame\" style = \"overflow-x: scroll;\"><table class = \"data-frame\" style = \"margin-bottom: 6px;\"><thead><tr class = \"header\"><th class = \"rowNumber\" style = \"font-weight: bold; text-align: right;\">Row</th><th style = \"text-align: left;\">ID</th><th style = \"text-align: left;\">name</th></tr><tr class = \"subheader headerLastRow\"><th class = \"rowNumber\" style = \"font-weight: bold; text-align: right;\"></th><th title = \"Union{Missing, Int64}\" style = \"text-align: left;\">Int64?</th><th title = \"String\" style = \"text-align: left;\">String</th></tr></thead><tbody><tr><td class = \"rowNumber\" style = \"font-weight: bold; text-align: right;\">1</td><td style = \"text-align: right;\">1</td><td style = \"text-align: left;\">Alice</td></tr><tr><td class = \"rowNumber\" style = \"font-weight: bold; text-align: right;\">2</td><td style = \"text-align: right;\">2</td><td style = \"text-align: left;\">Bob</td></tr><tr><td class = \"rowNumber\" style = \"font-weight: bold; text-align: right;\">3</td><td style = \"font-style: italic; text-align: right;\">missing</td><td style = \"text-align: left;\">Zed</td></tr></tbody></table></div>"
      ]
     },
     "metadata": {},
     "execution_count": 8
    }
   ],
   "cell_type": "code",
   "source": [
    "semijoin(x, y, on=:ID => :id, matchmissing=:equal)"
   ],
   "metadata": {},
   "execution_count": 8
  },
  {
   "outputs": [
    {
     "output_type": "execute_result",
     "data": {
      "text/plain": "\u001b[1m2×2 DataFrame\u001b[0m\n\u001b[1m Row \u001b[0m│\u001b[1m ID     \u001b[0m\u001b[1m name   \u001b[0m\n     │\u001b[90m Int64? \u001b[0m\u001b[90m String \u001b[0m\n─────┼────────────────\n   1 │      3  Conor\n   2 │      4  Dave",
      "text/html": [
       "<div><div style = \"float: left;\"><span>2×2 DataFrame</span></div><div style = \"clear: both;\"></div></div><div class = \"data-frame\" style = \"overflow-x: scroll;\"><table class = \"data-frame\" style = \"margin-bottom: 6px;\"><thead><tr class = \"header\"><th class = \"rowNumber\" style = \"font-weight: bold; text-align: right;\">Row</th><th style = \"text-align: left;\">ID</th><th style = \"text-align: left;\">name</th></tr><tr class = \"subheader headerLastRow\"><th class = \"rowNumber\" style = \"font-weight: bold; text-align: right;\"></th><th title = \"Union{Missing, Int64}\" style = \"text-align: left;\">Int64?</th><th title = \"String\" style = \"text-align: left;\">String</th></tr></thead><tbody><tr><td class = \"rowNumber\" style = \"font-weight: bold; text-align: right;\">1</td><td style = \"text-align: right;\">3</td><td style = \"text-align: left;\">Conor</td></tr><tr><td class = \"rowNumber\" style = \"font-weight: bold; text-align: right;\">2</td><td style = \"text-align: right;\">4</td><td style = \"text-align: left;\">Dave</td></tr></tbody></table></div>"
      ]
     },
     "metadata": {},
     "execution_count": 9
    }
   ],
   "cell_type": "code",
   "source": [
    "antijoin(x, y, on=:ID => :id, matchmissing=:equal)"
   ],
   "metadata": {},
   "execution_count": 9
  },
  {
   "cell_type": "markdown",
   "source": [
    "## Cross join\n",
    "(here no `on` argument)"
   ],
   "metadata": {}
  },
  {
   "outputs": [
    {
     "output_type": "execute_result",
     "data": {
      "text/plain": "\u001b[1m6×2 DataFrame\u001b[0m\n\u001b[1m Row \u001b[0m│\u001b[1m x     \u001b[0m\u001b[1m y      \u001b[0m\n     │\u001b[90m Int64 \u001b[0m\u001b[90m String \u001b[0m\n─────┼───────────────\n   1 │     1  a\n   2 │     1  b\n   3 │     1  c\n   4 │     2  a\n   5 │     2  b\n   6 │     2  c",
      "text/html": [
       "<div><div style = \"float: left;\"><span>6×2 DataFrame</span></div><div style = \"clear: both;\"></div></div><div class = \"data-frame\" style = \"overflow-x: scroll;\"><table class = \"data-frame\" style = \"margin-bottom: 6px;\"><thead><tr class = \"header\"><th class = \"rowNumber\" style = \"font-weight: bold; text-align: right;\">Row</th><th style = \"text-align: left;\">x</th><th style = \"text-align: left;\">y</th></tr><tr class = \"subheader headerLastRow\"><th class = \"rowNumber\" style = \"font-weight: bold; text-align: right;\"></th><th title = \"Int64\" style = \"text-align: left;\">Int64</th><th title = \"String\" style = \"text-align: left;\">String</th></tr></thead><tbody><tr><td class = \"rowNumber\" style = \"font-weight: bold; text-align: right;\">1</td><td style = \"text-align: right;\">1</td><td style = \"text-align: left;\">a</td></tr><tr><td class = \"rowNumber\" style = \"font-weight: bold; text-align: right;\">2</td><td style = \"text-align: right;\">1</td><td style = \"text-align: left;\">b</td></tr><tr><td class = \"rowNumber\" style = \"font-weight: bold; text-align: right;\">3</td><td style = \"text-align: right;\">1</td><td style = \"text-align: left;\">c</td></tr><tr><td class = \"rowNumber\" style = \"font-weight: bold; text-align: right;\">4</td><td style = \"text-align: right;\">2</td><td style = \"text-align: left;\">a</td></tr><tr><td class = \"rowNumber\" style = \"font-weight: bold; text-align: right;\">5</td><td style = \"text-align: right;\">2</td><td style = \"text-align: left;\">b</td></tr><tr><td class = \"rowNumber\" style = \"font-weight: bold; text-align: right;\">6</td><td style = \"text-align: right;\">2</td><td style = \"text-align: left;\">c</td></tr></tbody></table></div>"
      ]
     },
     "metadata": {},
     "execution_count": 10
    }
   ],
   "cell_type": "code",
   "source": [
    "crossjoin(DataFrame(x=[1, 2]), DataFrame(y=[\"a\", \"b\", \"c\"]))"
   ],
   "metadata": {},
   "execution_count": 10
  },
  {
   "cell_type": "markdown",
   "source": [
    "## Complex cases of joins"
   ],
   "metadata": {}
  },
  {
   "outputs": [
    {
     "output_type": "execute_result",
     "data": {
      "text/plain": "\u001b[1m6×3 DataFrame\u001b[0m\n\u001b[1m Row \u001b[0m│\u001b[1m id1     \u001b[0m\u001b[1m id2     \u001b[0m\u001b[1m age   \u001b[0m\n     │\u001b[90m Int64?  \u001b[0m\u001b[90m Int64?  \u001b[0m\u001b[90m Int64 \u001b[0m\n─────┼─────────────────────────\n   1 │       1       11     21\n   2 │       1        1     22\n   3 │       3       31     23\n   4 │       3        3     24\n   5 │\u001b[90m missing \u001b[0m\u001b[90m missing \u001b[0m    99\n   6 │\u001b[90m missing \u001b[0m     999    100",
      "text/html": [
       "<div><div style = \"float: left;\"><span>6×3 DataFrame</span></div><div style = \"clear: both;\"></div></div><div class = \"data-frame\" style = \"overflow-x: scroll;\"><table class = \"data-frame\" style = \"margin-bottom: 6px;\"><thead><tr class = \"header\"><th class = \"rowNumber\" style = \"font-weight: bold; text-align: right;\">Row</th><th style = \"text-align: left;\">id1</th><th style = \"text-align: left;\">id2</th><th style = \"text-align: left;\">age</th></tr><tr class = \"subheader headerLastRow\"><th class = \"rowNumber\" style = \"font-weight: bold; text-align: right;\"></th><th title = \"Union{Missing, Int64}\" style = \"text-align: left;\">Int64?</th><th title = \"Union{Missing, Int64}\" style = \"text-align: left;\">Int64?</th><th title = \"Int64\" style = \"text-align: left;\">Int64</th></tr></thead><tbody><tr><td class = \"rowNumber\" style = \"font-weight: bold; text-align: right;\">1</td><td style = \"text-align: right;\">1</td><td style = \"text-align: right;\">11</td><td style = \"text-align: right;\">21</td></tr><tr><td class = \"rowNumber\" style = \"font-weight: bold; text-align: right;\">2</td><td style = \"text-align: right;\">1</td><td style = \"text-align: right;\">1</td><td style = \"text-align: right;\">22</td></tr><tr><td class = \"rowNumber\" style = \"font-weight: bold; text-align: right;\">3</td><td style = \"text-align: right;\">3</td><td style = \"text-align: right;\">31</td><td style = \"text-align: right;\">23</td></tr><tr><td class = \"rowNumber\" style = \"font-weight: bold; text-align: right;\">4</td><td style = \"text-align: right;\">3</td><td style = \"text-align: right;\">3</td><td style = \"text-align: right;\">24</td></tr><tr><td class = \"rowNumber\" style = \"font-weight: bold; text-align: right;\">5</td><td style = \"font-style: italic; text-align: right;\">missing</td><td style = \"font-style: italic; text-align: right;\">missing</td><td style = \"text-align: right;\">99</td></tr><tr><td class = \"rowNumber\" style = \"font-weight: bold; text-align: right;\">6</td><td style = \"font-style: italic; text-align: right;\">missing</td><td style = \"text-align: right;\">999</td><td style = \"text-align: right;\">100</td></tr></tbody></table></div>"
      ]
     },
     "metadata": {},
     "execution_count": 11
    }
   ],
   "cell_type": "code",
   "source": [
    "x = DataFrame(id1=[1, 1, 2, 2, missing, missing],\n",
    "    id2=[1, 11, 2, 21, missing, 99],\n",
    "    name=[\"Alice\", \"Bob\", \"Conor\", \"Dave\", \"Zed\", \"Zoe\"]\n",
    ")\n",
    "y = DataFrame(id1=[1, 1, 3, 3, missing, missing],\n",
    "    id2=[11, 1, 31, 3, missing, 999],\n",
    "    age=[21, 22, 23, 24, 99, 100]\n",
    ")"
   ],
   "metadata": {},
   "execution_count": 11
  },
  {
   "cell_type": "markdown",
   "source": [
    "joining on two columns"
   ],
   "metadata": {}
  },
  {
   "outputs": [
    {
     "output_type": "execute_result",
     "data": {
      "text/plain": "\u001b[1m3×4 DataFrame\u001b[0m\n\u001b[1m Row \u001b[0m│\u001b[1m id1     \u001b[0m\u001b[1m id2     \u001b[0m\u001b[1m name   \u001b[0m\u001b[1m age   \u001b[0m\n     │\u001b[90m Int64?  \u001b[0m\u001b[90m Int64?  \u001b[0m\u001b[90m String \u001b[0m\u001b[90m Int64 \u001b[0m\n─────┼─────────────────────────────────\n   1 │       1       11  Bob        21\n   2 │       1        1  Alice      22\n   3 │\u001b[90m missing \u001b[0m\u001b[90m missing \u001b[0m Zed        99",
      "text/html": [
       "<div><div style = \"float: left;\"><span>3×4 DataFrame</span></div><div style = \"clear: both;\"></div></div><div class = \"data-frame\" style = \"overflow-x: scroll;\"><table class = \"data-frame\" style = \"margin-bottom: 6px;\"><thead><tr class = \"header\"><th class = \"rowNumber\" style = \"font-weight: bold; text-align: right;\">Row</th><th style = \"text-align: left;\">id1</th><th style = \"text-align: left;\">id2</th><th style = \"text-align: left;\">name</th><th style = \"text-align: left;\">age</th></tr><tr class = \"subheader headerLastRow\"><th class = \"rowNumber\" style = \"font-weight: bold; text-align: right;\"></th><th title = \"Union{Missing, Int64}\" style = \"text-align: left;\">Int64?</th><th title = \"Union{Missing, Int64}\" style = \"text-align: left;\">Int64?</th><th title = \"String\" style = \"text-align: left;\">String</th><th title = \"Int64\" style = \"text-align: left;\">Int64</th></tr></thead><tbody><tr><td class = \"rowNumber\" style = \"font-weight: bold; text-align: right;\">1</td><td style = \"text-align: right;\">1</td><td style = \"text-align: right;\">11</td><td style = \"text-align: left;\">Bob</td><td style = \"text-align: right;\">21</td></tr><tr><td class = \"rowNumber\" style = \"font-weight: bold; text-align: right;\">2</td><td style = \"text-align: right;\">1</td><td style = \"text-align: right;\">1</td><td style = \"text-align: left;\">Alice</td><td style = \"text-align: right;\">22</td></tr><tr><td class = \"rowNumber\" style = \"font-weight: bold; text-align: right;\">3</td><td style = \"font-style: italic; text-align: right;\">missing</td><td style = \"font-style: italic; text-align: right;\">missing</td><td style = \"text-align: left;\">Zed</td><td style = \"text-align: right;\">99</td></tr></tbody></table></div>"
      ]
     },
     "metadata": {},
     "execution_count": 12
    }
   ],
   "cell_type": "code",
   "source": [
    "innerjoin(x, y, on=[:id1, :id2], matchmissing=:equal)"
   ],
   "metadata": {},
   "execution_count": 12
  },
  {
   "cell_type": "markdown",
   "source": [
    "with duplicates all combinations are produced"
   ],
   "metadata": {}
  },
  {
   "outputs": [
    {
     "output_type": "execute_result",
     "data": {
      "text/plain": "\u001b[1m12×6 DataFrame\u001b[0m\n\u001b[1m Row \u001b[0m│\u001b[1m id1     \u001b[0m\u001b[1m id2     \u001b[0m\u001b[1m name    \u001b[0m\u001b[1m id2_1   \u001b[0m\u001b[1m age     \u001b[0m\u001b[1m source     \u001b[0m\n     │\u001b[90m Int64?  \u001b[0m\u001b[90m Int64?  \u001b[0m\u001b[90m String? \u001b[0m\u001b[90m Int64?  \u001b[0m\u001b[90m Int64?  \u001b[0m\u001b[90m String     \u001b[0m\n─────┼─────────────────────────────────────────────────────────\n   1 │       1        1  Alice         11       21  both\n   2 │       1       11  Bob           11       21  both\n   3 │       1        1  Alice          1       22  both\n   4 │       1       11  Bob            1       22  both\n   5 │\u001b[90m missing \u001b[0m\u001b[90m missing \u001b[0m Zed     \u001b[90m missing \u001b[0m      99  both\n   6 │\u001b[90m missing \u001b[0m      99  Zoe     \u001b[90m missing \u001b[0m      99  both\n   7 │\u001b[90m missing \u001b[0m\u001b[90m missing \u001b[0m Zed          999      100  both\n   8 │\u001b[90m missing \u001b[0m      99  Zoe          999      100  both\n   9 │       2        2  Conor   \u001b[90m missing \u001b[0m\u001b[90m missing \u001b[0m left_only\n  10 │       2       21  Dave    \u001b[90m missing \u001b[0m\u001b[90m missing \u001b[0m left_only\n  11 │       3 \u001b[90m missing \u001b[0m\u001b[90m missing \u001b[0m      31       23  right_only\n  12 │       3 \u001b[90m missing \u001b[0m\u001b[90m missing \u001b[0m       3       24  right_only",
      "text/html": [
       "<div><div style = \"float: left;\"><span>12×6 DataFrame</span></div><div style = \"clear: both;\"></div></div><div class = \"data-frame\" style = \"overflow-x: scroll;\"><table class = \"data-frame\" style = \"margin-bottom: 6px;\"><thead><tr class = \"header\"><th class = \"rowNumber\" style = \"font-weight: bold; text-align: right;\">Row</th><th style = \"text-align: left;\">id1</th><th style = \"text-align: left;\">id2</th><th style = \"text-align: left;\">name</th><th style = \"text-align: left;\">id2_1</th><th style = \"text-align: left;\">age</th><th style = \"text-align: left;\">source</th></tr><tr class = \"subheader headerLastRow\"><th class = \"rowNumber\" style = \"font-weight: bold; text-align: right;\"></th><th title = \"Union{Missing, Int64}\" style = \"text-align: left;\">Int64?</th><th title = \"Union{Missing, Int64}\" style = \"text-align: left;\">Int64?</th><th title = \"Union{Missing, String}\" style = \"text-align: left;\">String?</th><th title = \"Union{Missing, Int64}\" style = \"text-align: left;\">Int64?</th><th title = \"Union{Missing, Int64}\" style = \"text-align: left;\">Int64?</th><th title = \"String\" style = \"text-align: left;\">String</th></tr></thead><tbody><tr><td class = \"rowNumber\" style = \"font-weight: bold; text-align: right;\">1</td><td style = \"text-align: right;\">1</td><td style = \"text-align: right;\">1</td><td style = \"text-align: left;\">Alice</td><td style = \"text-align: right;\">11</td><td style = \"text-align: right;\">21</td><td style = \"text-align: left;\">both</td></tr><tr><td class = \"rowNumber\" style = \"font-weight: bold; text-align: right;\">2</td><td style = \"text-align: right;\">1</td><td style = \"text-align: right;\">11</td><td style = \"text-align: left;\">Bob</td><td style = \"text-align: right;\">11</td><td style = \"text-align: right;\">21</td><td style = \"text-align: left;\">both</td></tr><tr><td class = \"rowNumber\" style = \"font-weight: bold; text-align: right;\">3</td><td style = \"text-align: right;\">1</td><td style = \"text-align: right;\">1</td><td style = \"text-align: left;\">Alice</td><td style = \"text-align: right;\">1</td><td style = \"text-align: right;\">22</td><td style = \"text-align: left;\">both</td></tr><tr><td class = \"rowNumber\" style = \"font-weight: bold; text-align: right;\">4</td><td style = \"text-align: right;\">1</td><td style = \"text-align: right;\">11</td><td style = \"text-align: left;\">Bob</td><td style = \"text-align: right;\">1</td><td style = \"text-align: right;\">22</td><td style = \"text-align: left;\">both</td></tr><tr><td class = \"rowNumber\" style = \"font-weight: bold; text-align: right;\">5</td><td style = \"font-style: italic; text-align: right;\">missing</td><td style = \"font-style: italic; text-align: right;\">missing</td><td style = \"text-align: left;\">Zed</td><td style = \"font-style: italic; text-align: right;\">missing</td><td style = \"text-align: right;\">99</td><td style = \"text-align: left;\">both</td></tr><tr><td class = \"rowNumber\" style = \"font-weight: bold; text-align: right;\">6</td><td style = \"font-style: italic; text-align: right;\">missing</td><td style = \"text-align: right;\">99</td><td style = \"text-align: left;\">Zoe</td><td style = \"font-style: italic; text-align: right;\">missing</td><td style = \"text-align: right;\">99</td><td style = \"text-align: left;\">both</td></tr><tr><td class = \"rowNumber\" style = \"font-weight: bold; text-align: right;\">7</td><td style = \"font-style: italic; text-align: right;\">missing</td><td style = \"font-style: italic; text-align: right;\">missing</td><td style = \"text-align: left;\">Zed</td><td style = \"text-align: right;\">999</td><td style = \"text-align: right;\">100</td><td style = \"text-align: left;\">both</td></tr><tr><td class = \"rowNumber\" style = \"font-weight: bold; text-align: right;\">8</td><td style = \"font-style: italic; text-align: right;\">missing</td><td style = \"text-align: right;\">99</td><td style = \"text-align: left;\">Zoe</td><td style = \"text-align: right;\">999</td><td style = \"text-align: right;\">100</td><td style = \"text-align: left;\">both</td></tr><tr><td class = \"rowNumber\" style = \"font-weight: bold; text-align: right;\">9</td><td style = \"text-align: right;\">2</td><td style = \"text-align: right;\">2</td><td style = \"text-align: left;\">Conor</td><td style = \"font-style: italic; text-align: right;\">missing</td><td style = \"font-style: italic; text-align: right;\">missing</td><td style = \"text-align: left;\">left_only</td></tr><tr><td class = \"rowNumber\" style = \"font-weight: bold; text-align: right;\">10</td><td style = \"text-align: right;\">2</td><td style = \"text-align: right;\">21</td><td style = \"text-align: left;\">Dave</td><td style = \"font-style: italic; text-align: right;\">missing</td><td style = \"font-style: italic; text-align: right;\">missing</td><td style = \"text-align: left;\">left_only</td></tr><tr><td class = \"rowNumber\" style = \"font-weight: bold; text-align: right;\">11</td><td style = \"text-align: right;\">3</td><td style = \"font-style: italic; text-align: right;\">missing</td><td style = \"font-style: italic; text-align: left;\">missing</td><td style = \"text-align: right;\">31</td><td style = \"text-align: right;\">23</td><td style = \"text-align: left;\">right_only</td></tr><tr><td class = \"rowNumber\" style = \"font-weight: bold; text-align: right;\">12</td><td style = \"text-align: right;\">3</td><td style = \"font-style: italic; text-align: right;\">missing</td><td style = \"font-style: italic; text-align: left;\">missing</td><td style = \"text-align: right;\">3</td><td style = \"text-align: right;\">24</td><td style = \"text-align: left;\">right_only</td></tr></tbody></table></div>"
      ]
     },
     "metadata": {},
     "execution_count": 13
    }
   ],
   "cell_type": "code",
   "source": [
    "outerjoin(x, y, on=:id1, makeunique=true, indicator=:source, matchmissing=:equal)"
   ],
   "metadata": {},
   "execution_count": 13
  },
  {
   "cell_type": "markdown",
   "source": [
    "you can force validation of uniqueness of key on which you join"
   ],
   "metadata": {}
  },
  {
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "ArgumentError(\"Merge key(s) are not unique in both df1 and df2. df1 contains 3 duplicate keys: @NamedTuple{id1::Union{Missing, Int64}}((1,)), ..., @NamedTuple{id1::Union{Missing, Int64}}((missing,)). df2 contains 3 duplicate keys: @NamedTuple{id1::Union{Missing, Int64}}((1,)), ..., @NamedTuple{id1::Union{Missing, Int64}}((missing,)).\")"
     ]
    }
   ],
   "cell_type": "code",
   "source": [
    "try\n",
    "    innerjoin(x, y, on=:id1, makeunique=true, validate=(true, true), matchmissing=:equal)\n",
    "catch e\n",
    "    show(e)\n",
    "end"
   ],
   "metadata": {},
   "execution_count": 14
  },
  {
   "cell_type": "markdown",
   "source": [
    "mixed `on` argument for joining on multiple columns"
   ],
   "metadata": {}
  },
  {
   "outputs": [
    {
     "output_type": "execute_result",
     "data": {
      "text/plain": "\u001b[1m2×4 DataFrame\u001b[0m\n\u001b[1m Row \u001b[0m│\u001b[1m id1   \u001b[0m\u001b[1m id2   \u001b[0m\u001b[1m x1   \u001b[0m\u001b[1m x2   \u001b[0m\n     │\u001b[90m Int64 \u001b[0m\u001b[90m Int64 \u001b[0m\u001b[90m Char \u001b[0m\u001b[90m Char \u001b[0m\n─────┼──────────────────────────\n   1 │     1      1  a     a\n   2 │     2      2  b     b",
      "text/html": [
       "<div><div style = \"float: left;\"><span>2×4 DataFrame</span></div><div style = \"clear: both;\"></div></div><div class = \"data-frame\" style = \"overflow-x: scroll;\"><table class = \"data-frame\" style = \"margin-bottom: 6px;\"><thead><tr class = \"header\"><th class = \"rowNumber\" style = \"font-weight: bold; text-align: right;\">Row</th><th style = \"text-align: left;\">id1</th><th style = \"text-align: left;\">id2</th><th style = \"text-align: left;\">x1</th><th style = \"text-align: left;\">x2</th></tr><tr class = \"subheader headerLastRow\"><th class = \"rowNumber\" style = \"font-weight: bold; text-align: right;\"></th><th title = \"Int64\" style = \"text-align: left;\">Int64</th><th title = \"Int64\" style = \"text-align: left;\">Int64</th><th title = \"Char\" style = \"text-align: left;\">Char</th><th title = \"Char\" style = \"text-align: left;\">Char</th></tr></thead><tbody><tr><td class = \"rowNumber\" style = \"font-weight: bold; text-align: right;\">1</td><td style = \"text-align: right;\">1</td><td style = \"text-align: right;\">1</td><td style = \"text-align: left;\">a</td><td style = \"text-align: left;\">a</td></tr><tr><td class = \"rowNumber\" style = \"font-weight: bold; text-align: right;\">2</td><td style = \"text-align: right;\">2</td><td style = \"text-align: right;\">2</td><td style = \"text-align: left;\">b</td><td style = \"text-align: left;\">b</td></tr></tbody></table></div>"
      ]
     },
     "metadata": {},
     "execution_count": 15
    }
   ],
   "cell_type": "code",
   "source": [
    "x = DataFrame(id1=1:6, id2=[1, 2, 1, 2, 1, 2], x1='a':'f')\n",
    "y = DataFrame(id1=1:6, ID2=1:6, x2='a':'f')\n",
    "innerjoin(x, y, on=[:id1, :id2 => :ID2])"
   ],
   "metadata": {},
   "execution_count": 15
  },
  {
   "cell_type": "markdown",
   "source": [
    "joining more than two data frames"
   ],
   "metadata": {}
  },
  {
   "outputs": [
    {
     "output_type": "execute_result",
     "data": {
      "text/plain": "5-element Vector{DataFrames.DataFrame}:\n \u001b[1m6×2 DataFrame\u001b[0m\n\u001b[1m Row \u001b[0m│\u001b[1m id    \u001b[0m\u001b[1m v1    \u001b[0m\n     │\u001b[90m Int64 \u001b[0m\u001b[90m Int64 \u001b[0m\n─────┼──────────────\n   1 │     1     11\n   2 │     2     12\n   3 │     3     13\n   4 │     4     14\n   5 │     5     15\n   6 │     6     16\n \u001b[1m6×2 DataFrame\u001b[0m\n\u001b[1m Row \u001b[0m│\u001b[1m id    \u001b[0m\u001b[1m v2    \u001b[0m\n     │\u001b[90m Int64 \u001b[0m\u001b[90m Int64 \u001b[0m\n─────┼──────────────\n   1 │     1     21\n   2 │     2     22\n   3 │     3     23\n   4 │     4     24\n   5 │     5     25\n   6 │     6     26\n \u001b[1m6×2 DataFrame\u001b[0m\n\u001b[1m Row \u001b[0m│\u001b[1m id    \u001b[0m\u001b[1m v3    \u001b[0m\n     │\u001b[90m Int64 \u001b[0m\u001b[90m Int64 \u001b[0m\n─────┼──────────────\n   1 │     1     31\n   2 │     2     32\n   3 │     3     33\n   4 │     4     34\n   5 │     5     35\n   6 │     6     36\n \u001b[1m6×2 DataFrame\u001b[0m\n\u001b[1m Row \u001b[0m│\u001b[1m id    \u001b[0m\u001b[1m v4    \u001b[0m\n     │\u001b[90m Int64 \u001b[0m\u001b[90m Int64 \u001b[0m\n─────┼──────────────\n   1 │     1     41\n   2 │     2     42\n   3 │     3     43\n   4 │     4     44\n   5 │     5     45\n   6 │     6     46\n \u001b[1m6×2 DataFrame\u001b[0m\n\u001b[1m Row \u001b[0m│\u001b[1m id    \u001b[0m\u001b[1m v5    \u001b[0m\n     │\u001b[90m Int64 \u001b[0m\u001b[90m Int64 \u001b[0m\n─────┼──────────────\n   1 │     1     51\n   2 │     2     52\n   3 │     3     53\n   4 │     4     54\n   5 │     5     55\n   6 │     6     56"
     },
     "metadata": {},
     "execution_count": 16
    }
   ],
   "cell_type": "code",
   "source": [
    "xs = [DataFrame(\"id\" => 1:6, \"v$i\" => ((1:6) .+ 10i)) for i in 1:5]"
   ],
   "metadata": {},
   "execution_count": 16
  },
  {
   "cell_type": "markdown",
   "source": [
    "`innerjoin` as an example, it also works for `outerjoin` and `crossjoin`"
   ],
   "metadata": {}
  },
  {
   "outputs": [
    {
     "output_type": "execute_result",
     "data": {
      "text/plain": "\u001b[1m6×6 DataFrame\u001b[0m\n\u001b[1m Row \u001b[0m│\u001b[1m id    \u001b[0m\u001b[1m v1    \u001b[0m\u001b[1m v2    \u001b[0m\u001b[1m v3    \u001b[0m\u001b[1m v4    \u001b[0m\u001b[1m v5    \u001b[0m\n     │\u001b[90m Int64 \u001b[0m\u001b[90m Int64 \u001b[0m\u001b[90m Int64 \u001b[0m\u001b[90m Int64 \u001b[0m\u001b[90m Int64 \u001b[0m\u001b[90m Int64 \u001b[0m\n─────┼──────────────────────────────────────────\n   1 │     1     11     21     31     41     51\n   2 │     2     12     22     32     42     52\n   3 │     3     13     23     33     43     53\n   4 │     4     14     24     34     44     54\n   5 │     5     15     25     35     45     55\n   6 │     6     16     26     36     46     56",
      "text/html": [
       "<div><div style = \"float: left;\"><span>6×6 DataFrame</span></div><div style = \"clear: both;\"></div></div><div class = \"data-frame\" style = \"overflow-x: scroll;\"><table class = \"data-frame\" style = \"margin-bottom: 6px;\"><thead><tr class = \"header\"><th class = \"rowNumber\" style = \"font-weight: bold; text-align: right;\">Row</th><th style = \"text-align: left;\">id</th><th style = \"text-align: left;\">v1</th><th style = \"text-align: left;\">v2</th><th style = \"text-align: left;\">v3</th><th style = \"text-align: left;\">v4</th><th style = \"text-align: left;\">v5</th></tr><tr class = \"subheader headerLastRow\"><th class = \"rowNumber\" style = \"font-weight: bold; text-align: right;\"></th><th title = \"Int64\" style = \"text-align: left;\">Int64</th><th title = \"Int64\" style = \"text-align: left;\">Int64</th><th title = \"Int64\" style = \"text-align: left;\">Int64</th><th title = \"Int64\" style = \"text-align: left;\">Int64</th><th title = \"Int64\" style = \"text-align: left;\">Int64</th><th title = \"Int64\" style = \"text-align: left;\">Int64</th></tr></thead><tbody><tr><td class = \"rowNumber\" style = \"font-weight: bold; text-align: right;\">1</td><td style = \"text-align: right;\">1</td><td style = \"text-align: right;\">11</td><td style = \"text-align: right;\">21</td><td style = \"text-align: right;\">31</td><td style = \"text-align: right;\">41</td><td style = \"text-align: right;\">51</td></tr><tr><td class = \"rowNumber\" style = \"font-weight: bold; text-align: right;\">2</td><td style = \"text-align: right;\">2</td><td style = \"text-align: right;\">12</td><td style = \"text-align: right;\">22</td><td style = \"text-align: right;\">32</td><td style = \"text-align: right;\">42</td><td style = \"text-align: right;\">52</td></tr><tr><td class = \"rowNumber\" style = \"font-weight: bold; text-align: right;\">3</td><td style = \"text-align: right;\">3</td><td style = \"text-align: right;\">13</td><td style = \"text-align: right;\">23</td><td style = \"text-align: right;\">33</td><td style = \"text-align: right;\">43</td><td style = \"text-align: right;\">53</td></tr><tr><td class = \"rowNumber\" style = \"font-weight: bold; text-align: right;\">4</td><td style = \"text-align: right;\">4</td><td style = \"text-align: right;\">14</td><td style = \"text-align: right;\">24</td><td style = \"text-align: right;\">34</td><td style = \"text-align: right;\">44</td><td style = \"text-align: right;\">54</td></tr><tr><td class = \"rowNumber\" style = \"font-weight: bold; text-align: right;\">5</td><td style = \"text-align: right;\">5</td><td style = \"text-align: right;\">15</td><td style = \"text-align: right;\">25</td><td style = \"text-align: right;\">35</td><td style = \"text-align: right;\">45</td><td style = \"text-align: right;\">55</td></tr><tr><td class = \"rowNumber\" style = \"font-weight: bold; text-align: right;\">6</td><td style = \"text-align: right;\">6</td><td style = \"text-align: right;\">16</td><td style = \"text-align: right;\">26</td><td style = \"text-align: right;\">36</td><td style = \"text-align: right;\">46</td><td style = \"text-align: right;\">56</td></tr></tbody></table></div>"
      ]
     },
     "metadata": {},
     "execution_count": 17
    }
   ],
   "cell_type": "code",
   "source": [
    "innerjoin(xs..., on=:id)"
   ],
   "metadata": {},
   "execution_count": 17
  },
  {
   "cell_type": "markdown",
   "source": [
    "## matchmissing keyword argument\n",
    "\n",
    "In general you have three options how `missing` values are handled in joins that are handled by `matchmisssing` kewyowrd argument value as follows:\n",
    "\n",
    "+ `:error`: throw an error if missings are encountered (this is the default)\n",
    "+ `:equal`: assume `misssing` values are equal to themselves\n",
    "+ `:notequal`: assume `misssing` values are not equal to themselves (not available for `outerjoin`)\n",
    "\n",
    "Here are some examples comparing the options:"
   ],
   "metadata": {}
  },
  {
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "ArgumentError(\"Missing values in key columns are not allowed when matchmissing == :error. `missing` found in column :id in left data frame.\")"
     ]
    }
   ],
   "cell_type": "code",
   "source": [
    "df1 = DataFrame(id=[1, 2, missing], x=1:3)\n",
    "df2 = DataFrame(id=[1, missing, 3], y=1:3)\n",
    "\n",
    "try\n",
    "    innerjoin(df1, df2, on=:id)\n",
    "catch e\n",
    "    show(e)\n",
    "end"
   ],
   "metadata": {},
   "execution_count": 18
  },
  {
   "outputs": [
    {
     "output_type": "execute_result",
     "data": {
      "text/plain": "\u001b[1m2×3 DataFrame\u001b[0m\n\u001b[1m Row \u001b[0m│\u001b[1m id      \u001b[0m\u001b[1m x     \u001b[0m\u001b[1m y     \u001b[0m\n     │\u001b[90m Int64?  \u001b[0m\u001b[90m Int64 \u001b[0m\u001b[90m Int64 \u001b[0m\n─────┼───────────────────────\n   1 │       1      1      1\n   2 │\u001b[90m missing \u001b[0m     3      2",
      "text/html": [
       "<div><div style = \"float: left;\"><span>2×3 DataFrame</span></div><div style = \"clear: both;\"></div></div><div class = \"data-frame\" style = \"overflow-x: scroll;\"><table class = \"data-frame\" style = \"margin-bottom: 6px;\"><thead><tr class = \"header\"><th class = \"rowNumber\" style = \"font-weight: bold; text-align: right;\">Row</th><th style = \"text-align: left;\">id</th><th style = \"text-align: left;\">x</th><th style = \"text-align: left;\">y</th></tr><tr class = \"subheader headerLastRow\"><th class = \"rowNumber\" style = \"font-weight: bold; text-align: right;\"></th><th title = \"Union{Missing, Int64}\" style = \"text-align: left;\">Int64?</th><th title = \"Int64\" style = \"text-align: left;\">Int64</th><th title = \"Int64\" style = \"text-align: left;\">Int64</th></tr></thead><tbody><tr><td class = \"rowNumber\" style = \"font-weight: bold; text-align: right;\">1</td><td style = \"text-align: right;\">1</td><td style = \"text-align: right;\">1</td><td style = \"text-align: right;\">1</td></tr><tr><td class = \"rowNumber\" style = \"font-weight: bold; text-align: right;\">2</td><td style = \"font-style: italic; text-align: right;\">missing</td><td style = \"text-align: right;\">3</td><td style = \"text-align: right;\">2</td></tr></tbody></table></div>"
      ]
     },
     "metadata": {},
     "execution_count": 19
    }
   ],
   "cell_type": "code",
   "source": [
    "innerjoin(df1, df2, on=:id, matchmissing=:equal)"
   ],
   "metadata": {},
   "execution_count": 19
  },
  {
   "outputs": [
    {
     "output_type": "execute_result",
     "data": {
      "text/plain": "\u001b[1m1×3 DataFrame\u001b[0m\n\u001b[1m Row \u001b[0m│\u001b[1m id     \u001b[0m\u001b[1m x     \u001b[0m\u001b[1m y     \u001b[0m\n     │\u001b[90m Int64? \u001b[0m\u001b[90m Int64 \u001b[0m\u001b[90m Int64 \u001b[0m\n─────┼──────────────────────\n   1 │      1      1      1",
      "text/html": [
       "<div><div style = \"float: left;\"><span>1×3 DataFrame</span></div><div style = \"clear: both;\"></div></div><div class = \"data-frame\" style = \"overflow-x: scroll;\"><table class = \"data-frame\" style = \"margin-bottom: 6px;\"><thead><tr class = \"header\"><th class = \"rowNumber\" style = \"font-weight: bold; text-align: right;\">Row</th><th style = \"text-align: left;\">id</th><th style = \"text-align: left;\">x</th><th style = \"text-align: left;\">y</th></tr><tr class = \"subheader headerLastRow\"><th class = \"rowNumber\" style = \"font-weight: bold; text-align: right;\"></th><th title = \"Union{Missing, Int64}\" style = \"text-align: left;\">Int64?</th><th title = \"Int64\" style = \"text-align: left;\">Int64</th><th title = \"Int64\" style = \"text-align: left;\">Int64</th></tr></thead><tbody><tr><td class = \"rowNumber\" style = \"font-weight: bold; text-align: right;\">1</td><td style = \"text-align: right;\">1</td><td style = \"text-align: right;\">1</td><td style = \"text-align: right;\">1</td></tr></tbody></table></div>"
      ]
     },
     "metadata": {},
     "execution_count": 20
    }
   ],
   "cell_type": "code",
   "source": [
    "innerjoin(df1, df2, on=:id, matchmissing=:notequal)"
   ],
   "metadata": {},
   "execution_count": 20
  },
  {
   "cell_type": "markdown",
   "source": [
    "Since DataFrames.jl 1.3 you can do an efficient left join of two data frames in-place. This means that the left data frame gets updated with new columns, but the columns that exist in it are not affected. This operation requires that there are no duplicates of keys in the right data frame that match keys in left data frame:"
   ],
   "metadata": {}
  },
  {
   "outputs": [
    {
     "output_type": "execute_result",
     "data": {
      "text/plain": "\u001b[1m3×2 DataFrame\u001b[0m\n\u001b[1m Row \u001b[0m│\u001b[1m id      \u001b[0m\u001b[1m x     \u001b[0m\n     │\u001b[90m Int64?  \u001b[0m\u001b[90m Int64 \u001b[0m\n─────┼────────────────\n   1 │       1      1\n   2 │       2      2\n   3 │\u001b[90m missing \u001b[0m     3",
      "text/html": [
       "<div><div style = \"float: left;\"><span>3×2 DataFrame</span></div><div style = \"clear: both;\"></div></div><div class = \"data-frame\" style = \"overflow-x: scroll;\"><table class = \"data-frame\" style = \"margin-bottom: 6px;\"><thead><tr class = \"header\"><th class = \"rowNumber\" style = \"font-weight: bold; text-align: right;\">Row</th><th style = \"text-align: left;\">id</th><th style = \"text-align: left;\">x</th></tr><tr class = \"subheader headerLastRow\"><th class = \"rowNumber\" style = \"font-weight: bold; text-align: right;\"></th><th title = \"Union{Missing, Int64}\" style = \"text-align: left;\">Int64?</th><th title = \"Int64\" style = \"text-align: left;\">Int64</th></tr></thead><tbody><tr><td class = \"rowNumber\" style = \"font-weight: bold; text-align: right;\">1</td><td style = \"text-align: right;\">1</td><td style = \"text-align: right;\">1</td></tr><tr><td class = \"rowNumber\" style = \"font-weight: bold; text-align: right;\">2</td><td style = \"text-align: right;\">2</td><td style = \"text-align: right;\">2</td></tr><tr><td class = \"rowNumber\" style = \"font-weight: bold; text-align: right;\">3</td><td style = \"font-style: italic; text-align: right;\">missing</td><td style = \"text-align: right;\">3</td></tr></tbody></table></div>"
      ]
     },
     "metadata": {},
     "execution_count": 21
    }
   ],
   "cell_type": "code",
   "source": [
    "df1"
   ],
   "metadata": {},
   "execution_count": 21
  },
  {
   "outputs": [
    {
     "output_type": "execute_result",
     "data": {
      "text/plain": "\u001b[1m3×2 DataFrame\u001b[0m\n\u001b[1m Row \u001b[0m│\u001b[1m id      \u001b[0m\u001b[1m y     \u001b[0m\n     │\u001b[90m Int64?  \u001b[0m\u001b[90m Int64 \u001b[0m\n─────┼────────────────\n   1 │       1      1\n   2 │\u001b[90m missing \u001b[0m     2\n   3 │       3      3",
      "text/html": [
       "<div><div style = \"float: left;\"><span>3×2 DataFrame</span></div><div style = \"clear: both;\"></div></div><div class = \"data-frame\" style = \"overflow-x: scroll;\"><table class = \"data-frame\" style = \"margin-bottom: 6px;\"><thead><tr class = \"header\"><th class = \"rowNumber\" style = \"font-weight: bold; text-align: right;\">Row</th><th style = \"text-align: left;\">id</th><th style = \"text-align: left;\">y</th></tr><tr class = \"subheader headerLastRow\"><th class = \"rowNumber\" style = \"font-weight: bold; text-align: right;\"></th><th title = \"Union{Missing, Int64}\" style = \"text-align: left;\">Int64?</th><th title = \"Int64\" style = \"text-align: left;\">Int64</th></tr></thead><tbody><tr><td class = \"rowNumber\" style = \"font-weight: bold; text-align: right;\">1</td><td style = \"text-align: right;\">1</td><td style = \"text-align: right;\">1</td></tr><tr><td class = \"rowNumber\" style = \"font-weight: bold; text-align: right;\">2</td><td style = \"font-style: italic; text-align: right;\">missing</td><td style = \"text-align: right;\">2</td></tr><tr><td class = \"rowNumber\" style = \"font-weight: bold; text-align: right;\">3</td><td style = \"text-align: right;\">3</td><td style = \"text-align: right;\">3</td></tr></tbody></table></div>"
      ]
     },
     "metadata": {},
     "execution_count": 22
    }
   ],
   "cell_type": "code",
   "source": [
    "df2"
   ],
   "metadata": {},
   "execution_count": 22
  },
  {
   "outputs": [
    {
     "output_type": "execute_result",
     "data": {
      "text/plain": "\u001b[1m3×3 DataFrame\u001b[0m\n\u001b[1m Row \u001b[0m│\u001b[1m id      \u001b[0m\u001b[1m x     \u001b[0m\u001b[1m y       \u001b[0m\n     │\u001b[90m Int64?  \u001b[0m\u001b[90m Int64 \u001b[0m\u001b[90m Int64?  \u001b[0m\n─────┼─────────────────────────\n   1 │       1      1        1\n   2 │       2      2 \u001b[90m missing \u001b[0m\n   3 │\u001b[90m missing \u001b[0m     3 \u001b[90m missing \u001b[0m",
      "text/html": [
       "<div><div style = \"float: left;\"><span>3×3 DataFrame</span></div><div style = \"clear: both;\"></div></div><div class = \"data-frame\" style = \"overflow-x: scroll;\"><table class = \"data-frame\" style = \"margin-bottom: 6px;\"><thead><tr class = \"header\"><th class = \"rowNumber\" style = \"font-weight: bold; text-align: right;\">Row</th><th style = \"text-align: left;\">id</th><th style = \"text-align: left;\">x</th><th style = \"text-align: left;\">y</th></tr><tr class = \"subheader headerLastRow\"><th class = \"rowNumber\" style = \"font-weight: bold; text-align: right;\"></th><th title = \"Union{Missing, Int64}\" style = \"text-align: left;\">Int64?</th><th title = \"Int64\" style = \"text-align: left;\">Int64</th><th title = \"Union{Missing, Int64}\" style = \"text-align: left;\">Int64?</th></tr></thead><tbody><tr><td class = \"rowNumber\" style = \"font-weight: bold; text-align: right;\">1</td><td style = \"text-align: right;\">1</td><td style = \"text-align: right;\">1</td><td style = \"text-align: right;\">1</td></tr><tr><td class = \"rowNumber\" style = \"font-weight: bold; text-align: right;\">2</td><td style = \"text-align: right;\">2</td><td style = \"text-align: right;\">2</td><td style = \"font-style: italic; text-align: right;\">missing</td></tr><tr><td class = \"rowNumber\" style = \"font-weight: bold; text-align: right;\">3</td><td style = \"font-style: italic; text-align: right;\">missing</td><td style = \"text-align: right;\">3</td><td style = \"font-style: italic; text-align: right;\">missing</td></tr></tbody></table></div>"
      ]
     },
     "metadata": {},
     "execution_count": 23
    }
   ],
   "cell_type": "code",
   "source": [
    "leftjoin!(df1, df2, on=:id, matchmissing=:notequal)"
   ],
   "metadata": {},
   "execution_count": 23
  },
  {
   "outputs": [
    {
     "output_type": "execute_result",
     "data": {
      "text/plain": "\u001b[1m3×3 DataFrame\u001b[0m\n\u001b[1m Row \u001b[0m│\u001b[1m id      \u001b[0m\u001b[1m x     \u001b[0m\u001b[1m y       \u001b[0m\n     │\u001b[90m Int64?  \u001b[0m\u001b[90m Int64 \u001b[0m\u001b[90m Int64?  \u001b[0m\n─────┼─────────────────────────\n   1 │       1      1        1\n   2 │       2      2 \u001b[90m missing \u001b[0m\n   3 │\u001b[90m missing \u001b[0m     3 \u001b[90m missing \u001b[0m",
      "text/html": [
       "<div><div style = \"float: left;\"><span>3×3 DataFrame</span></div><div style = \"clear: both;\"></div></div><div class = \"data-frame\" style = \"overflow-x: scroll;\"><table class = \"data-frame\" style = \"margin-bottom: 6px;\"><thead><tr class = \"header\"><th class = \"rowNumber\" style = \"font-weight: bold; text-align: right;\">Row</th><th style = \"text-align: left;\">id</th><th style = \"text-align: left;\">x</th><th style = \"text-align: left;\">y</th></tr><tr class = \"subheader headerLastRow\"><th class = \"rowNumber\" style = \"font-weight: bold; text-align: right;\"></th><th title = \"Union{Missing, Int64}\" style = \"text-align: left;\">Int64?</th><th title = \"Int64\" style = \"text-align: left;\">Int64</th><th title = \"Union{Missing, Int64}\" style = \"text-align: left;\">Int64?</th></tr></thead><tbody><tr><td class = \"rowNumber\" style = \"font-weight: bold; text-align: right;\">1</td><td style = \"text-align: right;\">1</td><td style = \"text-align: right;\">1</td><td style = \"text-align: right;\">1</td></tr><tr><td class = \"rowNumber\" style = \"font-weight: bold; text-align: right;\">2</td><td style = \"text-align: right;\">2</td><td style = \"text-align: right;\">2</td><td style = \"font-style: italic; text-align: right;\">missing</td></tr><tr><td class = \"rowNumber\" style = \"font-weight: bold; text-align: right;\">3</td><td style = \"font-style: italic; text-align: right;\">missing</td><td style = \"text-align: right;\">3</td><td style = \"font-style: italic; text-align: right;\">missing</td></tr></tbody></table></div>"
      ]
     },
     "metadata": {},
     "execution_count": 24
    }
   ],
   "cell_type": "code",
   "source": [
    "df1"
   ],
   "metadata": {},
   "execution_count": 24
  },
  {
   "cell_type": "markdown",
   "source": [
    "---\n",
    "\n",
    "*This notebook was generated using [Literate.jl](https://github.com/fredrikekre/Literate.jl).*"
   ],
   "metadata": {}
  }
 ],
 "nbformat_minor": 3,
 "metadata": {
  "language_info": {
   "file_extension": ".jl",
   "mimetype": "application/julia",
   "name": "julia",
   "version": "1.10.5"
  },
  "kernelspec": {
   "name": "julia-1.10",
   "display_name": "Julia 1.10.5",
   "language": "julia"
  }
 },
 "nbformat": 4
}