{ "cells": [ { "cell_type": "markdown", "source": [ "# Joining DataFrames" ], "metadata": {} }, { "outputs": [], "cell_type": "code", "source": [ "using DataFrames" ], "metadata": {}, "execution_count": 1 }, { "cell_type": "markdown", "source": [ "## Preparing DataFrames for a join" ], "metadata": {} }, { "outputs": [ { "output_type": "execute_result", "data": { "text/plain": "\u001b[1m5×2 DataFrame\u001b[0m\n\u001b[1m Row \u001b[0m│\u001b[1m id \u001b[0m\u001b[1m age \u001b[0m\n │\u001b[90m Int64? \u001b[0m\u001b[90m Int64 \u001b[0m\n─────┼────────────────\n 1 │ 1 21\n 2 │ 2 22\n 3 │ 5 23\n 4 │ 6 24\n 5 │\u001b[90m missing \u001b[0m 99", "text/html": [ "
5×2 DataFrame
Rowidage
Int64?Int64
1121
2222
3523
4624
5missing99
" ] }, "metadata": {}, "execution_count": 2 } ], "cell_type": "code", "source": [ "x = DataFrame(ID=[1, 2, 3, 4, missing], name=[\"Alice\", \"Bob\", \"Conor\", \"Dave\", \"Zed\"])\n", "y = DataFrame(id=[1, 2, 5, 6, missing], age=[21, 22, 23, 24, 99])" ], "metadata": {}, "execution_count": 2 }, { "cell_type": "markdown", "source": [ "Rules for the `on` keyword argument:\n", "\n", "+ a single `Symbol` or string if joining on one column with the same name, e.g. `on=:id`\n", "+ a `Pair` of `Symbol`s or string if joining on one column with different names, e.g. `on=:id=>:id2`\n", "+ a vector of `Symbol`s or strings if joining on multiple columns with the same name, e.g. `on=[:id1, :id2]`\n", "+ a vector of `Pair`s of `Symbol`s or strings if joining on multiple columns with different names, e.g. `on=[:a1=>:a2, :b1=>:b2]`\n", "+ a vector containing a combination of `Symbol`s or strings or `Pair` of `Symbol`s or strings, e.g. `on=[:a1=>:a2, :b1]`" ], "metadata": {} }, { "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "ArgumentError(\"Missing values in key columns are not allowed when matchmissing == :error. `missing` found in column :ID in left data frame.\")" ] } ], "cell_type": "code", "source": [ "try\n", " innerjoin(x, y, on=:ID => :id) ## missing is not allowed to join-on by default\n", "catch e\n", " show(e)\n", "end" ], "metadata": {}, "execution_count": 3 }, { "outputs": [ { "output_type": "execute_result", "data": { "text/plain": "\u001b[1m3×3 DataFrame\u001b[0m\n\u001b[1m Row \u001b[0m│\u001b[1m ID \u001b[0m\u001b[1m name \u001b[0m\u001b[1m age \u001b[0m\n │\u001b[90m Int64? \u001b[0m\u001b[90m String \u001b[0m\u001b[90m Int64 \u001b[0m\n─────┼────────────────────────\n 1 │ 1 Alice 21\n 2 │ 2 Bob 22\n 3 │\u001b[90m missing \u001b[0m Zed 99", "text/html": [ "
3×3 DataFrame
RowIDnameage
Int64?StringInt64
11Alice21
22Bob22
3missingZed99
" ] }, "metadata": {}, "execution_count": 4 } ], "cell_type": "code", "source": [ "innerjoin(x, y, on=:ID => :id, matchmissing=:equal)" ], "metadata": {}, "execution_count": 4 }, { "outputs": [ { "output_type": "execute_result", "data": { "text/plain": "\u001b[1m5×3 DataFrame\u001b[0m\n\u001b[1m Row \u001b[0m│\u001b[1m ID \u001b[0m\u001b[1m name \u001b[0m\u001b[1m age \u001b[0m\n │\u001b[90m Int64? \u001b[0m\u001b[90m String \u001b[0m\u001b[90m Int64? \u001b[0m\n─────┼──────────────────────────\n 1 │ 1 Alice 21\n 2 │ 2 Bob 22\n 3 │\u001b[90m missing \u001b[0m Zed 99\n 4 │ 3 Conor \u001b[90m missing \u001b[0m\n 5 │ 4 Dave \u001b[90m missing \u001b[0m", "text/html": [ "
5×3 DataFrame
RowIDnameage
Int64?StringInt64?
11Alice21
22Bob22
3missingZed99
43Conormissing
54Davemissing
" ] }, "metadata": {}, "execution_count": 5 } ], "cell_type": "code", "source": [ "leftjoin(x, y, on=\"ID\" => \"id\", matchmissing=:equal)" ], "metadata": {}, "execution_count": 5 }, { "outputs": [ { "output_type": "execute_result", "data": { "text/plain": "\u001b[1m5×3 DataFrame\u001b[0m\n\u001b[1m Row \u001b[0m│\u001b[1m ID \u001b[0m\u001b[1m name \u001b[0m\u001b[1m age \u001b[0m\n │\u001b[90m Int64? \u001b[0m\u001b[90m String? \u001b[0m\u001b[90m Int64 \u001b[0m\n─────┼─────────────────────────\n 1 │ 1 Alice 21\n 2 │ 2 Bob 22\n 3 │\u001b[90m missing \u001b[0m Zed 99\n 4 │ 5 \u001b[90m missing \u001b[0m 23\n 5 │ 6 \u001b[90m missing \u001b[0m 24", "text/html": [ "
5×3 DataFrame
RowIDnameage
Int64?String?Int64
11Alice21
22Bob22
3missingZed99
45missing23
56missing24
" ] }, "metadata": {}, "execution_count": 6 } ], "cell_type": "code", "source": [ "rightjoin(x, y, on=:ID => :id, matchmissing=:equal)" ], "metadata": {}, "execution_count": 6 }, { "outputs": [ { "output_type": "execute_result", "data": { "text/plain": "\u001b[1m7×3 DataFrame\u001b[0m\n\u001b[1m Row \u001b[0m│\u001b[1m ID \u001b[0m\u001b[1m name \u001b[0m\u001b[1m age \u001b[0m\n │\u001b[90m Int64? \u001b[0m\u001b[90m String? \u001b[0m\u001b[90m Int64? \u001b[0m\n─────┼───────────────────────────\n 1 │ 1 Alice 21\n 2 │ 2 Bob 22\n 3 │\u001b[90m missing \u001b[0m Zed 99\n 4 │ 3 Conor \u001b[90m missing \u001b[0m\n 5 │ 4 Dave \u001b[90m missing \u001b[0m\n 6 │ 5 \u001b[90m missing \u001b[0m 23\n 7 │ 6 \u001b[90m missing \u001b[0m 24", "text/html": [ "
7×3 DataFrame
RowIDnameage
Int64?String?Int64?
11Alice21
22Bob22
3missingZed99
43Conormissing
54Davemissing
65missing23
76missing24
" ] }, "metadata": {}, "execution_count": 7 } ], "cell_type": "code", "source": [ "outerjoin(x, y, on=:ID => :id, matchmissing=:equal)" ], "metadata": {}, "execution_count": 7 }, { "outputs": [ { "output_type": "execute_result", "data": { "text/plain": "\u001b[1m3×2 DataFrame\u001b[0m\n\u001b[1m Row \u001b[0m│\u001b[1m ID \u001b[0m\u001b[1m name \u001b[0m\n │\u001b[90m Int64? \u001b[0m\u001b[90m String \u001b[0m\n─────┼─────────────────\n 1 │ 1 Alice\n 2 │ 2 Bob\n 3 │\u001b[90m missing \u001b[0m Zed", "text/html": [ "
3×2 DataFrame
RowIDname
Int64?String
11Alice
22Bob
3missingZed
" ] }, "metadata": {}, "execution_count": 8 } ], "cell_type": "code", "source": [ "semijoin(x, y, on=:ID => :id, matchmissing=:equal)" ], "metadata": {}, "execution_count": 8 }, { "outputs": [ { "output_type": "execute_result", "data": { "text/plain": "\u001b[1m2×2 DataFrame\u001b[0m\n\u001b[1m Row \u001b[0m│\u001b[1m ID \u001b[0m\u001b[1m name \u001b[0m\n │\u001b[90m Int64? \u001b[0m\u001b[90m String \u001b[0m\n─────┼────────────────\n 1 │ 3 Conor\n 2 │ 4 Dave", "text/html": [ "
2×2 DataFrame
RowIDname
Int64?String
13Conor
24Dave
" ] }, "metadata": {}, "execution_count": 9 } ], "cell_type": "code", "source": [ "antijoin(x, y, on=:ID => :id, matchmissing=:equal)" ], "metadata": {}, "execution_count": 9 }, { "cell_type": "markdown", "source": [ "## Cross join\n", "(here no `on` argument)" ], "metadata": {} }, { "outputs": [ { "output_type": "execute_result", "data": { "text/plain": "\u001b[1m6×2 DataFrame\u001b[0m\n\u001b[1m Row \u001b[0m│\u001b[1m x \u001b[0m\u001b[1m y \u001b[0m\n │\u001b[90m Int64 \u001b[0m\u001b[90m String \u001b[0m\n─────┼───────────────\n 1 │ 1 a\n 2 │ 1 b\n 3 │ 1 c\n 4 │ 2 a\n 5 │ 2 b\n 6 │ 2 c", "text/html": [ "
6×2 DataFrame
Rowxy
Int64String
11a
21b
31c
42a
52b
62c
" ] }, "metadata": {}, "execution_count": 10 } ], "cell_type": "code", "source": [ "crossjoin(DataFrame(x=[1, 2]), DataFrame(y=[\"a\", \"b\", \"c\"]))" ], "metadata": {}, "execution_count": 10 }, { "cell_type": "markdown", "source": [ "## Complex cases of joins" ], "metadata": {} }, { "outputs": [ { "output_type": "execute_result", "data": { "text/plain": "\u001b[1m6×3 DataFrame\u001b[0m\n\u001b[1m Row \u001b[0m│\u001b[1m id1 \u001b[0m\u001b[1m id2 \u001b[0m\u001b[1m age \u001b[0m\n │\u001b[90m Int64? \u001b[0m\u001b[90m Int64? \u001b[0m\u001b[90m Int64 \u001b[0m\n─────┼─────────────────────────\n 1 │ 1 11 21\n 2 │ 1 1 22\n 3 │ 3 31 23\n 4 │ 3 3 24\n 5 │\u001b[90m missing \u001b[0m\u001b[90m missing \u001b[0m 99\n 6 │\u001b[90m missing \u001b[0m 999 100", "text/html": [ "
6×3 DataFrame
Rowid1id2age
Int64?Int64?Int64
111121
21122
333123
43324
5missingmissing99
6missing999100
" ] }, "metadata": {}, "execution_count": 11 } ], "cell_type": "code", "source": [ "x = DataFrame(id1=[1, 1, 2, 2, missing, missing],\n", " id2=[1, 11, 2, 21, missing, 99],\n", " name=[\"Alice\", \"Bob\", \"Conor\", \"Dave\", \"Zed\", \"Zoe\"]\n", ")\n", "y = DataFrame(id1=[1, 1, 3, 3, missing, missing],\n", " id2=[11, 1, 31, 3, missing, 999],\n", " age=[21, 22, 23, 24, 99, 100]\n", ")" ], "metadata": {}, "execution_count": 11 }, { "cell_type": "markdown", "source": [ "joining on two columns" ], "metadata": {} }, { "outputs": [ { "output_type": "execute_result", "data": { "text/plain": "\u001b[1m3×4 DataFrame\u001b[0m\n\u001b[1m Row \u001b[0m│\u001b[1m id1 \u001b[0m\u001b[1m id2 \u001b[0m\u001b[1m name \u001b[0m\u001b[1m age \u001b[0m\n │\u001b[90m Int64? \u001b[0m\u001b[90m Int64? \u001b[0m\u001b[90m String \u001b[0m\u001b[90m Int64 \u001b[0m\n─────┼─────────────────────────────────\n 1 │ 1 11 Bob 21\n 2 │ 1 1 Alice 22\n 3 │\u001b[90m missing \u001b[0m\u001b[90m missing \u001b[0m Zed 99", "text/html": [ "
3×4 DataFrame
Rowid1id2nameage
Int64?Int64?StringInt64
1111Bob21
211Alice22
3missingmissingZed99
" ] }, "metadata": {}, "execution_count": 12 } ], "cell_type": "code", "source": [ "innerjoin(x, y, on=[:id1, :id2], matchmissing=:equal)" ], "metadata": {}, "execution_count": 12 }, { "cell_type": "markdown", "source": [ "with duplicates all combinations are produced" ], "metadata": {} }, { "outputs": [ { "output_type": "execute_result", "data": { "text/plain": "\u001b[1m12×6 DataFrame\u001b[0m\n\u001b[1m Row \u001b[0m│\u001b[1m id1 \u001b[0m\u001b[1m id2 \u001b[0m\u001b[1m name \u001b[0m\u001b[1m id2_1 \u001b[0m\u001b[1m age \u001b[0m\u001b[1m source \u001b[0m\n │\u001b[90m Int64? \u001b[0m\u001b[90m Int64? \u001b[0m\u001b[90m String? \u001b[0m\u001b[90m Int64? \u001b[0m\u001b[90m Int64? \u001b[0m\u001b[90m String \u001b[0m\n─────┼─────────────────────────────────────────────────────────\n 1 │ 1 1 Alice 11 21 both\n 2 │ 1 11 Bob 11 21 both\n 3 │ 1 1 Alice 1 22 both\n 4 │ 1 11 Bob 1 22 both\n 5 │\u001b[90m missing \u001b[0m\u001b[90m missing \u001b[0m Zed \u001b[90m missing \u001b[0m 99 both\n 6 │\u001b[90m missing \u001b[0m 99 Zoe \u001b[90m missing \u001b[0m 99 both\n 7 │\u001b[90m missing \u001b[0m\u001b[90m missing \u001b[0m Zed 999 100 both\n 8 │\u001b[90m missing \u001b[0m 99 Zoe 999 100 both\n 9 │ 2 2 Conor \u001b[90m missing \u001b[0m\u001b[90m missing \u001b[0m left_only\n 10 │ 2 21 Dave \u001b[90m missing \u001b[0m\u001b[90m missing \u001b[0m left_only\n 11 │ 3 \u001b[90m missing \u001b[0m\u001b[90m missing \u001b[0m 31 23 right_only\n 12 │ 3 \u001b[90m missing \u001b[0m\u001b[90m missing \u001b[0m 3 24 right_only", "text/html": [ "
12×6 DataFrame
Rowid1id2nameid2_1agesource
Int64?Int64?String?Int64?Int64?String
111Alice1121both
2111Bob1121both
311Alice122both
4111Bob122both
5missingmissingZedmissing99both
6missing99Zoemissing99both
7missingmissingZed999100both
8missing99Zoe999100both
922Conormissingmissingleft_only
10221Davemissingmissingleft_only
113missingmissing3123right_only
123missingmissing324right_only
" ] }, "metadata": {}, "execution_count": 13 } ], "cell_type": "code", "source": [ "outerjoin(x, y, on=:id1, makeunique=true, indicator=:source, matchmissing=:equal)" ], "metadata": {}, "execution_count": 13 }, { "cell_type": "markdown", "source": [ "you can force validation of uniqueness of key on which you join" ], "metadata": {} }, { "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "ArgumentError(\"Merge key(s) are not unique in both df1 and df2. df1 contains 3 duplicate keys: @NamedTuple{id1::Union{Missing, Int64}}((1,)), ..., @NamedTuple{id1::Union{Missing, Int64}}((missing,)). df2 contains 3 duplicate keys: @NamedTuple{id1::Union{Missing, Int64}}((1,)), ..., @NamedTuple{id1::Union{Missing, Int64}}((missing,)).\")" ] } ], "cell_type": "code", "source": [ "try\n", " innerjoin(x, y, on=:id1, makeunique=true, validate=(true, true), matchmissing=:equal)\n", "catch e\n", " show(e)\n", "end" ], "metadata": {}, "execution_count": 14 }, { "cell_type": "markdown", "source": [ "mixed `on` argument for joining on multiple columns" ], "metadata": {} }, { "outputs": [ { "output_type": "execute_result", "data": { "text/plain": "\u001b[1m2×4 DataFrame\u001b[0m\n\u001b[1m Row \u001b[0m│\u001b[1m id1 \u001b[0m\u001b[1m id2 \u001b[0m\u001b[1m x1 \u001b[0m\u001b[1m x2 \u001b[0m\n │\u001b[90m Int64 \u001b[0m\u001b[90m Int64 \u001b[0m\u001b[90m Char \u001b[0m\u001b[90m Char \u001b[0m\n─────┼──────────────────────────\n 1 │ 1 1 a a\n 2 │ 2 2 b b", "text/html": [ "
2×4 DataFrame
Rowid1id2x1x2
Int64Int64CharChar
111aa
222bb
" ] }, "metadata": {}, "execution_count": 15 } ], "cell_type": "code", "source": [ "x = DataFrame(id1=1:6, id2=[1, 2, 1, 2, 1, 2], x1='a':'f')\n", "y = DataFrame(id1=1:6, ID2=1:6, x2='a':'f')\n", "innerjoin(x, y, on=[:id1, :id2 => :ID2])" ], "metadata": {}, "execution_count": 15 }, { "cell_type": "markdown", "source": [ "joining more than two data frames" ], "metadata": {} }, { "outputs": [ { "output_type": "execute_result", "data": { "text/plain": "5-element Vector{DataFrames.DataFrame}:\n \u001b[1m6×2 DataFrame\u001b[0m\n\u001b[1m Row \u001b[0m│\u001b[1m id \u001b[0m\u001b[1m v1 \u001b[0m\n │\u001b[90m Int64 \u001b[0m\u001b[90m Int64 \u001b[0m\n─────┼──────────────\n 1 │ 1 11\n 2 │ 2 12\n 3 │ 3 13\n 4 │ 4 14\n 5 │ 5 15\n 6 │ 6 16\n \u001b[1m6×2 DataFrame\u001b[0m\n\u001b[1m Row \u001b[0m│\u001b[1m id \u001b[0m\u001b[1m v2 \u001b[0m\n │\u001b[90m Int64 \u001b[0m\u001b[90m Int64 \u001b[0m\n─────┼──────────────\n 1 │ 1 21\n 2 │ 2 22\n 3 │ 3 23\n 4 │ 4 24\n 5 │ 5 25\n 6 │ 6 26\n \u001b[1m6×2 DataFrame\u001b[0m\n\u001b[1m Row \u001b[0m│\u001b[1m id \u001b[0m\u001b[1m v3 \u001b[0m\n │\u001b[90m Int64 \u001b[0m\u001b[90m Int64 \u001b[0m\n─────┼──────────────\n 1 │ 1 31\n 2 │ 2 32\n 3 │ 3 33\n 4 │ 4 34\n 5 │ 5 35\n 6 │ 6 36\n \u001b[1m6×2 DataFrame\u001b[0m\n\u001b[1m Row \u001b[0m│\u001b[1m id \u001b[0m\u001b[1m v4 \u001b[0m\n │\u001b[90m Int64 \u001b[0m\u001b[90m Int64 \u001b[0m\n─────┼──────────────\n 1 │ 1 41\n 2 │ 2 42\n 3 │ 3 43\n 4 │ 4 44\n 5 │ 5 45\n 6 │ 6 46\n \u001b[1m6×2 DataFrame\u001b[0m\n\u001b[1m Row \u001b[0m│\u001b[1m id \u001b[0m\u001b[1m v5 \u001b[0m\n │\u001b[90m Int64 \u001b[0m\u001b[90m Int64 \u001b[0m\n─────┼──────────────\n 1 │ 1 51\n 2 │ 2 52\n 3 │ 3 53\n 4 │ 4 54\n 5 │ 5 55\n 6 │ 6 56" }, "metadata": {}, "execution_count": 16 } ], "cell_type": "code", "source": [ "xs = [DataFrame(\"id\" => 1:6, \"v$i\" => ((1:6) .+ 10i)) for i in 1:5]" ], "metadata": {}, "execution_count": 16 }, { "cell_type": "markdown", "source": [ "`innerjoin` as an example, it also works for `outerjoin` and `crossjoin`" ], "metadata": {} }, { "outputs": [ { "output_type": "execute_result", "data": { "text/plain": "\u001b[1m6×6 DataFrame\u001b[0m\n\u001b[1m Row \u001b[0m│\u001b[1m id \u001b[0m\u001b[1m v1 \u001b[0m\u001b[1m v2 \u001b[0m\u001b[1m v3 \u001b[0m\u001b[1m v4 \u001b[0m\u001b[1m v5 \u001b[0m\n │\u001b[90m Int64 \u001b[0m\u001b[90m Int64 \u001b[0m\u001b[90m Int64 \u001b[0m\u001b[90m Int64 \u001b[0m\u001b[90m Int64 \u001b[0m\u001b[90m Int64 \u001b[0m\n─────┼──────────────────────────────────────────\n 1 │ 1 11 21 31 41 51\n 2 │ 2 12 22 32 42 52\n 3 │ 3 13 23 33 43 53\n 4 │ 4 14 24 34 44 54\n 5 │ 5 15 25 35 45 55\n 6 │ 6 16 26 36 46 56", "text/html": [ "
6×6 DataFrame
Rowidv1v2v3v4v5
Int64Int64Int64Int64Int64Int64
111121314151
221222324252
331323334353
441424344454
551525354555
661626364656
" ] }, "metadata": {}, "execution_count": 17 } ], "cell_type": "code", "source": [ "innerjoin(xs..., on=:id)" ], "metadata": {}, "execution_count": 17 }, { "cell_type": "markdown", "source": [ "## matchmissing keyword argument\n", "\n", "In general you have three options how `missing` values are handled in joins that are handled by `matchmisssing` kewyowrd argument value as follows:\n", "\n", "+ `:error`: throw an error if missings are encountered (this is the default)\n", "+ `:equal`: assume `misssing` values are equal to themselves\n", "+ `:notequal`: assume `misssing` values are not equal to themselves (not available for `outerjoin`)\n", "\n", "Here are some examples comparing the options:" ], "metadata": {} }, { "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "ArgumentError(\"Missing values in key columns are not allowed when matchmissing == :error. `missing` found in column :id in left data frame.\")" ] } ], "cell_type": "code", "source": [ "df1 = DataFrame(id=[1, 2, missing], x=1:3)\n", "df2 = DataFrame(id=[1, missing, 3], y=1:3)\n", "\n", "try\n", " innerjoin(df1, df2, on=:id)\n", "catch e\n", " show(e)\n", "end" ], "metadata": {}, "execution_count": 18 }, { "outputs": [ { "output_type": "execute_result", "data": { "text/plain": "\u001b[1m2×3 DataFrame\u001b[0m\n\u001b[1m Row \u001b[0m│\u001b[1m id \u001b[0m\u001b[1m x \u001b[0m\u001b[1m y \u001b[0m\n │\u001b[90m Int64? \u001b[0m\u001b[90m Int64 \u001b[0m\u001b[90m Int64 \u001b[0m\n─────┼───────────────────────\n 1 │ 1 1 1\n 2 │\u001b[90m missing \u001b[0m 3 2", "text/html": [ "
2×3 DataFrame
Rowidxy
Int64?Int64Int64
1111
2missing32
" ] }, "metadata": {}, "execution_count": 19 } ], "cell_type": "code", "source": [ "innerjoin(df1, df2, on=:id, matchmissing=:equal)" ], "metadata": {}, "execution_count": 19 }, { "outputs": [ { "output_type": "execute_result", "data": { "text/plain": "\u001b[1m1×3 DataFrame\u001b[0m\n\u001b[1m Row \u001b[0m│\u001b[1m id \u001b[0m\u001b[1m x \u001b[0m\u001b[1m y \u001b[0m\n │\u001b[90m Int64? \u001b[0m\u001b[90m Int64 \u001b[0m\u001b[90m Int64 \u001b[0m\n─────┼──────────────────────\n 1 │ 1 1 1", "text/html": [ "
1×3 DataFrame
Rowidxy
Int64?Int64Int64
1111
" ] }, "metadata": {}, "execution_count": 20 } ], "cell_type": "code", "source": [ "innerjoin(df1, df2, on=:id, matchmissing=:notequal)" ], "metadata": {}, "execution_count": 20 }, { "cell_type": "markdown", "source": [ "Since DataFrames.jl 1.3 you can do an efficient left join of two data frames in-place. This means that the left data frame gets updated with new columns, but the columns that exist in it are not affected. This operation requires that there are no duplicates of keys in the right data frame that match keys in left data frame:" ], "metadata": {} }, { "outputs": [ { "output_type": "execute_result", "data": { "text/plain": "\u001b[1m3×2 DataFrame\u001b[0m\n\u001b[1m Row \u001b[0m│\u001b[1m id \u001b[0m\u001b[1m x \u001b[0m\n │\u001b[90m Int64? \u001b[0m\u001b[90m Int64 \u001b[0m\n─────┼────────────────\n 1 │ 1 1\n 2 │ 2 2\n 3 │\u001b[90m missing \u001b[0m 3", "text/html": [ "
3×2 DataFrame
Rowidx
Int64?Int64
111
222
3missing3
" ] }, "metadata": {}, "execution_count": 21 } ], "cell_type": "code", "source": [ "df1" ], "metadata": {}, "execution_count": 21 }, { "outputs": [ { "output_type": "execute_result", "data": { "text/plain": "\u001b[1m3×2 DataFrame\u001b[0m\n\u001b[1m Row \u001b[0m│\u001b[1m id \u001b[0m\u001b[1m y \u001b[0m\n │\u001b[90m Int64? \u001b[0m\u001b[90m Int64 \u001b[0m\n─────┼────────────────\n 1 │ 1 1\n 2 │\u001b[90m missing \u001b[0m 2\n 3 │ 3 3", "text/html": [ "
3×2 DataFrame
Rowidy
Int64?Int64
111
2missing2
333
" ] }, "metadata": {}, "execution_count": 22 } ], "cell_type": "code", "source": [ "df2" ], "metadata": {}, "execution_count": 22 }, { "outputs": [ { "output_type": "execute_result", "data": { "text/plain": "\u001b[1m3×3 DataFrame\u001b[0m\n\u001b[1m Row \u001b[0m│\u001b[1m id \u001b[0m\u001b[1m x \u001b[0m\u001b[1m y \u001b[0m\n │\u001b[90m Int64? \u001b[0m\u001b[90m Int64 \u001b[0m\u001b[90m Int64? \u001b[0m\n─────┼─────────────────────────\n 1 │ 1 1 1\n 2 │ 2 2 \u001b[90m missing \u001b[0m\n 3 │\u001b[90m missing \u001b[0m 3 \u001b[90m missing \u001b[0m", "text/html": [ "
3×3 DataFrame
Rowidxy
Int64?Int64Int64?
1111
222missing
3missing3missing
" ] }, "metadata": {}, "execution_count": 23 } ], "cell_type": "code", "source": [ "leftjoin!(df1, df2, on=:id, matchmissing=:notequal)" ], "metadata": {}, "execution_count": 23 }, { "outputs": [ { "output_type": "execute_result", "data": { "text/plain": "\u001b[1m3×3 DataFrame\u001b[0m\n\u001b[1m Row \u001b[0m│\u001b[1m id \u001b[0m\u001b[1m x \u001b[0m\u001b[1m y \u001b[0m\n │\u001b[90m Int64? \u001b[0m\u001b[90m Int64 \u001b[0m\u001b[90m Int64? \u001b[0m\n─────┼─────────────────────────\n 1 │ 1 1 1\n 2 │ 2 2 \u001b[90m missing \u001b[0m\n 3 │\u001b[90m missing \u001b[0m 3 \u001b[90m missing \u001b[0m", "text/html": [ "
3×3 DataFrame
Rowidxy
Int64?Int64Int64?
1111
222missing
3missing3missing
" ] }, "metadata": {}, "execution_count": 24 } ], "cell_type": "code", "source": [ "df1" ], "metadata": {}, "execution_count": 24 }, { "cell_type": "markdown", "source": [ "---\n", "\n", "*This notebook was generated using [Literate.jl](https://github.com/fredrikekre/Literate.jl).*" ], "metadata": {} } ], "nbformat_minor": 3, "metadata": { "language_info": { "file_extension": ".jl", "mimetype": "application/julia", "name": "julia", "version": "1.10.5" }, "kernelspec": { "name": "julia-1.10", "display_name": "Julia 1.10.5", "language": "julia" } }, "nbformat": 4 }