{ "cells": [ { "cell_type": "markdown", "source": [ "# Possible pitfalls" ], "metadata": {} }, { "outputs": [], "cell_type": "code", "source": [ "using DataFrames" ], "metadata": {}, "execution_count": 1 }, { "cell_type": "markdown", "source": [ "## Know what is copied when creating a DataFrame" ], "metadata": {} }, { "outputs": [ { "output_type": "execute_result", "data": { "text/plain": "\u001b[1m3×5 DataFrame\u001b[0m\n\u001b[1m Row \u001b[0m│\u001b[1m x1 \u001b[0m\u001b[1m x2 \u001b[0m\u001b[1m x3 \u001b[0m\u001b[1m x4 \u001b[0m\u001b[1m x5 \u001b[0m\n │\u001b[90m Float64 \u001b[0m\u001b[90m Float64 \u001b[0m\u001b[90m Float64 \u001b[0m\u001b[90m Float64 \u001b[0m\u001b[90m Float64 \u001b[0m\n─────┼────────────────────────────────────────────────────\n 1 │ 0.587516 0.718981 0.62205 0.791086 0.505925\n 2 │ 0.900673 0.0775141 0.210494 0.15541 0.0177345\n 3 │ 0.436133 0.866412 0.603696 0.360206 0.424278", "text/html": [ "
3×5 DataFrame
Rowx1x2x3x4x5
Float64Float64Float64Float64Float64
10.5875160.7189810.622050.7910860.505925
20.9006730.07751410.2104940.155410.0177345
30.4361330.8664120.6036960.3602060.424278
" ] }, "metadata": {}, "execution_count": 2 } ], "cell_type": "code", "source": [ "x = DataFrame(rand(3, 5), :auto)" ], "metadata": {}, "execution_count": 2 }, { "cell_type": "markdown", "source": [ "x and y are not the same object" ], "metadata": {} }, { "outputs": [ { "output_type": "execute_result", "data": { "text/plain": "false" }, "metadata": {}, "execution_count": 3 } ], "cell_type": "code", "source": [ "y = copy(x)\n", "x === y" ], "metadata": {}, "execution_count": 3 }, { "cell_type": "markdown", "source": [ "x and y are not the same object" ], "metadata": {} }, { "outputs": [ { "output_type": "execute_result", "data": { "text/plain": "false" }, "metadata": {}, "execution_count": 4 } ], "cell_type": "code", "source": [ "y = DataFrame(x)\n", "x === y" ], "metadata": {}, "execution_count": 4 }, { "cell_type": "markdown", "source": [ "the columns are also not the same" ], "metadata": {} }, { "outputs": [ { "output_type": "execute_result", "data": { "text/plain": "false" }, "metadata": {}, "execution_count": 5 } ], "cell_type": "code", "source": [ "any(x[!, i] === y[!, i] for i in ncol(x))" ], "metadata": {}, "execution_count": 5 }, { "cell_type": "markdown", "source": [ "x and y are not the same object" ], "metadata": {} }, { "outputs": [ { "output_type": "execute_result", "data": { "text/plain": "false" }, "metadata": {}, "execution_count": 6 } ], "cell_type": "code", "source": [ "y = DataFrame(x, copycols=false)\n", "x === y" ], "metadata": {}, "execution_count": 6 }, { "cell_type": "markdown", "source": [ "But the columns are the same" ], "metadata": {} }, { "outputs": [ { "output_type": "execute_result", "data": { "text/plain": "true" }, "metadata": {}, "execution_count": 7 } ], "cell_type": "code", "source": [ "all(x[!, i] === y[!, i] for i in ncol(x))" ], "metadata": {}, "execution_count": 7 }, { "cell_type": "markdown", "source": [ "the same when creating data frames using `kwarg` syntax" ], "metadata": {} }, { "outputs": [ { "output_type": "execute_result", "data": { "text/plain": "\u001b[1m3×2 DataFrame\u001b[0m\n\u001b[1m Row \u001b[0m│\u001b[1m x \u001b[0m\u001b[1m y \u001b[0m\n │\u001b[90m Int64 \u001b[0m\u001b[90m Int64 \u001b[0m\n─────┼──────────────\n 1 │ 1 1\n 2 │ 2 2\n 3 │ 3 3", "text/html": [ "
3×2 DataFrame
Rowxy
Int64Int64
111
222
333
" ] }, "metadata": {}, "execution_count": 8 } ], "cell_type": "code", "source": [ "x = 1:3;\n", "y = [1, 2, 3];\n", "df = DataFrame(x=x, y=y)" ], "metadata": {}, "execution_count": 8 }, { "cell_type": "markdown", "source": [ "different object" ], "metadata": {} }, { "outputs": [ { "output_type": "execute_result", "data": { "text/plain": "false" }, "metadata": {}, "execution_count": 9 } ], "cell_type": "code", "source": [ "y === df.y" ], "metadata": {}, "execution_count": 9 }, { "cell_type": "markdown", "source": [ "range is converted to a vector" ], "metadata": {} }, { "outputs": [ { "output_type": "execute_result", "data": { "text/plain": "(UnitRange{Int64}, Vector{Int64})" }, "metadata": {}, "execution_count": 10 } ], "cell_type": "code", "source": [ "typeof(x), typeof(df.x)" ], "metadata": {}, "execution_count": 10 }, { "cell_type": "markdown", "source": [ "slicing rows always creates a copy" ], "metadata": {} }, { "outputs": [ { "output_type": "execute_result", "data": { "text/plain": "false" }, "metadata": {}, "execution_count": 11 } ], "cell_type": "code", "source": [ "y === df[:, :y]" ], "metadata": {}, "execution_count": 11 }, { "cell_type": "markdown", "source": [ "you can avoid copying by using copycols=false keyword argument in functions." ], "metadata": {} }, { "outputs": [ { "output_type": "execute_result", "data": { "text/plain": "\u001b[1m3×2 DataFrame\u001b[0m\n\u001b[1m Row \u001b[0m│\u001b[1m x \u001b[0m\u001b[1m y \u001b[0m\n │\u001b[90m Int64 \u001b[0m\u001b[90m Int64 \u001b[0m\n─────┼──────────────\n 1 │ 1 1\n 2 │ 2 2\n 3 │ 3 3", "text/html": [ "
3×2 DataFrame
Rowxy
Int64Int64
111
222
333
" ] }, "metadata": {}, "execution_count": 12 } ], "cell_type": "code", "source": [ "df = DataFrame(x=x, y=y, copycols=false)" ], "metadata": {}, "execution_count": 12 }, { "cell_type": "markdown", "source": [ "now it is the same" ], "metadata": {} }, { "outputs": [ { "output_type": "execute_result", "data": { "text/plain": "true" }, "metadata": {}, "execution_count": 13 } ], "cell_type": "code", "source": [ "y === df.y" ], "metadata": {}, "execution_count": 13 }, { "cell_type": "markdown", "source": [ "not the same object" ], "metadata": {} }, { "outputs": [ { "output_type": "execute_result", "data": { "text/plain": "false" }, "metadata": {}, "execution_count": 14 } ], "cell_type": "code", "source": [ "select(df, :y)[!, 1] === y" ], "metadata": {}, "execution_count": 14 }, { "cell_type": "markdown", "source": [ "the same object" ], "metadata": {} }, { "outputs": [ { "output_type": "execute_result", "data": { "text/plain": "true" }, "metadata": {}, "execution_count": 15 } ], "cell_type": "code", "source": [ "select(df, :y, copycols=false)[!, 1] === y" ], "metadata": {}, "execution_count": 15 }, { "cell_type": "markdown", "source": [ "## Do not modify the parent of `GroupedDataFrame` or view" ], "metadata": {} }, { "outputs": [ { "output_type": "execute_result", "data": { "text/plain": "GroupedDataFrame with 2 groups based on key: id\nFirst Group (3 rows): id = 2\n\u001b[1m Row \u001b[0m│\u001b[1m id \u001b[0m\u001b[1m x \u001b[0m\n │\u001b[90m Int64 \u001b[0m\u001b[90m Int64 \u001b[0m\n─────┼──────────────\n 1 │ 2 1\n 2 │ 2 3\n 3 │ 1 5\n⋮\nLast Group (3 rows): id = 2\n\u001b[1m Row \u001b[0m│\u001b[1m id \u001b[0m\u001b[1m x \u001b[0m\n │\u001b[90m Int64 \u001b[0m\u001b[90m Int64 \u001b[0m\n─────┼──────────────\n 1 │ 2 2\n 2 │ 2 4\n 3 │ 2 6", "text/html": [ "

GroupedDataFrame with 2 groups based on key: id

First Group (3 rows): id = 2
Rowidx
Int64Int64
121
223
315

Last Group (3 rows): id = 2
Rowidx
Int64Int64
122
224
326
" ] }, "metadata": {}, "execution_count": 16 } ], "cell_type": "code", "source": [ "x = DataFrame(id=repeat([1, 2], outer=3), x=1:6)\n", "g = groupby(x, :id)\n", "\n", "x[1:3, 1] = [2, 2, 2]\n", "g ## well - it is wrong now, g is only a view" ], "metadata": {}, "execution_count": 16 }, { "outputs": [ { "output_type": "execute_result", "data": { "text/plain": "\u001b[1m2×2 SubDataFrame\u001b[0m\n\u001b[1m Row \u001b[0m│\u001b[1m id \u001b[0m\u001b[1m x \u001b[0m\n │\u001b[90m Int64 \u001b[0m\u001b[90m Int64 \u001b[0m\n─────┼──────────────\n 1 │ 1 5\n 2 │ 2 6", "text/html": [ "
2×2 SubDataFrame
Rowidx
Int64Int64
115
226
" ] }, "metadata": {}, "execution_count": 17 } ], "cell_type": "code", "source": [ "s = view(x, 5:6, :)" ], "metadata": {}, "execution_count": 17 }, { "outputs": [ { "output_type": "execute_result", "data": { "text/plain": "\u001b[1m2×2 DataFrame\u001b[0m\n\u001b[1m Row \u001b[0m│\u001b[1m id \u001b[0m\u001b[1m x \u001b[0m\n │\u001b[90m Int64 \u001b[0m\u001b[90m Int64 \u001b[0m\n─────┼──────────────\n 1 │ 2 1\n 2 │ 2 2", "text/html": [ "
2×2 DataFrame
Rowidx
Int64Int64
121
222
" ] }, "metadata": {}, "execution_count": 18 } ], "cell_type": "code", "source": [ "delete!(x, 3:6)" ], "metadata": {}, "execution_count": 18 }, { "cell_type": "markdown", "source": [ "This is an error\n", "\n", "```julia\n", "s ## Will return BoundsError\n", "```" ], "metadata": {} }, { "cell_type": "markdown", "source": [ "## Single column selection for `DataFrame` creates aliases with ! and `getproperty` syntax and copies with :" ], "metadata": {} }, { "outputs": [ { "output_type": "display_data", "data": { "text/plain": "\u001b[1m3×5 DataFrame\u001b[0m\n\u001b[1m Row \u001b[0m│\u001b[1m a \u001b[0m\u001b[1m b \u001b[0m\u001b[1m c \u001b[0m\u001b[1m d \u001b[0m\u001b[1m e \u001b[0m\n │\u001b[90m Int64 \u001b[0m\u001b[90m Int64 \u001b[0m\u001b[90m Int64 \u001b[0m\u001b[90m Int64 \u001b[0m\u001b[90m Int64 \u001b[0m\n─────┼───────────────────────────────────\n 1 │ 1 1 1 1 1\n 2 │ 2 2 2 2 2\n 3 │ 3 3 3 3 3", "text/html": [ "
3×5 DataFrame
Rowabcde
Int64Int64Int64Int64Int64
111111
222222
333333
" ] }, "metadata": {} } ], "cell_type": "code", "source": [ "x = DataFrame(a=1:3)\n", "x.b = x[!, 1] ## alias\n", "x.c = x[:, 1] ## copy\n", "x.d = x[!, 1][:] ## copy\n", "x.e = copy(x[!, 1]) ## explicit copy\n", "display(x)" ], "metadata": {}, "execution_count": 19 }, { "outputs": [ { "output_type": "display_data", "data": { "text/plain": "\u001b[1m3×5 DataFrame\u001b[0m\n\u001b[1m Row \u001b[0m│\u001b[1m a \u001b[0m\u001b[1m b \u001b[0m\u001b[1m c \u001b[0m\u001b[1m d \u001b[0m\u001b[1m e \u001b[0m\n │\u001b[90m Int64 \u001b[0m\u001b[90m Int64 \u001b[0m\u001b[90m Int64 \u001b[0m\u001b[90m Int64 \u001b[0m\u001b[90m Int64 \u001b[0m\n─────┼───────────────────────────────────\n 1 │ 100 100 1 1 1\n 2 │ 2 2 2 2 2\n 3 │ 3 3 3 3 3", "text/html": [ "
3×5 DataFrame
Rowabcde
Int64Int64Int64Int64Int64
1100100111
222222
333333
" ] }, "metadata": {} } ], "cell_type": "code", "source": [ "x[1, 1] = 100\n", "display(x)" ], "metadata": {}, "execution_count": 20 }, { "cell_type": "markdown", "source": [ "## When iterating rows of a data frame\n", "\n", "- use `eachrow` to avoid compilation cost (wide tables),\n", "- but `Tables.namedtupleiterator` for fast execution (tall tables)\n", "\n", "this table is wide" ], "metadata": {} }, { "outputs": [ { "output_type": "execute_result", "data": { "text/plain": "\u001b[1m2×900 DataFrame\u001b[0m\n\u001b[1m Row \u001b[0m│\u001b[1m x1 \u001b[0m\u001b[1m x2 \u001b[0m\u001b[1m x3 \u001b[0m\u001b[1m x4 \u001b[0m\u001b[1m x5 \u001b[0m\u001b[1m x6 \u001b[0m\u001b[1m x7 \u001b[0m\u001b[1m x8 \u001b[0m\u001b[1m x9 \u001b[0m\u001b[1m x10 \u001b[0m\u001b[1m x\u001b[0m ⋯\n │\u001b[90m Bool \u001b[0m\u001b[90m Char \u001b[0m\u001b[90m Bool \u001b[0m\u001b[90m Int64 \u001b[0m\u001b[90m Int64 \u001b[0m\u001b[90m Int64 \u001b[0m\u001b[90m Float64 \u001b[0m\u001b[90m Char \u001b[0m\u001b[90m Int64 \u001b[0m\u001b[90m Int64 \u001b[0m\u001b[90m C\u001b[0m ⋯\n─────┼──────────────────────────────────────────────────────────────────────────\n 1 │ false a false 1 1 1 1.0 a 1 1 a ⋯\n 2 │ true b true 2 2 2 2.0 b 2 2 b\n\u001b[36m 890 columns omitted\u001b[0m", "text/html": [ "
2×900 DataFrame
800 columns omitted
Rowx1x2x3x4x5x6x7x8x9x10x11x12x13x14x15x16x17x18x19x20x21x22x23x24x25x26x27x28x29x30x31x32x33x34x35x36x37x38x39x40x41x42x43x44x45x46x47x48x49x50x51x52x53x54x55x56x57x58x59x60x61x62x63x64x65x66x67x68x69x70x71x72x73x74x75x76x77x78x79x80x81x82x83x84x85x86x87x88x89x90x91x92x93x94x95x96x97x98x99x100
BoolCharBoolInt64Int64Int64Float64CharInt64Int64CharFloat64Float64Int64CharInt64CharInt64Int64Float64CharFloat64BoolCharInt64BoolInt64Float64Float64CharFloat64BoolInt64BoolCharInt64Int64Int64CharInt64Float64Int64BoolInt64CharFloat64BoolBoolBoolFloat64CharFloat64CharCharCharBoolFloat64CharCharCharFloat64CharInt64BoolBoolCharBoolFloat64Float64Int64Int64CharFloat64CharBoolInt64Float64Float64Float64Int64Int64CharInt64BoolInt64CharCharFloat64Int64BoolCharFloat64Int64Int64CharBoolFloat64Float64Int64Float64
1falseafalse1111.0a11a1.01.01a1a111.0a1.0falsea1false11.01.0a1.0false1falsea111a11.01false1a1.0falsefalsefalse1.0a1.0aaafalse1.0aaa1.0a1falsefalseafalse1.01.011a1.0afalse11.01.01.011a1false1aa1.01falsea1.011afalse1.01.011.0
2truebtrue2222.0b22b2.02.02b2b222.0b2.0trueb2true22.02.0b2.0true2trueb222b22.02true2b2.0truetruetrue2.0b2.0bbbtrue2.0bbb2.0b2truetruebtrue2.02.022b2.0btrue22.02.02.022b2true2bb2.02trueb2.022btrue2.02.022.0
" ] }, "metadata": {}, "execution_count": 21 } ], "cell_type": "code", "source": [ "df1 = DataFrame([rand([1:2, 'a':'b', false:true, 1.0:2.0]) for i in 1:900], :auto)" ], "metadata": {}, "execution_count": 21 }, { "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " 0.041341 seconds (52.71 k allocations: 3.614 MiB, 99.91% compilation time)\n" ] } ], "cell_type": "code", "source": [ "@time collect(eachrow(df1));" ], "metadata": {}, "execution_count": 22 }, { "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " 7.539398 seconds (885.91 k allocations: 70.446 MiB, 99.68% compilation time)\n" ] } ], "cell_type": "code", "source": [ "@time collect(Tables.namedtupleiterator(df1));" ], "metadata": {}, "execution_count": 23 }, { "cell_type": "markdown", "source": [ "as you can see the time to compile `Tables.namedtupleiterator` is very large in this case, and it would get much worse if the table was wider (that is why we include this tip in pitfalls notebook)" ], "metadata": {} }, { "cell_type": "markdown", "source": [ "the table below is tall" ], "metadata": {} }, { "outputs": [ { "output_type": "execute_result", "data": { "text/plain": "\u001b[1m1000000×10 DataFrame\u001b[0m\n\u001b[1m Row \u001b[0m│\u001b[1m x1 \u001b[0m\u001b[1m x2 \u001b[0m\u001b[1m x3 \u001b[0m\u001b[1m x4 \u001b[0m\u001b[1m x5 \u001b[0m\u001b[1m x6 \u001b[0m\u001b[1m \u001b[0m ⋯\n │\u001b[90m Float64 \u001b[0m\u001b[90m Float64 \u001b[0m\u001b[90m Float64 \u001b[0m\u001b[90m Float64 \u001b[0m\u001b[90m Float64 \u001b[0m\u001b[90m Float64 \u001b[0m\u001b[90m \u001b[0m ⋯\n─────────┼──────────────────────────────────────────────────────────────────────\n 1 │ 0.791452 0.297286 0.049207 0.88973 0.309781 0.124727 ⋯\n 2 │ 0.220364 0.701363 0.37604 0.853463 0.292551 0.160833\n 3 │ 0.375985 0.0946538 0.401264 0.259818 0.426253 0.992351\n 4 │ 0.190554 0.799914 0.814485 0.664285 0.243209 0.538027\n 5 │ 0.794052 0.707524 0.982855 0.679856 0.33846 0.377405 ⋯\n 6 │ 0.388176 0.641699 0.793341 0.455715 0.24483 0.519513\n 7 │ 0.432196 0.0478298 0.691303 0.952995 0.352697 0.359571\n 8 │ 0.796189 0.0284523 0.102914 0.570933 0.207063 0.212477\n ⋮ │ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋱\n 999994 │ 0.165754 0.677366 0.134086 0.644506 0.594977 0.0533597 ⋯\n 999995 │ 0.168486 0.869069 0.0727793 0.0418242 0.226054 0.0179684\n 999996 │ 0.0873745 0.77549 0.224858 0.176993 0.387384 0.100599\n 999997 │ 0.792855 0.471462 0.23574 0.18291 0.235345 0.801913\n 999998 │ 0.82707 0.813067 0.0496293 0.81687 0.0583434 0.825218 ⋯\n 999999 │ 0.805874 0.665029 0.376935 0.849447 0.672471 0.246654\n 1000000 │ 0.357941 0.279164 0.286021 0.229009 0.569281 0.348493\n\u001b[36m 4 columns and 999985 rows omitted\u001b[0m", "text/html": [ "
1000000×10 DataFrame
999975 rows omitted
Rowx1x2x3x4x5x6x7x8x9x10
Float64Float64Float64Float64Float64Float64Float64Float64Float64Float64
10.7914520.2972860.0492070.889730.3097810.1247270.5269940.3767580.9820590.201495
20.2203640.7013630.376040.8534630.2925510.1608330.4036480.8061450.6868960.319294
30.3759850.09465380.4012640.2598180.4262530.9923510.3512960.8970250.6425450.36698
40.1905540.7999140.8144850.6642850.2432090.5380270.2810220.7165380.7352610.0711319
50.7940520.7075240.9828550.6798560.338460.3774050.3937510.781780.449720.205827
60.3881760.6416990.7933410.4557150.244830.5195130.6971980.3715010.7076920.252749
70.4321960.04782980.6913030.9529950.3526970.3595710.5975460.1510980.4137310.00542862
80.7961890.02845230.1029140.5709330.2070630.2124770.5989740.3910490.2906730.416863
90.5143410.5742740.009553440.04261170.8740770.1980390.5041210.8486450.6077430.464585
100.6781130.6012790.307890.0376170.1983170.09060970.07809640.3398570.06572250.0773766
110.2168090.918020.6636820.8131370.9886590.01237060.3229140.796040.3825190.227306
120.9529480.7977730.9197030.9837420.9760590.8186070.290840.6575540.9212090.362167
130.3708190.7770840.3431260.4574760.4119530.5060550.8296120.8944460.1138230.804729
9999890.598120.3416710.7568170.1296770.5580780.1192350.6629590.6198810.9025430.332156
9999900.7957650.5285710.7114050.05771240.9309840.5518590.7036040.2690380.5578760.433196
9999910.5796240.2336880.2153930.3046010.8511720.2043150.09412040.2947330.7458450.504573
9999920.4950980.1659490.4627640.837110.115770.7690740.104810.6307650.1545070.641631
9999930.6955560.3180690.3985150.7409370.8582540.2357510.1846720.03918090.4195160.183319
9999940.1657540.6773660.1340860.6445060.5949770.05335970.6163340.3918970.1613370.967639
9999950.1684860.8690690.07277930.04182420.2260540.01796840.6145550.9110160.2568360.658037
9999960.08737450.775490.2248580.1769930.3873840.1005990.3334150.2834540.2928320.447891
9999970.7928550.4714620.235740.182910.2353450.8019130.5637710.5581610.5081290.595255
9999980.827070.8130670.04962930.816870.05834340.8252180.633720.9029620.8236720.417822
9999990.8058740.6650290.3769350.8494470.6724710.2466540.5684490.5062830.07389840.650809
10000000.3579410.2791640.2860210.2290090.5692810.3484930.5923930.9225440.8256040.48944
" ] }, "metadata": {}, "execution_count": 24 } ], "cell_type": "code", "source": [ "df2 = DataFrame(rand(10^6, 10), :auto)" ], "metadata": {}, "execution_count": 24 }, { "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " 2.127519 seconds (60.08 M allocations: 1.056 GiB, 6.17% gc time, 3.69% compilation time)\n" ] } ], "cell_type": "code", "source": [ "@time map(sum, eachrow(df2));" ], "metadata": {}, "execution_count": 25 }, { "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " 2.119773 seconds (59.99 M allocations: 1.050 GiB, 6.50% gc time)\n" ] } ], "cell_type": "code", "source": [ "@time map(sum, eachrow(df2));" ], "metadata": {}, "execution_count": 26 }, { "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " 0.195608 seconds (200.06 k allocations: 21.156 MiB, 24.98% gc time, 96.98% compilation time)\n" ] } ], "cell_type": "code", "source": [ "@time map(sum, Tables.namedtupleiterator(df2));" ], "metadata": {}, "execution_count": 27 }, { "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " 0.005788 seconds (22 allocations: 7.631 MiB)\n" ] } ], "cell_type": "code", "source": [ "@time map(sum, Tables.namedtupleiterator(df2));" ], "metadata": {}, "execution_count": 28 }, { "cell_type": "markdown", "source": [ "as you can see - this time it is much faster to iterate a type stable container\n", "still you might want to use the `select` syntax, which is optimized for such reductions:" ], "metadata": {} }, { "cell_type": "markdown", "source": [ "this includes compilation time" ], "metadata": {} }, { "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " 0.392389 seconds (509.34 k allocations: 41.875 MiB, 98.52% compilation time: 93% of which was recompilation)\n" ] }, { "output_type": "execute_result", "data": { "text/plain": "1000000-element Vector{Float64}:\n 4.549489608990553\n 4.820598260856793\n 4.808170226496026\n 5.054427780801031\n 5.711228368654866\n 5.0724123056568216\n 4.00439573440345\n 3.61558727488398\n 4.637989455369663\n 2.474878422732406\n ⋮\n 4.377478339806004\n 4.073770679842652\n 4.40725544523935\n 3.836624901881449\n 3.110289532802179\n 4.945542466292135\n 6.168372468024979\n 5.415848010904047\n 4.899889662604407" }, "metadata": {}, "execution_count": 29 } ], "cell_type": "code", "source": [ "@time select(df2, AsTable(:) => ByRow(sum) => \"sum\").sum" ], "metadata": {}, "execution_count": 29 }, { "cell_type": "markdown", "source": [ "Do it again" ], "metadata": {} }, { "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " 0.004722 seconds (125 allocations: 7.635 MiB)\n" ] }, { "output_type": "execute_result", "data": { "text/plain": "1000000-element Vector{Float64}:\n 4.549489608990553\n 4.820598260856793\n 4.808170226496026\n 5.054427780801031\n 5.711228368654866\n 5.0724123056568216\n 4.00439573440345\n 3.61558727488398\n 4.637989455369663\n 2.474878422732406\n ⋮\n 4.377478339806004\n 4.073770679842652\n 4.40725544523935\n 3.836624901881449\n 3.110289532802179\n 4.945542466292135\n 6.168372468024979\n 5.415848010904047\n 4.899889662604407" }, "metadata": {}, "execution_count": 30 } ], "cell_type": "code", "source": [ "@time select(df2, AsTable(:) => ByRow(sum) => \"sum\").sum" ], "metadata": {}, "execution_count": 30 }, { "cell_type": "markdown", "source": [ "---\n", "\n", "*This notebook was generated using [Literate.jl](https://github.com/fredrikekre/Literate.jl).*" ], "metadata": {} } ], "nbformat_minor": 3, "metadata": { "language_info": { "file_extension": ".jl", "mimetype": "application/julia", "name": "julia", "version": "1.10.5" }, "kernelspec": { "name": "julia-1.10", "display_name": "Julia 1.10.5", "language": "julia" } }, "nbformat": 4 }