Files
cmdla/11-10/lesson.ipynb

315 lines
6.9 KiB
Plaintext
Raw Normal View History

2023-11-15 11:23:16 +01:00
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "d735264a-f7aa-491b-b804-aa7adf93ea53",
"metadata": {},
"outputs": [],
"source": [
"using LinearAlgebra, Plots, DelimitedFiles"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "c66f5d41-623c-4e7c-a56f-db00dea5dcb7",
"metadata": {},
"outputs": [],
"source": [
"M = convert(Matrix{Int}, readdlm(\"../09-29/salaries.csv\", ',', skipstart=1)[:, 2:end])\n",
"A = M[1:end, 1:3];"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "e41b86f2-4c79-4d8e-8b18-2939d3e94fe8",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"398×4 Matrix{Int64}:\n",
" 101 132 491 460\n",
" 507 153 719 1073\n",
" 15 10 18 23\n",
" 111 77 132 166\n",
" 596 163 1249 1682\n",
" 301 117 562 746\n",
" 499 171 839 1167\n",
" 27 25 65 67\n",
" 38 15 29 52\n",
" 109 71 412 450\n",
" 594 230 703 1067\n",
" 269 134 352 487\n",
" 216 192 832 856\n",
" ⋮ \n",
" 75 25 59 109\n",
" 345 174 1153 1324\n",
" 146 101 395 440\n",
" 21 8 25 38\n",
" 174 82 586 678\n",
" 248 173 549 624\n",
" 238 179 977 1036\n",
" 476 147 1178 1507\n",
" 352 146 463 669\n",
" 122 47 182 257\n",
" 228 193 1150 1185\n",
" 529 146 1040 1423"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"A = hcat(A, A[:, 1] .- A[:, 2] .+ A[:, 3]) # new column!"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "474db68f-18a1-4465-86d2-8858b5e673f0",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"4-element Vector{Float64}:\n",
" 26031.994273985612\n",
" 3136.8196381998196\n",
" 1113.9150221554164\n",
" 1.2532368455541195e-12"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"U, S, V = svd(A)\n",
"S # last eigen very close to 0"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "996fde91-0545-4c13-b698-f98695f4f9e6",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"4-element Vector{Float64}:\n",
" 0.0002891389119208513\n",
" 1113.9150221554394\n",
" 3136.8196381998237\n",
" 26031.994273985627"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"sqrt.(eigvals(A' * A)) # no help because numerical error"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "bd0558d1-a325-453e-b11c-33fb8fa08179",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"4-element Vector{Float64}:\n",
" -4.70433529065995e17\n",
" 4.7043352906598285e17\n",
" -4.704335290659996e17\n",
" 4.704335290660089e17"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"y = M[1:end, 4]\n",
"\n",
"x = V * inv(diagm(S)) * U' * y # large values :("
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "8e337ee5-c9d9-4c36-a186-d6f1f249fc99",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"4-element Vector{Float64}:\n",
" -6312.126276909091\n",
" 1760.8043960629386\n",
" -17914.38753206213\n",
" -9.408670581319933e17"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"diagm(S) \\ (U' * y) # same problem"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "174145da-2232-4b8c-a41f-fdbfb6d1d983",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"4-element Vector{Float64}:\n",
" -7381.269979698599\n",
" -5911.720495010535\n",
" -11150.23174887978\n",
" 20666.47182920933"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"(A' * A) \\ (A' * y) # same problem but not apparent here"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "07671bb0-0d01-4a9f-a075-522fbd0f691e",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"4-element Vector{Real}:\n",
" 3.841426782270476e-5\n",
" 0.0003187942296146447\n",
" 0.0008977345489649724\n",
" 0"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# truncate the svd?\n",
"Ss = (x -> if x > 1e-10 1/x else 0 end).(S)\n",
"# very pragmatic"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "294319d8-71ce-412d-8c09-ff2a032e2b52",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"4-element Vector{Any}:\n",
" 940.2932859956918\n",
" -14233.28376070477\n",
" -2828.6684831854627\n",
" 12344.908563515011"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"4-element Vector{Float64}:\n",
" 940.2932859956919\n",
" -14233.28376070477\n",
" -2828.6684831854627\n",
" 12344.908563515011"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"x_reg = V * diagm(Ss) * (U' * y)\n",
"x_reg |> display\n",
"\n",
"# the same as\n",
"x_reg = V[:, 1:3] * (diagm(S[1:3]) \\ (U[:, 1:3]' * y))\n",
"x_reg |> display\n",
"\n",
"# much better but a bit arbitrary"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "96678996-a2b6-4f96-be9b-b5107015c237",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"4-element Vector{Float64}:\n",
" 26031.99442670313\n",
" 3136.8195355612684\n",
" 1113.914857400499\n",
" 0.001975927891687143"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"E = 1e-4 * randn(size(A))\n",
"AA = A .+ E\n",
"U, S, V = svd(AA)\n",
"\n",
"display(S) # perturbed by noise, the small value became much larger"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Julia 1.9.3",
"language": "julia",
"name": "julia-1.9"
},
"language_info": {
"file_extension": ".jl",
"mimetype": "application/julia",
"name": "julia",
"version": "1.9.3"
}
},
"nbformat": 4,
"nbformat_minor": 5
}