Files
cmdla/Lessons/11-10/lesson.ipynb
2024-07-30 14:43:25 +02:00

315 lines
6.9 KiB
Plaintext
Generated
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "d735264a-f7aa-491b-b804-aa7adf93ea53",
"metadata": {},
"outputs": [],
"source": [
"using LinearAlgebra, Plots, DelimitedFiles"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "c66f5d41-623c-4e7c-a56f-db00dea5dcb7",
"metadata": {},
"outputs": [],
"source": [
"M = convert(Matrix{Int}, readdlm(\"../09-29/salaries.csv\", ',', skipstart=1)[:, 2:end])\n",
"A = M[1:end, 1:3];"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "e41b86f2-4c79-4d8e-8b18-2939d3e94fe8",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"398×4 Matrix{Int64}:\n",
" 101 132 491 460\n",
" 507 153 719 1073\n",
" 15 10 18 23\n",
" 111 77 132 166\n",
" 596 163 1249 1682\n",
" 301 117 562 746\n",
" 499 171 839 1167\n",
" 27 25 65 67\n",
" 38 15 29 52\n",
" 109 71 412 450\n",
" 594 230 703 1067\n",
" 269 134 352 487\n",
" 216 192 832 856\n",
" ⋮ \n",
" 75 25 59 109\n",
" 345 174 1153 1324\n",
" 146 101 395 440\n",
" 21 8 25 38\n",
" 174 82 586 678\n",
" 248 173 549 624\n",
" 238 179 977 1036\n",
" 476 147 1178 1507\n",
" 352 146 463 669\n",
" 122 47 182 257\n",
" 228 193 1150 1185\n",
" 529 146 1040 1423"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"A = hcat(A, A[:, 1] .- A[:, 2] .+ A[:, 3]) # new column!"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "474db68f-18a1-4465-86d2-8858b5e673f0",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"4-element Vector{Float64}:\n",
" 26031.994273985612\n",
" 3136.8196381998196\n",
" 1113.9150221554164\n",
" 1.2532368455541195e-12"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"U, S, V = svd(A)\n",
"S # last eigen very close to 0"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "996fde91-0545-4c13-b698-f98695f4f9e6",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"4-element Vector{Float64}:\n",
" 0.0002891389119208513\n",
" 1113.9150221554394\n",
" 3136.8196381998237\n",
" 26031.994273985627"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"sqrt.(eigvals(A' * A)) # no help because numerical error"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "bd0558d1-a325-453e-b11c-33fb8fa08179",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"4-element Vector{Float64}:\n",
" -4.70433529065995e17\n",
" 4.7043352906598285e17\n",
" -4.704335290659996e17\n",
" 4.704335290660089e17"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"y = M[1:end, 4]\n",
"\n",
"x = V * inv(diagm(S)) * U' * y # large values :("
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "8e337ee5-c9d9-4c36-a186-d6f1f249fc99",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"4-element Vector{Float64}:\n",
" -6312.126276909091\n",
" 1760.8043960629386\n",
" -17914.38753206213\n",
" -9.408670581319933e17"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"diagm(S) \\ (U' * y) # same problem"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "174145da-2232-4b8c-a41f-fdbfb6d1d983",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"4-element Vector{Float64}:\n",
" -7381.269979698599\n",
" -5911.720495010535\n",
" -11150.23174887978\n",
" 20666.47182920933"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"(A' * A) \\ (A' * y) # same problem but not apparent here"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "07671bb0-0d01-4a9f-a075-522fbd0f691e",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"4-element Vector{Real}:\n",
" 3.841426782270476e-5\n",
" 0.0003187942296146447\n",
" 0.0008977345489649724\n",
" 0"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# truncate the svd?\n",
"Ss = (x -> if x > 1e-10 1/x else 0 end).(S)\n",
"# very pragmatic"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "294319d8-71ce-412d-8c09-ff2a032e2b52",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"4-element Vector{Any}:\n",
" 940.2932859956918\n",
" -14233.28376070477\n",
" -2828.6684831854627\n",
" 12344.908563515011"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"4-element Vector{Float64}:\n",
" 940.2932859956919\n",
" -14233.28376070477\n",
" -2828.6684831854627\n",
" 12344.908563515011"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"x_reg = V * diagm(Ss) * (U' * y)\n",
"x_reg |> display\n",
"\n",
"# the same as\n",
"x_reg = V[:, 1:3] * (diagm(S[1:3]) \\ (U[:, 1:3]' * y))\n",
"x_reg |> display\n",
"\n",
"# much better but a bit arbitrary"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "96678996-a2b6-4f96-be9b-b5107015c237",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"4-element Vector{Float64}:\n",
" 26031.99442670313\n",
" 3136.8195355612684\n",
" 1113.914857400499\n",
" 0.001975927891687143"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"E = 1e-4 * randn(size(A))\n",
"AA = A .+ E\n",
"U, S, V = svd(AA)\n",
"\n",
"display(S) # perturbed by noise, the small value became much larger"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Julia 1.9.3",
"language": "julia",
"name": "julia-1.9"
},
"language_info": {
"file_extension": ".jl",
"mimetype": "application/julia",
"name": "julia",
"version": "1.9.3"
}
},
"nbformat": 4,
"nbformat_minor": 5
}