{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "d735264a-f7aa-491b-b804-aa7adf93ea53", "metadata": {}, "outputs": [], "source": [ "using LinearAlgebra, Plots, DelimitedFiles" ] }, { "cell_type": "code", "execution_count": 2, "id": "c66f5d41-623c-4e7c-a56f-db00dea5dcb7", "metadata": {}, "outputs": [], "source": [ "M = convert(Matrix{Int}, readdlm(\"../09-29/salaries.csv\", ',', skipstart=1)[:, 2:end])\n", "A = M[1:end, 1:3];" ] }, { "cell_type": "code", "execution_count": 3, "id": "e41b86f2-4c79-4d8e-8b18-2939d3e94fe8", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "398×4 Matrix{Int64}:\n", " 101 132 491 460\n", " 507 153 719 1073\n", " 15 10 18 23\n", " 111 77 132 166\n", " 596 163 1249 1682\n", " 301 117 562 746\n", " 499 171 839 1167\n", " 27 25 65 67\n", " 38 15 29 52\n", " 109 71 412 450\n", " 594 230 703 1067\n", " 269 134 352 487\n", " 216 192 832 856\n", " ⋮ \n", " 75 25 59 109\n", " 345 174 1153 1324\n", " 146 101 395 440\n", " 21 8 25 38\n", " 174 82 586 678\n", " 248 173 549 624\n", " 238 179 977 1036\n", " 476 147 1178 1507\n", " 352 146 463 669\n", " 122 47 182 257\n", " 228 193 1150 1185\n", " 529 146 1040 1423" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "A = hcat(A, A[:, 1] .- A[:, 2] .+ A[:, 3]) # new column!" ] }, { "cell_type": "code", "execution_count": 4, "id": "474db68f-18a1-4465-86d2-8858b5e673f0", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "4-element Vector{Float64}:\n", " 26031.994273985612\n", " 3136.8196381998196\n", " 1113.9150221554164\n", " 1.2532368455541195e-12" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "U, S, V = svd(A)\n", "S # last eigen very close to 0" ] }, { "cell_type": "code", "execution_count": 5, "id": "996fde91-0545-4c13-b698-f98695f4f9e6", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "4-element Vector{Float64}:\n", " 0.0002891389119208513\n", " 1113.9150221554394\n", " 3136.8196381998237\n", " 26031.994273985627" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "sqrt.(eigvals(A' * A)) # no help because numerical error" ] }, { "cell_type": "code", "execution_count": 6, "id": "bd0558d1-a325-453e-b11c-33fb8fa08179", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "4-element Vector{Float64}:\n", " -4.70433529065995e17\n", " 4.7043352906598285e17\n", " -4.704335290659996e17\n", " 4.704335290660089e17" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "y = M[1:end, 4]\n", "\n", "x = V * inv(diagm(S)) * U' * y # large values :(" ] }, { "cell_type": "code", "execution_count": 7, "id": "8e337ee5-c9d9-4c36-a186-d6f1f249fc99", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "4-element Vector{Float64}:\n", " -6312.126276909091\n", " 1760.8043960629386\n", " -17914.38753206213\n", " -9.408670581319933e17" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "diagm(S) \\ (U' * y) # same problem" ] }, { "cell_type": "code", "execution_count": 8, "id": "174145da-2232-4b8c-a41f-fdbfb6d1d983", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "4-element Vector{Float64}:\n", " -7381.269979698599\n", " -5911.720495010535\n", " -11150.23174887978\n", " 20666.47182920933" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "(A' * A) \\ (A' * y) # same problem but not apparent here" ] }, { "cell_type": "code", "execution_count": 9, "id": "07671bb0-0d01-4a9f-a075-522fbd0f691e", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "4-element Vector{Real}:\n", " 3.841426782270476e-5\n", " 0.0003187942296146447\n", " 0.0008977345489649724\n", " 0" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# truncate the svd?\n", "Ss = (x -> if x > 1e-10 1/x else 0 end).(S)\n", "# very pragmatic" ] }, { "cell_type": "code", "execution_count": 10, "id": "294319d8-71ce-412d-8c09-ff2a032e2b52", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "4-element Vector{Any}:\n", " 940.2932859956918\n", " -14233.28376070477\n", " -2828.6684831854627\n", " 12344.908563515011" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/plain": [ "4-element Vector{Float64}:\n", " 940.2932859956919\n", " -14233.28376070477\n", " -2828.6684831854627\n", " 12344.908563515011" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "x_reg = V * diagm(Ss) * (U' * y)\n", "x_reg |> display\n", "\n", "# the same as\n", "x_reg = V[:, 1:3] * (diagm(S[1:3]) \\ (U[:, 1:3]' * y))\n", "x_reg |> display\n", "\n", "# much better but a bit arbitrary" ] }, { "cell_type": "code", "execution_count": 11, "id": "96678996-a2b6-4f96-be9b-b5107015c237", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "4-element Vector{Float64}:\n", " 26031.99442670313\n", " 3136.8195355612684\n", " 1113.914857400499\n", " 0.001975927891687143" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "E = 1e-4 * randn(size(A))\n", "AA = A .+ E\n", "U, S, V = svd(AA)\n", "\n", "display(S) # perturbed by noise, the small value became much larger" ] } ], "metadata": { "kernelspec": { "display_name": "Julia 1.9.3", "language": "julia", "name": "julia-1.9" }, "language_info": { "file_extension": ".jl", "mimetype": "application/julia", "name": "julia", "version": "1.9.3" } }, "nbformat": 4, "nbformat_minor": 5 }