[{"data":1,"prerenderedAt":510},["ShallowReactive",2],{"content-query-blHdh2uMAA":3},{"_path":4,"_dir":5,"_draft":6,"_partial":6,"_locale":7,"title":8,"description":9,"heading":10,"prompt":11,"tags":15,"files":17,"nav":17,"presets":18,"gallery":30,"body":33,"_type":503,"_id":504,"_source":505,"_file":506,"_stem":507,"_extension":508,"sitemap":509},"/tools/linear-regression","tools",false,"","Linear Regression Calculator for Excel & CSV","Run linear regression online from Excel or CSV data. Estimate slopes, confidence intervals, R-squared, and fitted trendlines with AI.","Linear Regression",{"prefix":12,"label":13,"placeholder":14},"Run linear regression","Describe the columns for linear regression you want to run","e.g. linear regression between column A and column B",[16],"statistics",true,[19,25],{"label":20,"prompt":21,"dataset_url":22,"dataset_title":23,"dataset_citation":24},"Happiness vs. GDP","linear regression predicting happiness score from GDP per capita","https://ourworldindata.org/grapher/happiness-vs-gdp-per-capita-time.csv","Self-reported happiness vs. GDP per capita","Our World in Data",{"label":26,"prompt":27,"dataset_url":28,"dataset_title":29,"dataset_citation":24},"Obesity vs. protein supply","linear regression predicting obesity rate from daily protein supply","https://ourworldindata.org/grapher/adult-obesity-vs-protein.csv","Share of adults who are obese vs. daily protein supply",[31,32],"/img/tools/linear-regression.png","/img/tools/iris-scatter-marginal-and-trendlines.jpg",{"type":34,"children":35,"toc":494},"root",[36,45,59,64,84,98,104,144,164,170,278,284,375,381,424,429,435,445,460,476],{"type":37,"tag":38,"props":39,"children":41},"element","h2",{"id":40},"what-is-linear-regression",[42],{"type":43,"value":44},"text","What Is Linear Regression?",{"type":37,"tag":46,"props":47,"children":48},"p",{},[49,51,57],{"type":43,"value":50},"Linear regression models the ",{"type":37,"tag":52,"props":53,"children":54},"strong",{},[55],{"type":43,"value":56},"relationship between one predictor variable and a continuous outcome",{"type":43,"value":58},", fitting a straight line that best describes how the outcome changes as the predictor changes.",{"type":37,"tag":46,"props":60,"children":61},{},[62],{"type":43,"value":63},"Use linear regression when you want to:",{"type":37,"tag":65,"props":66,"children":67},"ul",{},[68,74,79],{"type":37,"tag":69,"props":70,"children":71},"li",{},[72],{"type":43,"value":73},"Quantify how much one variable affects another (e.g. how much does advertising spend increase sales?)",{"type":37,"tag":69,"props":75,"children":76},{},[77],{"type":43,"value":78},"Predict an outcome for new values of the predictor",{"type":37,"tag":69,"props":80,"children":81},{},[82],{"type":43,"value":83},"Test whether a relationship is statistically significant",{"type":37,"tag":46,"props":85,"children":86},{},[87,89,96],{"type":43,"value":88},"For multiple predictor variables, use ",{"type":37,"tag":90,"props":91,"children":93},"a",{"href":92},"/tools/multiple-regression",[94],{"type":43,"value":95},"Multiple Regression",{"type":43,"value":97},".",{"type":37,"tag":38,"props":99,"children":101},{"id":100},"how-it-works",[102],{"type":43,"value":103},"How It Works",{"type":37,"tag":105,"props":106,"children":107},"ol",{},[108,118,134],{"type":37,"tag":69,"props":109,"children":110},{},[111,116],{"type":37,"tag":52,"props":112,"children":113},{},[114],{"type":43,"value":115},"Upload your data",{"type":43,"value":117}," — CSV or Excel with a predictor column and an outcome column",{"type":37,"tag":69,"props":119,"children":120},{},[121,126,128],{"type":37,"tag":52,"props":122,"children":123},{},[124],{"type":43,"value":125},"Describe the relationship",{"type":43,"value":127}," — e.g. ",{"type":37,"tag":129,"props":130,"children":131},"em",{},[132],{"type":43,"value":133},"\"linear regression predicting house_price from square_footage\"",{"type":37,"tag":69,"props":135,"children":136},{},[137,142],{"type":37,"tag":52,"props":138,"children":139},{},[140],{"type":43,"value":141},"Get full results",{"type":43,"value":143}," — regression equation, R², slope, intercept, p-values, and a scatter plot with the fitted line",{"type":37,"tag":46,"props":145,"children":146},{},[147,149,155,157,163],{"type":43,"value":148},"The AI generates Python code using ",{"type":37,"tag":90,"props":150,"children":152},{"href":151},"https://scikit-learn.org/",[153],{"type":43,"value":154},"scikit-learn",{"type":43,"value":156}," and ",{"type":37,"tag":90,"props":158,"children":160},{"href":159},"https://www.statsmodels.org/",[161],{"type":43,"value":162},"statsmodels",{"type":43,"value":97},{"type":37,"tag":38,"props":165,"children":167},{"id":166},"interpreting-the-results",[168],{"type":43,"value":169},"Interpreting the Results",{"type":37,"tag":171,"props":172,"children":173},"table",{},[174,193],{"type":37,"tag":175,"props":176,"children":177},"thead",{},[178],{"type":37,"tag":179,"props":180,"children":181},"tr",{},[182,188],{"type":37,"tag":183,"props":184,"children":185},"th",{},[186],{"type":43,"value":187},"Output",{"type":37,"tag":183,"props":189,"children":190},{},[191],{"type":43,"value":192},"What it means",{"type":37,"tag":194,"props":195,"children":196},"tbody",{},[197,214,230,246,262],{"type":37,"tag":179,"props":198,"children":199},{},[200,209],{"type":37,"tag":201,"props":202,"children":203},"td",{},[204],{"type":37,"tag":52,"props":205,"children":206},{},[207],{"type":43,"value":208},"Slope (coefficient)",{"type":37,"tag":201,"props":210,"children":211},{},[212],{"type":43,"value":213},"Change in the outcome for a one-unit increase in the predictor",{"type":37,"tag":179,"props":215,"children":216},{},[217,225],{"type":37,"tag":201,"props":218,"children":219},{},[220],{"type":37,"tag":52,"props":221,"children":222},{},[223],{"type":43,"value":224},"Intercept",{"type":37,"tag":201,"props":226,"children":227},{},[228],{"type":43,"value":229},"Predicted outcome when the predictor equals zero",{"type":37,"tag":179,"props":231,"children":232},{},[233,241],{"type":37,"tag":201,"props":234,"children":235},{},[236],{"type":37,"tag":52,"props":237,"children":238},{},[239],{"type":43,"value":240},"R² (R-squared)",{"type":37,"tag":201,"props":242,"children":243},{},[244],{"type":43,"value":245},"Proportion of variance in the outcome explained by the model (0–1; higher is better)",{"type":37,"tag":179,"props":247,"children":248},{},[249,257],{"type":37,"tag":201,"props":250,"children":251},{},[252],{"type":37,"tag":52,"props":253,"children":254},{},[255],{"type":43,"value":256},"p-value",{"type":37,"tag":201,"props":258,"children":259},{},[260],{"type":43,"value":261},"Whether the relationship is statistically significant (p \u003C 0.05 is conventional)",{"type":37,"tag":179,"props":263,"children":264},{},[265,273],{"type":37,"tag":201,"props":266,"children":267},{},[268],{"type":37,"tag":52,"props":269,"children":270},{},[271],{"type":43,"value":272},"Confidence interval",{"type":37,"tag":201,"props":274,"children":275},{},[276],{"type":43,"value":277},"Range that likely contains the true slope",{"type":37,"tag":38,"props":279,"children":281},{"id":280},"example-prompts",[282],{"type":43,"value":283},"Example Prompts",{"type":37,"tag":171,"props":285,"children":286},{},[287,303],{"type":37,"tag":175,"props":288,"children":289},{},[290],{"type":37,"tag":179,"props":291,"children":292},{},[293,298],{"type":37,"tag":183,"props":294,"children":295},{},[296],{"type":43,"value":297},"Scenario",{"type":37,"tag":183,"props":299,"children":300},{},[301],{"type":43,"value":302},"What to type",{"type":37,"tag":194,"props":304,"children":305},{},[306,324,341,358],{"type":37,"tag":179,"props":307,"children":308},{},[309,314],{"type":37,"tag":201,"props":310,"children":311},{},[312],{"type":43,"value":313},"Sales prediction",{"type":37,"tag":201,"props":315,"children":316},{},[317],{"type":37,"tag":318,"props":319,"children":321},"code",{"className":320},[],[322],{"type":43,"value":323},"linear regression: predict revenue from advertising_spend",{"type":37,"tag":179,"props":325,"children":326},{},[327,332],{"type":37,"tag":201,"props":328,"children":329},{},[330],{"type":43,"value":331},"Real estate",{"type":37,"tag":201,"props":333,"children":334},{},[335],{"type":37,"tag":318,"props":336,"children":338},{"className":337},[],[339],{"type":43,"value":340},"regression of house_price on square_footage",{"type":37,"tag":179,"props":342,"children":343},{},[344,349],{"type":37,"tag":201,"props":345,"children":346},{},[347],{"type":43,"value":348},"Biology",{"type":37,"tag":201,"props":350,"children":351},{},[352],{"type":37,"tag":318,"props":353,"children":355},{"className":354},[],[356],{"type":43,"value":357},"linear regression between plant height and fertilizer amount",{"type":37,"tag":179,"props":359,"children":360},{},[361,366],{"type":37,"tag":201,"props":362,"children":363},{},[364],{"type":43,"value":365},"Education",{"type":37,"tag":201,"props":367,"children":368},{},[369],{"type":37,"tag":318,"props":370,"children":372},{"className":371},[],[373],{"type":43,"value":374},"predict exam_score from hours_studied",{"type":37,"tag":38,"props":376,"children":378},{"id":377},"assumptions-to-check",[379],{"type":43,"value":380},"Assumptions to Check",{"type":37,"tag":65,"props":382,"children":383},{},[384,394,404,414],{"type":37,"tag":69,"props":385,"children":386},{},[387,392],{"type":37,"tag":52,"props":388,"children":389},{},[390],{"type":43,"value":391},"Linearity",{"type":43,"value":393}," — the relationship between X and Y is approximately linear (check scatter plot)",{"type":37,"tag":69,"props":395,"children":396},{},[397,402],{"type":37,"tag":52,"props":398,"children":399},{},[400],{"type":43,"value":401},"Homoscedasticity",{"type":43,"value":403}," — residuals have constant variance across all fitted values",{"type":37,"tag":69,"props":405,"children":406},{},[407,412],{"type":37,"tag":52,"props":408,"children":409},{},[410],{"type":43,"value":411},"Normality of residuals",{"type":43,"value":413}," — residuals are approximately normally distributed",{"type":37,"tag":69,"props":415,"children":416},{},[417,422],{"type":37,"tag":52,"props":418,"children":419},{},[420],{"type":43,"value":421},"No influential outliers",{"type":43,"value":423}," — a few extreme points can distort the line significantly",{"type":37,"tag":46,"props":425,"children":426},{},[427],{"type":43,"value":428},"Ask the AI to generate residual plots to check these automatically.",{"type":37,"tag":38,"props":430,"children":432},{"id":431},"frequently-asked-questions",[433],{"type":43,"value":434},"Frequently Asked Questions",{"type":37,"tag":46,"props":436,"children":437},{},[438,443],{"type":37,"tag":52,"props":439,"children":440},{},[441],{"type":43,"value":442},"What's the difference between linear regression and correlation?",{"type":43,"value":444},"\nCorrelation measures the strength and direction of a relationship. Linear regression quantifies the exact slope and lets you make predictions. They are related but answer different questions.",{"type":37,"tag":46,"props":446,"children":447},{},[448,453,455,459],{"type":37,"tag":52,"props":449,"children":450},{},[451],{"type":43,"value":452},"My R² is low — does that mean the model is useless?",{"type":43,"value":454},"\nNot necessarily. A low R² means the predictor explains only a small portion of the variance, but the relationship can still be real and statistically significant. Consider adding more predictors using ",{"type":37,"tag":90,"props":456,"children":457},{"href":92},[458],{"type":43,"value":95},{"type":43,"value":97},{"type":37,"tag":46,"props":461,"children":462},{},[463,468,470,475],{"type":37,"tag":52,"props":464,"children":465},{},[466],{"type":43,"value":467},"How do I make predictions for new values?",{"type":43,"value":469},"\nAfter fitting the model, ask the AI: ",{"type":37,"tag":129,"props":471,"children":472},{},[473],{"type":43,"value":474},"\"predict house_price for square_footage = 1500\"",{"type":43,"value":97},{"type":37,"tag":46,"props":477,"children":478},{},[479,484,486,492],{"type":37,"tag":52,"props":480,"children":481},{},[482],{"type":43,"value":483},"What if my outcome variable is binary (yes/no)?",{"type":43,"value":485},"\nLinear regression is not suitable for binary outcomes. Use ",{"type":37,"tag":90,"props":487,"children":489},{"href":488},"/tools/logistic-regression",[490],{"type":43,"value":491},"Logistic Regression",{"type":43,"value":493}," instead.",{"title":7,"searchDepth":495,"depth":495,"links":496},2,[497,498,499,500,501,502],{"id":40,"depth":495,"text":44},{"id":100,"depth":495,"text":103},{"id":166,"depth":495,"text":169},{"id":280,"depth":495,"text":283},{"id":377,"depth":495,"text":380},{"id":431,"depth":495,"text":434},"markdown","content:tools:0007.linear-regression.md","content","tools/0007.linear-regression.md","tools/0007.linear-regression","md",{"loc":4},1775502468196]