[{"data":1,"prerenderedAt":509},["ShallowReactive",2],{"content-query-fAACzJc238":3},{"_path":4,"_dir":5,"_draft":6,"_partial":6,"_locale":7,"title":8,"description":9,"heading":10,"prompt":11,"tags":15,"files":17,"nav":17,"presets":18,"gallery":30,"body":33,"_type":502,"_id":503,"_source":504,"_file":505,"_stem":506,"_extension":507,"sitemap":508},"/tools/multiple-regression","tools",false,"","Multiple Regression Calculator for Excel & CSV","Run multiple regression online from Excel or CSV data. Model one outcome with multiple predictors, interpret coefficients, and compare fit with AI.","Multiple Regression",{"prefix":12,"label":13,"placeholder":14},"Run multiple regression","Describe the columns for multiple regression you want to run","e.g. multiple regression to predict column A using columns B, C, D",[16],"statistics",true,[19,25],{"label":20,"prompt":21,"dataset_url":22,"dataset_title":23,"dataset_citation":24},"Happiness: GDP + population","multiple regression predicting happiness score from GDP per capita and population as predictors, for the most recent year available","https://ourworldindata.org/grapher/happiness-vs-gdp-per-capita-time.csv","Self-reported happiness vs. GDP per capita","Our World in Data",{"label":26,"prompt":27,"dataset_url":28,"dataset_title":29,"dataset_citation":24},"Life expectancy + health spending","multiple regression predicting healthy life expectancy from health spending per capita and year","https://ourworldindata.org/grapher/healthy-life-expectancy-vs-health-expenditure-per-capita.csv","Healthy life expectancy vs. health spending per capita",[31,32],"/img/tools/multiple-regression.png","/img/tools/multiple-regression-coefs.png",{"type":34,"children":35,"toc":493},"root",[36,45,65,70,90,104,110,149,169,175,283,289,380,386,439,445,455,465,475],{"type":37,"tag":38,"props":39,"children":41},"element","h2",{"id":40},"what-is-multiple-regression",[42],{"type":43,"value":44},"text","What Is Multiple Regression?",{"type":37,"tag":46,"props":47,"children":48},"p",{},[49,51,57,59],{"type":43,"value":50},"Multiple regression extends simple linear regression to model the effect of ",{"type":37,"tag":52,"props":53,"children":54},"strong",{},[55],{"type":43,"value":56},"two or more predictor variables",{"type":43,"value":58}," on a continuous outcome simultaneously. It answers: ",{"type":37,"tag":60,"props":61,"children":62},"em",{},[63],{"type":43,"value":64},"controlling for all other predictors, how much does each one individually affect the outcome?",{"type":37,"tag":46,"props":66,"children":67},{},[68],{"type":43,"value":69},"Use multiple regression when:",{"type":37,"tag":71,"props":72,"children":73},"ul",{},[74,80,85],{"type":37,"tag":75,"props":76,"children":77},"li",{},[78],{"type":43,"value":79},"You have several variables that may each influence an outcome",{"type":37,"tag":75,"props":81,"children":82},{},[83],{"type":43,"value":84},"You want to control for confounders (e.g. holding age constant while measuring the effect of income)",{"type":37,"tag":75,"props":86,"children":87},{},[88],{"type":43,"value":89},"You need more accurate predictions than a single predictor provides",{"type":37,"tag":46,"props":91,"children":92},{},[93,95,102],{"type":43,"value":94},"For a single predictor, use ",{"type":37,"tag":96,"props":97,"children":99},"a",{"href":98},"/tools/linear-regression",[100],{"type":43,"value":101},"Linear Regression",{"type":43,"value":103},".",{"type":37,"tag":38,"props":105,"children":107},{"id":106},"how-it-works",[108],{"type":43,"value":109},"How It Works",{"type":37,"tag":111,"props":112,"children":113},"ol",{},[114,124,139],{"type":37,"tag":75,"props":115,"children":116},{},[117,122],{"type":37,"tag":52,"props":118,"children":119},{},[120],{"type":43,"value":121},"Upload your data",{"type":43,"value":123}," — CSV or Excel with predictor columns and an outcome column",{"type":37,"tag":75,"props":125,"children":126},{},[127,132,134],{"type":37,"tag":52,"props":128,"children":129},{},[130],{"type":43,"value":131},"Describe the model",{"type":43,"value":133}," — e.g. ",{"type":37,"tag":60,"props":135,"children":136},{},[137],{"type":43,"value":138},"\"predict house_price using square_footage, bedrooms, and location_score\"",{"type":37,"tag":75,"props":140,"children":141},{},[142,147],{"type":37,"tag":52,"props":143,"children":144},{},[145],{"type":43,"value":146},"Get full results",{"type":43,"value":148}," — coefficients, p-values, R², adjusted R², and diagnostic plots",{"type":37,"tag":46,"props":150,"children":151},{},[152,154,160,162,168],{"type":43,"value":153},"The AI generates Python code using ",{"type":37,"tag":96,"props":155,"children":157},{"href":156},"https://www.statsmodels.org/",[158],{"type":43,"value":159},"statsmodels",{"type":43,"value":161}," or ",{"type":37,"tag":96,"props":163,"children":165},{"href":164},"https://scikit-learn.org/",[166],{"type":43,"value":167},"scikit-learn",{"type":43,"value":103},{"type":37,"tag":38,"props":170,"children":172},{"id":171},"interpreting-the-results",[173],{"type":43,"value":174},"Interpreting the Results",{"type":37,"tag":176,"props":177,"children":178},"table",{},[179,198],{"type":37,"tag":180,"props":181,"children":182},"thead",{},[183],{"type":37,"tag":184,"props":185,"children":186},"tr",{},[187,193],{"type":37,"tag":188,"props":189,"children":190},"th",{},[191],{"type":43,"value":192},"Output",{"type":37,"tag":188,"props":194,"children":195},{},[196],{"type":43,"value":197},"What it means",{"type":37,"tag":199,"props":200,"children":201},"tbody",{},[202,219,235,251,267],{"type":37,"tag":184,"props":203,"children":204},{},[205,214],{"type":37,"tag":206,"props":207,"children":208},"td",{},[209],{"type":37,"tag":52,"props":210,"children":211},{},[212],{"type":43,"value":213},"Coefficient",{"type":37,"tag":206,"props":215,"children":216},{},[217],{"type":43,"value":218},"Change in outcome for a one-unit increase in that predictor, holding all others constant",{"type":37,"tag":184,"props":220,"children":221},{},[222,230],{"type":37,"tag":206,"props":223,"children":224},{},[225],{"type":37,"tag":52,"props":226,"children":227},{},[228],{"type":43,"value":229},"p-value",{"type":37,"tag":206,"props":231,"children":232},{},[233],{"type":43,"value":234},"Whether that predictor's effect is statistically significant",{"type":37,"tag":184,"props":236,"children":237},{},[238,246],{"type":37,"tag":206,"props":239,"children":240},{},[241],{"type":37,"tag":52,"props":242,"children":243},{},[244],{"type":43,"value":245},"R² (R-squared)",{"type":37,"tag":206,"props":247,"children":248},{},[249],{"type":43,"value":250},"Proportion of outcome variance explained by all predictors combined",{"type":37,"tag":184,"props":252,"children":253},{},[254,262],{"type":37,"tag":206,"props":255,"children":256},{},[257],{"type":37,"tag":52,"props":258,"children":259},{},[260],{"type":43,"value":261},"Adjusted R²",{"type":37,"tag":206,"props":263,"children":264},{},[265],{"type":43,"value":266},"R² penalized for the number of predictors — use this to compare models with different numbers of variables",{"type":37,"tag":184,"props":268,"children":269},{},[270,278],{"type":37,"tag":206,"props":271,"children":272},{},[273],{"type":37,"tag":52,"props":274,"children":275},{},[276],{"type":43,"value":277},"VIF",{"type":37,"tag":206,"props":279,"children":280},{},[281],{"type":43,"value":282},"Variance Inflation Factor — flags multicollinearity between predictors (VIF > 5–10 is a concern)",{"type":37,"tag":38,"props":284,"children":286},{"id":285},"example-prompts",[287],{"type":43,"value":288},"Example Prompts",{"type":37,"tag":176,"props":290,"children":291},{},[292,308],{"type":37,"tag":180,"props":293,"children":294},{},[295],{"type":37,"tag":184,"props":296,"children":297},{},[298,303],{"type":37,"tag":188,"props":299,"children":300},{},[301],{"type":43,"value":302},"Scenario",{"type":37,"tag":188,"props":304,"children":305},{},[306],{"type":43,"value":307},"What to type",{"type":37,"tag":199,"props":309,"children":310},{},[311,329,346,363],{"type":37,"tag":184,"props":312,"children":313},{},[314,319],{"type":37,"tag":206,"props":315,"children":316},{},[317],{"type":43,"value":318},"Real estate pricing",{"type":37,"tag":206,"props":320,"children":321},{},[322],{"type":37,"tag":323,"props":324,"children":326},"code",{"className":325},[],[327],{"type":43,"value":328},"multiple regression: predict price using size, bedrooms, bathrooms, and age",{"type":37,"tag":184,"props":330,"children":331},{},[332,337],{"type":37,"tag":206,"props":333,"children":334},{},[335],{"type":43,"value":336},"Employee performance",{"type":37,"tag":206,"props":338,"children":339},{},[340],{"type":37,"tag":323,"props":341,"children":343},{"className":342},[],[344],{"type":43,"value":345},"regression of performance_score on experience, training_hours, and manager_rating",{"type":37,"tag":184,"props":347,"children":348},{},[349,354],{"type":37,"tag":206,"props":350,"children":351},{},[352],{"type":43,"value":353},"Marketing attribution",{"type":37,"tag":206,"props":355,"children":356},{},[357],{"type":37,"tag":323,"props":358,"children":360},{"className":359},[],[361],{"type":43,"value":362},"predict conversions using email_opens, ad_clicks, and page_views",{"type":37,"tag":184,"props":364,"children":365},{},[366,371],{"type":37,"tag":206,"props":367,"children":368},{},[369],{"type":43,"value":370},"Health outcomes",{"type":37,"tag":206,"props":372,"children":373},{},[374],{"type":37,"tag":323,"props":375,"children":377},{"className":376},[],[378],{"type":43,"value":379},"multiple regression of blood_pressure on age, bmi, and exercise_frequency",{"type":37,"tag":38,"props":381,"children":383},{"id":382},"assumptions-to-check",[384],{"type":43,"value":385},"Assumptions to Check",{"type":37,"tag":71,"props":387,"children":388},{},[389,399,409,419,429],{"type":37,"tag":75,"props":390,"children":391},{},[392,397],{"type":37,"tag":52,"props":393,"children":394},{},[395],{"type":43,"value":396},"Linearity",{"type":43,"value":398}," — each predictor has a linear relationship with the outcome",{"type":37,"tag":75,"props":400,"children":401},{},[402,407],{"type":37,"tag":52,"props":403,"children":404},{},[405],{"type":43,"value":406},"No multicollinearity",{"type":43,"value":408}," — predictors should not be highly correlated with each other (check VIF)",{"type":37,"tag":75,"props":410,"children":411},{},[412,417],{"type":37,"tag":52,"props":413,"children":414},{},[415],{"type":43,"value":416},"Homoscedasticity",{"type":43,"value":418}," — residual variance is constant across fitted values",{"type":37,"tag":75,"props":420,"children":421},{},[422,427],{"type":37,"tag":52,"props":423,"children":424},{},[425],{"type":43,"value":426},"Normality of residuals",{"type":43,"value":428}," — residuals follow an approximately normal distribution",{"type":37,"tag":75,"props":430,"children":431},{},[432,437],{"type":37,"tag":52,"props":433,"children":434},{},[435],{"type":43,"value":436},"Independence",{"type":43,"value":438}," — observations are not repeated measures or time series (use specialized models for those)",{"type":37,"tag":38,"props":440,"children":442},{"id":441},"frequently-asked-questions",[443],{"type":43,"value":444},"Frequently Asked Questions",{"type":37,"tag":46,"props":446,"children":447},{},[448,453],{"type":37,"tag":52,"props":449,"children":450},{},[451],{"type":43,"value":452},"How many predictors can I include?",{"type":43,"value":454},"\nAs a rule of thumb, you need at least 10–20 observations per predictor to avoid overfitting. With 200 rows, 10–20 predictors is reasonable.",{"type":37,"tag":46,"props":456,"children":457},{},[458,463],{"type":37,"tag":52,"props":459,"children":460},{},[461],{"type":43,"value":462},"Two of my predictors are highly correlated — is that a problem?",{"type":43,"value":464},"\nYes — this is called multicollinearity, and it inflates standard errors, making coefficients unreliable. The AI will flag high VIF values automatically. You may need to drop one of the correlated variables or combine them.",{"type":37,"tag":46,"props":466,"children":467},{},[468,473],{"type":37,"tag":52,"props":469,"children":470},{},[471],{"type":43,"value":472},"How do I compare two different models?",{"type":43,"value":474},"\nAsk the AI to fit both models and compare their adjusted R² and AIC values. A lower AIC indicates a better fit relative to model complexity.",{"type":37,"tag":46,"props":476,"children":477},{},[478,483,485,491],{"type":37,"tag":52,"props":479,"children":480},{},[481],{"type":43,"value":482},"What if my outcome is binary?",{"type":43,"value":484},"\nUse ",{"type":37,"tag":96,"props":486,"children":488},{"href":487},"/tools/logistic-regression",[489],{"type":43,"value":490},"Logistic Regression",{"type":43,"value":492}," instead.",{"title":7,"searchDepth":494,"depth":494,"links":495},2,[496,497,498,499,500,501],{"id":40,"depth":494,"text":44},{"id":106,"depth":494,"text":109},{"id":171,"depth":494,"text":174},{"id":285,"depth":494,"text":288},{"id":382,"depth":494,"text":385},{"id":441,"depth":494,"text":444},"markdown","content:tools:0008.multiple-regression.md","content","tools/0008.multiple-regression.md","tools/0008.multiple-regression","md",{"loc":4},1775502468196]