Мне нужно построить прогноз и доверительные интервалы, а также использовать Python и только следующие пакеты. Как построить оба интервала в одной модели. Мне удалось построить интервалы прогнозирования с помощью
import statsmodels.api as sm
import statsmodels.formula.api as smf
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
#dataframe
data = {
'X': [55641, 55681, 55637, 55825, 55772, 55890, 56068, 56299, 56825, 57205,
57562, 57850, 57975, 57992, 58240, 58414, 58561, 59066, 58596, 58631,
58758, 59037],
'Y': [21886, 21934, 21699, 21901, 21812, 21714, 21932, 22086, 22265, 22551,
22736, 22301, 22518, 22580, 22618, 22890, 23112, 23315, 22865, 22788,
22949, 23149]
}
df = pd.DataFrame(data)
#OLS
model = smf.ols(formula='Y ~ X', data=df)
results = model.fit()
print(results.summary())
#calculating prediction intevals
predictions = results.get_prediction(df)
prediction_summary_frame = predictions.summary_frame(alpha=0.05)
#data points
plt.scatter(df['X'], df['Y'], color='black', label='Data')
#regression line
plt.plot(df['X'], results.fittedvalues, color='#58C9F4', label='Regression Line')
#prediction invterval
plt.fill_between(df['X'], prediction_summary_frame['obs_ci_lower'], prediction_summary_frame['obs_ci_upper'], color='grey', alpha=0.2, label='95% Prediction Interval')
plt.xlabel('X')
plt.ylabel('Y')
plt.title('Linear Regression with Prediction Intervals')
plt.legend()
plt.show()
Как мне продолжать строить оба интервала?






summary_frame предоставляет как интервалы прогнозирования (obs_ci_lower, obs_ci_upper), так и доверительные интервалы (mean_ci_lower, mean_ci_upper).fill_between используется дважды для построения обоих интервалов: один раз для интервала прогнозирования и один раз для доверительного интервала.import statsmodels.api as sm
import statsmodels.formula.api as smf
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
# Dataframe
data = {
'X': [55641, 55681, 55637, 55825, 55772, 55890, 56068, 56299, 56825, 57205,
57562, 57850, 57975, 57992, 58240, 58414, 58561, 59066, 58596, 58631,
58758, 59037],
'Y': [21886, 21934, 21699, 21901, 21812, 21714, 21932, 22086, 22265, 22551,
22736, 22301, 22518, 22580, 22618, 22890, 23112, 23315, 22865, 22788,
22949, 23149]
}
df = pd.DataFrame(data)
# OLS
model = smf.ols(formula='Y ~ X', data=df)
results = model.fit()
print(results.summary())
# Calculating prediction intervals
predictions = results.get_prediction(df)
prediction_summary_frame = predictions.summary_frame(alpha=0.05)
# Calculating confidence intervals
confidence_intervals = results.conf_int(alpha=0.05)
# Data points
ax = df.plot(kind='scatter', x='X', y='Y', color='black', label='Data')
# Regression line
ax.plot(df['X'], results.fittedvalues, color='#58C9F4', label='Regression Line')
# Prediction interval
ax.fill_between(df['X'], prediction_summary_frame['obs_ci_lower'], prediction_summary_frame['obs_ci_upper'], color='grey', alpha=0.2, label='95% Prediction Interval')
# Confidence interval
ax.fill_between(df['X'], prediction_summary_frame['mean_ci_lower'], prediction_summary_frame['mean_ci_upper'], color='blue', alpha=0.2, label='95% Confidence Interval')
ax.set(xlabel='X', ylabel='Y', title='Linear Regression with Prediction and Confidence Intervals')
ax.legend()
plt.show()
OLS Regression Results
==============================================================================
Dep. Variable: Y R-squared: 0.919
Model: OLS Adj. R-squared: 0.915
Method: Least Squares F-statistic: 227.5
Date: Tue, 11 Jun 2024 Prob (F-statistic): 2.17e-12
Time: 06:30:58 Log-Likelihood: -140.06
No. Observations: 22 AIC: 284.1
Df Residuals: 20 BIC: 286.3
Df Model: 1
Covariance Type: nonrobust
==============================================================================
coef std err t P>|t| [0.025 0.975]
------------------------------------------------------------------------------
Intercept 559.4600 1450.698 0.386 0.704 -2466.642 3585.562
X 0.3815 0.025 15.084 0.000 0.329 0.434
==============================================================================
Omnibus: 0.314 Durbin-Watson: 1.479
Prob(Omnibus): 0.855 Jarque-Bera (JB): 0.390
Skew: -0.242 Prob(JB): 0.823
Kurtosis: 2.562 Cond. No. 2.64e+06
==============================================================================
Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The condition number is large, 2.64e+06. This might indicate that there are
strong multicollinearity or other numerical problems.
В исходном коде перехват x = 0 правильный, но для его визуального представления необходимо расширить пределы оси X. Кроме того, наклон кажется искаженным из-за неравного соотношения сторон между осями X и Y, что приводит к искажению линейной зависимости.
# OLS
model = smf.ols(formula='Y ~ X', data=df)
results = model.fit()
# Extract coefficients
intercept = results.params['Intercept']
slope = results.params['X']
# Calculate regression line values
x_values = np.linspace(0, 59037, 100)
y_values = intercept + slope * x_values
# Calculating prediction intervals
predictions = results.get_prediction(df)
prediction_summary_frame = predictions.summary_frame(alpha=0.05)
# Calculating confidence intervals
confidence_intervals = results.conf_int(alpha=0.05)
# Data points
ax = df.plot(kind='scatter', x='X', y='Y', color='black', label='Data', s=1, figsize=(12, 10))
# Regression line
ax.plot(x_values, y_values, color='#58C9F4', label='Regression Line')
# Adding the point where the regression line intersects the y-axis
ax.scatter(0, intercept, color='red', label='Y-intercept (x=0)', s=2)
ax.annotate(f'(0, {intercept:.2f})', xy=(0, intercept), xytext=(2000, intercept + 1000), arrowprops=dict(facecolor='black', shrink=0.05))
# Annotating the regression line with the slope
ax.text(x_values[len(x_values)//2], y_values[len(y_values)//2], f'Slope: {slope:.2f}', fontsize=12, verticalalignment='bottom')
# Prediction interval
ax.fill_between(df['X'], prediction_summary_frame['obs_ci_lower'], prediction_summary_frame['obs_ci_upper'], color='grey', alpha=0.2, label='95% Prediction Interval')
# Confidence interval
ax.fill_between(df['X'], prediction_summary_frame['mean_ci_lower'], prediction_summary_frame['mean_ci_upper'], color='blue', alpha=0.2, label='95% Confidence Interval')
# Setting equal aspect ratio
ax.set_aspect('equal', 'box')
ax.set(xlabel='X', ylabel='Y', title='Linear Regression with Prediction and Confidence Intervals')
ax.legend(bbox_to_anchor=(1, 0.5), loc='center left', frameon=False)
plt.show()