#tistory 관련 코드(필요없음)
from IPython.core.display import display, HTML
display(HTML("<style>.container {width:90% !important;}</style>"))

import pandas as pd
pd.set_option('display.max_columns',500) #생략없이 출력 가능

Time series end to end¶

데이터 : https://community.tableau.com/docs/DOC-1236 ¶

참고 페이지 : https://towardsdatascience.com/an-end-to-end-project-on-time-series-analysis-and-forecasting-with-python-4835e6bf050b ¶

시계열 참고¶

funiture 판매 예측¶

import warnings
import itertools # 반복 가능한 데이터 스트림을 처리하는 데 유용한 많은 함수와 제네레이터가 포함
import numpy as np
import matplotlib.pyplot as plt
warnings.filterwarnings("ignore")
plt.style.use('fivethirtyeight')
import pandas as pd
import statsmodels.api as sm #통계분석 기능을 제공하는 파이썬 패키지
import matplotlib

#차트 기본 크기 설정
matplotlib.rcParams['axes.labelsize'] = 14
matplotlib.rcParams['xtick.labelsize'] = 12
matplotlib.rcParams['ytick.labelsize'] = 12
matplotlib.rcParams['text.color'] = 'k'

pd.set_option('display.max_columns',500) #생략없이 출력

df = pd.read_excel("C://Users//82106//Desktop//Sample - Superstore.xls")

df.head()

#Category가 Furniture인것만 사용
furniture = df.loc[df['Category'] == 'Furniture']

furniture.head()

furniture['Order Date'].min(), furniture['Order Date'].max()

(Timestamp('2014-01-06 00:00:00'), Timestamp('2017-12-30 00:00:00'))

데이터 전처리¶

#필요없는 열 제거
cols = ['Row ID', 'Order ID', 'Ship Date', 'Ship Mode', 'Customer ID', 'Customer Name', 'Segment', 'Country', 'City', 'State', 'Postal Code', 'Region', 'Product ID', 'Category', 'Sub-Category', 'Product Name', 'Quantity', 'Discount', 'Profit']
furniture.drop(cols, axis=1, inplace=True)

furniture = furniture.sort_values('Order Date')

furniture.head()

#누락 확인
furniture.isnull().sum()

Order Date    0
Sales         0
dtype: int64

furniture = furniture.groupby('Order Date')['Sales'].sum().reset_index()

furniture.head()

#index를 Order Date로 한다.
furniture = furniture.set_index('Order Date')

furniture.head()

furniture.index

DatetimeIndex(['2014-01-06', '2014-01-07', '2014-01-10', '2014-01-11',
               '2014-01-13', '2014-01-14', '2014-01-16', '2014-01-19',
               '2014-01-20', '2014-01-21',
               ...
               '2017-12-18', '2017-12-19', '2017-12-21', '2017-12-22',
               '2017-12-23', '2017-12-24', '2017-12-25', '2017-12-28',
               '2017-12-29', '2017-12-30'],
              dtype='datetime64[ns]', name='Order Date', length=889, freq=None)

resampling¶

resample 연산을 쓰면 시간 간격을 재조정하는 리샘플링(resampling)이 가능하다. 이 때 시간 구간이 작아지면 데이터 양이 증가한다고 해서 업-샘플링(up-sampling)이라 하고 시간 구간이 커지면 데이터 양이 감소한다고 해서 다운-샘플링(down-sampling)이라 부른다. https://rfriend.tistory.com/494

#MS는인덱스는 해당 월의 마지막 일자로 표시를 시작일로 변경
y = furniture['Sales'].resample('MS').mean()

#2017년 매출
y['2017':]

Order Date
2017-01-01     397.602133
2017-02-01     528.179800
2017-03-01     544.672240
2017-04-01     453.297905
2017-05-01     678.302328
2017-06-01     826.460291
2017-07-01     562.524857
2017-08-01     857.881889
2017-09-01    1209.508583
2017-10-01     875.362728
2017-11-01    1277.817759
2017-12-01    1256.298672
Freq: MS, Name: Sales, dtype: float64

Visualizing Furniture Sales Time Series Data¶

y.plot(figsize = (15,6))
plt.show()

연초에는 항상 매출이 낮고 연말에는 높은 계절 패턴이 있음을 확인.

#차트 기본 크기 설정
from pylab import rcParams
rcParams['figure.figsize'] = 18, 8

decomposition = sm.tsa.seasonal_decompose(y, model='additive')
fig = decomposition.plot()
plt.show()

일반 ARIMA모형과 계절 ARIMA모형이 서로 곱해져 있으므로 Multiplicative모형입니다. 합계되어 있으면 Additive모형인데 Multiplicative모형은 Additive모형보다 일반적으로 더(more) general합니다.

Time series forecasting with ARIMA¶

ARIMA라고 알려진 시계열 예측에 가장 많이 사용되는 방법 중 하나를 적용할 것이다. ARIMA는 자기진행적 통합 이동 평균을 의미한다. ARIMA 모델은 표기법 ARIMA(p, d, q)로 표시된다. 세 가지 파라미터: 계절성, 추세, 노이즈

p = d = q = range(0, 2)
pdq = list(itertools.product(p, d, q))
seasonal_pdq = [(x[0], x[1], x[2], 12) for x in list(itertools.product(p, d, q))]

print('Examples of parameter combinations for Seasonal ARIMA...')
print('SARIMAX: {} x {}'.format(pdq[1], seasonal_pdq[1]))
print('SARIMAX: {} x {}'.format(pdq[1], seasonal_pdq[2]))
print('SARIMAX: {} x {}'.format(pdq[2], seasonal_pdq[3]))
print('SARIMAX: {} x {}'.format(pdq[2], seasonal_pdq[4]))

Examples of parameter combinations for Seasonal ARIMA...
SARIMAX: (0, 0, 1) x (0, 0, 1, 12)
SARIMAX: (0, 0, 1) x (0, 1, 0, 12)
SARIMAX: (0, 1, 0) x (0, 1, 1, 12)
SARIMAX: (0, 1, 0) x (1, 0, 0, 12)

다음은 ARIMA 모델의 파라미터 선택이다. 여기서 목표는 "grid search"를 사용하여 모델에 가장 적합한 성능을 산출하는 최적의 매개변수 집합을 찾는 것이다.

for param in pdq:
    for param_seasonal in seasonal_pdq:
        try:
            mod = sm.tsa.statespace.SARIMAX(y,
                                            order=param,
                                            seasonal_order=param_seasonal,
                                            enforce_stationarity=False,
                                            enforce_invertibility=False)
            results = mod.fit()
            print('ARIMA{}x{}12 - AIC:{}'.format(param, param_seasonal, results.aic))
        except:
            continue

ARIMA(0, 0, 0)x(0, 0, 0, 12)12 - AIC:769.0817523205916
ARIMA(0, 0, 0)x(0, 0, 1, 12)12 - AIC:1446.5593245884702
ARIMA(0, 0, 0)x(0, 1, 0, 12)12 - AIC:477.7170130920218
ARIMA(0, 0, 0)x(1, 0, 0, 12)12 - AIC:497.23144334183365

C:\Users\82106\Anaconda3\lib\site-packages\statsmodels\base\model.py:512: ConvergenceWarning: Maximum Likelihood optimization failed to converge. Check mle_retvals
  "Check mle_retvals", ConvergenceWarning)
C:\Users\82106\Anaconda3\lib\site-packages\statsmodels\base\model.py:512: ConvergenceWarning: Maximum Likelihood optimization failed to converge. Check mle_retvals
  "Check mle_retvals", ConvergenceWarning)
C:\Users\82106\Anaconda3\lib\site-packages\statsmodels\base\model.py:512: ConvergenceWarning: Maximum Likelihood optimization failed to converge. Check mle_retvals
  "Check mle_retvals", ConvergenceWarning)

ARIMA(0, 0, 0)x(1, 0, 1, 12)12 - AIC:1172.2086741447833
ARIMA(0, 0, 0)x(1, 1, 0, 12)12 - AIC:318.0047199116341
ARIMA(0, 0, 1)x(0, 0, 0, 12)12 - AIC:720.92522707581
ARIMA(0, 0, 1)x(0, 0, 1, 12)12 - AIC:2900.357535652858
ARIMA(0, 0, 1)x(0, 1, 0, 12)12 - AIC:466.56074298091255
ARIMA(0, 0, 1)x(1, 0, 0, 12)12 - AIC:499.5929815918467

C:\Users\82106\Anaconda3\lib\site-packages\statsmodels\base\model.py:512: ConvergenceWarning: Maximum Likelihood optimization failed to converge. Check mle_retvals
  "Check mle_retvals", ConvergenceWarning)

ARIMA(0, 0, 1)x(1, 0, 1, 12)12 - AIC:2513.139467221268
ARIMA(0, 0, 1)x(1, 1, 0, 12)12 - AIC:319.9884876946867
ARIMA(0, 1, 0)x(0, 0, 0, 12)12 - AIC:677.894766843944

C:\Users\82106\Anaconda3\lib\site-packages\statsmodels\base\model.py:512: ConvergenceWarning: Maximum Likelihood optimization failed to converge. Check mle_retvals
  "Check mle_retvals", ConvergenceWarning)
C:\Users\82106\Anaconda3\lib\site-packages\statsmodels\base\model.py:512: ConvergenceWarning: Maximum Likelihood optimization failed to converge. Check mle_retvals
  "Check mle_retvals", ConvergenceWarning)

ARIMA(0, 1, 0)x(0, 0, 1, 12)12 - AIC:1250.256448915547
ARIMA(0, 1, 0)x(0, 1, 0, 12)12 - AIC:486.6378567198382
ARIMA(0, 1, 0)x(1, 0, 0, 12)12 - AIC:497.78896630044073
ARIMA(0, 1, 0)x(1, 0, 1, 12)12 - AIC:1550.2028470024434
ARIMA(0, 1, 0)x(1, 1, 0, 12)12 - AIC:319.7714068109211
ARIMA(0, 1, 1)x(0, 0, 0, 12)12 - AIC:649.9056176817193
ARIMA(0, 1, 1)x(0, 0, 1, 12)12 - AIC:2626.6969858049224
ARIMA(0, 1, 1)x(0, 1, 0, 12)12 - AIC:458.8705548482836
ARIMA(0, 1, 1)x(1, 0, 0, 12)12 - AIC:486.1832977442613
ARIMA(0, 1, 1)x(1, 0, 1, 12)12 - AIC:2500.937327525841
ARIMA(0, 1, 1)x(1, 1, 0, 12)12 - AIC:310.75743684172716
ARIMA(1, 0, 0)x(0, 0, 0, 12)12 - AIC:692.1645522067712

C:\Users\82106\Anaconda3\lib\site-packages\statsmodels\base\model.py:512: ConvergenceWarning: Maximum Likelihood optimization failed to converge. Check mle_retvals
  "Check mle_retvals", ConvergenceWarning)

ARIMA(1, 0, 0)x(0, 0, 1, 12)12 - AIC:1442.9450066127697
ARIMA(1, 0, 0)x(0, 1, 0, 12)12 - AIC:479.46321478521355
ARIMA(1, 0, 0)x(1, 0, 0, 12)12 - AIC:480.9259367935204

C:\Users\82106\Anaconda3\lib\site-packages\statsmodels\base\model.py:512: ConvergenceWarning: Maximum Likelihood optimization failed to converge. Check mle_retvals
  "Check mle_retvals", ConvergenceWarning)

위의 출력 중에서 SARIMAX(1, 1, 1)x(1, 1, 0, 12)가 297.78의 최저 AIC 값을 산출했다.

Fitting the ARIMA model¶

mod = sm.tsa.statespace.SARIMAX(y,
                                order=(1, 1, 1),
                                seasonal_order=(1, 1, 0, 12),
                                enforce_stationarity=False,
                                enforce_invertibility=False)
results = mod.fit()
print(results.summary().tables[1])

==============================================================================
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
ar.L1          0.0146      0.342      0.043      0.966      -0.655       0.684
ma.L1         -1.0000      0.360     -2.781      0.005      -1.705      -0.295
ar.S.L12      -0.0253      0.042     -0.609      0.543      -0.107       0.056
sigma2      2.958e+04   1.22e-05   2.43e+09      0.000    2.96e+04    2.96e+04
==============================================================================

results.plot_diagnostics(figsize=(16, 8))
plt.show()

모델 잔차가 정규 분포에 가깝다

Validating forecasts¶

예측의 정확성을 파악하기 위해 예측 판매량을 시계열의 실제 판매량과 비교하고, 2017–01–01에서 종료 시점까지 예측을 설정한다.

pred = results.get_prediction(start=pd.to_datetime('2017-01-01'), dynamic=False)
pred_ci = pred.conf_int() #추정된 계수의 신뢰구간 계산
ax = y['2014':].plot(label='observed')
pred.predicted_mean.plot(ax=ax, label='One-step ahead Forecast', alpha=.7, figsize=(14, 7))
ax.fill_between(pred_ci.index,
                pred_ci.iloc[:, 0],
                pred_ci.iloc[:, 1], color='k', alpha=.2)
ax.set_xlabel('Date')
ax.set_ylabel('Furniture Sales')
plt.legend()
plt.show()

y_forecasted = pred.predicted_mean
y_truth = y['2017-01-01':]
mse = ((y_forecasted - y_truth) ** 2).mean()
print('The Mean Squared Error of our forecasts is {}'.format(round(mse, 2)))

The Mean Squared Error of our forecasts is 22993.57

평균 제곱 오차(MSE)는 추정 값과 추정 값 사이의 평균 제곱 차이를 측정한다. MSE는 추정자의 품질에 대한 척도로서 항상 음성이 아니며, MSE가 작을수록 가장 적합한 선을 찾는 데 더 가까워진다.

print('The Root Mean Squared Error of our forecasts is {}'.format(round(np.sqrt(mse), 2)))

The Root Mean Squared Error of our forecasts is 151.64

Producing and visualizing forecasts¶

pred_uc = results.get_forecast(steps=100)
pred_ci = pred_uc.conf_int() #추정된 계수의 신뢰구간 계산
ax = y.plot(label='observed', figsize=(14, 7))
pred_uc.predicted_mean.plot(ax=ax, label='Forecast')
ax.fill_between(pred_ci.index,
                pred_ci.iloc[:, 0],
                pred_ci.iloc[:, 1], color='k', alpha=.25)
ax.set_xlabel('Date')
ax.set_ylabel('Furniture Sales')
plt.legend()
plt.show()

Time Series of Furniture vs. Office Supplies¶

데이터에 따르면 Time Series of Furniture보다 Office Supplies의 매출이 훨씬 더 많았다.

furniture = df.loc[df['Category'] == 'Furniture']
office = df.loc[df['Category'] == 'Office Supplies']
furniture.shape, office.shape

((2121, 21), (6026, 21))

cols = ['Row ID', 'Order ID', 'Ship Date', 'Ship Mode', 'Customer ID', 'Customer Name', 'Segment', 'Country', 'City', 'State', 'Postal Code', 'Region', 'Product ID', 'Category', 'Sub-Category', 'Product Name', 'Quantity', 'Discount', 'Profit']
furniture.drop(cols, axis=1, inplace=True)
office.drop(cols, axis=1, inplace=True)

furniture = furniture.sort_values('Order Date')
office = office.sort_values('Order Date')

furniture = furniture.groupby('Order Date')['Sales'].sum().reset_index()
office = office.groupby('Order Date')['Sales'].sum().reset_index()

furniture = furniture.set_index('Order Date')
office = office.set_index('Order Date')

y_furniture = furniture['Sales'].resample('MS').mean()
y_office = office['Sales'].resample('MS').mean()

furniture = pd.DataFrame({'Order Date':y_furniture.index, 'Sales':y_furniture.values})
office = pd.DataFrame({'Order Date': y_office.index, 'Sales': y_office.values})

furniture.head()

office.head()

store = furniture.merge(office, how='inner', on='Order Date')

store.head()

store.rename(columns={'Sales_x': 'furniture_sales', 'Sales_y': 'office_sales'}, inplace=True)
store.head()

plt.figure(figsize=(20, 8))
plt.plot(store['Order Date'], store['furniture_sales'], 'b-', label = 'furniture')
plt.plot(store['Order Date'], store['office_sales'], 'r-', label = 'office supplies')
plt.xlabel('Date'); plt.ylabel('Sales'); plt.title('Sales of Furniture and Office Supplies')
plt.legend()

<matplotlib.legend.Legend at 0x2079ee6ed48>

funiture와 office supllies 판매가 비슷한 계절 패턴을 공유한다는 것을 관찰할 수 있다. 두 종목 모두 연초다. 사무용품도 여름철이 한산한 것 같다. 게다가, 가구들의 하루 평균 판매량은 사무실 용품의 매월의 매상액보다 높다. 사무용품보다 가구 가격이 훨씬 높기 때문에 이해할 만하다. 때때로 사무용품들의 하루 평균 판매량은 가구보다 높았다. 사무실 용품 판매가 가구 판매량을 넘어선 것은 언제가 처음이었는지 알아보자.

first_date = store.ix[np.min(list(np.where(store['office_sales'] > store['furniture_sales'])[0])), 'Order Date']
print("Office supplies first time produced higher sales than furniture is {}.".format(first_date.date()))

Office supplies first time produced higher sales than furniture is 2014-07-01.

Time Series Modeling with Prophet¶

페이스북이 만든 시계열 예측 라이브러리¶

통계적 지식이 없어도 직관적 파라미터를 통해 모형을 조정할 수 있음
일반적인 경우 기본값만 사용해도 높은 성능을 보여줌
내부가 어떻게 동작하는지 고민할 필요가 없음

! pip install Prophet

Requirement already satisfied: Prophet in c:\users\82106\anaconda3\lib\site-packages (0.1.1)
Requirement already satisfied: pandas>=0.15.1 in c:\users\82106\anaconda3\lib\site-packages (from Prophet) (0.25.1)
Requirement already satisfied: pytz>=2014.9 in c:\users\82106\anaconda3\lib\site-packages (from Prophet) (2019.3)
Requirement already satisfied: six>=1.8.0 in c:\users\82106\anaconda3\lib\site-packages (from Prophet) (1.12.0)
Requirement already satisfied: numpy>=1.13.3 in c:\users\82106\anaconda3\lib\site-packages (from pandas>=0.15.1->Prophet) (1.16.5)
Requirement already satisfied: python-dateutil>=2.6.1 in c:\users\82106\anaconda3\lib\site-packages (from pandas>=0.15.1->Prophet) (2.8.0)

! pip install pystan

Requirement already satisfied: pystan in c:\users\82106\anaconda3\lib\site-packages (2.19.1.1)
Requirement already satisfied: Cython!=0.25.1,>=0.22 in c:\users\82106\anaconda3\lib\site-packages (from pystan) (0.29.13)
Requirement already satisfied: numpy>=1.7 in c:\users\82106\anaconda3\lib\site-packages (from pystan) (1.16.5)

from fbprophet import Prophet

Importing plotly failed. Interactive plots will not work.

furniture.head()

furniture = furniture.rename(columns={'Order Date': 'ds', 'Sales': 'y'})
furniture_model = Prophet(interval_width=0.95)
furniture_model.fit(furniture)
office = office.rename(columns={'Order Date': 'ds', 'Sales': 'y'})
office_model = Prophet(interval_width=0.95)
office_model.fit(office)
furniture_forecast = furniture_model.make_future_dataframe(periods=36, freq='MS')
furniture_forecast = furniture_model.predict(furniture_forecast)
office_forecast = office_model.make_future_dataframe(periods=36, freq='MS')
office_forecast = office_model.predict(office_forecast)

INFO:fbprophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.

plt.figure(figsize=(18, 6))
furniture_model.plot(furniture_forecast, xlabel = 'Date', ylabel = 'Sales')
plt.title('Furniture Sales');

<Figure size 1296x432 with 0 Axes>

plt.figure(figsize=(18, 6))
office_model.plot(office_forecast, xlabel = 'Date', ylabel = 'Sales')
plt.title('Office Supplies Sales');

<Figure size 1296x432 with 0 Axes>

예측 비교¶

furniture_names = ['furniture_%s' % column for column in furniture_forecast.columns]
office_names = ['office_%s' % column for column in office_forecast.columns]

furniture_names

['furniture_ds',
 'furniture_trend',
 'furniture_yhat_lower',
 'furniture_yhat_upper',
 'furniture_trend_lower',
 'furniture_trend_upper',
 'furniture_additive_terms',
 'furniture_additive_terms_lower',
 'furniture_additive_terms_upper',
 'furniture_yearly',
 'furniture_yearly_lower',
 'furniture_yearly_upper',
 'furniture_multiplicative_terms',
 'furniture_multiplicative_terms_lower',
 'furniture_multiplicative_terms_upper',
 'furniture_yhat']

merge_furniture_forecast = furniture_forecast.copy()
merge_office_forecast = office_forecast.copy()

merge_furniture_forecast.columns = furniture_names
merge_office_forecast.columns = office_names

forecast = pd.merge(merge_furniture_forecast, merge_office_forecast, how = 'inner', left_on = 'furniture_ds', right_on = 'office_ds')
forecast = forecast.rename(columns={'furniture_ds': 'Date'}).drop('office_ds', axis=1)
forecast.head()

plt.figure(figsize=(10, 7))
plt.plot(forecast['Date'], forecast['furniture_trend'], 'b-', label = "funiture")
plt.plot(forecast['Date'], forecast['office_trend'], 'r-', label = "office")
plt.legend(loc = 'upper right'); plt.xlabel('Date'); plt.ylabel('Sales')
plt.title('Furniture vs. Office Supplies Sales Trend');

plt.figure(figsize=(10, 7))
plt.plot(forecast['Date'], forecast['furniture_yhat'], 'b-',label = "funiture")
plt.plot(forecast['Date'], forecast['office_yhat'], 'r-',label = "office")
plt.legend(loc = "upper right"); plt.xlabel('Date'); plt.ylabel('Sales')
plt.title('Furniture vs. Office Supplies Estimate');

#furniture
furniture_model.plot_components(furniture_forecast);

#office
office_model.plot_components(office_forecast);

비록 사무용품들의 성장이 약간 더 강해진 것처럼 보이지만, 가구와 사무용품들의 판매는 시간이 지나면서 선형적으로 증가해 왔고 계속해서 증가할 것이라는 것을 보는 것은 좋은 일이다. funiture 최악의 달은 4월, office supply 최악의 달은 2월이다. funiture에 가장 좋은 달은 12월이고, office supply에 가장 좋은 달은 10월이다.

	Row ID	Order ID	Order Date	Ship Date	Ship Mode	Customer ID	Customer Name	Segment	Country	City	State	Postal Code	Region	Product ID	Category	Sub-Category	Product Name	Sales	Quantity	Discount	Profit
0	1	CA-2016-152156	2016-11-08	2016-11-11	Second Class	CG-12520	Claire Gute	Consumer	United States	Henderson	Kentucky	42420	South	FUR-BO-10001798	Furniture	Bookcases	Bush Somerset Collection Bookcase	261.9600	2	0.00	41.9136
1	2	CA-2016-152156	2016-11-08	2016-11-11	Second Class	CG-12520	Claire Gute	Consumer	United States	Henderson	Kentucky	42420	South	FUR-CH-10000454	Furniture	Chairs	Hon Deluxe Fabric Upholstered Stacking Chairs,...	731.9400	3	0.00	219.5820
2	3	CA-2016-138688	2016-06-12	2016-06-16	Second Class	DV-13045	Darrin Van Huff	Corporate	United States	Los Angeles	California	90036	West	OFF-LA-10000240	Office Supplies	Labels	Self-Adhesive Address Labels for Typewriters b...	14.6200	2	0.00	6.8714
3	4	US-2015-108966	2015-10-11	2015-10-18	Standard Class	SO-20335	Sean O'Donnell	Consumer	United States	Fort Lauderdale	Florida	33311	South	FUR-TA-10000577	Furniture	Tables	Bretford CR4500 Series Slim Rectangular Table	957.5775	5	0.45	-383.0310
4	5	US-2015-108966	2015-10-11	2015-10-18	Standard Class	SO-20335	Sean O'Donnell	Consumer	United States	Fort Lauderdale	Florida	33311	South	OFF-ST-10000760	Office Supplies	Storage	Eldon Fold 'N Roll Cart System	22.3680	2	0.20	2.5164

	Row ID	Order ID	Order Date	Ship Date	Ship Mode	Customer ID	Customer Name	Segment	Country	City	State	Postal Code	Region	Product ID	Category	Sub-Category	Product Name	Sales	Quantity	Discount	Profit
0	1	CA-2016-152156	2016-11-08	2016-11-11	Second Class	CG-12520	Claire Gute	Consumer	United States	Henderson	Kentucky	42420	South	FUR-BO-10001798	Furniture	Bookcases	Bush Somerset Collection Bookcase	261.9600	2	0.00	41.9136
1	2	CA-2016-152156	2016-11-08	2016-11-11	Second Class	CG-12520	Claire Gute	Consumer	United States	Henderson	Kentucky	42420	South	FUR-CH-10000454	Furniture	Chairs	Hon Deluxe Fabric Upholstered Stacking Chairs,...	731.9400	3	0.00	219.5820
3	4	US-2015-108966	2015-10-11	2015-10-18	Standard Class	SO-20335	Sean O'Donnell	Consumer	United States	Fort Lauderdale	Florida	33311	South	FUR-TA-10000577	Furniture	Tables	Bretford CR4500 Series Slim Rectangular Table	957.5775	5	0.45	-383.0310
5	6	CA-2014-115812	2014-06-09	2014-06-14	Standard Class	BH-11710	Brosina Hoffman	Consumer	United States	Los Angeles	California	90032	West	FUR-FU-10001487	Furniture	Furnishings	Eldon Expressions Wood and Plastic Desk Access...	48.8600	7	0.00	14.1694
10	11	CA-2014-115812	2014-06-09	2014-06-14	Standard Class	BH-11710	Brosina Hoffman	Consumer	United States	Los Angeles	California	90032	West	FUR-TA-10001539	Furniture	Tables	Chromcraft Rectangular Conference Tables	1706.1840	9	0.20	85.3092

	Order Date	Sales
7474	2014-01-06	2573.820
7660	2014-01-07	76.728
866	2014-01-10	51.940
716	2014-01-11	9.940
2978	2014-01-13	545.940

	Order Date	Sales
0	2014-01-01	480.194231
1	2014-02-01	367.931600
2	2014-03-01	857.291529
3	2014-04-01	567.488357
4	2014-05-01	432.049188

	Order Date	Sales
0	2014-01-01	285.357647
1	2014-02-01	63.042588
2	2014-03-01	391.176318
3	2014-04-01	464.794750
4	2014-05-01	324.346545

우주먼지의 하루

[Python] 시계열 데이터 분석 - 기초버전

Time series end to end¶

데이터 : https://community.tableau.com/docs/DOC-1236 ¶

참고 페이지 : https://towardsdatascience.com/an-end-to-end-project-on-time-series-analysis-and-forecasting-with-python-4835e6bf050b ¶

시계열 참고¶

funiture 판매 예측¶

데이터 전처리¶

resampling¶

Visualizing Furniture Sales Time Series Data¶

Time series forecasting with ARIMA¶

Fitting the ARIMA model¶

Validating forecasts¶

Producing and visualizing forecasts¶

Time Series of Furniture vs. Office Supplies¶

Time Series Modeling with Prophet¶

페이스북이 만든 시계열 예측 라이브러리¶

예측 비교¶

'나는야 데이터사이언티스트 > PYTHON' 카테고리의 다른 글

'나는야 데이터사이언티스트/PYTHON'의 다른글

티스토리툴바

	Date	furniture_trend	furniture_yhat_lower	furniture_yhat_upper	furniture_trend_lower	furniture_trend_upper	furniture_additive_terms	furniture_additive_terms_lower	furniture_additive_terms_upper	furniture_yearly	furniture_yearly_lower	furniture_yearly_upper	furniture_yhat	office_trend	office_yhat_lower	office_yhat_upper	office_trend_lower	office_trend_upper	office_additive_terms	office_additive_terms_lower	office_additive_terms_upper	office_yearly	office_yearly_lower	office_yearly_upper	office_yhat
0	2014-01-01	726.057713	286.506053	788.293852	726.057713	726.057713	-190.685662	-190.685662	-190.685662	-190.685662	-190.685662	-190.685662	535.372051	487.530759	46.051635	644.705253	487.530759	487.530759	-140.040481	-140.040481	-140.040481	-140.040481	-140.040481	-140.040481	347.490278
1	2014-02-01	727.494023	219.574087	688.314723	727.494023	727.494023	-276.377703	-276.377703	-276.377703	-276.377703	-276.377703	-276.377703	451.116320	494.918445	-199.376479	428.747939	494.918445	494.918445	-385.678283	-385.678283	-385.678283	-385.678283	-385.678283	-385.678283	109.240162
2	2014-03-01	728.791335	470.289440	934.523571	728.791335	728.791335	-22.389755	-22.389755	-22.389755	-22.389755	-22.389755	-22.389755	706.401580	501.591193	172.381810	770.923112	501.591193	501.591193	-31.379844	-31.379844	-31.379844	-31.379844	-31.379844	-31.379844	470.211349
3	2014-04-01	730.227645	385.387745	875.825580	730.227645	730.227645	-100.141158	-100.141158	-100.141158	-100.141158	-100.141158	-100.141158	630.086487	508.978878	93.464909	686.504737	508.978878	508.978878	-134.291690	-134.291690	-134.291690	-134.291690	-134.291690	-134.291690	374.687188
4	2014-05-01	731.617622	334.075179	812.655696	731.617622	731.617622	-160.815662	-160.815662	-160.815662	-160.815662	-160.815662	-160.815662	570.801960	516.128251	-61.843508	555.318794	516.128251	516.128251	-263.821569	-263.821569	-263.821569	-263.821569	-263.821569	-263.821569	252.306682

« 2025/05 »
일	월	화	수	목	금	토
				1	2	3
4	5	6	7	8	9	10
11	12	13	14	15	16	17
18	19	20	21	22	23	24
25	26	27	28	29	30	31

[Python] 시계열 데이터 분석 - 기초버전

Time series end to end¶

데이터 : https://community.tableau.com/docs/DOC-1236¶

참고 페이지 : https://towardsdatascience.com/an-end-to-end-project-on-time-series-analysis-and-forecasting-with-python-4835e6bf050b¶

시계열 참고¶

funiture 판매 예측¶

데이터 전처리¶

resampling¶

Visualizing Furniture Sales Time Series Data¶

Time series forecasting with ARIMA¶

Fitting the ARIMA model¶

Validating forecasts¶

Producing and visualizing forecasts¶

Time Series of Furniture vs. Office Supplies¶

Time Series Modeling with Prophet¶

페이스북이 만든 시계열 예측 라이브러리¶

예측 비교¶

'나는야 데이터사이언티스트 > PYTHON' 카테고리의 다른 글

'나는야 데이터사이언티스트/PYTHON'의 다른글

관련글

티스토리툴바

데이터 : https://community.tableau.com/docs/DOC-1236 ¶

참고 페이지 : https://towardsdatascience.com/an-end-to-end-project-on-time-series-analysis-and-forecasting-with-python-4835e6bf050b ¶