Screen Link:
https://app.dataquest.io/m/65/guided-project%3A-predicting-the-stock-market/6/improving-error
My Code:
import pandas as pd
import numpy as np
from datetime import datetime
data = pd.read_csv("sphist.csv")
data["Date"] = pd.to_datetime(data["Date"])
data = data.sort_values("Date",ascending=True)
for index, values in data.iterrows():
if index>365:
values["day_5"] = np.mean([data["Close"].iloc[i] for i in range(index-5,index)])
values["day_30"] = np.mean([data["Close"].iloc[i] for i in range(index-30,index)])
values["day_365"] = np.mean([data["Close"].iloc[i] for i in range(index-365,index)])
values["means_ratio"] = values["day_5"] / values["day_365"]
values["day_5_std"]= np.std([data["Close"].iloc[i] for i in range(index-5,index)])
values["day_365_std"]= np.std([data["Close"].iloc[i] for i in range(index-365,index)])
values["std_ratio"] = values["day_5_std"] / values["day_365_std"]
data = data[data["Date"]>datetime(year=1951,month=1,day=5)]
data = data.dropna(axis=0)
train = data[data["Date"]<datetime(year=2013,month=1,day=2)]
test = data[data["Date"]>datetime(year=2013,month=1,day=1)]
from scikit.metrics import mean_absolute_error
from scikit.linear_models import LinearRegression
model = LinearRegression()
train_features = train.drop(["Close","High","Low","Open","Volume","Adj Close","Date"])
train_target = train["Close"]
model.fit(train_features,train_target)
test_features = test.drop(["Close","High","Low","Open","Volume","Adj Close","Date"])
predictions = model.predict(test_features)
mean_absolute_error = mean_absolute_error(test["Close"],predictions)
print(mean_absolute_error)
No error. messages but my code runs very slow when creating the new features
anything I can do to make it faster
Altough the last part has not been checked for errors as the first part runs so slow