Pandas fillna()
Last updated
Was this helpful?
Last updated
Was this helpful?
We can use function from pandas library to fill missing data.
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
def fill_missing_values(df_data):
df_data.fillna(method='ffill', inplace=True)
df_data.fillna(method='bfill', inplace=True)
def symbol_to_path(symbol, base_dir="data"):
return os.path.join(base_dir, "{}.csv".format(str(symbol)))
def get_data(symbols, dates):
df_final = pd.DataFrame(index=dates)
if "SPY" not in symbols: # add SPY for reference, if absent
symbols.insert(0, "SPY")
for symbol in symbols:
file_path = symbol_to_path(symbol)
df_temp = pd.read_csv(file_path, parse_dates=True, index_col="Date",
usecols=["Date", "Adj Close"], na_values=["nan"])
df_temp = df_temp.rename(columns={"Adj Close": symbol})
df_final = df_final.join(df_temp)
if symbol == "SPY": # drop dates SPY did not trade
df_final = df_final.dropna(subset=["SPY"])
return df_final
def plot_data(df_data):
ax = df_data.plot(title="Stock Data", fontsize=2)
ax.set_xlabel("Date")
ax.set_ylabel("Price")
plt.show()
def test_run():
symbol_list = ["GOOG", "IBM", "XOM"] # list of symbols
start_date = "1940-12-31"
end_date = "2017-12-07"
dates = pd.date_range(start_date, end_date) # date range as index
df_data = get_data(symbol_list, dates) # get data for each symbol
# Fill missing values
fill_missing_values(df_data)
# Plot
plot_data(df_data)
if __name__ == "__main__":
test_run()
Here is the output.