diff --git a/longterm_baseline.py b/longterm_baseline.py index 6433479f638ca97d48f1947ca80db00e66eb7746..8a4c86104f65cb46d24146a620b1f47fc3f526f3 100644 --- a/longterm_baseline.py +++ b/longterm_baseline.py @@ -2,60 +2,98 @@ import torch import torch.nn as nn import torch.nn.functional as F import torch.optim as optim +import pickle # [(Snapshot_Time, [(buy_price1, amount1), ...] , [(sell_price1, amount1), ...]), ...] # [(1620457034392, [(56000, 0.01), (55900, 1), (55700, 30), ...] , [(57000, 0.01), (57100, 1), ...] ), (1620457034394, [...]), ...] # The snapshots should has almost identical time-interval. Good for LSTM. # Time axis: [history, older, newer, ..., latest] -realtime_shortterm_dataset_aggtrade = [] -realtime_shortterm_dataset_aggtrade_size = 1024 +realtime_shortterm_dataset_depth = [] +realtime_shortterm_dataset_depth_size = 1024 # [(Trade_Time, PRICE, AMOUNT), ...] # [(1620457034392, 56000, 0.5), (1620457034394, 56001, 0.05), ...] # The trades usually have various time-interval. TODO: transform it to [(WeightedAvgPrice, Volume), ...] for every 1 minutes? # Time axis: [history, older, newer, ..., latest] -realtime_shortterm_dataset_depth = [] -realtime_shortterm_dataset_depth_size = 1024*1024 +realtime_shortterm_dataset_aggtrade = [] +realtime_shortterm_dataset_aggtrade_size = 1024*1024 # The trading thread would not start working, before finish analysing longterm dataset. # Time-interval for longterm dataset is 1 minute. longterm_dataset = [] +def load_realtime_dataset_on_start(): + global realtime_shortterm_dataset_aggtrade, realtime_shortterm_dataset_depth + try: + realtime_shortterm_dataset_aggtrade, realtime_shortterm_dataset_depth = pickle.load(open( "realtime_dataset_dump.pyobj", "rb")) + print("Loaded {}+{} elements. ".format(len(realtime_shortterm_dataset_aggtrade), len(realtime_shortterm_dataset_depth))) + except: + print("No data to load. Skip data loading... ") + + class LSTM_Shortterm_Predictor(nn.Module): - def __init__(self, input_dim, hidden_dim): - super(LSTMTagger, self).__init__() - self.hidden_dim = hidden_dim + def __init__(self, input_dim): + super(LSTM_Shortterm_Predictor, self).__init__() + self.lstm_idim = 16 + self.lstm_odim = 128 # The input would be a tuple containing complex information. # Firstly, serialize these information into a tuple. - self.serializer = nn.Linear(input_dim, lstm_idim) + self.serializer = nn.Linear(input_dim, self.lstm_idim) + # TODO : remove this useless layer # The LSTM hidden states # with dimensionality hidden_dim. - self.lstm = nn.LSTM(lstm_idim, lstm_odim) + self.lstm = nn.LSTM(self.lstm_idim, self.lstm_odim) # The linear layer that maps from hidden state space to tag space - self.out = nn.Linear(hidden_odim, 1) + self.out = nn.Linear(self.lstm_odim, 1) def forward(self, sample_seq): - input_seq = sample_seq.view(len(sample_seq), 1, 1) + input_seq = sample_seq.view(len(sample_seq), 1, lstm_idim) lstm_in = self.serializer(input_seq) lstm_out, _ = self.lstm(lstm_in) predict_shortterm_trend = self.out(torch.tanh(lstm_out[-1:])) return predict_shortterm_trend -def aggtrade_to_impulsive_score_vector(aggtrade): - _, buys, sells = aggtrade +def depth_to_impulsive_score_vector(depth): + _, buys, sells = depth def get_factors(array_of_pairs): values = [pair[0] for pair in array_of_pairs] weights = [pair[1] for pair in array_of_pairs] average = numpy.average(values, weights=weights) + volume = numpy.sum(weights) variance = numpy.average((values-average)**2, weights=weights) leader_price, leader_weight = array_of_pairs[0] - return (average, math.sqrt(variance), leader_price, leader_weight) + return (average, volume, math.sqrt(variance), leader_price, leader_weight) return get_factors(buys) + get_factors(sells) +model = LSTM_Shortterm_Predictor(10) +# optimizer = optim.SGD(model.parameters(), lr=0.1) +optimizer = optim.RMSprop(model.parameters(), lr=0.05, alpha=0.99, eps=1e-08, weight_decay=0, momentum=0.75, centered=False) + +def learn_once(depth_seq, trend_answer_score): + # returns scalar loss + input_seq = [depth_to_impulsive_score_vector(depth) for depth in depth_seq] + + model.zero_grad() + i = torch.tensor(input_seq, dtype=torch.float) + o = model(i) + + loss = torch.square(o - trend_answer_score) + loss.backward(retain_graph=True) + optimizer.step() + + return loss.to_list()[0] + +load_realtime_dataset_on_start() +for i in range(300): + print("DEBUG: l=", realtime_shortterm_dataset_depth[i+256]) + answer = realtime_shortterm_dataset_depth[i+256][1][0][0] - realtime_shortterm_dataset_depth[i][1][0][0] + print("answer=", answer) + loss = learn_once(realtime_shortterm_dataset_depth[i], answer) + print("Loss=", loss) diff --git a/ws.py b/ws.py index d7f6b6ede1f0291df87120602588893cfaf3257f..86f206a20858d4b47bcf00fe8bd14a9cc2301665 100755 --- a/ws.py +++ b/ws.py @@ -11,15 +11,15 @@ import signal, sys, pickle # [(1620457034392, [(56000, 0.01), (55900, 1), (55700, 30), ...] , [(57000, 0.01), (57100, 1), ...] ), (1620457034394, [...]), ...] # The snapshots should has almost identical time-interval. Good for LSTM. # Time axis: [history, older, newer, ..., latest] -realtime_shortterm_dataset_aggtrade = [] -realtime_shortterm_dataset_aggtrade_size = -1 #1024 +realtime_shortterm_dataset_depth = [] +realtime_shortterm_dataset_depth_size = -1#1024*1024 # [(Trade_Time, PRICE, AMOUNT), ...] # [(1620457034392, 56000, 0.5), (1620457034394, 56001, 0.05), ...] # The trades usually have various time-interval. TODO: transform it to [(WeightedAvgPrice, Volume), ...] for every 1 minutes? # Time axis: [history, older, newer, ..., latest] -realtime_shortterm_dataset_depth = [] -realtime_shortterm_dataset_depth_size = -1#1024*1024 +realtime_shortterm_dataset_aggtrade = [] +realtime_shortterm_dataset_aggtrade_size = -1 #1024 # The trading thread would not start working, before finish analysing longterm dataset. # Time-interval for longterm dataset is 1 minute.