diff --git a/.gitignore b/.gitignore index 397b4a7624e35fa60563a9c03b1213d93f7b6546..55c5cd2aec45288050e5b034ef7e9609e43a598b 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,4 @@ *.log +*.gz +*.pyobj + diff --git a/longterm_baseline.py b/longterm_baseline.py index 05e59c7a7460e094f727dd34a477d506b489d93b..796a164375c00c501161aa8484a7b0bd37699a49 100644 --- a/longterm_baseline.py +++ b/longterm_baseline.py @@ -3,7 +3,23 @@ import torch.nn as nn import torch.nn.functional as F import torch.optim as optim +# [(Snapshot_Time, [(buy_price1, amount1), ...] , [(sell_price1, amount1), ...]), ...] +# [(1620457034392, [(56000, 0.01), (55900, 1), (55700, 30), ...] , [(57000, 0.01), (57100, 1), ...] ), (1620457034394, [...]), ...] +# The snapshots should has almost identical time-interval. Good for LSTM. +# Time axis: [history, older, newer, ..., latest] +realtime_shortterm_dataset_aggtrade = [] +realtime_shortterm_dataset_aggtrade_size = 1024 +# [(Trade_Time, PRICE, AMOUNT), ...] +# [(1620457034392, 56000, 0.5), (1620457034394, 56001, 0.05), ...] +# The trades usually have various time-interval. TODO: transform it to [(WeightedAvgPrice, Volume), ...] for every 1 minutes? +# Time axis: [history, older, newer, ..., latest] +realtime_shortterm_dataset_depth = [] +realtime_shortterm_dataset_depth_size = 1024*1024 + +# The trading thread would not start working, before finish analysing longterm dataset. +# Time-interval for longterm dataset is 1 minute. +longterm_dataset = [] diff --git a/sample.py b/sample.py new file mode 100644 index 0000000000000000000000000000000000000000..d14399389d0badc70458bb7ce53a452235174b48 --- /dev/null +++ b/sample.py @@ -0,0 +1,135 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F +import torch.optim as optim +from math import sin + +#def prepare_sequence(seq, to_ix): +# idxs = [to_ix[w] for w in seq] +# return torch.tensor(idxs, dtype=torch.long) + +def geni(i): + if i % 2 == 0: + return sin(i/3) + 0.05#, sin(i) - 0.08] + else: + return sin(i/3) - 0.1#, sin(i) - 0.1, sin(i)] + +input_seq = [geni(i) for i in range(10240)] + +EMBEDDING_DIM = 7 +HIDDEN_DIM = 7 + +class LSTMTagger(nn.Module): + + def __init__(self, input_dim, hidden_dim): + super(LSTMTagger, self).__init__() + self.hidden_dim = hidden_dim + + # The LSTM takes word embeddings as inputs, and outputs hidden states + # with dimensionality hidden_dim. + self.lstm = nn.LSTM(1, hidden_dim) + + # The linear layer that maps from hidden state space to tag space + self.out = nn.Linear(hidden_dim, 1) + + def forward(self, sample_seq, lstm_state): + lstm_out, lstm_state = self.lstm(sample_seq.view(len(sample_seq), 1, 1)) + score = self.out(torch.tanh(lstm_out[-1:])) + return score, lstm_state + + +model = LSTMTagger(128, 128) +loss_function = nn.NLLLoss() +optimizer = optim.SGD(model.parameters(), lr=0.1) + +# See what the scores are before training +# Note that element i,j of the output is the score for tag j for word i. +# Here we don't need to train, so the code is wrapped in torch.no_grad() +#with torch.no_grad(): +# inputs = prepare_sequence(training_data[0][0], word_to_ix) +# tag_scores = model(inputs) +# print(tag_scores) + +real_xy, guess_xy = [], [] + +lstm_state = torch.zeros([1,1,128]), torch.zeros([1,1,128]) +for i in range(len(input_seq)): + if i+130 >= len(input_seq): + continue + inputseq = input_seq[i:i+128] + # Step 1. Remember that Pytorch accumulates gradients. + # We need to clear them out before each instance + model.zero_grad() + + # Step 2. Get our inputs ready for the network, that is, turn them into + # Tensors of word indices. + sentence_in = torch.tensor(inputseq, dtype=torch.float) + + # Step 3. Run our forward pass. + #print("MODEL INPUT: ", sentence_in.shape) + scout, lstm_state = model(sentence_in, lstm_state) + + # Step 4. Compute the loss, gradients, and update the parameters by + # calling optimizer.step() + # loss = loss_function(tag_scores, targets) + #loss = loss_function(scout, torch.tensor(input_seq[i+129])) + loss = torch.abs(scout - input_seq[i+129]) + print("DEBUG: LOSS=", loss) + loss.backward(retain_graph=True) + optimizer.step() + + if i > 10240-1024: + real_xy.append(((i+129)/3, input_seq[i+129])) + guess_xy.append(((i+129)/3, scout[0,0,0].tolist())) + +#print("REAL=", real_xy) +#print("GUESS=", guess_xy) + +import numpy as np +import matplotlib.pyplot as plt +import matplotlib.cm as cm + +x = np.arange(10) +ys = [i+x+(i*x)**2 for i in range(10)] + +colors = cm.rainbow(np.linspace(0, 1, len(ys))) +for x, y in real_xy: + plt.scatter(x, y, color=colors[1]) +for x, y in guess_xy: + plt.scatter(x, y, color=colors[6]) +plt.show() + +#for epoch in range(3): # again, normally you would NOT do 300 epochs, it is toy data +# for sentence, tags in training_data: +# # Step 1. Remember that Pytorch accumulates gradients. +# # We need to clear them out before each instance +# model.zero_grad() +# +# # Step 2. Get our inputs ready for the network, that is, turn them into +# # Tensors of word indices. +# sentence_in = prepare_sequence(sentence, word_to_ix) +# targets = prepare_sequence(tags, tag_to_ix) +# +# # Step 3. Run our forward pass. +# print("MODEL INPUT: ", sentence_in.shape) +# tag_scores = model(sentence_in) +# +# # Step 4. Compute the loss, gradients, and update the parameters by +# # calling optimizer.step() +# loss = loss_function(tag_scores, targets) +# loss.backward() +# optimizer.step() + +# See what the scores are after training +#with torch.no_grad(): +# inputs = prepare_sequence(training_data[0][0], word_to_ix) +# tag_scores = model(inputs) +# +# # The sentence is "the dog ate the apple". i,j corresponds to score for tag j +# # for word i. The predicted tag is the maximum scoring tag. +# # Here, we can see the predicted sequence below is 0 1 2 0 1 +# # since 0 is index of the maximum value of row 1, +# # 1 is the index of maximum value of row 2, etc. +# # Which is DET NOUN VERB DET NOUN, the correct sequence! +# print(tag_scores) + diff --git a/ws.py b/ws.py index cb7c128b67bb068ce9b9fd0d1cbc77366e27084a..d7f6b6ede1f0291df87120602588893cfaf3257f 100755 --- a/ws.py +++ b/ws.py @@ -5,25 +5,38 @@ import json, time from websocket import create_connection +import signal, sys, pickle # [(Snapshot_Time, [(buy_price1, amount1), ...] , [(sell_price1, amount1), ...]), ...] # [(1620457034392, [(56000, 0.01), (55900, 1), (55700, 30), ...] , [(57000, 0.01), (57100, 1), ...] ), (1620457034394, [...]), ...] # The snapshots should has almost identical time-interval. Good for LSTM. # Time axis: [history, older, newer, ..., latest] realtime_shortterm_dataset_aggtrade = [] -realtime_shortterm_dataset_aggtrade_size = 1024 +realtime_shortterm_dataset_aggtrade_size = -1 #1024 # [(Trade_Time, PRICE, AMOUNT), ...] # [(1620457034392, 56000, 0.5), (1620457034394, 56001, 0.05), ...] # The trades usually have various time-interval. TODO: transform it to [(WeightedAvgPrice, Volume), ...] for every 1 minutes? # Time axis: [history, older, newer, ..., latest] realtime_shortterm_dataset_depth = [] -realtime_shortterm_dataset_depth_size = 1024*1024 +realtime_shortterm_dataset_depth_size = -1#1024*1024 # The trading thread would not start working, before finish analysing longterm dataset. # Time-interval for longterm dataset is 1 minute. longterm_dataset = [] +def save_realtime_dataset_on_exit(): + global realtime_shortterm_dataset_aggtrade, realtime_shortterm_dataset_depth + print("Saving {}+{} elements...".format(len(realtime_shortterm_dataset_aggtrade), len(realtime_shortterm_dataset_depth))) + pickle.dump((realtime_shortterm_dataset_aggtrade, realtime_shortterm_dataset_depth), open( "realtime_dataset_dump.pyobj", "wb")) +def load_realtime_dataset_on_start(): + global realtime_shortterm_dataset_aggtrade, realtime_shortterm_dataset_depth + try: + realtime_shortterm_dataset_aggtrade, realtime_shortterm_dataset_depth = pickle.load(open( "realtime_dataset_dump.pyobj", "rb")) + print("Loaded {}+{} elements. ".format(len(realtime_shortterm_dataset_aggtrade), len(realtime_shortterm_dataset_depth))) + except: + print("No data to load. Skip data loading... ") + def dataset_maintain_thread_main(): global realtime_shortterm_dataset_aggtrade, realtime_shortterm_dataset_aggtrade_size, realtime_shortterm_dataset_depth, realtime_shortterm_dataset_depth_size @@ -47,7 +60,7 @@ def dataset_maintain_thread_main(): # Is a trade message trade_time, price, amount = parsed.get('T'), parsed.get('p'), parsed.get('q') realtime_shortterm_dataset_aggtrade.append((trade_time, price, amount)) - if len(realtime_shortterm_dataset_aggtrade) > realtime_shortterm_dataset_aggtrade_size: + if realtime_shortterm_dataset_aggtrade_size != -1 and len(realtime_shortterm_dataset_aggtrade) > realtime_shortterm_dataset_aggtrade_size: del realtime_shortterm_dataset_aggtrade[0] elif 'b' in parsed.keys(): # Is a depth snapshot update @@ -55,7 +68,7 @@ def dataset_maintain_thread_main(): buy_makes_parsed = [(float(ele[0]), float(ele[1])) for ele in buy_makers] sell_makes_parsed = [(float(ele[0]), float(ele[1])) for ele in sell_makers] realtime_shortterm_dataset_depth.append((snapshot_time, buy_makes_parsed, sell_makes_parsed)) - if len(realtime_shortterm_dataset_depth) > realtime_shortterm_dataset_depth_size: + if realtime_shortterm_dataset_depth_size != -1 and len(realtime_shortterm_dataset_depth) > realtime_shortterm_dataset_depth_size: del realtime_shortterm_dataset_depth[0] else: print("ERROR: unexpected message (maybe ping message): ", message) @@ -71,5 +84,12 @@ def dataset_maintain_thread_main(): ws.close() +def sigint_handler(sig, frame): + print('Got SIGINT! Saving realtime dataset... ') + save_realtime_dataset_on_exit() + sys.exit(0) + +signal.signal(signal.SIGINT, sigint_handler) +load_realtime_dataset_on_start() dataset_maintain_thread_main()