updates to initial code:
now the dataset preprocessed to create 2 features using fractional differencing (FractionalDifferentiator) https://github.com/microprediction/timeseries-notebooks/blob/main/pytorch_fractional_differencer.ipynb and use both of them for input, also the 3 classes instead of 2 introduced (none, start, end), new combined loss function which calculate CrossEntropyLoss instead of BCEWithLogitsLoss and penalize two consecutive same classes in output sequence, “sequential_loss”;
optimizer.step() and optimizer.zero_grad() moved outside of dataloader batch, in order to update loss for entire input series instead of update loss for batches, i suppose it is important so that bi-lstm will learn layers weights from entire series in each separate file.
from sklearn.preprocessing import MinMaxScaler, StandardScaler,Normalizer,RobustScaler
from sklearn.impute import SimpleImputer
from sklearn.pipeline import Pipeline
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
from torch.utils.data import Dataset, DataLoader
import tqdm
def sequential_loss(pred_proba_, pred_class_,labels):
base_loss = criterion(pred_proba_.reshape(pred_proba_.size()[1], 3), labels)
order_loss = 0
same_class_mask = torch.logical_and(
torch.eq(pred_class_[:, 1:], pred_class_[:, :-1]),
torch.logical_or(pred_class_[:, 1:] == 1, pred_class_[:, 1:] == 2))
order_loss = same_class_mask.float().sum() * 0.01
total_loss = base_loss + order_loss
return total_loss
class BiLSTMModel(nn.Module):
def __init__(self, input_size, hidden_size, output_size):
super(BiLSTMModel, self).__init__()
self.bilstm = nn.LSTM(input_size, hidden_size, bidirectional=True, batch_first=True)
self.fc = nn.Linear(hidden_size * 2, output_size) #
def forward(self, x):
lstm_output, _ = self.bilstm(x)
logits = self.fc(lstm_output)
probs = torch.softmax(logits, dim=2) # Convert logits to probabilities
_, predicted = torch.max(probs, dim=2) # Get class predictions
return probs, predicted
def predict(self,x):
with torch.no_grad():
self.eval()
y_pred_logits = self(x)
y_pred_logits = y_pred_logits[0]
y_pred_probs = torch.softmax(y_pred_logits, dim=2)
y_pred_classes = y_pred_probs.max(dim=2)
return y_pred_probs, y_pred_classes
input_size = 2
hidden_size = 20
output_size = 3
model = BiLSTMModel(input_size, hidden_size, output_size).to('cuda')
batch_size = 1024
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
optimizer.zero_grad()
for f in tqdm.tqdm([k for k in df_list.keys()][6:-50]):
try:
tr_x = pd.read_csv("/app/onward/SpotTrend/data/"+f,sep="\t", )
except:
continue
if 'Unnamed: 0' in tr_x.columns:
del tr_x['Unnamed: 0']
else:
pass
fy= f[:-4]+'_intervals.csv'
try:
tr_y = pd.read_csv("/app/onward/SpotTrend/data/"+fy)
except:
continue
if 'Unnamed: 0' in tr_y.columns:
del tr_y['Unnamed: 0']
else:
pass
new_intervals = []
for i in range(0, (len(tr_y))):
start = tr_y.loc[i]['start']
stop = tr_y.loc[i]['stop']
new_start = tr_x.iloc[start]
new_stop = tr_x.iloc[stop]
new_intervals.append(tr_x[start.item():stop.item()].index)
tr_x['bin'] = 0
for idx_range in new_intervals:
tr_x.loc[idx_range.start, 'bin'] = 1
tr_x.loc[idx_range.stop, 'bin'] = 2
order = 0.5
# Create a TorchFractionalDifferentiator object
torch_frac_diff = TorchFractionalDifferentiator(order, memory_threshold=1e-5, max_lags=20)
print({'num_lags': torch_frac_diff.num_lags})
# Transform the data (fractional differentiation)
original_data_tensor = torch.tensor(tr_x.y, dtype=torch.float32)
frac_diffed_tensor = torch_frac_diff.transform(original_data_tensor)
scaler1 = StandardScaler()
scaled_1 = scaler1.fit_transform(tr_x.y.values.reshape(-1, 1))
scaler41 = StandardScaler()
frac_scaled = scaler41.fit_transform(frac_diffed_tensor.reshape(-1, 1)) # MinMax on frac
scaled_1_tensor_frac = torch_frac_diff.transform(torch.tensor(scaled_1, dtype=torch.float32)) #frac on MinMax
frac_diffed_series = pd.Series([np.nan] * len(tr_x))
frac_diffed_series.loc[:len(frac_scaled) - 1] = frac_scaled.ravel()
print(frac_diffed_series.dtypes)
frac_diffed_series = frac_diffed_series.ffill()
tr_x['frac_scaled'] = frac_diffed_series
frac_diffed_series = pd.Series([np.nan] * len(tr_x))
frac_diffed_series.loc[:len(scaled_1_tensor_frac) - 1] = scaled_1_tensor_frac.cpu().numpy()
frac_diffed_series = frac_diffed_series.ffill()
print(frac_diffed_series.dtypes)
tr_x['scaled_1_tensor_frac'] = frac_diffed_series
my_dataset = MyDataset(tr_x[['frac_scaled', 'scaled_1_tensor_frac']].to_numpy(), tr_x['bin'].to_numpy())
data_loader = DataLoader(my_dataset, batch_size=batch_size, shuffle=False)
for X_batch, y_batch in data_loader:
X, y = (X_batch.view(1, int(X_batch.numel()/2) , 2), y_batch)
preds_proba_, preds_class_ = model(X)
loss = sequential_loss(preds_proba_, preds_class_, y)
loss.backward()
optimizer.step()
optimizer.zero_grad()
# Validation part:
all_pred_probs = []
all_pred_classes = []
data_loader_test = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
for X_batch,y_batch in data_loader_test:
X_t, y_t = (X_batch.view(1, int(X_batch.numel()/2) , 2), y_batch)
pred_probs, pred_classes = model.predict(X_t)
# Accumulate predictions
all_pred_probs.append(pred_probs)
all_pred_classes.append(pred_classes[1])
# Combine predictions from all batches
final_pred_probs = torch.cat(all_pred_probs, dim=1)
final_pred_classes = torch.cat(all_pred_classes, dim=1)
#... to be continued
def interval_loss(predicted_intervals, actual_intervals):
# Compute interval lengths and time differences for predicted intervals
predicted_lengths = [stop - start for start, stop in predicted_intervals]
predicted_time_diffs = [next_start - stop for stop, next_start in zip(predicted_intervals[:-1], predicted_intervals[1:])]
# Calculate deviation from reference values
interval_loss_ = np.mean(np.abs(predicted_lengths - avg_interval_length))
time_diff_loss = np.mean(np.abs(predicted_time_diffs - avg_time_diff))
# Combine interval and time difference losses
total_loss = interval_loss_ + time_diff_loss
return total_loss
‘interval_loss’ for validation metric (and probably for loss) might be applied, but before that need to find the way how to create intervals from learned classes 1 (start) and 2 (end) in fit-predict bi-lstm output layer.