Note
Click here to download the full example code
Fit head-direction population
Learning objectives
- Learn how to add history-related predictors to NeMoS GLM
- Learn how to reduce over-fitting with
Basis
- Learn how to cross-validate with NeMoS + scikit-learn
import matplotlib.pyplot as plt
import numpy as np
import pynapple as nap
import warnings
import workshop_utils
import nemos as nmo
from sklearn.model_selection import GridSearchCV
warnings.filterwarnings("ignore")
# configure pynapple to ignore conversion warning
nap.nap_config.suppress_conversion_warnings = True
# configure plots some
plt.style.use(workshop_utils.STYLE_FILE)
Data Streaming
- Stream the head-direction neurons data
path = workshop_utils.fetch_data("Mouse32-140822.nwb")
Pynapple
load_file
: open the NWB file and give a preview.
data = nap.load_file(path)
data
- Load the units
spikes = data["units"]
spikes
- Load the epochs and take only wakefulness
epochs = data["epochs"]
wake_ep = data["epochs"]["wake"]
- Load the angular head-direction of the animal (in radians)
angle = data["ry"]
- Select only those units that are in ADn
- Restrict the activity to wakefulness (both the spiking activity and the angle)
spikes = spikes.getby_category("location")["adn"]
spikes = spikes.restrict(wake_ep).getby_threshold("rate", 1.0)
angle = angle.restrict(wake_ep)
- Compute tuning curves as a function of head-direction
tuning_curves = nap.compute_1d_tuning_curves(
group=spikes, feature=angle, nb_bins=61, minmax=(0, 2 * np.pi)
)
fig, ax = plt.subplots(1, 2, figsize=(12, 4))
ax[0].plot(tuning_curves.iloc[:, 0])
ax[0].set_xlabel("Angle (rad)")
ax[0].set_ylabel("Firing rate (Hz)")
ax[1].plot(tuning_curves.iloc[:, 1])
ax[1].set_xlabel("Angle (rad)")
plt.tight_layout()
- Let's visualize the data at the population level.
fig = workshop_utils.plotting.plot_head_direction_tuning(
tuning_curves, spikes, angle, threshold_hz=1, start=8910, end=8960
)
- Take the first 3 minutes of wakefulness to speed up optimization
wake_ep = nap.IntervalSet(
start=wake_ep.start[0], end=wake_ep.start[0] + 3 * 60
)
- bin the spike trains in 10 ms bin
bin_size = 0.01
count = spikes.count(bin_size, ep=wake_ep)
- sort the neurons by their preferred direction using pandas
pref_ang = tuning_curves.idxmax()
count = nap.TsdFrame(
t=count.t,
d=count.values[:, np.argsort(pref_ang.values)],
)
NeMoS
Self-Connected Single Neuron
- Start with modeling a self-connected single neuron
- Select a neuron
- Select the first 1.2 seconds for visualization
# select a neuron's spike count time series
neuron_count = count[:, 0]
# restrict to a smaller time interval
epoch_one_spk = nap.IntervalSet(
start=count.time_support.start[0], end=count.time_support.start[0] + 1.2
)
- visualize the spike count time course
# {.keep-code}
# set the size of the spike history window in seconds
window_size_sec = 0.8
workshop_utils.plotting.plot_history_window(
neuron_count, epoch_one_spk, window_size_sec
)
- Form a predictor matrix by vertically stacking all the windows (you can use a convolution).
# convert the prediction window to bins (by multiplying with the sampling rate)
window_size = int(window_size_sec * neuron_count.rate)
# convolve the counts with the identity matrix.
input_feature = nmo.convolve.create_convolutional_predictor(
np.eye(window_size), neuron_count
)
# print the NaN indices along the time axis
print("NaN indices:\n", np.where(np.isnan(input_feature[:, 0]))[0])
- Check the shape of the counts and features.
print(f"Time bins in counts: {neuron_count.shape[0]}")
print(f"Convolution window size in bins: {window_size}")
print(f"Feature shape: {input_feature.shape}")
- Plot the convolution output.
# {.keep-code}
suptitle = "Input feature: Count History"
neuron_id = 0
workshop_utils.plotting.plot_features(input_feature, count.rate, suptitle)
# The resulting feature dimension is 80, because our bin size was 0.01 sec and the window size is 0.8 sec.
# We can learn these weights by maximum likelihood by fitting a GLM.
Fitting the Model
- Split your epochs in two for validation purposes.
# {.keep-code}
# construct the train and test epochs
duration = neuron_count.time_support.tot_length()
start = neuron_count.time_support["start"]
end = neuron_count.time_support["end"]
first_half = nap.IntervalSet(start, start + duration / 2)
second_half = nap.IntervalSet(start + duration / 2, end)
- Fit a GLM to the first half.
# define the GLM object
model = nmo.glm.GLM()
# Fit over the training epochs
model.fit(
input_feature.restrict(first_half),
neuron_count.restrict(first_half)
)
- Plot the weights.
# {.keep-code}
workshop_utils.plotting.plot_and_compare_weights(
[model.coef_], ["GLM raw history 1st Half"], count.rate)
Inspecting the results
- Fit on the other half.
model_second_half = nmo.glm.GLM()
model_second_half.fit(
input_feature.restrict(second_half),
neuron_count.restrict(second_half)
)
- Compare results.
# {.keep-code}
workshop_utils.plotting.plot_and_compare_weights(
[model.coef_, model_second_half.coef_],
["GLM raw history 1st Half", "GLM raw history 2nd Half"],
count.rate)
Reducing feature dimensionality
- Visualize the raised cosine basis.
# {.keep-code}
workshop_utils.plotting.plot_basis()
# a basis object can be instantiated in "conv" mode for convolving the input.
basis = nmo.basis.RaisedCosineBasisLog(
n_basis_funcs=8, mode="conv", window_size=window_size
)
# time takes equi-spaced values between 0 and 1, we could multiply by the
# duration of our window to scale it to seconds.
time = window_size_sec * np.arange(window_size)
- Convolve the counts with the basis functions.
# equivalent to
# `nmo.convolve.create_convolutional_predictor(basis_kernels, neuron_count)`
conv_spk = basis.compute_features(neuron_count)
print(f"Raw count history as feature: {input_feature.shape}")
print(f"Compressed count history as feature: {conv_spk.shape}")
- Visualize the output.
# {.keep-code}
# Visualize the convolution results
epoch_one_spk = nap.IntervalSet(8917.5, 8918.5)
epoch_multi_spk = nap.IntervalSet(8979.2, 8980.2)
workshop_utils.plotting.plot_convolved_counts(
neuron_count, conv_spk, epoch_one_spk, epoch_multi_spk
)
Fit and compare the models
- Fit the model using the compressed features.
model_basis = nmo.glm.GLM()
# use restrict on interval set training
model_basis.fit(
conv_spk.restrict(first_half),
neuron_count.restrict(first_half)
)
print(model_basis.coef_)
- Reconstruct the history filter.
_, basis_kernels = basis.evaluate_on_grid(window_size)
self_connection = np.matmul(basis_kernels, model_basis.coef_)
print(self_connection.shape)
- Fit the other half of the data.
model_basis_second_half = nmo.glm.GLM(
regularizer=nmo.regularizer.UnRegularized("LBFGS")
)
model_basis_second_half.fit(
conv_spk.restrict(second_half), neuron_count.restrict(second_half)
)
# compute responses for the 2nd half fit
self_connection_second_half = np.matmul(basis_kernels, model_basis_second_half.coef_)
- Plot and compare the results.
workshop_utils.plotting.plot_and_compare_weights(
[model.coef_, model_second_half.coef_, self_connection, self_connection_second_half],
["GLM raw history 1st Half", "GLM raw history 2nd half", "GLM basis 1st half", "GLM basis 2nd half"],
count.rate
)
- Predict the rates
rate_basis = model_basis.predict(conv_spk) * conv_spk.rate
rate_history = model.predict(input_feature) * conv_spk.rate
ep = nap.IntervalSet(start=8819.4, end=8821)
- Plot the results.
# {.keep-code}
# plot the rates
workshop_utils.plotting.plot_rates_and_smoothed_counts(
neuron_count.restrict(ep),
{
"Self-connection raw history":rate_history,
"Self-connection bsais": rate_basis
}
)
All-to-all Connectivity
Preparing the features
- Convolve all counts.
- Print the output shape
# convolve all the neurons
convolved_count = basis.compute_features(count)
# shape should be `(n_samples, n_basis_func * n_neurons)`
print(f"Convolved count shape: {convolved_count.shape}")
Fitting the Model
- Fit a
PopulationGLM
- Use Ridge regularization with a
regularizer_strength=0.1
- Print the shape of the estimated coefficients.
model = nmo.glm.PopulationGLM(
regularizer=nmo.regularizer.Ridge("LBFGS", regularizer_strength=0.1)
).fit(convolved_count, count)
print(f"Model coefficients shape: {model.coef_.shape}")
Comparing model predictions.
- Predict the firing rate of each neuron
- Convert the rate from spike/bin to spike/sec
# predict the rate (counts are already sorted by tuning prefs)
predicted_firing_rate = model.predict(convolved_count) * conv_spk.rate
- Visualize the predicted rate and tuning function.
# {.keep-code}
# use pynapple for time axis for all variables plotted for tick labels in imshow
workshop_utils.plotting.plot_head_direction_tuning_model(
tuning_curves, predicted_firing_rate, spikes, angle, threshold_hz=1,
start=8910, end=8960, cmap_label="hsv"
)
- Visually compare all the models.
# {.keep-code}
# mkdocs_gallery_thumbnail_number = 2
workshop_utils.plotting.plot_rates_and_smoothed_counts(
neuron_count,
{"Self-connection: raw history": rate_history,
"Self-connection: bsais": rate_basis,
"All-to-all: basis": predicted_firing_rate[:, 0]}
)
Visualizing the connectivity
- Compute tuning curves from the predicted rates using pynapple.
tuning = nap.compute_1d_tuning_curves_continuous(predicted_firing_rate,
feature=angle,
nb_bins=61,
minmax=(0, 2 * np.pi))
- Extract the weights and store it in an array, shape (num_neurons, num_neurons, num_features).
n_neurons = count.shape[1]
weights = model.coef_.reshape(n_neurons, basis.n_basis_funcs, n_neurons)
- Multiply the weights by the basis, to get the history filters.
responses = np.einsum("jki,tk->ijt", weights, basis_kernels)
print(responses.shape)
- Plot the connectivity map.
# {.keep-code}
workshop_utils.plotting.plot_coupling(responses, tuning)
K-fold Cross-Validation
K-fold cross-validation (from scikit-learn docs)
K-fold with NeMoS and scikit-learn
- Instantiate the PopulationGLM
- Define a grid of regularization strengths.
- Instantiate and fit the GridSearchCV with 2 folds.
# {.keep-code}
# define the model
model = nmo.glm.PopulationGLM(
regularizer=nmo.regularizer.Ridge("LBFGS")
)
# define a grid of parameters for the search
param_grid = dict(regularizer__regularizer_strength=np.logspace(-3, 0, 4))
print(param_grid)
# define a GridSearch cross-validation from scikit-learn
# with 2-folds
k_fold = GridSearchCV(model, param_grid=param_grid, cv=2)
- Run cross-validation!
# {.keep-code}
# fit the cross-validated model
k_fold.fit(convolved_count, count)
- Print the best parameters.
# {.keep-code}
print(f"Best regularization strength: "
f"{k_fold.best_params_['regularizer__regularizer_strength']}")
Exercises
- Plot the weights and rate predictions.
- What happens if you use 5 folds?
- What happen if you cross-validate each neuron individually? Do you select the same hyperparameter for every neuron or not?
Total running time of the script: ( 0 minutes 0.000 seconds)
Download Python source code: 06_head_direction_code.py