TypeError: cannot pickle 'torch._C.Generator' object
See original GitHub issueIt throws an exception when I follow the official tutorial to implement a video classification model.
https://pytorchvideo.org/docs/tutorial_classification
Environment:
python version: macOS-10.16-x86_64-i386-64bit
python version: 3.8.5
torch version: 1.8.1
torchvision version: 0.9.1
pytorch_lightning version: 1.2.8
pytorchvideo version: 0.1.0
fvcore version: 0.1.4.post20210326
The code
import os
import pytorch_lightning as pl
import pytorchvideo.data
import torch.utils.data
from pytorchvideo.transforms import (
ApplyTransformToKey,
RandomShortSideScale,
RemoveKey,
ShortSideScale,
UniformTemporalSubsample
)
from torchvision.transforms import (
Compose,
Normalize,
RandomCrop,
RandomHorizontalFlip
)
class KineticsDataModule(pl.LightningDataModule):
def __init__(self):
super().__init__()
self.transform = Compose(
[
ApplyTransformToKey(
key="video",
transform=Compose(
[
UniformTemporalSubsample(8),
Normalize((0.45, 0.45, 0.45), (0.225, 0.225, 0.225)),
RandomShortSideScale(min_size=256, max_size=320),
RandomCrop(244),
RandomHorizontalFlip(p=0.5),
]
),
),
]
)
def train_dataloader(self):
train_dataset = pytorchvideo.data.Kinetics(
data_path=VIDEO_PATH + "/train",
clip_sampler=pytorchvideo.data.make_clip_sampler("random", 2),
transform=self.transform,
)
return torch.utils.data.DataLoader(
train_dataset,
batch_size=8,
num_workers=8,
)
def val_dataloader(self):
val_dataset = pytorchvideo.data.Kinetics(
data_path=VIDEO_PATH + "/valid",
clip_sampler=pytorchvideo.data.make_clip_sampler("uniform", 2),
transform=self.transform,
)
return torch.utils.data.DataLoader(
val_dataset,
batch_size=8,
num_workers=8,
)
import pytorchvideo.models.resnet
import torch
import torch.nn as nn
import torch.nn.functional as F
def make_kinetics_resnet():
return pytorchvideo.models.resnet.create_resnet(
input_channel=3,
model_depth=50, #
model_num_class=4,
norm=nn.BatchNorm3d,
activation=nn.ReLU,
)
class ClassificationModule(pl.LightningModule):
def __init__(self):
super().__init__()
self.model = make_kinetics_resnet()
def forward(self, x):
return self.model(x)
def training_step(self, batch, batch_idx):
# The model expects a video tensor of shape (B, C, T, H, W), which is the
# format provided by the dataset
y_hat = self.model(batch["video"])
# Compute cross entropy loss, loss.backwards will be called behind the scenes
# by PyTorchLightning after being returned from this method.
loss = F.cross_entropy(y_hat, batch["label"])
# Log the train loss to Tensorboard
self.log("train_loss", loss.item())
return loss
def validation_step(self, batch, batch_idx):
y_hat = self.model(batch["video"])
loss = F.cross_entropy(y_hat, batch["label"])
self.log("val_loss", loss)
return loss
def configure_optimizers(self):
"""
Setup the Adam optimizer. Note, that this function also can return a lr scheduler, which is
usually useful for training video models.
"""
return torch.optim.Adam(self.parameters(), lr=1e-1)
classification_module = ClassificationModule()
data_module = KineticsDataModule()
trainer = pl.Trainer()
trainer.fit(classification_module, datamodule=data_module)
The full log:
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
| Name | Type | Params
-------------------------------
0 | model | Net | 31.7 M
-------------------------------
31.7 M Trainable params
0 Non-trainable params
31.7 M Total params
126.646 Total estimated model params size (MB)
Validation sanity check: 0%
0/2 [00:00<?, ?it/s]
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-7-a7ab4758bd42> in <module>
2 data_module = KineticsDataModule()
3 trainer = pl.Trainer()
----> 4 trainer.fit(classification_module, datamodule=data_module)
~/opt/anaconda3/lib/python3.8/site-packages/pytorch_lightning/trainer/trainer.py in fit(self, model, train_dataloader, val_dataloaders, datamodule)
497
498 # dispath `start_training` or `start_testing` or `start_predicting`
--> 499 self.dispatch()
500
501 # plugin will finalized fitting (e.g. ddp_spawn will load trained model)
~/opt/anaconda3/lib/python3.8/site-packages/pytorch_lightning/trainer/trainer.py in dispatch(self)
544
545 else:
--> 546 self.accelerator.start_training(self)
547
548 def train_or_test_or_predict(self):
~/opt/anaconda3/lib/python3.8/site-packages/pytorch_lightning/accelerators/accelerator.py in start_training(self, trainer)
71
72 def start_training(self, trainer):
---> 73 self.training_type_plugin.start_training(trainer)
74
75 def start_testing(self, trainer):
~/opt/anaconda3/lib/python3.8/site-packages/pytorch_lightning/plugins/training_type/training_type_plugin.py in start_training(self, trainer)
112 def start_training(self, trainer: 'Trainer') -> None:
113 # double dispatch to initiate the training loop
--> 114 self._results = trainer.run_train()
115
116 def start_testing(self, trainer: 'Trainer') -> None:
~/opt/anaconda3/lib/python3.8/site-packages/pytorch_lightning/trainer/trainer.py in run_train(self)
605 self.progress_bar_callback.disable()
606
--> 607 self.run_sanity_check(self.lightning_module)
608
609 # set stage for logging
~/opt/anaconda3/lib/python3.8/site-packages/pytorch_lightning/trainer/trainer.py in run_sanity_check(self, ref_model)
862
863 # run eval step
--> 864 _, eval_results = self.run_evaluation(max_batches=self.num_sanity_val_batches)
865
866 self.on_sanity_check_end()
~/opt/anaconda3/lib/python3.8/site-packages/pytorch_lightning/trainer/trainer.py in run_evaluation(self, max_batches, on_epoch)
711 dl_max_batches = self.evaluation_loop.max_batches[dataloader_idx]
712
--> 713 for batch_idx, batch in enumerate(dataloader):
714 if batch is None:
715 continue
~/opt/anaconda3/lib/python3.8/site-packages/torch/utils/data/dataloader.py in __iter__(self)
353 return self._iterator
354 else:
--> 355 return self._get_iterator()
356
357 @property
~/opt/anaconda3/lib/python3.8/site-packages/torch/utils/data/dataloader.py in _get_iterator(self)
299 else:
300 self.check_worker_number_rationality()
--> 301 return _MultiProcessingDataLoaderIter(self)
302
303 @property
~/opt/anaconda3/lib/python3.8/site-packages/torch/utils/data/dataloader.py in __init__(self, loader)
912 # before it starts, and __del__ tries to join but will get:
913 # AssertionError: can only join a started process.
--> 914 w.start()
915 self._index_queues.append(index_queue)
916 self._workers.append(w)
~/opt/anaconda3/lib/python3.8/multiprocessing/process.py in start(self)
119 'daemonic processes are not allowed to have children'
120 _cleanup()
--> 121 self._popen = self._Popen(self)
122 self._sentinel = self._popen.sentinel
123 # Avoid a refcycle if the target function holds an indirect
~/opt/anaconda3/lib/python3.8/multiprocessing/context.py in _Popen(process_obj)
222 @staticmethod
223 def _Popen(process_obj):
--> 224 return _default_context.get_context().Process._Popen(process_obj)
225
226 class DefaultContext(BaseContext):
~/opt/anaconda3/lib/python3.8/multiprocessing/context.py in _Popen(process_obj)
282 def _Popen(process_obj):
283 from .popen_spawn_posix import Popen
--> 284 return Popen(process_obj)
285
286 class ForkServerProcess(process.BaseProcess):
~/opt/anaconda3/lib/python3.8/multiprocessing/popen_spawn_posix.py in __init__(self, process_obj)
30 def __init__(self, process_obj):
31 self._fds = []
---> 32 super().__init__(process_obj)
33
34 def duplicate_for_child(self, fd):
~/opt/anaconda3/lib/python3.8/multiprocessing/popen_fork.py in __init__(self, process_obj)
17 self.returncode = None
18 self.finalizer = None
---> 19 self._launch(process_obj)
20
21 def duplicate_for_child(self, fd):
~/opt/anaconda3/lib/python3.8/multiprocessing/popen_spawn_posix.py in _launch(self, process_obj)
45 try:
46 reduction.dump(prep_data, fp)
---> 47 reduction.dump(process_obj, fp)
48 finally:
49 set_spawning_popen(None)
~/opt/anaconda3/lib/python3.8/multiprocessing/reduction.py in dump(obj, file, protocol)
58 def dump(obj, file, protocol=None):
59 '''Replacement for pickle.dump() using ForkingPickler.'''
---> 60 ForkingPickler(file, protocol).dump(obj)
61
62 #
TypeError: cannot pickle 'torch._C.Generator' object
Issue Analytics
- State:
- Created 2 years ago
- Comments:10 (6 by maintainers)
Top Results From Across the Web
TypeError: can't pickle generator objects - Stack Overflow
This is a generator. As the error says, we cannot pickle generators. Use this instead. raw_X=[] for text in data: raw_X.append ...
Read more >Deepcopy TypeError: can't pickle torch._C.Generator objects
Hey, I have a Policy class consisting of an lstm network and a cnn. During the training process I need to do a...
Read more >TypeError: cannot pickle 'torch._C.Generator' object - CSDN博客
Hi I got this error when doing distributed multiprocessing with Pytorch. The fix which solved the issue for me is adding a distributed ......
Read more >cannot pickle 'torch._C.Generator' object - Code Grepper
Make num_workers=0. This typically occurs on Windows and Mac.
Read more >cannot pickle 'dict_keys' object - You.com | The AI Search ...
File "/home/ofir/anaconda3/envs/raya2_dev/lib/python3.8/copy.py", line 161, in deepcopy rv = reductor(4) TypeError: cannot pickle 'dict_keys' object.
Read more >
Top Related Medium Post
No results found
Top Related StackOverflow Question
No results found
Troubleshoot Live Code
Lightrun enables developers to add logs, metrics and snapshots to live code - no restarts or redeploys required.
Start Free
Top Related Reddit Thread
No results found
Top Related Hackernoon Post
No results found
Top Related Tweet
No results found
Top Related Dev.to Post
No results found
Top Related Hashnode Post
No results found
Ah yea this issue still exists on Mac CPU only with num_workers > 0. Should this be documented somewhere if there are no plans to fix it?
Hi I got this error when doing distributed multiprocessing with Pytorch. The fix which solved the issue for me is adding a distributed video_sampler to the datasets (both train and val).