Python Deep Copy vs Shallow Copy Explained
Basic Concepts of Copying
In Python, assignment operations do not create new objects, but create references to the same object. Copy operations create new objects.
Assignment vs Copy
python# Assignment operation original = [1, 2, 3] assigned = original assigned[0] = 99 print(original) # [99, 2, 3] - Original list is modified # Copy operation import copy original = [1, 2, 3] copied = copy.copy(original) copied[0] = 99 print(original) # [1, 2, 3] - Original list is not modified
Shallow Copy
What is Shallow Copy
Shallow copy creates a new object, but does not recursively copy nested objects. Nested objects are still shared references.
Ways to Implement Shallow Copy
pythonimport copy # 1. Use copy.copy() original = [1, 2, [3, 4]] shallow = copy.copy(original) # 2. Use list's copy() method shallow = original.copy() # 3. Use slicing shallow = original[:] # 4. Use list() constructor shallow = list(original) # 5. Use dict's copy() method original_dict = {'a': 1, 'b': [2, 3]} shallow_dict = original_dict.copy()
Problems with Shallow Copy
pythonimport copy original = [1, 2, [3, 4]] shallow = copy.copy(original) # Modify top-level element shallow[0] = 99 print(original) # [1, 2, [3, 4]] - Original list is not modified # Modify nested object shallow[2][0] = 99 print(original) # [1, 2, [99, 4]] - Original list is modified!
Deep Copy
What is Deep Copy
Deep copy creates a new object and recursively copies all nested objects. Modifying the copy does not affect the original object.
Ways to Implement Deep Copy
pythonimport copy original = [1, 2, [3, 4]] deep = copy.deepcopy(original) # Modify top-level element deep[0] = 99 print(original) # [1, 2, [3, 4]] - Original list is not modified # Modify nested object deep[2][0] = 99 print(original) # [1, 2, [3, 4]] - Original list is not modified
Comparison of Deep Copy and Shallow Copy
Basic Data Types
pythonimport copy # Immutable objects (integers, strings, tuples) a = 42 b = copy.copy(a) c = copy.deepcopy(a) print(a is b) # True - Immutable objects share references print(a is c) # True # Mutable objects (lists, dicts, sets) original = [1, 2, 3] shallow = copy.copy(original) deep = copy.deepcopy(original) print(original is shallow) # False - New object created print(original is deep) # False - New object created
Nested Structures
pythonimport copy original = { 'numbers': [1, 2, 3], 'nested': {'a': [4, 5], 'b': [6, 7]}, 'tuple': (8, 9, [10, 11]) } shallow = copy.copy(original) deep = copy.deepcopy(original) # Modify shallow copy's nested list shallow['numbers'][0] = 99 print(original['numbers'][0]) # 99 - Original object is modified # Modify deep copy's nested list deep['numbers'][0] = 88 print(original['numbers'][0]) # 99 - Original object is not modified
Copying Custom Objects
pythonimport copy class MyClass: def __init__(self, value): self.value = value self.nested = [value * 2, value * 3] def __copy__(self): """Implement shallow copy""" new_obj = type(self)(self.value) new_obj.nested = self.nested return new_obj def __deepcopy__(self, memo): """Implement deep copy""" new_obj = type(self)(self.value) new_obj.nested = copy.deepcopy(self.nested, memo) return new_obj original = MyClass(10) shallow = copy.copy(original) deep = copy.deepcopy(original) shallow.nested[0] = 99 print(original.nested[0]) # 99 - Shallow copy shares nested object deep.nested[0] = 88 print(original.nested[0]) # 99 - Deep copy is independent
Practical Application Scenarios
1. Handling Configuration Objects
pythonimport copy default_config = { 'debug': False, 'max_retries': 3, 'timeout': 30, 'endpoints': ['api1.example.com', 'api2.example.com'] } # Use deep copy to create independent config config1 = copy.deepcopy(default_config) config2 = copy.deepcopy(default_config) config1['debug'] = True config1['endpoints'].append('api3.example.com') print(default_config['debug']) # False print(default_config['endpoints']) # ['api1.example.com', 'api2.example.com']
2. Handling Data Structures
pythonimport copy # Handle nested data data = { 'users': [ {'name': 'Alice', 'scores': [85, 90, 78]}, {'name': 'Bob', 'scores': [92, 88, 95]} ] } # Create copy for processing processed_data = copy.deepcopy(data) # Modify copy without affecting original data for user in processed_data['users']: user['average'] = sum(user['scores']) / len(user['scores']) print(processed_data['users'][0]['average']) # 84.333... print('average' in data['users'][0]) # False
3. Implementing Undo/Redo Functionality
pythonimport copy class TextEditor: def __init__(self): self.content = "" self.history = [] def write(self, text): self.history.append(copy.deepcopy(self.content)) self.content += text def undo(self): if self.history: self.content = self.history.pop() def get_content(self): return self.content editor = TextEditor() editor.write("Hello ") editor.write("World!") print(editor.get_content()) # Hello World! editor.undo() print(editor.get_content()) # Hello
4. Caching Data
pythonimport copy class DataCache: def __init__(self): self.cache = {} def get(self, key): if key in self.cache: return copy.deepcopy(self.cache[key]) return None def set(self, key, value): self.cache[key] = value cache = DataCache() data = {'items': [1, 2, 3]} cache.set('data', data) # Get copy of cached data cached_data = cache.get('data') cached_data['items'].append(4) # Original cached data is not modified original_data = cache.get('data') print(original_data['items']) # [1, 2, 3]
Performance Considerations
Performance Overhead of Deep Copy
pythonimport copy import time # Large data structure large_data = {'items': list(range(10000))} # Shallow copy start = time.time() shallow = copy.copy(large_data) print(f"Shallow copy time: {time.time() - start:.6f} seconds") # Deep copy start = time.time() deep = copy.deepcopy(large_data) print(f"Deep copy time: {time.time() - start:.6f} seconds")
Choosing the Right Copy Method
pythonimport copy # Simple data structure - Use shallow copy simple_data = [1, 2, 3, 4, 5] shallow_copy = copy.copy(simple_data) # Nested data structure - Use deep copy complex_data = [1, 2, [3, 4], {'a': 5}] deep_copy = copy.deepcopy(complex_data) # Read-only data - No need to copy read_only_data = (1, 2, 3) # Tuples are immutable
Common Issues and Solutions
1. Circular References
pythonimport copy # Create circular reference a = [1, 2] b = [3, 4] a.append(b) b.append(a) # Deep copy handles circular references try: deep_copy = copy.deepcopy(a) print("Deep copy successfully handled circular reference") except RecursionError: print("Cannot handle circular reference")
2. Copying Custom Objects
pythonimport copy class Node: def __init__(self, value): self.value = value self.next = None def __deepcopy__(self, memo): new_node = Node(self.value) memo[id(self)] = new_node if self.next: new_node.next = copy.deepcopy(self.next, memo) return new_node # Create linked list node1 = Node(1) node2 = Node(2) node3 = Node(3) node1.next = node2 node2.next = node3 # Deep copy linked list copied_list = copy.deepcopy(node1) print(copied_list.value) # 1 print(copied_list.next.value) # 2
3. Copying Immutable Objects
pythonimport copy # Immutable objects don't need copying immutable = (1, 2, 3) shallow = copy.copy(immutable) deep = copy.deepcopy(immutable) print(immutable is shallow) # True print(immutable is deep) # True
Best Practices
1. Clarify Copy Requirements
pythonimport copy # Need to independently modify nested objects - Use deep copy data = {'config': {'timeout': 30}} independent_copy = copy.deepcopy(data) # Only need to modify top-level objects - Use shallow copy data = [1, 2, 3, 4, 5] shallow_copy = copy.copy(data)
2. Avoid Unnecessary Copying
pythonimport copy # Bad practice - Unnecessary copying def process_data(data): copied = copy.deepcopy(data) return sum(copied) # Good practice - Use original data directly def process_data(data): return sum(data)
3. Use Context Managers
pythonimport copy from contextlib import contextmanager @contextmanager def copy_context(data, deep=False): """Create copy context""" copied = copy.deepcopy(data) if deep else copy.copy(data) yield copied # Use context manager original = [1, 2, [3, 4]] with copy_context(original, deep=True) as copied: copied[2][0] = 99 print(original) # [1, 2, [3, 4]] - Original data is not modified
4. Document Copy Behavior
pythonimport copy class DataProcessor: """Data processing class Note: The process_data method modifies the input data. To preserve the original data, create a copy using copy.deepcopy() before calling. """ def process_data(self, data): data[0] = 99 return data # Usage example processor = DataProcessor() original = [1, 2, 3] processed = processor.process_data(copy.deepcopy(original))
Summary
Key differences between deep copy and shallow copy:
Shallow Copy
- Creates new object, but nested objects share references
- Use
copy.copy()or object'scopy()method - Suitable for simple data structures or when not modifying nested objects
- Lower performance overhead
Deep Copy
- Creates new object, recursively copies all nested objects
- Use
copy.deepcopy() - Suitable for complex nested data structures
- Higher performance overhead
Selection Recommendations
- Simple data structures: Use shallow copy
- Nested data structures: Use deep copy
- Read-only data: No need to copy
- Performance sensitive: Avoid unnecessary copying
- Custom objects: Implement
__copy__and__deepcopy__methods
Understanding the difference between deep copy and shallow copy enables proper handling of data copying and avoids unexpected data modification issues.