Performance Analysis Tools
1. timeit Module
The timeit module is used to measure the execution time of small code snippets.
pythonimport timeit # Measure code execution time code = """ sum(range(1000)) """ execution_time = timeit.timeit(code, number=1000) print(f"Execution time: {execution_time:.4f} seconds") # Use timeit decorator @timeit.timeit def test_function(): return sum(range(1000)) test_function()
2. cProfile Module
The cProfile module is used to analyze program performance bottlenecks.
pythonimport cProfile def slow_function(): total = 0 for i in range(1000000): total += i return total def fast_function(): return sum(range(1000000)) def main(): slow_function() fast_function() # Performance profiling cProfile.run('main()') # Output analysis results to file cProfile.run('main()', filename='profile_stats')
3. memory_profiler
memory_profiler is used to analyze memory usage.
python# Install: pip install memory-profiler from memory_profiler import profile @profile def memory_intensive_function(): data = [i for i in range(1000000)] return sum(data) if __name__ == '__main__': memory_intensive_function()
4. line_profiler
line_profiler is used to analyze function performance line by line.
python# Install: pip install line_profiler from line_profiler import LineProfiler def complex_function(): result = [] for i in range(1000): result.append(i * 2) return sum(result) # Create performance profiler lp = LineProfiler() lp_wrapper = lp(complex_function) lp_wrapper() # Display results lp.print_stats()
Algorithm Optimization
1. Choose Appropriate Algorithms
python# Bad practice - O(n²) complexity def find_duplicates_slow(arr): duplicates = [] for i in range(len(arr)): for j in range(i + 1, len(arr)): if arr[i] == arr[j] and arr[i] not in duplicates: duplicates.append(arr[i]) return duplicates # Good practice - O(n) complexity def find_duplicates_fast(arr): seen = set() duplicates = set() for item in arr: if item in seen: duplicates.add(item) else: seen.add(item) return list(duplicates)
2. Use Built-in Functions
python# Bad practice - Manual implementation def manual_sum(arr): total = 0 for item in arr: total += item return total # Good practice - Use built-in functions def builtin_sum(arr): return sum(arr) # Performance comparison import timeit print(timeit.timeit(lambda: manual_sum(range(10000)), number=100)) print(timeit.timeit(lambda: builtin_sum(range(10000)), number=100))
3. Avoid Unnecessary Computations
python# Bad practice - Repeated computation def calculate_distances(points): distances = [] for i in range(len(points)): for j in range(len(points)): dx = points[j][0] - points[i][0] dy = points[j][1] - points[i][1] distances.append((dx ** 2 + dy ** 2) ** 0.5) return distances # Good practice - Avoid repeated computation def calculate_distances_optimized(points): distances = [] for i in range(len(points)): for j in range(i + 1, len(points)): dx = points[j][0] - points[i][0] dy = points[j][1] - points[i][1] distances.append((dx ** 2 + dy ** 2) ** 0.5) return distances
Data Structure Optimization
1. Use Appropriate Data Structures
python# List lookup - O(n) def find_in_list(lst, target): return target in lst # Set lookup - O(1) def find_in_set(s, target): return target in s # Performance comparison import timeit lst = list(range(10000)) s = set(range(10000)) print("List lookup:", timeit.timeit(lambda: find_in_list(lst, 5000), number=1000)) print("Set lookup:", timeit.timeit(lambda: find_in_set(s, 5000), number=1000))
2. Use Generators Instead of Lists
python# Bad practice - Use lists def get_squares_list(n): return [i ** 2 for i in range(n)] # Good practice - Use generators def get_squares_generator(n): for i in range(n): yield i ** 2 # Memory usage comparison import sys list_obj = get_squares_list(1000000) gen_obj = get_squares_generator(1000000) print(f"List memory: {sys.getsizeof(list_obj)} bytes") print(f"Generator memory: {sys.getsizeof(gen_obj)} bytes")
3. Use slots to Reduce Memory
pythonclass Person: def __init__(self, name, age): self.name = name self.age = age class PersonWithSlots: __slots__ = ['name', 'age'] def __init__(self, name, age): self.name = name self.age = age # Memory comparison import sys p1 = Person("Alice", 25) p2 = PersonWithSlots("Alice", 25) print(f"Regular object: {sys.getsizeof(p1)} bytes") print(f"With __slots__: {sys.getsizeof(p2)} bytes")
I/O Optimization
1. Batch Process I/O
python# Bad practice - Write line by line def write_lines_slow(filename, lines): with open(filename, 'w') as f: for line in lines: f.write(line + '\n') # Good practice - Batch write def write_lines_fast(filename, lines): with open(filename, 'w') as f: f.write('\n'.join(lines))
2. Use Buffering
python# Bad practice - No buffering def read_without_buffer(filename): with open(filename, 'r', buffering=0) as f: return f.read() # Good practice - Use buffering def read_with_buffer(filename): with open(filename, 'r', buffering=8192) as f: return f.read()
3. Asynchronous I/O
pythonimport asyncio import aiohttp async def fetch_url(url): async with aiohttp.ClientSession() as session: async with session.get(url) as response: return await response.text() async def fetch_all_urls(urls): tasks = [fetch_url(url) for url in urls] return await asyncio.gather(*tasks) urls = [ "https://www.example.com", "https://www.google.com", "https://www.github.com", ] # Fetch all URLs asynchronously results = asyncio.run(fetch_all_urls(urls))
Concurrency Optimization
1. Multiprocessing for CPU-Intensive Tasks
pythonimport multiprocessing def process_data(data_chunk): return sum(x ** 2 for x in data_chunk) def parallel_processing(data, num_processes=4): chunk_size = len(data) // num_processes chunks = [data[i:i + chunk_size] for i in range(0, len(data), chunk_size)] with multiprocessing.Pool(processes=num_processes) as pool: results = pool.map(process_data, chunks) return sum(results) data = list(range(1000000)) result = parallel_processing(data)
2. Multithreading for I/O-Intensive Tasks
pythonimport threading import requests def download_url(url): response = requests.get(url) return len(response.content) def parallel_download(urls): threads = [] results = [] def worker(url): result = download_url(url) results.append(result) for url in urls: thread = threading.Thread(target=worker, args=(url,)) threads.append(thread) thread.start() for thread in threads: thread.join() return results urls = ["url1", "url2", "url3"] results = parallel_download(urls)
3. Use concurrent.futures
pythonfrom concurrent.futures import ThreadPoolExecutor, ProcessPoolExecutor def process_item(item): return item ** 2 def with_thread_pool(items): with ThreadPoolExecutor(max_workers=4) as executor: results = list(executor.map(process_item, items)) return results def with_process_pool(items): with ProcessPoolExecutor(max_workers=4) as executor: results = list(executor.map(process_item, items)) return results items = list(range(1000)) thread_results = with_thread_pool(items) process_results = with_process_pool(items)
Caching Optimization
1. Use functools.lru_cache
pythonfrom functools import lru_cache @lru_cache(maxsize=128) def fibonacci(n): if n < 2: return n return fibonacci(n-1) + fibonacci(n-2) # Fast calculation print(fibonacci(100))
2. Custom Caching
pythonclass Cache: def __init__(self, max_size=128): self.cache = {} self.max_size = max_size def get(self, key): return self.cache.get(key) def set(self, key, value): if len(self.cache) >= self.max_size: self.cache.pop(next(iter(self.cache))) self.cache[key] = value cache = Cache() def expensive_computation(x): cached_result = cache.get(x) if cached_result is not None: return cached_result result = sum(i ** 2 for i in range(x)) cache.set(x, result) return result
3. Use Redis Cache
pythonimport redis import pickle # Connect to Redis r = redis.Redis(host='localhost', port=6379, db=0) def cache_result(key, value, ttl=3600): """Cache result""" r.setex(key, ttl, pickle.dumps(value)) def get_cached_result(key): """Get cached result""" result = r.get(key) if result: return pickle.loads(result) return None def expensive_operation(data): cache_key = f"result:{hash(str(data))}" # Try to get from cache cached = get_cached_result(cache_key) if cached: return cached # Execute computation result = complex_computation(data) # Cache result cache_result(cache_key, result) return result
String Optimization
1. Use join Instead of +
python# Bad practice - Use + def build_string_slow(parts): result = "" for part in parts: result += part return result # Good practice - Use join def build_string_fast(parts): return ''.join(parts) # Performance comparison import timeit parts = ["part"] * 1000 print(timeit.timeit(lambda: build_string_slow(parts), number=100)) print(timeit.timeit(lambda: build_string_fast(parts), number=100))
2. Use String Formatting
python# Bad practice - String concatenation def format_message_slow(name, age): return "Name: " + name + ", Age: " + str(age) # Good practice - Use f-string def format_message_fast(name, age): return f"Name: {name}, Age: {age}" # Performance comparison print(timeit.timeit(lambda: format_message_slow("Alice", 25), number=10000)) print(timeit.timeit(lambda: format_message_fast("Alice", 25), number=10000))
3. Use String Methods
python# Bad practice - Manual processing def process_string_slow(s): result = "" for char in s: if char.isupper(): result += char.lower() else: result += char return result # Good practice - Use built-in methods def process_string_fast(s): return s.lower() # Performance comparison print(timeit.timeit(lambda: process_string_slow("HELLO"), number=10000)) print(timeit.timeit(lambda: process_string_fast("HELLO"), number=10000))
Database Optimization
1. Use Connection Pool
pythonfrom sqlalchemy import create_engine from sqlalchemy.pool import QueuePool # Create connection pool engine = create_engine( 'postgresql://user:password@localhost/dbname', poolclass=QueuePool, pool_size=10, max_overflow=5 ) def execute_query(query): with engine.connect() as connection: result = connection.execute(query) return result.fetchall()
2. Batch Insert
python# Bad practice - Insert one by one def insert_slow(items): for item in items: db.execute("INSERT INTO table VALUES (%s)", (item,)) # Good practice - Batch insert def insert_fast(items): db.executemany("INSERT INTO table VALUES (%s)", [(item,) for item in items])
3. Use Indexes
python# Create index CREATE INDEX idx_name ON users(name); # Use index query SELECT * FROM users WHERE name = 'Alice'; # Avoid full table scan # Bad practice SELECT * FROM users WHERE LOWER(name) = 'alice'; # Good practice SELECT * FROM users WHERE name = 'Alice';
Best Practices
1. Pre-allocate Memory
python# Bad practice - Dynamic growth def build_list_slow(): result = [] for i in range(10000): result.append(i) return result # Good practice - Pre-allocate def build_list_fast(): return [i for i in range(10000)]
2. Avoid Global Variables
python# Bad practice - Use global variables counter = 0 def increment_global(): global counter counter += 1 # Good practice - Use local variables def increment_local(counter): return counter + 1
3. Use Appropriate Data Types
python# Bad practice - Use lists for numeric data numbers = [1, 2, 3, 4, 5] # Good practice - Use arrays import array numbers = array.array('i', [1, 2, 3, 4, 5]) # Bad practice - Use strings for binary data data = "binary data" # Good practice - Use bytes data = b"binary data"
4. Lazy Loading
python# Bad practice - Load all data immediately def load_all_data(): data = [] for item in large_dataset: processed = process_item(item) data.append(processed) return data # Good practice - Lazy loading def load_data_lazy(): for item in large_dataset: yield process_item(item)
Performance Monitoring
1. Use logging to Record Performance
pythonimport logging import time logging.basicConfig(level=logging.INFO) def logged_function(func): def wrapper(*args, **kwargs): start_time = time.time() result = func(*args, **kwargs) end_time = time.time() logging.info(f"{func.__name__} execution time: {end_time - start_time:.4f} seconds") return result return wrapper @logged_function def expensive_function(): time.sleep(1) return "Done" expensive_function()
2. Use Performance Counters
pythonimport time from collections import defaultdict class PerformanceMonitor: def __init__(self): self.counters = defaultdict(list) def record(self, name, duration): self.counters[name].append(duration) def get_stats(self, name): durations = self.counters[name] return { 'count': len(durations), 'total': sum(durations), 'average': sum(durations) / len(durations), 'min': min(durations), 'max': max(durations) } monitor = PerformanceMonitor() def monitored_function(func): def wrapper(*args, **kwargs): start_time = time.time() result = func(*args, **kwargs) end_time = time.time() monitor.record(func.__name__, end_time - start_time) return result return wrapper
Summary
Key points of Python performance optimization:
- Performance Analysis Tools: timeit, cProfile, memory_profiler, line_profiler
- Algorithm Optimization: Choose appropriate algorithms, use built-in functions, avoid unnecessary computations
- Data Structure Optimization: Use appropriate data structures, use generators, use slots
- I/O Optimization: Batch processing, use buffering, asynchronous I/O
- Concurrency Optimization: Multiprocessing, multithreading, concurrent.futures
- Caching Optimization: lru_cache, custom caching, Redis caching
- String Optimization: Use join, string formatting, string methods
- Database Optimization: Connection pooling, batch insertion, use indexes
- Best Practices: Pre-allocate memory, avoid global variables, use appropriate data types, lazy loading
- Performance Monitoring: logging, performance counters
Performance optimization principles:
- Measure first, then optimize
- Optimize bottlenecks, not all code
- Balance readability and performance
- Use built-in functions and libraries
- Consider using C extensions or Cython
Mastering performance optimization techniques enables writing more efficient and faster Python programs.