Generators and iterators are fundamental Python concepts for memory-efficient data processing.
Understanding Iterators
class Counter:
def __init__(self, max_value):
self.max_value = max_value
self.current = 0
def __iter__(self):
return self
def __next__(self):
if self.current >= self.max_value:
raise StopIteration
self.current += 1
return self.current
counter = Counter(5)
for num in counter:
print(num) # 1, 2, 3, 4, 5
Generator Functions with Yield
def fibonacci(n):
a, b = 0, 1
for _ in range(n):
yield a
a, b = b, a + b
for num in fibonacci(10):
print(num)
# Generator maintains state
gen = fibonacci(5)
print(next(gen)) # 0
print(next(gen)) # 1
Generator Expressions
# Memory efficient for large datasets
squares = (x**2 for x in range(1000000))
# Use in functions that accept iterables
total = sum(x**2 for x in range(1000))
max_value = max(x for x in range(100) if x % 7 == 0)
Infinite Generators
def infinite_counter():
count = 0
while True:
yield count
count += 1
counter = infinite_counter()
print(next(counter)) # 0
print(next(counter)) # 1
Pipeline Processing
def read_large_file(file_path):
with open(file_path) as f:
for line in f:
yield line.strip()
def filter_lines(lines):
for line in lines:
if len(line) > 10:
yield line
def process_lines(lines):
for line in lines:
yield line.upper()
# Chain generators
lines = read_large_file('data.txt')
filtered = filter_lines(lines)
processed = process_lines(filtered)
for line in processed:
print(line)
Use generators for memory-efficient processing of large datasets!