Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix multiprocessing in face detection gallery example #7140

Draft
wants to merge 2 commits into
base: main
Choose a base branch
from
Draft
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -26,11 +26,10 @@
"""
from time import time

import dask
import numpy as np
import matplotlib.pyplot as plt

from dask import delayed

from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score
Expand All @@ -48,7 +47,6 @@
# integral image within this ROI is computed. Finally, the integral image is
# used to extract the features.

@delayed
def extract_feature_image(img, feature_type, feature_coord=None):
"""Extract the haar feature for the current image"""
ii = integral_image(img)
Expand All @@ -69,10 +67,11 @@ def extract_feature_image(img, feature_type, feature_coord=None):

# Build a computation graph using Dask. This allows the use of multiple
# CPU cores later during the actual computation
X = delayed(extract_feature_image(img, feature_types) for img in images)
X = [dask.delayed(extract_feature_image)(img, feature_types) for img in images]
X = dask.delayed(np.stack)(X)
# Compute the result
t_start = time()
X = np.array(X.compute(scheduler='single-threaded'))
X = X.compute(scheduler="processes")
time_full_feature_comp = time() - t_start

# Label images (100 faces and 100 non-faces)
Expand Down Expand Up @@ -139,12 +138,16 @@ def extract_feature_image(img, feature_type, feature_coord=None):
# but we would like to emphasize the usage of `feature_coord` and `feature_type`
# to recompute a subset of desired features.

# Build the computational graph using Dask
X = delayed(extract_feature_image(img, feature_type_sel, feature_coord_sel)
for img in images)
# Build a computation graph using Dask. This allows the use of multiple
# CPU cores later during the actual computation
X = [
dask.delayed(extract_feature_image)(img, feature_type_sel, feature_coord_sel)
for img in images
]
X = dask.delayed(np.stack)(X)
# Compute the result
t_start = time()
X = np.array(X.compute(scheduler='single-threaded'))
X = X.compute(scheduler="processes")
time_subs_feature_comp = time() - t_start

y = np.array([1] * 100 + [0] * 100)
Expand Down