# Import numpyimport numpy as np
# Import linear algebra modulefrom scipy import linalg as la
# Create dataset
data = np.array([[7.,4.,3.],[4.,1.,8.],[6.,3.,5.],[8.,6.,1.],[8.,5.,7.],[7.,2.,9.],[5.,3.,3.],[9.,5.,8.],[7.,4.,5.],[8.,2.,2.]])
计算协方差矩阵:
# Calculate the covariance matrix# Center your data
data -= data.mean(axis=0)
cov = np.cov(data, rowvar=False)
计算协方差矩阵的特征值和特征向量:
# Calculate eigenvalues and eigenvector of the covariance matrix
evals, evecs = la.eig(cov)
将原始数据矩阵与特征向量矩阵相乘:
# Multiply the original data matrix with Eigenvector matrix.# Sort the Eigen values and vector and select components
num_components=2
sorted_key = np.argsort(evals)[::-1][:num_components]
evals, evecs = evals[sorted_key], evecs[:, sorted_key]
print("Eigenvalues:", evals)
print("Eigenvector:", evecs)
print("Sorted and Selected Eigen Values:", evals)
print("Sorted and Selected Eigen Vector:", evecs)
# Multiply original data and Eigen vector
principal_components=np.dot(data,evecs)
print("Principal Components:", principal_components)
# import pandasimport pandas as pd
# import matplotlibimport matplotlib.pyplot as plt
# import K-meansfrom sklearn.cluster import KMeans
# Create a DataFrame
data=pd.DataFrame({"X":[12,15,18,10,8,9,12,20],"Y":[6,16,17,8,7,6,9,18]})
wcss_list =[]
# Run a loop for different value of number of clusterfor i inrange(1,6):
# Create and fit the KMeans model
kmeans_model = KMeans(n_clusters = i, random_state =123)
kmeans_model.fit(data)
# Add the WCSS or inertia of the clusters to the score_list
wcss_list.append(kmeans_model.inertia_)
# Plot the inertia(WCSS) and number of clusters
plt.plot(range(1,6), wcss_list, marker='*')
# set title of the plot
plt.title('Selecting Optimum Number of Clusters using Elbow Method')
# Set x-axis label
plt.xlabel('Number of Clusters K')
# Set y-axis label
plt.ylabel('Within-Cluster Sum of the Squares(Inertia)')
# Display plot
plt.show()
在前面的示例中,我们创建了一个包含 X 和 Y 两列的 DataFrame。我们使用 K-means 生成了聚类并计算了 WCSS。之后,我们绘制了聚类数量和惯性图。如图所示,在 k = 2 时,图形开始显著变平,因此我们会选择 2 作为最佳聚类数。
# import pandasimport pandas as pd
# import matplotlib for data visualizationimport matplotlib.pyplot as plt
# import k-means for performing clusteringfrom sklearn.cluster import KMeans
# import silhouette scorefrom sklearn.metrics import silhouette_score
# Create a DataFrame
data=pd.DataFrame({"X":[12,15,18,10,8,9,12,20],"Y":[6,16,17,8,7,6,9,18]})
score_list =[]
# Run a loop for different value of number of clusterfor i inrange(2,6):
# Create and fit the KMeans model
kmeans_model = KMeans(n_clusters = i, random_state =123)
kmeans_model.fit(data)
# Make predictions
pred=kmeans_model.predict(data)
# Calculate the Silhouette Score
score = silhouette_score (data, pred, metric='euclidean')
# Add the Silhouette score of the clusters to the score_list
score_list.append(score)
# Plot the Silhouette score and number of cluster
plt.bar(range(2,6), score_list)
# Set title of the plot
plt.title('Silhouette Score Plot')
# Set x-axis label
plt.xlabel('Number of Clusters K')
# Set y-axis label
plt.ylabel('Silhouette Scores')
# Display plot
plt.show()
在前面的示例中,我们创建了一个包含 X 和 Y 两列的 DataFrame。我们使用 K-means 在创建的 DataFrame 上生成了不同数量的聚类并计算了轮廓分数。之后,我们使用条形图绘制了聚类数量和轮廓分数。如图所示,在 k = 2 时,轮廓分数达到最高值,因此我们会选择 2 个聚类。接下来,我们进入 k-means 聚类技术。
# import pandasimport pandas as pd
# import matplotlib for data visualizationimport matplotlib.pyplot as plt
# Import K-meansfrom sklearn.cluster import KMeans
# Create a DataFrame
data=pd.DataFrame({"X":[12,15,18,10,8,9,12,20],"Y":[6,16,17,8,7,6,9,18]})
# Define number of clusters
num_clusters =2# Create and fit the KMeans model
km = KMeans(n_clusters=num_clusters)
km.fit(data)
# Predict the target variable
pred=km.predict(data)
# Plot the Clusters
plt.scatter(data.X,data.Y,c=pred, marker="o", cmap="bwr_r")
# Set title of the plot
plt.title('K-Means Clustering')
# Set x-axis label
plt.xlabel('X-Axis Values')
# Set y-axis label
plt.ylabel('Y-Axis Values')
# Display the plot
plt.show()
# import pandasimport pandas as pd
# import matplotlib for data visualizationimport matplotlib.pyplot as plt
# Import dendrogramfrom scipy.cluster.hierarchy import dendrogram
from scipy.cluster.hierarchy import linkage
# Create a DataFrame
data=pd.DataFrame({"X":[12,15,18,10,8,9,12,20],"Y":[6,16,17,8,7,6,9,18]})
# create dendrogram using ward linkage
dendrogram_plot = dendrogram(linkage(data, method ='ward'))
# Set title of the plot
plt.title('Hierarchical Clustering: Dendrogram')
# Set x-axis label
plt.xlabel('Data Items')
# Set y-axis label
plt.ylabel('Distance')
# Display the plot
plt.show()
# import pandasimport pandas as pd
# import matplotlib for data visualizationimport matplotlib.pyplot as plt
# Import Agglomerative Clusteringfrom sklearn.cluster import AgglomerativeClustering
# Create a DataFrame
data=pd.DataFrame({"X":[12,15,18,10,8,9,12,20],"Y":[6,16,17,8,7,6,9,18]})
# Specify number of clusters
num_clusters =2# Create agglomerative clustering model
ac = AgglomerativeClustering(n_clusters = num_clusters, linkage='ward')
# Fit the Agglomerative Clustering model
ac.fit(data)
# Predict the target variable
pred=ac.labels_
# Plot the Clusters
plt.scatter(data.X,data.Y,c=pred, marker="o")
# Set title of the plot
plt.title('Agglomerative Clustering')
# Set x-axis label
plt.xlabel('X-Axis Values')
# Set y-axis label
plt.ylabel('Y-Axis Values')
# Display the plot
plt.show()
# import pandasimport pandas as pd
# import matplotlib for data visualizationimport matplotlib.pyplot as plt
# Import DBSCAN clustering modulefrom sklearn.cluster import DBSCAN
# import make_moons datasetfrom sklearn.datasets import make_moons
# Generate some random moon data
features, label = make_moons(n_samples =2000)
# Create DBSCAN clustering model
db = DBSCAN()
# Fit the Spectral Clustering model
db.fit(features)
# Predict the target variable
pred_label=db.labels_
# Plot the Clusters
plt.scatter(features[:,0], features[:,1], c=pred_label, marker="o",cmap="bwr_r")
# Set title of the plot
plt.title('DBSCAN Clustering')
# Set x-axis label
plt.xlabel('X-Axis Values')
# Set y-axis label
plt.ylabel('Y-Axis Values')
# Display the plot
plt.show()
# import pandasimport pandas as pd
# import matplotlib for data visualizationimport matplotlib.pyplot as plt
# Import Spectral Clusteringfrom sklearn.cluster import SpectralClustering
# Create a DataFrame
data=pd.DataFrame({"X":[12,15,18,10,8,9,12,20],"Y":[6,16,17,8,7,6,9,18]})
# Specify number of clusters
num_clusters =2# Create Spectral Clustering model
sc=SpectralClustering(num_clusters, affinity='rbf', n_init=100, assign_labels='discretize')
# Fit the Spectral Clustering model
sc.fit(data)
# Predict the target variable
pred=sc.labels_
# Plot the Clusters
plt.scatter(data.X,data.Y,c=pred, marker="o")
# Set title of the plot
plt.title('Spectral Clustering')
# Set x-axis label
plt.xlabel('X-Axis Values')
# Set y-axis label
plt.ylabel('Y-Axis Values')
# Display the plot
plt.show()
让我们使用 k-means 聚类创建一个聚类模型,并使用 Python 中的内部和外部评估指标评估性能,使用 Pima Indian Diabetes 数据集:
# Import librariesimport pandas as pd
# read the dataset
diabetes = pd.read_csv("diabetes.csv")
# Show top 5-records
diabetes.head()
首先,我们需要导入 pandas 并读取数据集。在前面的例子中,我们读取的是 Pima Indian Diabetes 数据集:
# split dataset in two parts: feature set and target label
feature_set =['pregnant','insulin','bmi','age','glucose','bp','pedigree']
features = diabetes[feature_set]
target = diabetes.label
# partition data into training and testing setfrom sklearn.model_selection import train_test_split
feature_train, feature_test, target_train, target_test = train_test_split(features, target, test_size=0.3, random_state=1)
# Input text paragraph
paragraph="""Taj Mahal is one of the beautiful monuments. It is one of the wonders of the world. It was built by Shah Jahan in 1631 in memory of his third beloved wife Mumtaj Mahal."""# Converting paragraph in lowercase print(paragraph.lower())
# Sentence Tokenizationfrom nltk.tokenize import sent_tokenize
paragraph="""Taj Mahal is one of the beautiful monuments. It is one of the wonders of the world. It was built by Shah Jahan in 1631 in memory of his third beloved wife Mumtaj Mahal."""
tokenized_sentences=sent_tokenize(paragraph)
print(tokenized_sentences)
# Import spacyimport spacy
# Loading english language model
nlp = spacy.load("en")
# Build the nlp pipe using 'sentencizer'
sent_pipe = nlp.create_pipe('sentencizer')
# Append the sentencizer pipe to the nlp pipeline
nlp.add_pipe(sent_pipe)
paragraph ="""Taj Mahal is one of the beautiful monuments. It is one of the wonders of the world. It was built by Shah Jahan in 1631 in memory of his third beloved wife Mumtaj Mahal."""# Create nlp Object to handle linguistic annotations in a documents.
nlp_doc = nlp(paragraph)
# Generate list of tokenized sentence
tokenized_sentences =[]
for sentence in nlp_doc.sents:
tokenized_sentences.append(sentence.text)
print(tokenized_sentences)
# Import spacyimport spacy
# Loading english language model
nlp = spacy.load("en")
paragraph ="""Taj Mahal is one of the beautiful monuments. It is one of the wonders of the world. It was built by Shah Jahan in 1631 in memory of his third beloved wife Mumtaj Mahal."""# Create nlp Object to handle linguistic annotations in a documents.
my_doc = nlp(paragraph)
# tokenize paragraph into words
tokenized_words =[]
for token in my_doc:
tokenized_words.append(token.text)
print(tokenized_words)
# Import frequency distributionfrom nltk.probability import FreqDist
# Find frequency distribution of paragraph
fdist = FreqDist(tokenized_words)
# Check top 5 common words
fdist.most_common(5)
让我们使用 matplotlib 创建一个频率分布图:
# Import matplotlibimport matplotlib.pyplot as plt
# Plot Frequency Distribution
fdist.plot(20, cumulative=False)
plt.show()
# import the nltk stopwordsfrom nltk.corpus import stopwords
# Load english stopwords list
stopwords_set=set(stopwords.words("english"))
# Removing stopwords from text
filtered_word_list=[]
for word in tokenized_words:
# filter stopwordsif word notin stopwords_set:
filtered_word_list.append(word)
# print tokenized wordsprint("Tokenized Word List:", tokenized_words)
# print filtered wordsprint("Filtered Word List:", filtered_word_list)
# Import spacyimport spacy
# Loading english language model
nlp = spacy.load("en")
# text paragraph
paragraph ="""Taj Mahal is one of the beautiful monuments. It is one of the wonders of the world. It was built by Shah Jahan in 1631 in memory of his third beloved wife Mumtaj Mahal."""# Create nlp Object to handle linguistic annotations in a documents.
my_doc = nlp(paragraph)
# Removing stopwords from text
filtered_token_list =[]
for token in my_doc:
# filter stopwordsif token.is_stop==False:
filtered_token_list.append(token)
print("Filtered Word List:",filtered_token_list)
# Import english language modelimport spacy
# Loading english language model
nlp = spacy.load("en")
# Create nlp Object to handle linguistic annotations in documents.
words = nlp("cry cries crying")
# Find lemmatized wordfor w in words:
print('Original Word: ', w.text)
print('Lemmatized Word: ',w.lemma_)
# import Word Tokenizer and PoS Taggerfrom nltk.tokenize import word_tokenize
from nltk import pos_tag
# Sample sentence
sentence ="Taj Mahal is one of the beautiful monument."# Tokenize the sentence
sent_tokens = word_tokenize(sentence)
# Create PoS tags
sent_pos = pos_tag(sent_tokens)
# Print tokens with PoSprint(sent_pos)
# Import spacyimport spacy
# Loading small english language model
nlp = spacy.load("en_core_web_sm")
# Create nlp Object to handle linguistic annotations in a documents.
sentence = nlp(u"Taj Mahal is one of the beautiful monument.")
for token in sentence:
print(token.text, token.pos_)
# Import spacyimport spacy
# Load English model for tokenizer, tagger, parser, and NER
nlp = spacy.load('en')
# Sample paragraph
paragraph ="""Taj Mahal is one of the beautiful monuments. It is one of the wonders of the world. It was built by Shah Jahan in 1631 in memory of his third beloved wife Mumtaj Mahal."""# Create nlp Object to handle linguistic annotations in documents.
docs=nlp(paragraph)
entities=[(i.text, i.label_)for i in docs.ents]
print(entities)
# Import display for visualizing the Entitiesfrom spacy import displacy
# Visualize the entities using render function
displacy.render(docs, style ="ent",jupyter =True)
# Import spacyimport spacy
# Load English model for tokenizer, tagger, parser, and NER
nlp = spacy.load('en')
# Create nlp Object to handle linguistic annotations in a documents.
docs=nlp(sentence)
# Visualize the using render function
displacy.render(docs, style="dep", jupyter=True, options={'distance':150})
# importing all necessary modulesfrom wordcloud import WordCloud
from wordcloud import STOPWORDS
import matplotlib.pyplot as plt
stopword_list =set(STOPWORDS)
paragraph="""Taj Mahal is one of the beautiful monuments. It is one of the wonders of the world. It was built by Shah Jahan in 1631 in memory of his third beloved wife Mumtaj Mahal."""
# Import seabornimport seaborn as sns
import matplotlib.pyplot as plt
# Count plot
sns.countplot(x='feedback', data=df)
# Set X-axis and Y-axis labels
plt.xlabel('Sentiment Score')
plt.ylabel('Number of Records')
# Show the plot using show() function
plt.show()
# Import train_test_splitfrom sklearn.model_selection import train_test_split
# Partition data into training and testing set
feature_train, feature_test, target_train, target_test = train_test_split(count_vectors, df['feedback'], test_size=0.3, random_state=1)
# import logistic regression scikit-learn modulefrom sklearn.linear_model import LogisticRegression
# Create logistic regression model object
logreg = LogisticRegression(solver='lbfgs')
# fit the model with data
logreg.fit(feature_train,target_train)
# Forecast the target variable for given test dataset
predictions = logreg.predict(feature_test)
# Import metrics module for performance evaluationfrom sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score
# Assess model performance using accuracy measureprint("Logistic Regression Model Accuracy:",accuracy_score(target_test, predictions))
# Calculate model precisionprint("Logistic Regression Model Precision:",precision_score(target_test, predictions))
# Calculate model recallprint("Logistic Regression Model Recall:",recall_score(target_test, predictions))
# Calculate model f1 scoreprint("Logistic Regression Model F1-Score:",f1_score(target_test, predictions))
在前面的代码中,我们使用scikit-learn metrics函数评估了模型的性能,指标包括准确率、精确度、召回率和 F1 值。所有这些度量都大于 94%,所以我们可以说我们的模型表现良好,能够准确分类情感级别,并且精确度和召回率都很高。
# import logistic regression scikit-learn modulefrom sklearn.linear_model import LogisticRegression
# instantiate the model
logreg = LogisticRegression(solver='lbfgs')
# fit the model with data
logreg.fit(feature_train,target_train)
# Forecast the target variable for given test dataset
predictions = logreg.predict(feature_test)
# Import metrics module for performance evaluationfrom sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score
# Assess model performance using accuracy measureprint("Logistic Regression Model Accuracy:",accuracy_score(target_test, predictions))
# Calculate model precisionprint("Logistic Regression Model Precision:",precision_score(target_test, predictions))
# Calculate model recallprint("Logistic Regression Model Recall:",recall_score(target_test, predictions))
# Calculate model f1 scoreprint("Logistic Regression Model F1-Score:",f1_score(target_test, predictions))
在前面的代码中,我们使用 scikit-learn 的metrics函数,通过准确率、精确率、召回率和 F1 分数评估了模型的性能。所有指标都大于 94%,因此我们可以说我们的模型表现良好,并且能够以较好的精度和召回率对两种情感类别进行分类。在本节中,我们了解了如何通过文本分类进行情感分析。文本分类使用了 BoW 和 TF-IDF 特征。在下一节中,我们将学习如何找出两段文本之间的相似性,比如句子或段落。
文本相似度
文本相似度是确定两个最接近文本的过程。文本相似度对于查找相似文档、问题和查询非常有帮助。例如,像 Google 这样的搜索引擎使用相似度来查找文档的相关性,而像 StackOverflow 这样的问答系统或客户服务系统则使用类似问题。文本相似度通常使用两种度量标准,即 Jaccard 相似度和余弦相似度。
# Import spacyimport spacy
# Load English model for tokenizer, tagger, parser, and NER
nlp = spacy.load('en')
# Create documents
doc1 = nlp(u'I love pets.')
doc2 = nlp(u'I hate pets')
# Find similarityprint(doc1.similarity(doc2))
defjaccard_similarity(sent1, sent2):
"""Find text similarity using jaccard similarity"""# Tokenize sentences
token1 =set(sent1.split())
token2 =set(sent2.split())
# intersection between tokens of two sentences
intersection_tokens = token1.intersection(token2)
# Union between tokens of two sentences
union_tokens=token1.union(token2)
# Cosine Similarity
sim_=float(len(intersection_tokens)/len(union_tokens))
return sim_
jaccard_similarity('I love pets.','I hate pets.')
# Import cv2 latest version of OpenCV library import cv2
# Import numeric python (NumPy) libraryimport numpy as np
# Import matplotlib for showing the image import matplotlib.pyplot as plt
# magic function to render the figure in a notebook
%matplotlib inline
# Read image using imread() function
image = cv2.imread('google.jpg')
# Let's check image data typeprint('Image Type:',type(image))
# Let's check dimension of image print('Image Dimension:',image.shape)
# Let's show the image
plt.imshow(image)
plt.show()
# Import cv2 latest version of OpenCV libraryimport cv2
# Import numeric python (NumPy) libraryimport numpy as np
# Import matplotlib for showing the imageimport matplotlib.pyplot as plt
# Magic function to render the figure in a notebook
%matplotlib inline
# Let's create a black image
image_shape=(600,600,3)
black_image = np.zeros(shape=image_shape,dtype=np.int16)
# Show the image
plt.imshow(black_image)
# Create a white image
image_shape=(600,600,3)
white_image = np.zeros(shape=image_shape,dtype=np.int16)
# Set every pixel of the image to 255
white_image.fill(255)
# Show the image
plt.imshow(white_image)
# Let's draw a blue line on white image
line = cv2.line(white_image,(599,0),(0,599),(0,0,255),4)
# Show the image
plt.imshow(line)
让我们看一个在白色图像上绘制圆形的示例:
# Let's create a white image
img_shape=(600,600,3)
white_image = np.zeros(shape=image_shape,dtype=np.int16)
# Set every pixel of the image to 255
white_image.fill(255)
# Draw a red circle on white image
circle=cv2.circle(white_image,(300,300),100,(255,0,0),6)
# Show the image
plt.imshow(circle)
# Let's create a black image
img_shape=(600,600,3)
black_image = np.zeros(shape=image_shape,dtype=np.int16)
# Draw a green rectangle on black image
rectangle= cv2.rectangle(black_image,(200,200),(400,500),(0,255,0),5)
# Show the image
plt.imshow(rectangle)
# Let's create a black image
img_shape=(600,600,3)
black_image = np.zeros(shape=image_shape,dtype=np.int16)
# Draw a green filled rectangle on black image
rectangle= cv2.rectangle(black_image,(200,200),(400,500),(0,255,0),-1)
# Show the image
plt.imshow(rectangle)
# Let's create a black image
img_shape=(600,800,3)
black_image = np.zeros(shape=image_shape,dtype=np.int16)
# Write on black image
text = cv2.putText(black_image,'Thanksgiving',(10,500), cv2.FONT_HERSHEY_SIMPLEX,3,(255,0,0),2,cv2.LINE_AA)
# Display the image
plt.imshow(text)
# Import cv2 moduleimport cv2
# Import matplotlib for showing the imageimport matplotlib.pyplot as plt
# magic function to render the figure in a notebook
%matplotlib inline
# read image
image = cv2.imread('tajmahal.jpg')
# Convert image color space BGR to RGB
rgb_image=cv2.cvtColor(image,cv2.COLOR_BGR2RGB)
# Display the image
plt.imshow(rgb_image)
# Import OpenCV moduleimport cv2
# Import NumPyimport numpy as np
# Import matplotlib for showing the imageimport matplotlib.pyplot as plt
# magic function to render the figure in a notebook
%matplotlib inline
# Read image
image = cv2.imread('messi.png')
# Convert image color space BGR to RGB
rgb_image=cv2.cvtColor(image,cv2.COLOR_BGR2RGB)
# Display the image
plt.imshow(rgb_image)
# Import cv2 latest version of OpenCV libraryimport cv2
# Import matplotlib for showing the imageimport matplotlib.pyplot as plt
# Magic function to render the figure in a notebook
%matplotlib inline
# Read image
image = cv2.imread('nature.jpeg')
# Convert image color space BGR to RGB
rgb_image=cv2.cvtColor(image,cv2.COLOR_BGR2RGB)
# Display the image
plt.imshow(rgb_image)
# set weightage for alpha and beta
both the matrix alpha_=1 beta_=50# Add weight to the original image to change the brightness
image_change=cv2.addWeighted(rgb_image, alpha_, np.zeros(image.shape,image.dtype),0, beta_)
# Display the image
plt.imshow(image_change)
# Import OpenCV moduleimport cv2
# Import matplotlib for showing the imageimport matplotlib.pyplot as plt
# Magic function to render the figure in a notebook
%matplotlib inline
# Read image
image = cv2.imread('tajmahal.jpg')
# Convert image color space BGR to RGB
rgb_image=cv2.cvtColor(image,cv2.COLOR_BGR2RGB)
# Display the image
plt.imshow(rgb_image)
# Import cv2 moduleimport cv2
# Import matplotlib for showing the imageimport matplotlib.pyplot as plt
# magic function to render the figure in a notebook
%matplotlib inline
# read image
image = cv2.imread('tajmahal.jpg')
# Convert image color space BGR to RGB
rgb_image=cv2.cvtColor(image,cv2.COLOR_BGR2RGB)
# Blurring the image using Gaussian Blur
image_blur = cv2.GaussianBlur(rgb_image,(7,7),0)
# Display the image
plt.imshow(image_blur)
# Import cv2 moduleimport cv2
# Import matplotlib for showing the imageimport matplotlib.pyplot as plt
# Convert image color space BGR to RGB
%matplotlib inline
# read image
image = cv2.imread('tajmahal.jpg')
# Convert image color space BGR to RGB
rgb_image=cv2.cvtColor(image,cv2.COLOR_BGR2RGB)
# Blurring the image using Median blurring
image_blur = cv2.medianBlur(rgb_image,11)
# Display the image
plt.imshow(image_blur)
人脸检测是一个分类问题。我们可以将图像分为两类,脸或非脸。我们需要大量的图像来训练这样的分类模型。幸运的是,OpenCV 提供了预训练的模型,如 Haar 特征级联分类器和局部二值模式(LBP)分类器,这些分类器在数千张图像上进行了训练。在我们的示例中,我们将使用 Haar 特征提取来检测人脸。让我们看看如何使用 OpenCV 在图像中捕捉人脸:
读取图像并将其转换为灰度图:
# Import cv2 latest version of OpenCV libraryimport cv2
# Import numeric python (NumPy) libraryimport numpy as np
# Import matplotlib for showing the imageimport matplotlib.pyplot as plt
# magic function to render the figure in a notebook
%matplotlib inline
# Read image
image= cv2.imread('messi.png')
# Convert image color space BGR to grayscale
image_gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
# Displaying the grayscale image
plt.imshow(image_gray, cmap='gray')
# Load the haar cascade face classifier file
haar_cascade = cv2.CascadeClassifier('haarcascade_frontalface_default.xml')
获取图像中所有人脸的坐标:
# Get the faces coordinates for all the faces in the image
faces_coordinates = haar_cascade.detectMultiScale(image_gray, scaleFactor =1.3, minNeighbors =7);
在检测到的面部上绘制矩形:
# Draw rectangle on detected facesfor(p, q, r, s)in faces_coordinates:
cv2.rectangle(image,(p, q),(p+r, q+s),(255,255,0),2)
将图像颜色空间从 BGR 转换为 RGB 并显示图像:
# Convert image color space BGR to RGB
image_rgb=cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
# Display face detected image
plt.imshow(image_rgb)
在前面的示例中,我们将 BGR 图像转换为灰度图像。OpenCV 已经预先训练了面部、眼睛和微笑检测的分类器。我们可以使用预训练的面部级联分类器 XML 文件(haarcascade_frontalface_default.xml)。你可以从官方 Git 仓库获取分类器文件(haarcascade_frontalface_default.xml),或者你也可以从我们的 GitHub 仓库获取。
# Read the image
image= cv2.imread('barcelona.jpeg')
# Convert image BGR to grayscale
image_gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
# Load the haar cascade face classifier file
haar_cascade = cv2.CascadeClassifier('haarcascade_frontalface_default.xml')
# Get the faces coordinates for all the faces in the image
faces_coordinates = haar_cascade.detectMultiScale(image_gray, scaleFactor =1.3, minNeighbors =5);
# Draw rectangle on detected facesfor(x1,y1,x2,y2)in faces_coordinates:
cv2.rectangle(image,(x1, y1),(x1+x2, y1+y2),(255,255,0),2)
# Convert image color space BGR to RGB
image_rgb=cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
# Display face detected the image
plt.imshow(image_rgb)