from sklearn.naive_bayes import MultinomialNB from sklearn.feature_extraction.text import CountVectorizer from sklearn.pipeline import make_pipeline
# 示例数据 data = [ ("Buy now, get cheap meds", "spam"), ("Hello, how are you?", "ham"), ("Free lottery tickets!", "spam"), ("Hi, I hope you are doing well", "ham") ]
X, y = zip(*data)
# 构建管道 model = make_pipeline(CountVectorizer(), MultinomialNB())
# 训练模型 model.fit(X, y)
# 预测 print(model.predict(["Win a free iPhone now!"]))
from sklearn.svm import SVC from sklearn.feature_extraction.text import TfidfVectorizer from sklearn.pipeline import make_pipeline
# 示例数据 documents = ["I love this movie", "Horrible film", "It was fantastic", "I did not like it"] labels = ["positive", "negative", "positive", "negative"]
import numpy as np from keras.models import Sequential from keras.layers import Dense, Embedding, LSTM, SpatialDropout1D from keras.preprocessing.sequence import pad_sequences from keras.preprocessing.text import Tokenizer
# 示例数据 data = ["Great product", "Bad quality", "I love it", "Not what I expected"] labels = [1, 0, 1, 0] # 1表示正面,0表示负面
# 文本处理 tokenizer = Tokenizer() tokenizer.fit_on_texts(data) X = tokenizer.texts_to_sequences(data) X = pad_sequences(X)