nlp
1
import nltk
from nltk.tokenize import word_tokenize
from nltk.stem import PorterStemmer, WordNetLemmatizer
from nltk import pos_tag, ne_chunk
nltk.download('punkt')
text = "Dinesh puri sir kiti bhari HOD ahet."
# Tokenization
tokens = word_tokenize(text)
print("Tokens:", tokens)
# Stemming
stemmer = PorterStemmer()
print("Stemmed:", [stemmer.stem(w) for w in tokens])
# Lemmatization
lemmatizer = WordNetLemmatizer()
print("Lemmatized:", [lemmatizer.lemmatize(w) for w in tokens])
# Chunking
chunks = ne_chunk(pos_tag(tokens))
print("Chunks:", chunks)
2
from nltk.tokenize import word_tokenize
def count_lines_words(file):
with open(file, 'r') as f:
text = f.read()
print("Lines:", text.count('\n') + 1)
print("Words:", len(word_tokenize(text)))
count_lines_words("nlp5.txt")
#pracitcal 3
from nltk.tokenize import word_tokenize
def count_articles(file):
with open(file, 'r') as f:
text = f.read().lower()
words = word_tokenize(text)
for a in ['a', 'an', 'the']:
print(f"{a}: {words.count(a)}")
count_articles("nlp5.txt")
#practical 4
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
def remove_stopwords(file):
with open(file, 'r') as f:
text = f.read()
tokens = word_tokenize(text.lower())
filtered = [w for w in tokens if w not in stopwords.words('english')]
print("Filtered:", filtered)
remove_stopwords("nlp5.txt")
#practical 5
import re
from nltk.tokenize import word_tokenize
def apply_regex(file):
with open(file, 'r') as f:
text = f.read().lower()
words = word_tokenize(text)
patterns = {'a*': r'a*', 'a+': r'a+', 'a?': r'a?', 'upper': r'[A-Z]', 's_S': r'[Ss]'}
for name, pat in patterns.items():
matched = [w for w in words if re.search(pat, w)]
print(f"{name}: {matched}")
apply_regex("nlp5.txt")
#practical 6
from nltk import word_tokenize, pos_tag
sentence = input("Enter a sentence: ")
tags = pos_tag(word_tokenize(sentence))
print("POS Tags:", tags)
Comments
Post a Comment