Posts

nlp manual

  import nltk nltk.download('punkt') # Download the punkt tokenizer models for word tokenization def count_lines(file_path): """ This function counts the number of lines in the given text file. """ try: with open(file_path, 'r') as file: lines = file.readlines() return len(lines) except FileNotFoundError: print("File not found. Please check the file path.") return 0 def count_words(file_path): """ This function counts the number of words in the given text file. It uses NLTK's word_tokenize method for accurate word tokenization. """ try: with open(file_path, 'r') as file: text = file.read() words = nltk.word_tokenize(text) # Tokenizes the text into words return len(words) except FileNotFoundError: print("File not found. Please check the file path.") return 0 def main(): file_path = input("Enter the path of the text file: ") # Counting lines line_count = count_lines(file_...

nlp

1  import nltk from nltk.tokenize import word_tokenize from nltk.stem import PorterStemmer, WordNetLemmatizer from nltk import pos_tag, ne_chunk nltk.download('punkt') text = "Dinesh puri sir kiti bhari HOD ahet." # Tokenization tokens = word_tokenize(text) print("Tokens:", tokens) # Stemming stemmer = PorterStemmer() print("Stemmed:", [stemmer.stem(w) for w in tokens]) # Lemmatization lemmatizer = WordNetLemmatizer() print("Lemmatized:", [lemmatizer.lemmatize(w) for w in tokens]) # Chunking chunks = ne_chunk(pos_tag(tokens)) print("Chunks:", chunks) 2 from nltk.tokenize import word_tokenize def count_lines_words(file):     with open(file, 'r') as f:         text = f.read()     print("Lines:", text.count('\n') + 1)     print("Words:", len(word_tokenize(text))) count_lines_words("nlp5.txt") #pracitcal 3 from nltk.tokenize import word_tokenize def count_articles(file):     with open(fi...