Spaces:

mdj1412
/

stock_news_summaries_AI

Runtime error

stock_news_summaries_AI / modules /inference.py

mdj1412

commit first

c109682 almost 3 years ago

7.45 kB

	import os
	import torch
	import numpy as np

	import spacy
	from spacy.tokens import Span
	from spacy.attrs import ENT_IOB, ENT_TYPE
	from spacy import displacy

	from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
	from transformers import pipeline


	if torch.cuda.is_available():
	device = 'cuda'
	elif torch.backends.mps.is_available():
	device = 'mps'
	else:
	device = 'cpu'
	print(f"inference.py -> DEVICE : {device}")



	summarizer = pipeline(
	"summarization",
	"pszemraj/long-t5-tglobal-base-16384-book-summary",
	device=0 if torch.cuda.is_available() else -1,
	)
	long_text = "Here is a lot of text I don't want to read. Replace me"



	# [ Practice ]
	# result = summarizer(long_text)
	# print(result[0]["summary_text"])



	tokenizer = AutoTokenizer.from_pretrained("allenai/tk-instruct-base-def-pos")
	model = AutoModelForSeq2SeqLM.from_pretrained("allenai/tk-instruct-base-def-pos")
	# k = pipeline("text2text-generation", model="allenai/tk-instruct-3b-def")



	# [ Practice ]
	# input_ids = tokenizer.encode("Definition: return the currency of the given country. Now complete the following example - Input: India. Output:",
	# return_tensors="pt")
	# output = model.generate(input_ids, max_length=10)
	# output = tokenizer.decode(output[0], skip_special_tokens=True) # model should output 'Indian Rupee'
	# print(output)

	# input_ids = tokenizer.encode("Definition: negate the following sentence. Input: John went to school. Output:",
	# return_tensors="pt")
	# output = model.generate(input_ids, max_length=10)
	# output = tokenizer.decode(output[0], skip_special_tokens=True) # model should output 'John did not go to shool.'
	# print(output)



	# text = "Alphabet's results also missed forecasts on revenue and earnings per share, as advertising declined year-over-year. The numbers come after the company laid off about 12,000 employees in January, a move CEO Sundar Pichai blamed on Alphabet overhiring during the pandemic boom. \
	# Q: Why did Alphabet's stock go down?"
	# input_ids = tokenizer.encode(text, return_tensors="pt")
	# output = model.generate(input_ids, max_length=10)
	# output = tokenizer.decode(output[0], skip_special_tokens=True) # model should output 'John did not go to shool.'
	# print(output)



	def Tk_instruct(text, questions):
	# Summary 했는지 안했는지
	summarized = False
	summarized_data = ""

	text = text + "\n\nQ: " + questions
	print("Model's input : ", text)


	if len(text) >= 512:
	print(f"===================== Apply Summarization : length = {len(text)} =====================")
	text = summarizer(text)[0]["summary_text"]
	print(f"===================== Summary text : {text} =====================")
	summarized = True
	summarized_data = text



	input_ids = tokenizer.encode(text, return_tensors="pt")
	output = model.generate(input_ids, max_length=10)
	output = tokenizer.decode(output[0], skip_special_tokens=True)


	if summarized:
	output = "Summary News : " + summarized_data + "\n\n" + "Answer : " + output


	return output




































	# NER 연습
	def practice1():
	print(f"======================={ 1. }=======================")
	nlp = spacy.load("en_core_web_sm")
	doc = nlp("Apple is looking at buying U.K. startup for $1 billion")

	print(doc)
	print(doc.ents)

	for ent in doc.ents:
	print(ent.text, ent.start_char, ent.end_char, ent.label_)


	title = "2. Accessing entity annotations and labels"
	print(f"======================={ title }=======================")
	nlp = spacy.load("en_core_web_sm")
	doc = nlp("San Francisco considers banning sidewalk delivery robots")

	# document level
	ents = [(e.text, e.start_char, e.end_char, e.label_) for e in doc.ents]
	print(ents)

	# I - Token is inside an entity.
	# O - Token is outside an entity.
	# B - Token is the beginning of an entity.

	# token level
	ent_san = [doc[0].text, doc[0].ent_iob_, doc[0].ent_type_]
	ent_francisco = [doc[1].text, doc[1].ent_iob_, doc[1].ent_type_]
	print(ent_san)
	print(ent_francisco)



	title = "3. Setting entity annotations"
	print(f"======================={ title }=======================")
	nlp = spacy.load("en_core_web_sm")
	doc = nlp("fb is hiring a new vice president of global policy")
	ents = [(e.text, e.start_char, e.end_char, e.label_) for e in doc.ents]
	print('Before', ents)
	# The model didn't recognize "fb" as an entity :(

	# Create a span for the new entity
	fb_ent = Span(doc, 0, 1, label="ORG"); print(fb_ent)
	orig_ents = list(doc.ents)

	# Option 1: Modify the provided entity spans, leaving the rest unmodified
	doc.set_ents([fb_ent], default="unmodified")

	# Option 2: Assign a complete list of ents to doc.ents
	doc.ents = orig_ents + [fb_ent]

	ents = [(e.text, e.start, e.end, e.label_) for e in doc.ents]
	print('After', ents)
	# [('fb', 0, 1, 'ORG')]



	title = "4. Setting entity annotations from array"
	print(f"======================={ title }=======================")
	nlp = spacy.load("en_core_web_sm")
	doc = nlp.make_doc("London is a big city in the United Kingdom.")
	print("Before", doc.ents) # []

	header = [ENT_IOB, ENT_TYPE]; print(header)
	attr_array = np.zeros((len(doc), len(header)), dtype="uint64"); print(attr_array)
	attr_array[0, 0] = 3 # B
	attr_array[0, 1] = doc.vocab.strings["GPE"]
	doc.from_array(header, attr_array); print(attr_array)
	print("After", doc.ents) # [London]



	title = "5. Visualizing named entities"
	print(f"======================={ title }=======================")
	text = "When Sebastian Thrun started working on self-driving cars at Google in 2007, few people outside of the company took him seriously."

	nlp = spacy.load("en_core_web_sm")
	doc = nlp(text)
	# displacy.serve(doc, style="ent")
	displacy.serve(doc, port=3, style="ent")













	############################################################################

	# news_analysis.html + ner.html => news.html 만드는 연습


	from flask import Flask, jsonify, request, render_template
	from bs4 import BeautifulSoup
	app = Flask(__name__)

	@app.route('/')
	def practice2():
	title = "1. Rendering HTML"
	print(f"======================={ title }=======================")
	nlp = spacy.load("en_core_web_sm")
	doc1 = nlp("This is a sentence.")
	doc2 = nlp("This is another sentence.")
	ner_html = displacy.render([doc1, doc2], style="dep", page=True)

	print("ner_html : ", ner_html)


	# NER html code
	soup = BeautifulSoup(ner_html, 'html.parser')
	ner_figure_list = soup.select('figure')
	ner_html = ""
	for i in range(len(ner_figure_list)):
	ner_html = ner_html + str(ner_figure_list[i])



	f = open("./templates/news_analysis.html", 'r')
	f2 = open("./modules/templates/example.html", 'w')# read and write

	html = f.read()
	idx = html.find("ner-box") + 9 # NER html 삽입되는 부분

	html = html[:idx] + ner_html + html[idx:]

	f2.seek(0) # open하면
	f2.write(html)
	# f2.seek(0)
	# print(f2.read())

	# from IPython import embed; embed()

	# f2.write(f.read())
	# f2.seek(0) # 가장 앞으로

	return render_template("example.html")








	if __name__ == "__main__":
	# app.run(host='0.0.0.0', port='777')

	practice1()