Spaces:
Runtime error
Runtime error
| import os | |
| import torch | |
| import numpy as np | |
| import spacy | |
| from spacy.tokens import Span | |
| from spacy.attrs import ENT_IOB, ENT_TYPE | |
| from spacy import displacy | |
| from transformers import AutoTokenizer, AutoModelForSeq2SeqLM | |
| from transformers import pipeline | |
| if torch.cuda.is_available(): | |
| device = 'cuda' | |
| elif torch.backends.mps.is_available(): | |
| device = 'mps' | |
| else: | |
| device = 'cpu' | |
| print(f"inference.py -> DEVICE : {device}") | |
| summarizer = pipeline( | |
| "summarization", | |
| "pszemraj/long-t5-tglobal-base-16384-book-summary", | |
| device=0 if torch.cuda.is_available() else -1, | |
| ) | |
| long_text = "Here is a lot of text I don't want to read. Replace me" | |
| # [ Practice ] | |
| # result = summarizer(long_text) | |
| # print(result[0]["summary_text"]) | |
| tokenizer = AutoTokenizer.from_pretrained("allenai/tk-instruct-base-def-pos") | |
| model = AutoModelForSeq2SeqLM.from_pretrained("allenai/tk-instruct-base-def-pos") | |
| # k = pipeline("text2text-generation", model="allenai/tk-instruct-3b-def") | |
| # [ Practice ] | |
| # input_ids = tokenizer.encode("Definition: return the currency of the given country. Now complete the following example - Input: India. Output:", | |
| # return_tensors="pt") | |
| # output = model.generate(input_ids, max_length=10) | |
| # output = tokenizer.decode(output[0], skip_special_tokens=True) # model should output 'Indian Rupee' | |
| # print(output) | |
| # input_ids = tokenizer.encode("Definition: negate the following sentence. Input: John went to school. Output:", | |
| # return_tensors="pt") | |
| # output = model.generate(input_ids, max_length=10) | |
| # output = tokenizer.decode(output[0], skip_special_tokens=True) # model should output 'John did not go to shool.' | |
| # print(output) | |
| # text = "Alphabet's results also missed forecasts on revenue and earnings per share, as advertising declined year-over-year. The numbers come after the company laid off about 12,000 employees in January, a move CEO Sundar Pichai blamed on Alphabet overhiring during the pandemic boom. \ | |
| # Q: Why did Alphabet's stock go down?" | |
| # input_ids = tokenizer.encode(text, return_tensors="pt") | |
| # output = model.generate(input_ids, max_length=10) | |
| # output = tokenizer.decode(output[0], skip_special_tokens=True) # model should output 'John did not go to shool.' | |
| # print(output) | |
| def Tk_instruct(text, questions): | |
| # Summary νλμ§ μνλμ§ | |
| summarized = False | |
| summarized_data = "" | |
| text = text + "\n\nQ: " + questions | |
| print("Model's input : ", text) | |
| if len(text) >= 512: | |
| print(f"===================== Apply Summarization : length = {len(text)} =====================") | |
| text = summarizer(text)[0]["summary_text"] | |
| print(f"===================== Summary text : {text} =====================") | |
| summarized = True | |
| summarized_data = text | |
| input_ids = tokenizer.encode(text, return_tensors="pt") | |
| output = model.generate(input_ids, max_length=10) | |
| output = tokenizer.decode(output[0], skip_special_tokens=True) | |
| if summarized: | |
| output = "Summary News : " + summarized_data + "\n\n" + "Answer : " + output | |
| return output | |
| # NER μ°μ΅ | |
| def practice1(): | |
| print(f"======================={ 1. }=======================") | |
| nlp = spacy.load("en_core_web_sm") | |
| doc = nlp("Apple is looking at buying U.K. startup for $1 billion") | |
| print(doc) | |
| print(doc.ents) | |
| for ent in doc.ents: | |
| print(ent.text, ent.start_char, ent.end_char, ent.label_) | |
| title = "2. Accessing entity annotations and labels" | |
| print(f"======================={ title }=======================") | |
| nlp = spacy.load("en_core_web_sm") | |
| doc = nlp("San Francisco considers banning sidewalk delivery robots") | |
| # document level | |
| ents = [(e.text, e.start_char, e.end_char, e.label_) for e in doc.ents] | |
| print(ents) | |
| # I - Token is inside an entity. | |
| # O - Token is outside an entity. | |
| # B - Token is the beginning of an entity. | |
| # token level | |
| ent_san = [doc[0].text, doc[0].ent_iob_, doc[0].ent_type_] | |
| ent_francisco = [doc[1].text, doc[1].ent_iob_, doc[1].ent_type_] | |
| print(ent_san) | |
| print(ent_francisco) | |
| title = "3. Setting entity annotations" | |
| print(f"======================={ title }=======================") | |
| nlp = spacy.load("en_core_web_sm") | |
| doc = nlp("fb is hiring a new vice president of global policy") | |
| ents = [(e.text, e.start_char, e.end_char, e.label_) for e in doc.ents] | |
| print('Before', ents) | |
| # The model didn't recognize "fb" as an entity :( | |
| # Create a span for the new entity | |
| fb_ent = Span(doc, 0, 1, label="ORG"); print(fb_ent) | |
| orig_ents = list(doc.ents) | |
| # Option 1: Modify the provided entity spans, leaving the rest unmodified | |
| doc.set_ents([fb_ent], default="unmodified") | |
| # Option 2: Assign a complete list of ents to doc.ents | |
| doc.ents = orig_ents + [fb_ent] | |
| ents = [(e.text, e.start, e.end, e.label_) for e in doc.ents] | |
| print('After', ents) | |
| # [('fb', 0, 1, 'ORG')] | |
| title = "4. Setting entity annotations from array" | |
| print(f"======================={ title }=======================") | |
| nlp = spacy.load("en_core_web_sm") | |
| doc = nlp.make_doc("London is a big city in the United Kingdom.") | |
| print("Before", doc.ents) # [] | |
| header = [ENT_IOB, ENT_TYPE]; print(header) | |
| attr_array = np.zeros((len(doc), len(header)), dtype="uint64"); print(attr_array) | |
| attr_array[0, 0] = 3 # B | |
| attr_array[0, 1] = doc.vocab.strings["GPE"] | |
| doc.from_array(header, attr_array); print(attr_array) | |
| print("After", doc.ents) # [London] | |
| title = "5. Visualizing named entities" | |
| print(f"======================={ title }=======================") | |
| text = "When Sebastian Thrun started working on self-driving cars at Google in 2007, few people outside of the company took him seriously." | |
| nlp = spacy.load("en_core_web_sm") | |
| doc = nlp(text) | |
| # displacy.serve(doc, style="ent") | |
| displacy.serve(doc, port=3, style="ent") | |
| ############################################################################ | |
| # news_analysis.html + ner.html => news.html λ§λλ μ°μ΅ | |
| from flask import Flask, jsonify, request, render_template | |
| from bs4 import BeautifulSoup | |
| app = Flask(__name__) | |
| def practice2(): | |
| title = "1. Rendering HTML" | |
| print(f"======================={ title }=======================") | |
| nlp = spacy.load("en_core_web_sm") | |
| doc1 = nlp("This is a sentence.") | |
| doc2 = nlp("This is another sentence.") | |
| ner_html = displacy.render([doc1, doc2], style="dep", page=True) | |
| print("ner_html : ", ner_html) | |
| # NER html code | |
| soup = BeautifulSoup(ner_html, 'html.parser') | |
| ner_figure_list = soup.select('figure') | |
| ner_html = "" | |
| for i in range(len(ner_figure_list)): | |
| ner_html = ner_html + str(ner_figure_list[i]) | |
| f = open("./templates/news_analysis.html", 'r') | |
| f2 = open("./modules/templates/example.html", 'w')# read and write | |
| html = f.read() | |
| idx = html.find("ner-box") + 9 # NER html μ½μ λλ λΆλΆ | |
| html = html[:idx] + ner_html + html[idx:] | |
| f2.seek(0) # openνλ©΄ | |
| f2.write(html) | |
| # f2.seek(0) | |
| # print(f2.read()) | |
| # from IPython import embed; embed() | |
| # f2.write(f.read()) | |
| # f2.seek(0) # κ°μ₯ μμΌλ‘ | |
| return render_template("example.html") | |
| if __name__ == "__main__": | |
| # app.run(host='0.0.0.0', port='777') | |
| practice1() |