构建word-to-embedding
目录
构建word to embedding
from torch.utils.data import Dataset, DataLoader
import numpy as np
def readfile(path, embeding):
with open(path, "r", encoding = "utf-8") as file:
all_data = file.read().split("\n")
word_embeding = {"UNK": np.random.normal(size = (embeding, ))}
for data in all_data:
for word in data:
if word not in word_embeding.keys():
word_embeding[word] = np.random.normal(size = (embeding, ))
return all_data, word_embeding
class MyDataset(Dataset):
def __init__(self,data):
self.data = data
def __len__(self):
return len(self.data)
def __getitem__(self, item):
return self.data[item]
if __name__ == "__main__":
path = "D:前50行.txt"
embeding = 50
all_data, word_embeding = readfile(path, embeding)
dataset = MyDataset(all_data)
dataloader = DataLoader( dataset)
for data in dataloader:
for words in data:
for word in words:
print(word_embeding[word])