working
This commit is contained in:
parent
4cc72067e4
commit
da395105d1
1 changed files with 13 additions and 7 deletions
|
@ -9,7 +9,6 @@ fake = Faker()
|
|||
|
||||
# pip install faker pymongo
|
||||
|
||||
# TODO: figures (fake)
|
||||
# TODO: inject (fake) figures and references in content
|
||||
|
||||
# - article doi or JSON filename can be used as paper id
|
||||
|
@ -83,16 +82,23 @@ def save_sentence(body: dict, parents: [dict], sentence: str):
|
|||
target = target["sections"][p["id"]]
|
||||
target["content"] += sentence + " "
|
||||
|
||||
def transform_section(sec: dict) -> dict:
|
||||
def transform_section(sec: dict, figures: [dict], references: [dict]) -> dict:
|
||||
content = []
|
||||
if random.randint(0, 10) == 0 and len(figures) > 0:
|
||||
content += [{ "label": figures[random.randint(0, len(figures)-1)]["label"] }]
|
||||
if "content" in sec and sec["content"] != "":
|
||||
content += [sec["content"]]
|
||||
if random.randint(0, 10) == 0 and len(references) > 0:
|
||||
content += [{ "reference": random.randint(1, len(references)) }]
|
||||
|
||||
arr = []
|
||||
ks = []
|
||||
for k in sec["sections"].keys():
|
||||
ks.append(k)
|
||||
ks.sort()
|
||||
for k in ks:
|
||||
arr.append(transform_section(sec["sections"][k]))
|
||||
|
||||
content = ([sec["content"]] if "content" in sec and sec["content"] != "" else []) + arr,
|
||||
arr.append(transform_section(sec["sections"][k], figures, references))
|
||||
content += arr,
|
||||
|
||||
if "title" not in sec:
|
||||
return content
|
||||
|
@ -156,7 +162,7 @@ def json_to_paper(filename: str, jsonObj: dict) -> dict:
|
|||
|
||||
figures = []
|
||||
for i in range(0, random.randint(3, 15)):
|
||||
figures.push({
|
||||
figures.append({
|
||||
"page": random.randint(1, 10),
|
||||
"label": "fig" + str(i),
|
||||
"caption": fake.paragraph(nb_sentences=1),
|
||||
|
@ -200,7 +206,7 @@ def json_to_paper(filename: str, jsonObj: dict) -> dict:
|
|||
parents.append({ "id": e["secId"], "title": e["title"] })
|
||||
save_sentence(body, parents, e["sentence"])
|
||||
|
||||
paper["content"] = transform_section(body)
|
||||
paper["content"] = transform_section(body, figures, references)
|
||||
return paper
|
||||
|
||||
|
||||
|
|
Reference in a new issue