query.py

from myads.query import ADSQueryWrapper
import yaml
import sys
import math

# Query papers citing the two SWIFT ADS entries
query = ADSQueryWrapper(sys.argv[1])
query_data = query.get("citations(2016pasc.conf....2S) or citations(2018ascl.soft05020S) or citations(2023arXiv230513380S)", fl="title,bibcode,first_author_norm,date,year,author_count,bibstem,abstract,volume,issue,page,citation_count", rows=1000)
paper_list = list(query_data.papers)
total_citations = 0

# Process the data to make the title a string and not an array of strings
for paper in paper_list:
    if paper.bibcode == "2023arXiv230513380S":
        to_remove = paper
        citations_release = paper.citation_count

    if len(paper.title) == 1:
        paper.title = paper.title[0]
    paper.bibstem = paper.bibstem[0]
    if hasattr(paper, "page") and "PhDT" not in paper.bibcode:
        if not isinstance(paper.page, float):
            paper.page = paper.page[0]
    if hasattr(paper, "abstract"):
        if not isinstance(paper.abstract, float):
            paper.abstract = paper.abstract.replace("<P />", "")
    total_citations += paper.citation_count

paper_list.remove(to_remove)

# Create a dictionary of cards
my_list = dict({"num_papers": query_data.num_found - 1,
                "total_citations": total_citations,
                "citations_release": citations_release,
                "cards": paper_list})

# Silence the yaml object naming
def noop(self, *args, **kw):
    pass
yaml.emitter.Emitter.process_tag = noop

# Dump everything
f = open('data/pubs.yaml', 'w+')
yaml_string=yaml.dump(my_list, f,  allow_unicode=True, default_flow_style=False)
f.close()