31 lines
793 B
Markdown
31 lines
793 B
Markdown
|
# How to generate dblp csvs
|
||
|
|
||
|
```shell
|
||
|
curl -o dblp.xml.gz https://dblp.org/xml/dblp.xml.gz
|
||
|
gunzip dblp.xml.gz
|
||
|
|
||
|
# download the DTD specification of the DBLP XML format
|
||
|
curl -o dblp.dtd https://dblp.org/xml/dblp.dtd
|
||
|
|
||
|
git clone https://github.com/ThomHurks/dblp-to-csv
|
||
|
|
||
|
dblp-to-csv/XMLToCSV.py --annotate dblp.xml dblp.dtd dblp_csv.csv \
|
||
|
--relations journal:article_journal author:article_author
|
||
|
|
||
|
for t in article; do
|
||
|
tr ';' '\n' <dblp_csv_${t}_header.csv | sed 's/:.*//g' | \
|
||
|
tr '\n' ';' | awk 1 | cat - dblp_csv_${t}.csv | \
|
||
|
sed -E 's/\{?\\""\}?/""/g' > csv-import/${t}.csv;
|
||
|
done
|
||
|
|
||
|
cp dblp_csv_{author|journal}_* dblp_csv_{author|journal}.csv csv-import
|
||
|
```
|
||
|
|
||
|
# Archive
|
||
|
|
||
|
The csv-import files are compressed. To decompress them run:
|
||
|
|
||
|
```shell
|
||
|
tar -xzvf csv-import.tar.gz
|
||
|
```
|