hw2 (part2): done ex2 dashboard, done 1 and 2 for canvas
This commit is contained in:
parent
4a5a71cb28
commit
94c1ad3321
6 changed files with 9648 additions and 0 deletions
33
Assignment2_part2/convert.sh
Executable file
33
Assignment2_part2/convert.sh
Executable file
|
@ -0,0 +1,33 @@
|
||||||
|
#!/bin/sh
|
||||||
|
set -e
|
||||||
|
|
||||||
|
SCRIPT_DIR=$(cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd)
|
||||||
|
|
||||||
|
input="$SCRIPT_DIR/data/restaurants_extended.csv"
|
||||||
|
output="$SCRIPT_DIR/data/restaurants_extended.jsonl"
|
||||||
|
|
||||||
|
|
||||||
|
# In order:
|
||||||
|
# - Convert CSV to JSON
|
||||||
|
# - Convert JSON array in JSON lines notation
|
||||||
|
# - Remove last line (which is all `null`)
|
||||||
|
cat "$input" | jq -s --raw-input --raw-output \
|
||||||
|
'split("\n") | .[1:-1] | map(split(",")) |
|
||||||
|
map({
|
||||||
|
"id": .[0],
|
||||||
|
"name": .[1],
|
||||||
|
"cityRaw": .[2],
|
||||||
|
"city": .[2] | split("/") | .[0],
|
||||||
|
"country": .[2] | split("/") | .[1],
|
||||||
|
"continent": .[2] | split("/") | .[2],
|
||||||
|
"location": {
|
||||||
|
"lon": .[8] | sub("^\"\\["; "") | sub("\\s*"; "") | tonumber,
|
||||||
|
"lat": .[9] | sub("\\]\"$"; "") | sub("\\s*"; "") | tonumber,
|
||||||
|
},
|
||||||
|
"averageCostForTwo": .[3],
|
||||||
|
"aggregateRating": .[4],
|
||||||
|
"ratingText": .[5],
|
||||||
|
"votes": .[6],
|
||||||
|
"date": .[7]
|
||||||
|
})' "$input" | \
|
||||||
|
jq -c '.[]' > "$output"
|
Can't render this file because it is too large.
|
9499
Assignment2_part2/data/restaurants_extended.jsonl
Normal file
9499
Assignment2_part2/data/restaurants_extended.jsonl
Normal file
File diff suppressed because it is too large
Load diff
5
Assignment2_part2/export.ndjson
Normal file
5
Assignment2_part2/export.ndjson
Normal file
File diff suppressed because one or more lines are too long
77
Assignment2_part2/mappings.json
Normal file
77
Assignment2_part2/mappings.json
Normal file
|
@ -0,0 +1,77 @@
|
||||||
|
{
|
||||||
|
"properties": {
|
||||||
|
"id": {
|
||||||
|
"type": "long"
|
||||||
|
},
|
||||||
|
"name": {
|
||||||
|
"type": "text",
|
||||||
|
"fields": {
|
||||||
|
"keyword": {
|
||||||
|
"type": "keyword",
|
||||||
|
"ignore_above": 256
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"city": {
|
||||||
|
"type": "text",
|
||||||
|
"fields": {
|
||||||
|
"keyword": {
|
||||||
|
"type": "keyword",
|
||||||
|
"ignore_above": 256
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"country": {
|
||||||
|
"type": "text",
|
||||||
|
"fields": {
|
||||||
|
"keyword": {
|
||||||
|
"type": "keyword",
|
||||||
|
"ignore_above": 256
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"continent": {
|
||||||
|
"type": "text",
|
||||||
|
"fields": {
|
||||||
|
"keyword": {
|
||||||
|
"type": "keyword",
|
||||||
|
"ignore_above": 256
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"cityRaw": {
|
||||||
|
"type": "text",
|
||||||
|
"fields": {
|
||||||
|
"keyword": {
|
||||||
|
"type": "keyword",
|
||||||
|
"ignore_above": 256
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"location": {
|
||||||
|
"type": "geo_point"
|
||||||
|
},
|
||||||
|
"averageCostForTwo": {
|
||||||
|
"type": "float"
|
||||||
|
},
|
||||||
|
"aggregateRating": {
|
||||||
|
"type": "float"
|
||||||
|
},
|
||||||
|
"ratingText": {
|
||||||
|
"type": "text",
|
||||||
|
"fields": {
|
||||||
|
"keyword": {
|
||||||
|
"type": "keyword",
|
||||||
|
"ignore_above": 256
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"votes": {
|
||||||
|
"type": "float"
|
||||||
|
},
|
||||||
|
"date": {
|
||||||
|
"type": "date",
|
||||||
|
"format": "date_optional_time"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
34
Assignment2_part2/upload.sh
Executable file
34
Assignment2_part2/upload.sh
Executable file
|
@ -0,0 +1,34 @@
|
||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
set -e
|
||||||
|
|
||||||
|
SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
|
||||||
|
|
||||||
|
elastic_dir="$HOME/bin/elasticsearch-8.6.2"
|
||||||
|
elastic_url="https://localhost:9200"
|
||||||
|
crt="$elastic_dir/config/certs/http_ca.crt"
|
||||||
|
|
||||||
|
input="$SCRIPT_DIR/data/restaurants_extended.jsonl"
|
||||||
|
password="GZH*wqNTvQ0WRdrPrpHm"
|
||||||
|
|
||||||
|
index_name="restaurants_extended"
|
||||||
|
|
||||||
|
# Create index
|
||||||
|
curl --cacert "$crt" -u "elastic:$password" \
|
||||||
|
-X DELETE "$elastic_url/$index_name" | jq . || true
|
||||||
|
curl --cacert "$crt" -u "elastic:$password" \
|
||||||
|
-X PUT "$elastic_url/$index_name" | jq .
|
||||||
|
|
||||||
|
# Upload mappings
|
||||||
|
cat mappings.json | curl --cacert "$crt" -u "elastic:$password" -X POST \
|
||||||
|
--data-binary @- "$elastic_url/$index_name/_mappings/" \
|
||||||
|
-H "Content-Type: application/json" | jq .
|
||||||
|
|
||||||
|
# Upload documents one by one
|
||||||
|
while IFS= read -r line
|
||||||
|
do
|
||||||
|
id=$(echo "$line" | jq '.id | tonumber')
|
||||||
|
echo $line | curl -k --cacert "$crt" -u "elastic:$password" -X PUT \
|
||||||
|
--data-binary @- "$elastic_url/$index_name/_doc/$id" \
|
||||||
|
-H "Content-Type: application/json" | jq ._id &
|
||||||
|
done < "$input"
|
Reference in a new issue