1
0
Fork 0

Increase back population threshold, add population figure

main
Ambrose Chua 2020-11-07 22:52:36 +08:00
parent 45ee517838
commit f61a33d681
2 changed files with 21 additions and 10 deletions

View File

@ -25,7 +25,7 @@ test:
DATASETS = \ DATASETS = \
data/cities5000.txt \ data/cities15000.txt \
data/admin1CodesASCII.txt \ data/admin1CodesASCII.txt \
data/countryInfo.txt data/countryInfo.txt
@ -35,11 +35,11 @@ data: js/data.json
js/data.json: $(DATASETS) scripts/data.go js/data.json: $(DATASETS) scripts/data.go
cd scripts && $(GO) run data.go cd scripts && $(GO) run data.go
data/cities5000.txt: data/cities15000.txt:
$(MKDIR) data/ $(MKDIR) data/
$(DOWNLOAD) data/cities5000.zip http://download.geonames.org/export/dump/cities5000.zip $(DOWNLOAD) data/cities15000.zip http://download.geonames.org/export/dump/cities15000.zip
$(UNZIP) data/ data/cities5000.zip $(UNZIP) data/ data/cities15000.zip
$(RM) data/cities5000.zip $(RM) data/cities15000.zip
data/countryInfo.txt: data/countryInfo.txt:
$(MKDIR) data/ $(MKDIR) data/

View File

@ -20,6 +20,7 @@ import (
"os" "os"
"regexp" "regexp"
"sort" "sort"
"strconv"
"strings" "strings"
"github.com/hbollon/go-edlib" "github.com/hbollon/go-edlib"
@ -42,6 +43,8 @@ type City struct {
AlternateNames []string `json:"an"` AlternateNames []string `json:"an"`
Timezone string `json:"t"` Timezone string `json:"t"`
Population uint64 `json:"p"`
Admin1 Admin1 `json:"a1"` Admin1 Admin1 `json:"a1"`
Country Country `json:"c"` Country Country `json:"c"`
} }
@ -80,7 +83,7 @@ func (p stringLengthSort) Len() int { return len(p) }
func (p stringLengthSort) Less(i, j int) bool { return len(p[i]) > len(p[j]) } func (p stringLengthSort) Less(i, j int) bool { return len(p[i]) > len(p[j]) }
func (p stringLengthSort) Swap(i, j int) { p[i], p[j] = p[j], p[i] } func (p stringLengthSort) Swap(i, j int) { p[i], p[j] = p[j], p[i] }
func limitNames(primaryName string, names []string) []string { func limitNames(primaryName string, names []string) ([]string, error) {
sort.Sort(stringLengthSort(names)) sort.Sort(stringLengthSort(names))
r := make([]string, 0, len(names)) r := make([]string, 0, len(names))
for _, n := range names { for _, n := range names {
@ -94,7 +97,7 @@ func limitNames(primaryName string, names []string) []string {
// Skip almost the same names // Skip almost the same names
res, err := edlib.FuzzySearchThreshold(n, r, 0.82, edlib.Levenshtein) res, err := edlib.FuzzySearchThreshold(n, r, 0.82, edlib.Levenshtein)
if err != nil { if err != nil {
log.Fatalf("Error doing fuzzy search: %v", err) return nil, err
} }
if len(res) != 0 { if len(res) != 0 {
continue continue
@ -115,7 +118,7 @@ func limitNames(primaryName string, names []string) []string {
} }
r = append(r, n) r = append(r, n)
} }
return r return r, nil
} }
func extendRef(refs ...string) string { func extendRef(refs ...string) string {
@ -192,9 +195,16 @@ func readCities(f string, countries map[string]Country, admin1s map[string]Admin
} }
name := record[1] name := record[1]
ref := normalizeName(record[2]) ref := normalizeName(record[2])
alternateNames := limitNames(name, splitNames(record[3])) alternateNames, err := limitNames(name, splitNames(record[3]))
if err != nil {
return nil, err
}
admin1Code := record[10] admin1Code := record[10]
countryRef := record[8] countryRef := record[8]
population, err := strconv.ParseUint(record[14], 10, 64)
if err != nil {
return nil, err
}
timezone := record[17] timezone := record[17]
// Resolve Country and Admin1 // Resolve Country and Admin1
@ -212,6 +222,7 @@ func readCities(f string, countries map[string]Country, admin1s map[string]Admin
Name: name, Name: name,
AlternateNames: alternateNames, AlternateNames: alternateNames,
Timezone: timezone, Timezone: timezone,
Population: population,
Admin1: admin1, Admin1: admin1,
Country: country, Country: country,
} }
@ -238,7 +249,7 @@ func main() {
if err != nil { if err != nil {
log.Fatalf("Reading countries failed") log.Fatalf("Reading countries failed")
} }
cities, err := readCities("../data/cities5000.txt", countries, admin1s) cities, err := readCities("../data/cities15000.txt", countries, admin1s)
if err != nil { if err != nil {
log.Fatalf("Reading cities failed") log.Fatalf("Reading cities failed")
} }