Increase back population threshold, add population figure
parent
45ee517838
commit
f61a33d681
10
Makefile
10
Makefile
|
@ -25,7 +25,7 @@ test:
|
||||||
|
|
||||||
|
|
||||||
DATASETS = \
|
DATASETS = \
|
||||||
data/cities5000.txt \
|
data/cities15000.txt \
|
||||||
data/admin1CodesASCII.txt \
|
data/admin1CodesASCII.txt \
|
||||||
data/countryInfo.txt
|
data/countryInfo.txt
|
||||||
|
|
||||||
|
@ -35,11 +35,11 @@ data: js/data.json
|
||||||
js/data.json: $(DATASETS) scripts/data.go
|
js/data.json: $(DATASETS) scripts/data.go
|
||||||
cd scripts && $(GO) run data.go
|
cd scripts && $(GO) run data.go
|
||||||
|
|
||||||
data/cities5000.txt:
|
data/cities15000.txt:
|
||||||
$(MKDIR) data/
|
$(MKDIR) data/
|
||||||
$(DOWNLOAD) data/cities5000.zip http://download.geonames.org/export/dump/cities5000.zip
|
$(DOWNLOAD) data/cities15000.zip http://download.geonames.org/export/dump/cities15000.zip
|
||||||
$(UNZIP) data/ data/cities5000.zip
|
$(UNZIP) data/ data/cities15000.zip
|
||||||
$(RM) data/cities5000.zip
|
$(RM) data/cities15000.zip
|
||||||
|
|
||||||
data/countryInfo.txt:
|
data/countryInfo.txt:
|
||||||
$(MKDIR) data/
|
$(MKDIR) data/
|
||||||
|
|
|
@ -20,6 +20,7 @@ import (
|
||||||
"os"
|
"os"
|
||||||
"regexp"
|
"regexp"
|
||||||
"sort"
|
"sort"
|
||||||
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
"github.com/hbollon/go-edlib"
|
"github.com/hbollon/go-edlib"
|
||||||
|
@ -42,6 +43,8 @@ type City struct {
|
||||||
AlternateNames []string `json:"an"`
|
AlternateNames []string `json:"an"`
|
||||||
Timezone string `json:"t"`
|
Timezone string `json:"t"`
|
||||||
|
|
||||||
|
Population uint64 `json:"p"`
|
||||||
|
|
||||||
Admin1 Admin1 `json:"a1"`
|
Admin1 Admin1 `json:"a1"`
|
||||||
Country Country `json:"c"`
|
Country Country `json:"c"`
|
||||||
}
|
}
|
||||||
|
@ -80,7 +83,7 @@ func (p stringLengthSort) Len() int { return len(p) }
|
||||||
func (p stringLengthSort) Less(i, j int) bool { return len(p[i]) > len(p[j]) }
|
func (p stringLengthSort) Less(i, j int) bool { return len(p[i]) > len(p[j]) }
|
||||||
func (p stringLengthSort) Swap(i, j int) { p[i], p[j] = p[j], p[i] }
|
func (p stringLengthSort) Swap(i, j int) { p[i], p[j] = p[j], p[i] }
|
||||||
|
|
||||||
func limitNames(primaryName string, names []string) []string {
|
func limitNames(primaryName string, names []string) ([]string, error) {
|
||||||
sort.Sort(stringLengthSort(names))
|
sort.Sort(stringLengthSort(names))
|
||||||
r := make([]string, 0, len(names))
|
r := make([]string, 0, len(names))
|
||||||
for _, n := range names {
|
for _, n := range names {
|
||||||
|
@ -94,7 +97,7 @@ func limitNames(primaryName string, names []string) []string {
|
||||||
// Skip almost the same names
|
// Skip almost the same names
|
||||||
res, err := edlib.FuzzySearchThreshold(n, r, 0.82, edlib.Levenshtein)
|
res, err := edlib.FuzzySearchThreshold(n, r, 0.82, edlib.Levenshtein)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Fatalf("Error doing fuzzy search: %v", err)
|
return nil, err
|
||||||
}
|
}
|
||||||
if len(res) != 0 {
|
if len(res) != 0 {
|
||||||
continue
|
continue
|
||||||
|
@ -115,7 +118,7 @@ func limitNames(primaryName string, names []string) []string {
|
||||||
}
|
}
|
||||||
r = append(r, n)
|
r = append(r, n)
|
||||||
}
|
}
|
||||||
return r
|
return r, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func extendRef(refs ...string) string {
|
func extendRef(refs ...string) string {
|
||||||
|
@ -192,9 +195,16 @@ func readCities(f string, countries map[string]Country, admin1s map[string]Admin
|
||||||
}
|
}
|
||||||
name := record[1]
|
name := record[1]
|
||||||
ref := normalizeName(record[2])
|
ref := normalizeName(record[2])
|
||||||
alternateNames := limitNames(name, splitNames(record[3]))
|
alternateNames, err := limitNames(name, splitNames(record[3]))
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
admin1Code := record[10]
|
admin1Code := record[10]
|
||||||
countryRef := record[8]
|
countryRef := record[8]
|
||||||
|
population, err := strconv.ParseUint(record[14], 10, 64)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
timezone := record[17]
|
timezone := record[17]
|
||||||
|
|
||||||
// Resolve Country and Admin1
|
// Resolve Country and Admin1
|
||||||
|
@ -212,6 +222,7 @@ func readCities(f string, countries map[string]Country, admin1s map[string]Admin
|
||||||
Name: name,
|
Name: name,
|
||||||
AlternateNames: alternateNames,
|
AlternateNames: alternateNames,
|
||||||
Timezone: timezone,
|
Timezone: timezone,
|
||||||
|
Population: population,
|
||||||
Admin1: admin1,
|
Admin1: admin1,
|
||||||
Country: country,
|
Country: country,
|
||||||
}
|
}
|
||||||
|
@ -238,7 +249,7 @@ func main() {
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Fatalf("Reading countries failed")
|
log.Fatalf("Reading countries failed")
|
||||||
}
|
}
|
||||||
cities, err := readCities("../data/cities5000.txt", countries, admin1s)
|
cities, err := readCities("../data/cities15000.txt", countries, admin1s)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Fatalf("Reading cities failed")
|
log.Fatalf("Reading cities failed")
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue