1
0
Fork 0

Initial very slow search implementation

main
Ambrose Chua 2020-11-29 09:16:41 +08:00
parent 70f92f299e
commit e8236d3189
5 changed files with 144 additions and 25 deletions

14
app.go
View File

@ -77,8 +77,18 @@ func (app Datetime) index(w http.ResponseWriter, req *http.Request) {
request := Request{}
if req.URL.Path != "/" {
request, err = ParseRequest(req.URL)
if errors.Is(err, ErrComponentsMismatch) {
l.Debug("not matching components", zap.Error(err))
app.error(HTTPError{http.StatusNotFound, err}, w, req)
return
}
if errors.Is(err, ErrInvalidTime) {
l.Debug("not matching components", zap.Error(err))
app.error(HTTPError{http.StatusNotFound, err}, w, req)
return
}
if err != nil {
l.Debug("parse failed", zap.Error(err))
l.Info("parse failed", zap.Error(err))
app.error(HTTPError{http.StatusBadRequest, err}, w, req)
return
}
@ -119,7 +129,7 @@ func (app Datetime) search(w http.ResponseWriter, req *http.Request) {
return
}
l.Debug("rendering template", zap.Reflect("search", search))
//l.Debug("rendering template", zap.Reflect("search", search))
err = tmpl.Execute(w, appSearch{app, search})
if err != nil {
l.Error("templating failed", zap.Error(err))

View File

@ -1,11 +1,81 @@
package main
import (
"sort"
"strings"
"github.com/hbollon/go-edlib"
"github.com/serverwentdown/datetime.link/data"
)
// FullSearchCities uses a very basic iterative method to search for a city
// FullSearchCities uses a very basic iterative method to search for cities
// with the given string
func FullSearchCities(cities map[string]*data.City, zone string) ([]*data.City, error) {
return nil, nil
// TODO: optimisations
ratings := []cityRatings{}
for _, city := range cities {
rating, err := compareCity(city, zone)
if err != nil {
return nil, err
}
ratings = append(ratings, cityRatings{city, rating})
}
sort.Slice(ratings, func(i, j int) bool { return ratings[i].Rating > ratings[j].Rating })
topCities := make([]*data.City, 10)
for i := 0; i < 10; i++ {
topCities[i] = ratings[i].City
//l.Debug("city", zap.String("n", topCities[i].Name), zap.Float32("r", ratings[i].Rating))
}
return topCities, nil
}
type cityRatings struct {
City *data.City
Rating float32
}
func compareCity(city *data.City, zone string) (float32, error) {
// City Name is preferred
cityDistance, err := compare(city.Name, zone)
if err != nil {
return 0, err
}
for _, altname := range city.AlternateNames {
altnameDistance, err := compare(altname, zone)
if err != nil {
return 0, err
}
altnameDistance *= 0.9
cityDistance = floatMax(cityDistance, altnameDistance)
}
// Admin1 Name is next preferred
admin1Distance, err := compare(city.Admin1.Name, zone)
if err != nil {
return 0, err
}
// Country Name is next preferred
countryDistance, err := compare(city.Country.Name, zone)
if err != nil {
return 0, err
}
// Merge 3 values
rating := floatMax(cityDistance, admin1Distance*0.9, countryDistance*0.9)
return rating, nil
}
func compare(str1, str2 string) (float32, error) {
algo := edlib.JaroWinkler
//algo := edlib.Levenshtein
res, err := edlib.StringsSimilarity(strings.ToLower(str1), strings.ToLower(str2), algo)
return res * res * res, err
}
func floatMax(a float32, bs ...float32) float32 {
for _, b := range bs {
if a > b {
continue
}
a = b
}
return a
}

45
search_test.go Normal file
View File

@ -0,0 +1,45 @@
package main
import (
"log"
"testing"
"github.com/hbollon/go-edlib"
"github.com/serverwentdown/datetime.link/data"
)
func TestEditDistance(t *testing.T) {
res, err := edlib.StringsSimilarity("Singapore", "Sing", edlib.JaroWinkler)
if err != nil {
return
}
log.Printf("%f", res)
}
func BenchmarkCompare(b *testing.B) {
for i := 0; i < b.N; i++ {
_, _ = compare("Random String That Is Quite Long", "Singapore")
}
}
func BenchmarkCompareCity(b *testing.B) {
cities, err := data.ReadCities()
if err != nil {
panic(err)
}
b.ResetTimer()
for i := 0; i < b.N; i++ {
_, _ = compareCity(cities["Singapore-SG"], "Singapore")
}
}
func BenchmarkFullSearchCities(b *testing.B) {
cities, err := data.ReadCities()
if err != nil {
panic(err)
}
b.ResetTimer()
for i := 0; i < b.N; i++ {
_, _ = FullSearchCities(cities, "Singapore")
}
}

20
url.go
View File

@ -8,11 +8,8 @@ import (
"time"
)
// ErrMissingComponent is thrown when the URL has empty or missing components
var ErrMissingComponent = errors.New("missing path component")
// ErrTooManyComponent is thrown when there are more than 2 components
var ErrTooManyComponent = errors.New("too many path components")
// ErrComponentsMismatch is thrown when the URL has empty or missing components
var ErrComponentsMismatch = errors.New("missing or too many path components")
// ErrInvalidTime is thrown in a time.ParseError
var ErrInvalidTime = errors.New("invalid ISO 8601 time")
@ -31,18 +28,15 @@ func ParseRequest(u *url.URL) (Request, error) {
var err error
parts := strings.Split(u.Path, "/")[1:]
if len(parts) > 2 {
return Request{}, ErrTooManyComponent
}
if len(parts) < 1 {
return Request{}, ErrMissingComponent
if len(parts) > 2 || len(parts) < 1 {
return Request{}, ErrComponentsMismatch
}
// Parse time portion
var t time.Time
timeString := parts[0]
if len(timeString) == 0 {
return Request{}, ErrMissingComponent
return Request{}, ErrComponentsMismatch
}
if timeString == "now" {
t = time.Now()
@ -60,12 +54,12 @@ func ParseRequest(u *url.URL) (Request, error) {
// Split zones
var z []string
zoneString := ""
zoneString := "local"
if len(parts) >= 2 {
zoneString = parts[1]
}
if len(zoneString) == 0 {
return Request{}, ErrMissingComponent
return Request{}, ErrComponentsMismatch
}
z = strings.Split(zoneString, ",")

View File

@ -50,26 +50,26 @@ func TestURLParse(t *testing.T) {
func TestURLParseFail(t *testing.T) {
u := mustURLParse("http://test/2002-08-30T14:00+06:00/")
_, err := ParseRequest(u)
if !errors.Is(err, ErrMissingComponent) {
t.Errorf("got error %v, want error %v", err, ErrMissingComponent)
if !errors.Is(err, ErrComponentsMismatch) {
t.Errorf("got error %v, want error %v", err, ErrComponentsMismatch)
}
u = mustURLParse("http://test/")
_, err = ParseRequest(u)
if !errors.Is(err, ErrMissingComponent) {
t.Errorf("got error %v, want error %v", err, ErrMissingComponent)
if !errors.Is(err, ErrComponentsMismatch) {
t.Errorf("got error %v, want error %v", err, ErrComponentsMismatch)
}
u = mustURLParse("http://test")
_, err = ParseRequest(u)
if !errors.Is(err, ErrMissingComponent) {
t.Errorf("got error %v, want error %v", err, ErrMissingComponent)
if !errors.Is(err, ErrComponentsMismatch) {
t.Errorf("got error %v, want error %v", err, ErrComponentsMismatch)
}
u = mustURLParse("http://test/hi/hi/hi")
_, err = ParseRequest(u)
if !errors.Is(err, ErrTooManyComponent) {
t.Errorf("got error %v, want error %v", err, ErrTooManyComponent)
if !errors.Is(err, ErrComponentsMismatch) {
t.Errorf("got error %v, want error %v", err, ErrComponentsMismatch)
}
u = mustURLParse("http://test/2000-01-13T00:00Z08:00/hi")