Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,10 @@ cmppg:
cmpgeos:
task=cmpgeos; $(DC_RUN)

.PHONY: scrapestrings
scrapestrings:
go run ./internal/cmprefimpl/scraper

DC_GEOS_RUN = \
docker compose \
--project-name sf-geos-$$(echo $$geos_version | sed 's/\./-/g') \
Expand Down
55 changes: 17 additions & 38 deletions internal/cmprefimpl/cmpgeos/extract_source.go
Original file line number Diff line number Diff line change
@@ -1,58 +1,37 @@
package main

import (
"bufio"
"errors"
"fmt"
"go/ast"
"go/parser"
"go/token"
"os"
"path/filepath"
"sort"
"strconv"
"strings"

"github.com/peterstace/simplefeatures/geom"
)

func extractStringsFromSource(dir string) ([]string, error) {
var strs []string
if err := filepath.Walk(dir, func(path string, info os.FileInfo, err error) error {
if err != nil {
return err
}
if !info.IsDir() || strings.Contains(path, ".git") {
return nil
}
pkgs, err := parser.ParseDir(new(token.FileSet), path, nil, 0)
if err != nil {
return err
}
for _, pkg := range pkgs {
ast.Inspect(pkg, func(n ast.Node) bool {
lit, ok := n.(*ast.BasicLit)
if !ok || lit.Kind != token.STRING {
return true
}
unquoted, err := strconv.Unquote(lit.Value)
if !ok {
// Shouldn't ever happen because we've validated that it's a string literal.
panic(fmt.Sprintf("could not unquote string '%s'from ast: %v", lit.Value, err))
}
strs = append(strs, unquoted)
return true
})
}
return nil
}); err != nil {
func loadStringsFromFile(path string) ([]string, error) {
f, err := os.Open(path)
if err != nil {
return nil, err
}
defer f.Close()

strSet := map[string]struct{}{}
for _, s := range strs {
strSet[strings.TrimSpace(s)] = struct{}{}
scanner := bufio.NewScanner(f)
for scanner.Scan() {
line := strings.TrimSpace(scanner.Text())
if line == "" {
continue
}
strSet[line] = struct{}{}
}
if err := scanner.Err(); err != nil {
return nil, err
}
strs = strs[:0]

var strs []string
for s := range strSet {
strs = append(strs, s)
}
Expand Down
4 changes: 2 additions & 2 deletions internal/cmprefimpl/cmpgeos/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,9 @@ func main() {
if err != nil {
log.Fatalf("could not get working dir: %v", err)
}
candidates, err := extractStringsFromSource(dir)
candidates, err := loadStringsFromFile(dir + "/internal/cmprefimpl/testdata/strings.txt")
if err != nil {
log.Fatalf("could not extract strings from src: %v", err)
log.Fatalf("could not load strings from file: %v", err)
}

geoms, err := convertToGeometries(candidates)
Expand Down
59 changes: 19 additions & 40 deletions internal/cmprefimpl/cmppg/fuzz_test.go
Original file line number Diff line number Diff line change
@@ -1,16 +1,12 @@
package main

import (
"bufio"
"database/sql"
"fmt"
"go/ast"
"go/parser"
"go/token"
"math"
"os"
"path/filepath"
"sort"
"strconv"
"strings"
"testing"

Expand All @@ -20,7 +16,7 @@ import (

func TestFuzz(t *testing.T) {
pg := setupDB(t)
candidates := extractStringsFromSource(t)
candidates := loadStringsFromFile(t, "../testdata/strings.txt")

checkWKTParse(t, pg, candidates)
checkWKBParse(t, pg, candidates)
Expand Down Expand Up @@ -84,44 +80,27 @@ func setupDB(t *testing.T) PostGIS {
return PostGIS{db}
}

func extractStringsFromSource(t *testing.T) []string {
var strs []string
if err := filepath.Walk("../../..", func(path string, info os.FileInfo, err error) error {
if err != nil {
return err
}
if !info.IsDir() || strings.Contains(path, ".git") {
return nil
}
pkgs, err := parser.ParseDir(new(token.FileSet), path, nil, 0)
if err != nil {
return err
}
for _, pkg := range pkgs {
ast.Inspect(pkg, func(n ast.Node) bool {
lit, ok := n.(*ast.BasicLit)
if !ok || lit.Kind != token.STRING {
return true
}
unquoted, err := strconv.Unquote(lit.Value)
if !ok {
// Shouldn't ever happen because we've validated that it's a string literal.
panic(fmt.Sprintf("could not unquote string '%s'from ast: %v", lit.Value, err))
}
strs = append(strs, unquoted)
return true
})
}
return nil
}); err != nil {
t.Fatal(err)
func loadStringsFromFile(t *testing.T, path string) []string {
f, err := os.Open(path)
if err != nil {
t.Fatalf("could not open strings file: %v", err)
}
defer f.Close()

strSet := map[string]struct{}{}
for _, s := range strs {
strSet[strings.TrimSpace(s)] = struct{}{}
scanner := bufio.NewScanner(f)
for scanner.Scan() {
line := strings.TrimSpace(scanner.Text())
if line == "" {
continue
}
strSet[line] = struct{}{}
}
strs = strs[:0]
if err := scanner.Err(); err != nil {
t.Fatalf("could not read strings file: %v", err)
}

var strs []string
for s := range strSet {
strs = append(strs, s)
}
Expand Down
154 changes: 154 additions & 0 deletions internal/cmprefimpl/scraper/main.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,154 @@
// scraper extracts string literals from the simplefeatures codebase and writes
// them to a file for use by the cmprefimpl tests. This decouples the test
// inputs from the unit test source code.
//
// Usage: go run ./internal/cmprefimpl/scraper
package main

import (
"bufio"
"fmt"
"go/ast"
"go/parser"
"go/token"
"log"
"os"
"path/filepath"
"sort"
"strconv"
"strings"
)

func main() {
dir, err := os.Getwd()
if err != nil {
log.Fatalf("could not get working dir: %v", err)
}

blacklist, err := loadBlacklist(filepath.Join(dir, "internal/cmprefimpl/testdata/blacklist.txt"))
if err != nil {
log.Fatalf("could not load blacklist: %v", err)
}

strs, err := extractStringsFromSource(dir)
if err != nil {
log.Fatalf("could not extract strings from source: %v", err)
}

// Check for stale blacklist entries (blacklisted strings not found in source).
for bl := range blacklist {
found := false
for _, s := range strs {
if s == bl {
found = true
break
}
}
if !found {
fmt.Printf("WARNING: blacklisted string not found in source: %q\n", bl)
}
}

// Filter out blacklisted strings.
var filtered []string
for _, s := range strs {
if _, ok := blacklist[s]; !ok {
filtered = append(filtered, s)
}
}

outputPath := filepath.Join(dir, "internal/cmprefimpl/testdata/strings.txt")
if err := writeStringsToFile(outputPath, filtered); err != nil {
log.Fatalf("could not write output: %v", err)
}

fmt.Printf("Wrote %d strings to %s\n", len(filtered), outputPath)
}

func loadBlacklist(path string) (map[string]struct{}, error) {
blacklist := make(map[string]struct{})

f, err := os.Open(path)
if err != nil {
return nil, err
}
defer f.Close()

scanner := bufio.NewScanner(f)
for scanner.Scan() {
line := strings.TrimSpace(scanner.Text())
if line == "" || strings.HasPrefix(line, "#") {
continue
}
blacklist[line] = struct{}{}
}
return blacklist, scanner.Err()
}

func extractStringsFromSource(dir string) ([]string, error) {
var strs []string
if err := filepath.Walk(dir, func(path string, info os.FileInfo, err error) error {
if err != nil {
return err
}
if !info.IsDir() || strings.Contains(path, ".git") {
return nil
}
pkgs, err := parser.ParseDir(new(token.FileSet), path, nil, 0)
if err != nil {
return err
}
for _, pkg := range pkgs {
ast.Inspect(pkg, func(n ast.Node) bool {
lit, ok := n.(*ast.BasicLit)
if !ok || lit.Kind != token.STRING {
return true
}
unquoted, err := strconv.Unquote(lit.Value)
if err != nil {
// Shouldn't ever happen because we've validated that it's a string literal.
panic(fmt.Sprintf("could not unquote string '%s' from ast: %v", lit.Value, err))
}
strs = append(strs, unquoted)
return true
})
}
return nil
}); err != nil {
return nil, err
}

strSet := map[string]struct{}{}
for _, s := range strs {
// Remove newlines so that strings.txt can use one-string-per-line format.
// For WKT/WKB/GeoJSON, newlines are just whitespace and don't affect parsing.
s = strings.ReplaceAll(s, "\n", " ")
s = strings.ReplaceAll(s, "\r", " ")
strSet[strings.TrimSpace(s)] = struct{}{}
}
strs = strs[:0]
for s := range strSet {
strs = append(strs, s)
}
sort.Strings(strs)
return strs, nil
}

func writeStringsToFile(path string, strs []string) error {
f, err := os.Create(path)
if err != nil {
return err
}
defer f.Close() // cleanup on error paths

w := bufio.NewWriter(f)
for _, s := range strs {
if _, err := w.WriteString(s + "\n"); err != nil {
return err
}
}
if err := w.Flush(); err != nil {
return err
}
return f.Close()
}
2 changes: 2 additions & 0 deletions internal/cmprefimpl/testdata/blacklist.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
# Blacklisted strings (one per line).
# These strings will be excluded from strings.txt when running the scraper.
Loading