Skip to content

Commit

Permalink
(feat) use hamming distance for perception sorting
Browse files Browse the repository at this point in the history
  • Loading branch information
leonjza committed Sep 17, 2024
1 parent fbcf989 commit 32dc9c7
Show file tree
Hide file tree
Showing 8 changed files with 234 additions and 51 deletions.
48 changes: 48 additions & 0 deletions internal/islazy/hamming.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
package islazy

import (
"encoding/hex"
"errors"
"strings"
)

// HammingGroup represents a hash -> group assignment used for
// inmemory hammingdistance calulations.
type HammingGroup struct {
GroupID uint
Hash []byte
}

// HammingDistance calculates the number of differing bits between two byte slices.
func HammingDistance(hash1, hash2 []byte) (int, error) {
if len(hash1) != len(hash2) {
return 0, errors.New("hash lengths do not match")
}

distance := 0
for i := 0; i < len(hash1); i++ {
x := hash1[i] ^ hash2[i]
for x != 0 {
distance++
x &= x - 1
}
}

return distance, nil
}

// ParsePerceptionHash converts a perception hash string "p:<hex>" to a byte slice.
func ParsePerceptionHash(hashStr string) ([]byte, error) {
if !strings.HasPrefix(hashStr, "p:") {
return nil, errors.New("invalid perception hash format: missing 'p:' prefix")
}

hexPart := strings.TrimPrefix(hashStr, "p:")

bytes, err := hex.DecodeString(hexPart)
if err != nil {
return nil, err
}

return bytes, nil
}
23 changes: 12 additions & 11 deletions pkg/models/models.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,17 +16,18 @@ const (
type Result struct {
ID uint `json:"id" gorm:"primarykey"`

URL string `json:"url"`
ProbedAt time.Time `json:"probed_at"`
FinalURL string `json:"final_url"`
ResponseCode int `json:"response_code"`
ResponseReason string `json:"response_reason"`
Protocol string `json:"protocol"`
ContentLength int64 `json:"content_length"`
HTML string `json:"html" gorm:"index"`
Title string `json:"title" gorm:"index"`
PerceptionHash string `json:"perception_hash" gorm:"index"`
Screenshot string `json:"screenshot"`
URL string `json:"url"`
ProbedAt time.Time `json:"probed_at"`
FinalURL string `json:"final_url"`
ResponseCode int `json:"response_code"`
ResponseReason string `json:"response_reason"`
Protocol string `json:"protocol"`
ContentLength int64 `json:"content_length"`
HTML string `json:"html" gorm:"index"`
Title string `json:"title" gorm:"index"`
PerceptionHash string `json:"perception_hash" gorm:"index"`
PerceptionHashGroupId uint `json:"perception_hash_group_id" gorm:"index"`
Screenshot string `json:"screenshot"`

// Name of the screenshot file
Filename string `json:"file_name"`
Expand Down
64 changes: 58 additions & 6 deletions pkg/writers/db.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,20 @@ package writers
import (
"sync"

"github.com/sensepost/gowitness/internal/islazy"
"github.com/sensepost/gowitness/pkg/database"
"github.com/sensepost/gowitness/pkg/models"
"gorm.io/gorm"
)

var hammingThreshold = 10

// DbWriter is a Database writer
type DbWriter struct {
URI string
conn *gorm.DB
mutex sync.Mutex
URI string
conn *gorm.DB
mutex sync.Mutex
hammingGroups []islazy.HammingGroup
}

// NewDbWriter initialises a database writer
Expand All @@ -23,9 +27,10 @@ func NewDbWriter(uri string, debug bool) (*DbWriter, error) {
}

return &DbWriter{
URI: uri,
conn: c,
mutex: sync.Mutex{},
URI: uri,
conn: c,
mutex: sync.Mutex{},
hammingGroups: []islazy.HammingGroup{},
}, nil
}

Expand All @@ -34,5 +39,52 @@ func (dw *DbWriter) Write(result *models.Result) error {
dw.mutex.Lock()
defer dw.mutex.Unlock()

// Assign Group ID based on PerceptionHash
groupID, err := dw.AssignGroupID(result.PerceptionHash)
if err != nil {
return err
}
result.PerceptionHashGroupId = groupID

return dw.conn.Create(result).Error
}

// AssignGroupID assigns a PerceptionHashGroupId based on Hamming distance
func (dw *DbWriter) AssignGroupID(perceptionHashStr string) (uint, error) {
// Parse the incoming perception hash
parsedHash, err := islazy.ParsePerceptionHash(perceptionHashStr)
if err != nil {
return 0, err
}

// Iterate through existing groups to find a match
for _, group := range dw.hammingGroups {
dist, err := islazy.HammingDistance(parsedHash, group.Hash)
if err != nil {
return 0, err
}

if dist <= hammingThreshold {
return group.GroupID, nil
}
}

// No matching group found; create a new group
var maxGroupID uint
err = dw.conn.Model(&models.Result{}).
Select("COALESCE(MAX(perception_hash_group_id), 0)").
Scan(&maxGroupID).Error
if err != nil {
return 0, err
}
nextGroupID := maxGroupID + 1

// Add the new group to in-memory cache
newGroup := islazy.HammingGroup{
GroupID: nextGroupID,
Hash: parsedHash,
}
dw.hammingGroups = append(dw.hammingGroups, newGroup)

return nextGroupID, nil
}
2 changes: 1 addition & 1 deletion web/api/gallery.go
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ func (h *ApiHandler) GalleryHandler(w http.ResponseWriter, r *http.Request) {
Offset(offset).Preload("Technologies")

if perceptionSort {
query.Order("perception_hash DESC")
query.Order("perception_hash_group_id DESC")
}

if len(statusCodes) > 0 {
Expand Down
21 changes: 20 additions & 1 deletion web/api/search.go
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,26 @@ func (h *ApiHandler) SearchHandler(w http.ResponseWriter, r *http.Request) {
}

searchResults = appendResults(searchResults, resultIDs, headerResults, key)
case "p":
var perceptionHashResults []models.Result
if err := h.DB.Model(&models.Result{}).
Where("perception_hash_group_id in (?)", h.DB.Model(&models.Result{}).
Select("perception_hash_group_id").Distinct("perception_hash_group_id").
Where(
"perception_hash = ?",
// p: was used as the operatator trigger, but we need it
// back to resolve the group_id.
fmt.Sprintf("p:%s", value),
)).
Find(&perceptionHashResults).Error; err != nil {

log.Error("failed to get perception hash results", "err", err)
return
}

searchResults = appendResults(searchResults, resultIDs, perceptionHashResults, key)
}

}

// process any freetext if there is
Expand Down Expand Up @@ -128,7 +147,7 @@ func (h *ApiHandler) SearchHandler(w http.ResponseWriter, r *http.Request) {
// and captures any remaining free-form text.
func parseSearchQuery(query string) (map[string]string, string) {
// Operators that we know of and that will be parsed
operators := []string{"title", "tech", "header"}
operators := []string{"title", "tech", "header", "p"}
result := make(map[string]string)

var freeText string
Expand Down
4 changes: 3 additions & 1 deletion web/templates/static-report.tmpl
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,8 @@
<th onclick="sortTable(1)">URL</th>
<th onclick="sortTable(2)">Title</th>
<th onclick="sortTable(3)">Code</th>
<th onclick="sortTable(4)">Failed</th>
<th onclick="sortTable(4)">Group</th>
<th onclick="sortTable(5)">Failed</th>
</tr>
</thead>
<tbody>
Expand All @@ -102,6 +103,7 @@
<td><a href="{{.URL}}" target="_blank" rel="noopener noreferrer">{{.URL}}</a></td>
<td>{{.Title}}</td>
<td class="{{statusClass .ResponseCode}}">{{.ResponseCode}}</td>
<td>{{.PerceptionHashGroupId}}</td>
<td>{{if .Failed}}Yes{{else}}{{end}}</td>
</tr>
{{end}}
Expand Down
1 change: 1 addition & 0 deletions web/ui/src/components/navigation.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ const searchOperators = [
{ key: 'title', description: 'search by title' },
{ key: 'tech', description: 'search by technology' },
{ key: 'header', description: 'search by header' },
{ key: 'p', description: 'search by perception hash' },
];

const Navigation = () => {
Expand Down
Loading

0 comments on commit 32dc9c7

Please sign in to comment.