Add experimental user parser

This commit is contained in:
Zed
2022-01-16 06:00:11 +01:00
parent fcfc1ef497
commit cdf49dcddd
8 changed files with 270 additions and 29 deletions

View File

@@ -0,0 +1,67 @@
import std/[macros, htmlgen, unicode]
import ../types/common
import ".."/../[formatters, utils]
type
ReplaceSliceKind = enum
rkRemove, rkUrl, rkHashtag, rkMention
ReplaceSlice* = object
slice: Slice[int]
kind: ReplaceSliceKind
url, display: string
proc cmp*(x, y: ReplaceSlice): int = cmp(x.slice.a, y.slice.b)
proc dedupSlices*(s: var seq[ReplaceSlice]) =
var
len = s.len
i = 0
while i < len:
var j = i + 1
while j < len:
if s[i].slice.a == s[j].slice.a:
s.del j
dec len
else:
inc j
inc i
proc extractUrls*(result: var seq[ReplaceSlice]; url: Url;
textLen: int; hideTwitter = false) =
let
link = url.expandedUrl
slice = url.indices[0] ..< url.indices[1]
if hideTwitter and slice.b.succ >= textLen and link.isTwitterUrl:
if slice.a < textLen:
result.add ReplaceSlice(kind: rkRemove, slice: slice)
else:
result.add ReplaceSlice(kind: rkUrl, url: link,
display: link.shortLink, slice: slice)
proc replacedWith*(runes: seq[Rune]; repls: openArray[ReplaceSlice];
textSlice: Slice[int]): string =
template extractLowerBound(i: int; idx): int =
if i > 0: repls[idx].slice.b.succ else: textSlice.a
result = newStringOfCap(runes.len)
for i, rep in repls:
result.add $runes[extractLowerBound(i, i - 1) ..< rep.slice.a]
case rep.kind
of rkHashtag:
let
name = $runes[rep.slice.a.succ .. rep.slice.b]
symbol = $runes[rep.slice.a]
result.add a(symbol & name, href = "/search?q=%23" & name)
of rkMention:
result.add a($runes[rep.slice], href = rep.url, title = rep.display)
of rkUrl:
result.add a(rep.display, href = rep.url)
of rkRemove:
discard
let rest = extractLowerBound(repls.len, ^1) ..< textSlice.b
if rest.a <= rest.b:
result.add $runes[rest]

View File

@@ -0,0 +1,68 @@
import std/[algorithm, unicode, re, strutils]
import jsony
import utils, slices
import ../types/user as userType
from ../../types import Profile, Error
let
unRegex = re"(^|[^A-z0-9-_./?])@([A-z0-9_]{1,15})"
unReplace = "$1<a href=\"/$2\">@$2</a>"
htRegex = re"(^|[^\w-_./?])([#$])([\w_]+)"
htReplace = "$1<a href=\"/search?q=%23$3\">$2$3</a>"
proc expandProfileEntities(profile: var Profile; user: User) =
let
orig = profile.bio.toRunes
ent = user.entities
if ent.url.urls.len > 0:
profile.website = ent.url.urls[0].expandedUrl
var replacements = newSeq[ReplaceSlice]()
for u in ent.description.urls:
replacements.extractUrls(u, orig.high)
replacements.dedupSlices
replacements.sort(cmp)
profile.bio = orig.replacedWith(replacements, 0 .. orig.len)
.replacef(unRegex, unReplace)
.replacef(htRegex, htReplace)
proc getBanner(user: User): string =
if user.profileBannerUrl.len > 0:
return user.profileBannerUrl & "/1500x500"
if user.profileLinkColor.len > 0:
return '#' & user.profileLinkColor
return "#161616"
proc parseUser*(json: string): Profile =
handleErrors:
case error
of suspended: return Profile(suspended: true)
of userNotFound: return
else: echo "[error - parseUser]: ", error
let user = json.fromJson(User)
result = Profile(
id: user.idStr,
username: user.screenName,
fullname: user.name,
location: user.location,
bio: user.description,
following: user.friendsCount,
followers: user.followersCount,
tweets: user.statusesCount,
likes: user.favouritesCount,
media: user.mediaCount,
verified: user.verified,
protected: user.protected,
joinDate: parseTwitterDate(user.createdAt),
banner: getBanner(user),
userPic: getImageUrl(user.profileImageUrlHttps).replace("_normal", "")
)
result.expandProfileEntities(user)

View File

@@ -0,0 +1,22 @@
# SPDX-License-Identifier: AGPL-3.0-only
import std/[sugar, strutils, times]
import ../types/common
import ../../utils as uutils
template parseTime(time: string; f: static string; flen: int): DateTime =
if time.len != flen: return
parse(time, f, utc())
proc parseIsoDate*(date: string): DateTime =
date.parseTime("yyyy-MM-dd\'T\'HH:mm:ss\'Z\'", 20)
proc parseTwitterDate*(date: string): DateTime =
date.parseTime("ddd MMM dd hh:mm:ss \'+0000\' yyyy", 30)
proc getImageUrl*(url: string): string =
url.dup(removePrefix(twimg), removePrefix(https))
template handleErrors*(body) =
if json.startsWith("{\"errors"):
let error {.inject.} = json.fromJson(Errors).errors[0].code
body