Scripting-Scripts/Package-Helper/parser.ts at main · BlackCCCat/Scripting-Scripts · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
import type { PickupInfo } from "./types"

const BRACKET_RE = /【([^】\d]{2,10})】/
const LOCATION_RE = /(?:到达|至|放|在|取件地[:：]|地址[:：])\s*([^，,。!！\n\r\]】]{2,30}?(?:店|驿站|超市|服务部|前台|门卫|代收点|便利店|服务站|仓|柜|厅|室|中心|报亭|花园|小区|楼|园|广场))/i
const GENERIC_RE = /(菜鸟|蜂巢|丰巢|兔喜|兔喜生活|极兔|顺丰|京东|韵达|中通|圆通|申通|邮政|EMS|妈妈驿站|驿站|日日顺|德邦)/i
const CODE_RE = /(?:取件码|取货码|验证码|提货码|取件|取货|凭)[^\d]{0,8}((\s*(?:\d+-){0,2}\d{3,8}[\s,，\.]*)+)/gi
const EXTRA_INFO_PATTERNS = [
  /([A-Za-z0-9一二三四五六七八九十百]+号柜)/i,
  /([A-Za-z0-9]+柜)/i,
  /(货架[A-Za-z0-9一二三四五六七八九十百-]+)/i,
  /([A-Za-z0-9一二三四五六七八九十百]+号货架)/i,
  /([A-Za-z0-9一二三四五六七八九十百]+号架)/i,
  /([A-Za-z0-9一二三四五六七八九十百]+层)/i,
]

function escapeRegExp(text: string) {
  return text.replace(/[.*+?^${}()|[\]\\]/g, "\\$&")
}

function normalizeSnippetCandidate(text: string, locationName: string | null, code: string, courierName?: string | null) {
  let normalized = String(text || "")
    .replace(/https?:\/\/\S+/gi, " ")
    .replace(/\b[a-z]\.[^\s]+/gi, " ")
    .replace(/【[^】]+】/g, " ")

  if (locationName) {
    normalized = normalized.replace(new RegExp(escapeRegExp(locationName), "g"), " ")
  }

  if (courierName && courierName !== locationName) {
    normalized = normalized.replace(new RegExp(escapeRegExp(courierName), "g"), " ")
  }

  normalized = normalized
    .replace(new RegExp(escapeRegExp(code), "g"), " ")
    .replace(/(?:凭|取件码|取货码|验证码|提货码|快递柜|快递员及?|快递员|至|到|前往|领取|取件|取货|提货|即可|规则|存放|放入|点击|详情)/g, " ")
    .replace(/(?:畅存规则|查看详情|详情请见|更多信息).*$/g, " ")
    .replace(/[，,。!！?？:：;；（）()\[\]【】]/g, " ")
    .replace(/\s+/g, " ")
    .trim()

  return normalized
}

export function buildPickupSnippetText(
  sourceText: string,
  locationName: string | null,
  code: string,
  courierName?: string | null,
) {
  const normalized = normalizeSnippetCandidate(sourceText, locationName, code, courierName)

  for (const pattern of EXTRA_INFO_PATTERNS) {
    const match = normalized.match(pattern)
    if (match?.[1]) {
      return match[1].trim()
    }
  }

  if (normalized) {
    return normalized.slice(0, 24)
  }

  return "查看包裹详情"
}

function parseMessageDate(text: string): string | null {
  const patterns = [
    /(20\d{2}[-/.年]\d{1,2}[-/.月]\d{1,2}(?:日)?\s+\d{1,2}:\d{2}(?::\d{2})?)/,
    /(\d{1,2}[-/.月]\d{1,2}(?:日)?\s+\d{1,2}:\d{2}(?::\d{2})?)/,
    /(今天\s*\d{1,2}:\d{2})/,
    /(昨天\s*\d{1,2}:\d{2})/,
  ]

  for (const re of patterns) {
    const m = text.match(re)
    if (!m || !m[1]) continue
    const raw = m[1].trim()
    const now = new Date()

    if (raw.startsWith("今天")) {
      const hm = raw.replace("今天", "").trim()
      const [h, min] = hm.split(":").map(Number)
      const d = new Date(now)
      d.setHours(h || 0, min || 0, 0, 0)
      return d.toISOString()
    }

    if (raw.startsWith("昨天")) {
      const hm = raw.replace("昨天", "").trim()
      const [h, min] = hm.split(":").map(Number)
      const d = new Date(now.getTime() - 24 * 3600 * 1000)
      d.setHours(h || 0, min || 0, 0, 0)
      return d.toISOString()
    }

    let normalized = raw
      .replace(/年/g, "-")
      .replace(/月/g, "-")
      .replace(/日/g, "")
      .replace(/\//g, "-")
      .replace(/\./g, "-")
      .trim()

    if (!/^20\d{2}-/.test(normalized)) {
      normalized = `${now.getFullYear()}-${normalized}`
    }

    const d = new Date(normalized)
    if (!isNaN(d.getTime())) return d.toISOString()
  }

  return null
}

export function extractPickupFromText(text: string): PickupInfo[] {
  if (!text) return []

  const results: PickupInfo[] = []
  const matcher = new RegExp(CODE_RE, "gi")
  let match: RegExpExecArray | null
  const detectedDate = parseMessageDate(text)

  while ((match = matcher.exec(text)) !== null) {
    const codeListString = match[1]
    if (!codeListString || !codeListString.trim()) continue

    let start = Math.max(0, match.index - 100)
    const lastBracket = text.lastIndexOf("【", match.index)
    const lastNewLine = text.lastIndexOf("\n", match.index)
    if (lastBracket > start) start = lastBracket
    if (lastNewLine > start) start = lastNewLine

    let end = Math.min(text.length, match.index + match[0].length + 100)
    const nextBracket = text.indexOf("【", match.index + match[0].length)
    const nextNewLine = text.indexOf("\n", match.index + match[0].length)
    if (nextBracket !== -1 && nextBracket < end) end = nextBracket
    if (nextNewLine !== -1 && nextNewLine < end) end = nextNewLine

    const context = text.slice(start, end)
    const bracketMatch = context.match(BRACKET_RE)
    const bracketName = bracketMatch ? bracketMatch[1] : null

    const locMatch = context.match(LOCATION_RE)
    let locationName = locMatch ? locMatch[1] : null
    if (locationName) locationName = locationName.replace(/^(在|位于|地址|:|：)/, "")

    const genericName = (context.match(GENERIC_RE) || [null])[0]
    const finalCourier = locationName || bracketName || genericName || null

    const snippetStart = lastBracket !== -1 ? lastBracket : Math.max(0, match.index - 40)
    let snippetEnd = text.length
    const nextCourierBracket = text.indexOf("【", match.index + match[0].length)
    if (nextCourierBracket !== -1) snippetEnd = Math.min(snippetEnd, nextCourierBracket)
    const isolatedSnippet = text.slice(snippetStart, snippetEnd).trim()

    const singleCodeRegex = /(\d+-){0,2}\d{3,8}/g
    let singleCodeMatch: RegExpExecArray | null

    while ((singleCodeMatch = singleCodeRegex.exec(codeListString)) !== null) {
      const code = singleCodeMatch[0]
      if (!code) continue
      results.push({
        courier: finalCourier,
        code,
        snippet: buildPickupSnippetText(isolatedSnippet, finalCourier, code, bracketName || genericName),
        date: detectedDate,
      })
    }
  }

  return results
}

export function splitMessages(data: string): string[] {
  const normalized = data.replace(/(\r\n|\n|\r)/g, "\n").trim()
  if (!normalized) return []

  if (normalized.includes("---SMS-DIVIDER---")) {
    return normalized.split(/---SMS-DIVIDER---/g).map((s) => s.trim()).filter(Boolean)
  }

  const byBracket = normalized.split(/(?=\n?【[^】]{2,20}】)/g).map((s) => s.trim()).filter(Boolean)
  if (byBracket.length > 1) return byBracket

  const byParagraph = normalized.split(/\n{2,}/g).map((s) => s.trim()).filter(Boolean)
  if (byParagraph.length > 1) return byParagraph

  return [normalized]
}