文章目錄
- 全部高亮
- 單個高亮
- [mysql]高亮查詢
- 存html
- 存excel
全部高亮
w
=
'比賽'
t
=
'比賽開始沒多久就結束了比賽,現在沒有比賽'
def
replace_color
(
text
,
word
)
:
new_word
=
'\033[031m'
+
word
+
'\033[0m'
# red
len_w
=
len
(
word
)
len_t
=
len
(
text
)
for
i
in
range
(
len_t
-
len_w
,
-
1
,
-
1
)
:
if
text
[
i
:
i
+
len_w
]
==
word
:
text
=
text
[
:
i
]
+
new_word
+
text
[
i
+
len_w
:
]
return
text
print
(
t
)
print
(
replace_color
(
t
,
w
)
)
單個高亮
from
jieba
import
tokenize
text
=
'我用小米手機訂購了一袋小米'
entity
=
'小米'
replace_color
=
lambda
word
:
'\033[033m'
+
word
+
'\033[0m'
replace_word
=
lambda
sentence
,
word
,
head
,
tail
:
sentence
[
:
head
]
+
word
+
sentence
[
tail
:
]
for
word
,
head
,
tail
in
tokenize
(
text
)
:
if
word
==
entity
:
word
=
replace_color
(
word
)
print
(
replace_word
(
text
,
word
,
head
,
tail
)
)
mysql高亮查詢
def
highlight
(
self
,
field
,
table
,
keyword
,
n
=
99
)
:
sql
=
"SELECT %s FROM %s WHERE INSTR(%s,'%s')>0;"
%
(
field
,
table
,
field
,
keyword
)
for
i
in
self
.
fetchone
(
sql
,
n
)
:
text
=
i
[
0
]
highlight_word
=
'\033[031m'
+
keyword
+
'\033[0m'
# red
len_w
=
len
(
keyword
)
len_t
=
len
(
text
)
for
i
in
range
(
len_t
-
len_w
,
-
1
,
-
1
)
:
if
text
[
i
:
i
+
len_w
]
==
keyword
:
text
=
text
[
:
i
]
+
highlight_word
+
text
[
i
+
len_w
:
]
print
(
text
)
存html
- py文件
def
replace_html_tag
(
text
,
word
)
:
new_word
=
'
'
+
word
+
''
len_w
=
len
(
word
)
len_t
=
len
(
text
)
for
i
in
range
(
len_t
-
len_w
,
-
1
,
-
1
)
:
if
text
[
i
:
i
+
len_w
]
==
word
:
text
=
text
[
:
i
]
+
new_word
+
text
[
i
+
len_w
:
]
return
text
def
save_html
(
ls_of_ls
,
prefix
)
:
fname
=
prefix
+
'.html'
with
open
(
fname
,
'w'
,
encoding
=
'utf-8'
)
as
f
:
f
.
write
(
'
)
for
in
:
.
(
'
'
)
for
in
:
.
(
'
'
.
format
(
)
)
.
(
'\n'
)
.
(
'
\n'
ls
ls_of_ls
f
write
i
ls
f
write
{}
i
f
write
f
write
'
)
texts
=
[
'深扣菊花舔指笑'
,
'菊花菊花一閃閃'
,
'接天蓮葉無窮碧'
,
'硬日菊花別樣紅'
]
word
=
'菊花'
ls_of_ls
=
[
]
for
text
in
texts
:
ls_of_ls
.
append
(
[
word
,
replace_html_tag
(
text
,
word
)
]
)
save_html
(
ls_of_ls
,
word
)
- 生成的html代碼
<
html
>
<
head
>
<
meta
charset
=
"
UTF-8
"
>
head
>
<
body
>
<
table
border
=
"
1
"
>
<
tr
>
<
td
>
<
font
size
=
"
4
"
>
菊花
font
>
td
>
<
td
>
<
font
size
=
"
4
"
>
深扣
<
font
color
=
"
red
"
>
菊花
font
>
舔指笑
font
>
td
>
tr
>
<
tr
>
<
td
>
<
font
size
=
"
4
"
>
菊花
font
>
td
>
<
td
>
<
font
size
=
"
4
"
>
<
font
color
=
"
red
"
>
菊花
font
>
<
font
color
=
"
red
"
>
菊花
font
>
一閃閃
font
>
td
>
tr
>
<
tr
>
<
td
>
<
font
size
=
"
4
"
>
菊花
font
>
td
>
<
td
>
<
font
size
=
"
4
"
>
接天蓮葉無窮碧
font
>
td
>
tr
>
<
tr
>
<
td
>
<
font
size
=
"
4
"
>
菊花
font
>
td
>
<
td
>
<
font
size
=
"
4
"
>
硬日
<
font
color
=
"
red
"
>
菊花
font
>
別樣紅
font
>
td
>
tr
>
table
>
body
>
html
>
存excel
from
pandas
import
DataFrame
from
jieba
import
tokenize
from
xlwings
import
App
replace_word
=
lambda
word
:
'【'
+
word
+
'】'
replace_sentence
=
lambda
sentence
,
word
,
head
,
tail
:
sentence
[
:
head
]
+
word
+
sentence
[
tail
:
]
def
ner
(
text
)
:
for
sentence
in
text
.
split
(
','
)
:
# 切句
for
word
,
head
,
tail
in
tokenize
(
sentence
)
:
# 分詞+位置
if
word
in
{
'小米'
,
'蘋果'
}
:
# NER
yield
[
text
,
replace_sentence
(
sentence
,
replace_word
(
word
)
,
head
,
tail
)
,
word
,
]
def
lss2excel
(
ls_of_ls
,
columns
,
fname
)
:
DataFrame
(
ls_of_ls
,
columns
=
columns
)
.
to_excel
(
fname
,
index
=
False
)
def
merge_cells
(
fname
)
:
# 打開excel
app
=
App
(
add_book
=
False
,
visible
=
False
)
# 關閉警告
app
.
display_alerts
=
False
# 打開book
book
=
app
.
books
.
open
(
fname
)
try
:
# 打開sheet
for
sheet
in
book
.
sheets
:
# 當前區域
current_region
=
sheet
.
cells
(
1
,
1
)
.
current_region
# 列寬
current_region
.
column_width
=
16
# 字體格式
current_region
.
api
.
Font
.
Size
=
9
# 最后一個單元格(的行)
last_row_index
=
current_region
.
last_cell
.
row
# 合并單元格
i
=
2
while
i
<
last_row_index
:
for
j
in
range
(
i
+
1
,
last_row_index
+
2
)
:
if
sheet
.
cells
(
i
,
1
)
.
value
!=
sheet
.
cells
(
j
,
1
)
.
value
:
cells
=
sheet
.
range
(
'A{}:A{}'
.
format
(
i
,
j
-
1
)
)
.
api
cells
.
MergeCells
=
True
# 合并
cells
.
WrapText
=
True
# 換行
i
=
j
except
Exception
as
e
:
print
(
'\033[031m{}\033[0m'
.
format
(
e
)
)
# 開啟警告
app
.
display_alerts
=
True
# 保存
book
.
save
(
)
# 關閉excel
app
.
quit
(
)
fname
=
'手機.xlsx'
fields
=
[
'text'
,
'phrase'
,
'word'
]
texts
=
[
'買小米機,送了袋小米和蘋果'
,
'諾基亞'
,
'買華為送蘋果'
]
ls_of_ls
=
[
ls
for
text
in
texts
for
ls
in
ner
(
text
)
]
lss2excel
(
ls_of_ls
,
fields
,
fname
)
merge_cells
(
fname
)
更多文章、技術交流、商務合作、聯系博主
微信掃碼或搜索:z360901061

微信掃一掃加我為好友
QQ號聯系: 360901061
您的支持是博主寫作最大的動力,如果您喜歡我的文章,感覺我的文章對您有幫助,請用微信掃描下面二維碼支持博主2元、5元、10元、20元等您想捐的金額吧,狠狠點擊下面給點支持吧,站長非常感激您!手機微信長按不能支付解決辦法:請將微信支付二維碼保存到相冊,切換到微信,然后點擊微信右上角掃一掃功能,選擇支付二維碼完成支付。
【本文對您有幫助就好】元
