北京网站制作培训机构,精选网站建立 推广 优化,济南泰安网站建设公司,网站开发市场调查前端小白第一次用streamlit搭简易页面#xff0c;记录一下。
一些tips
每次与页面进行交互#xff0c;如点击按钮、上传文件等#xff0c;streamlit就会重新运行整个页面的所有代码。如果在页面渲染前需要对上传文件做很复杂的操作#xff0c;重新运行所有代码就会重复这…前端小白第一次用streamlit搭简易页面记录一下。
一些tips
每次与页面进行交互如点击按钮、上传文件等streamlit就会重新运行整个页面的所有代码。如果在页面渲染前需要对上传文件做很复杂的操作重新运行所有代码就会重复这个过程会导致页面加载内容较慢。因此可以把不会变化的内容存起来避免重新对文件进行处理。
streamlit的渲染顺序和代码定义的分布一致例如先在代码里写了一级标题就先渲染一级标题先写占位符就先渲染占位符。所以排序在前的部分渲染时未渲染部分会是灰色。
一些命令
将页面分成两列
left_col, right_col st.columns([1, 1])
在占位符里动态渲染变化内容
left_content_placeholder st.empty()with left_content_placeholder.container():st.components.v1.html(st.session_state[current_content], height600, scrollingTrue)
在页面加载时滚动到当前匹配的结果
scroll_script scriptdocument.addEventListener(DOMContentLoaded, function() {var element document.getElementById(current_match);if(element) {element.scrollIntoView({ behavior: smooth, block: center });}});/script
代码示例
import streamlit as st
from docx import Document
import mammoth
import os
from bs4 import BeautifulSoup
import re
from PIL import Image
import base64
import markdown# 解析Word文档的函数保留原格式
def parse_word_document_for_table(file):try:# 使用Mammoth将文档转换为HTML以保留格式result mammoth.convert_to_html(file)html_content result.value# 使用python-docx处理表格部分document Document(file)tables_html # 存储所有表格的数据all_tables_data []# 遍历文档中的所有段落和表格prev_paragraph for element in document.element.body:if element.tag.endswith(tbl): # 检查是否为表格元素# 获取当前表格table document.tables[len(all_tables_data)] # 获取当前表格# 使用前一个段落作为表格标题table_title prev_paragraph.strip() if prev_paragraph else 无标题# 在表格前添加标题tables_html fh3{table_title}/h3 # 这里使用h3标签来显示标题或者根据需要使用其他标签tables_html table border1 styleborder: 1px solid black; border-collapse: collapse; width: 100%;for row in table.rows:tables_html trfor cell in row.cells:cell_content cell.text.replace(\n, br) # 处理单元格内换行tables_html ftd stylepadding: 5px; border: 1px solid black; text-align: left; vertical-align: top;{cell_content}/tdtables_html /trtables_html /tablebrall_tables_data.append(1)elif element.tag.endswith(p): # 检查是否为段落prev_paragraph element.text # 获取当前段落的文本作为表格的标题# 将表格的HTML拼接到文档内容中# html_content tables_htmlhtml_content tables_html return html_contentexcept Exception as e:return fError: {str(e)}def parse_word_document(file):try:# 使用Mammoth将文档转换为HTML以保留格式result mammoth.convert_to_html(file)html_content result.valuereturn html_contentexcept Exception as e:return fError: {str(e)}def extract_markdown(file):content_bytes file.read()content_text content_bytes.decode(utf-8)html markdown.markdown(content_text)return htmldef main():DOCUMENT_PATHS {标题验证: ./headings.md,表格信息提取: ./tables_information.md,表格验证结果: ./output.md}# 设置页面为宽布局st.set_page_config(page_title项目报告核验, layoutwide)logo_path ./logo.jpgst.image(logo_path, width200)st.markdown(h1 styletext-align: center;项目报告核验/h1, unsafe_allow_htmlTrue)# 使用自定义 CSS 来居中标题st.markdown(style.left-header {display: flex;justify-content: center;align-items: center;height: 100%;text-align: center;}.right-header {display: flex;justify-content: center;align-items: center;height: 100%;text-align: center;}/style, unsafe_allow_htmlTrue)# 创建两列布局左边显示上传文档右边显示a、b、c文档left_col, right_col st.columns([1, 1]) # 调整列的宽度比例左边更宽# 初始化状态if first_doc_uploaded not in st.session_state:st.session_state[first_doc_uploaded] Trueif left_doc_uploaded not in st.session_state:st.session_state[left_doc_uploaded] Falseif right_doc_content not in st.session_state:st.session_state[right_doc_content] if right_doc_error not in st.session_state:st.session_state[right_doc_error] if matches not in st.session_state:st.session_state[matches] []if current_index not in st.session_state:st.session_state[current_index] -1if first_render not in st.session_state:st.session_state[first_render] Truewith right_col: st.markdown(div classright-headerh2内容展示区域/h2/div, unsafe_allow_htmlTrue) tab_selection2 st.selectbox(选择展示内容:,(标题验证, 表格信息提取, 表格验证结果)) # 上传文档部分左列with left_col:st.markdown(div classleft-headerh2项目文件上传及原文展示/h2/div, unsafe_allow_htmlTrue)uploaded_file st.file_uploader(上传Word文档, type[docx])search_keyword st.text_input(输入要搜索的关键词)search_button st.button(查找/查找下一个)left_content_placeholder st.empty()# 处理文件上传逻辑if not uploaded_file:st.session_state[current_content] Nonest.session_state[left_doc_uploaded] Falseif not st.session_state[current_content] and uploaded_file:try:# 解析上传的Word文档content parse_word_document_for_table(uploaded_file)st.session_state[left_doc_uploaded] Truest.session_state[current_content] contentexcept Exception as e:st.error(str(e))# if st.session_state[left_doc_uploaded]:with left_content_placeholder.container():st.components.v1.html(st.session_state[current_content], height600, scrollingTrue)# 处理搜索按钮点击事件只有在文档已上传后才生效if st.session_state[left_doc_uploaded] and search_button:if search_keyword! st.session_state.get(last_search_keyword, ):# 关键词变化了重新执行搜索st.session_state[last_search_keyword] search_keywordst.session_state[matches] [] # 清空上次的匹配项st.session_state[current_index] -1 # 重置为-1表示没有匹配项content parse_word_document_for_table(uploaded_file)st.session_state[current_content] content # 重置为原始文档内容# 执行新的搜索if search_keyword:soup_new_search BeautifulSoup(content, html.parser)paragraphs soup_new_search.find_all([p, span, div, td, h3])pattern re.compile(re.escape(search_keyword), re.IGNORECASE)matches []for idx, paragraph in enumerate(paragraphs):if pattern.search(paragraph.get_text()):highlighted_text pattern.sub(lambda match: fmark stylebackground-color: yellow;{match.group(0)}/mark, paragraph.decode_contents())paragraph.clear()paragraph.append(BeautifulSoup(highlighted_text, html.parser))matches.append(paragraph)st.session_state[matches] matchesst.session_state[current_index] 0 # 重新开始从第一个匹配项st.session_state[current_content] str(soup_new_search) # 更新文档内容为高亮后的内容if matches:for idx, paragraph in enumerate(matches):if idx st.session_state[current_index]:paragraph[id] current_matchparagraph[style] background-color: orange; # 当前匹配项用橙色高亮else:paragraph[id] paragraph[style] background-color: yellow; # 其他匹配项用黄色高亮# 保存高亮后的内容st.session_state[current_content] str(soup_new_search)elif search_keyword st.session_state.get(last_search_keyword, ):# 关键词没有变化查找下一个匹配项if st.session_state[matches]:# 只有在有匹配项时才进行查找st.session_state[current_index] (st.session_state[current_index] 1) % len(st.session_state[matches])soup_repeat BeautifulSoup(st.session_state[current_content], html.parser)paragraphs soup_repeat.find_all([p, span, div, td, h3])pattern re.compile(re.escape(search_keyword), re.IGNORECASE)# 更新高亮颜色idx -1for paragraph in paragraphs:# 对当前匹配项和上一项进行样式更新if pattern.search(paragraph.get_text()):idx 1if idx st.session_state[current_index]:# 当前匹配项用橙色高亮paragraph[id] current_matchparagraph[style] background-color: orange;else:# 其他匹配项用黄色高亮paragraph[id] paragraph[style] background-color: yellow;# 保存高亮后的内容st.session_state[current_content] str(soup_repeat)#在页面加载时滚动到当前匹配的结果scroll_script scriptdocument.addEventListener(DOMContentLoaded, function() {var element document.getElementById(current_match);if(element) {element.scrollIntoView({ behavior: smooth, block: center });}});/scriptwith left_content_placeholder.container():st.components.v1.html(st.session_state[current_content] scroll_script , height600, scrollingTrue)with right_col: right_content_placeholder st.empty() if st.session_state[left_doc_uploaded]:selected_document DOCUMENT_PATHS.get(tab_selection2)else:selected_document Nonest.session_state[right_doc_content] right_content_placeholder.empty()if right_selected_doc not in st.session_state:st.session_state[right_selected_doc] Noneif selected_document:try:with open(selected_document, rb) as file:content extract_markdown(file)if content.startswith(Error):st.session_state[right_doc_error] contentelse:st.session_state[right_doc_content] contentst.session_state[right_selected_doc] selected_documentexcept Exception as e:st.error(str(e))if st.session_state[right_doc_content]:with right_content_placeholder.container():st.markdown(fdiv styleheight: 750px; overflow-y: scroll; padding: 10px; border: 1px solid #ccc; border-radius: 5px;{st.session_state[right_doc_content]}/div, unsafe_allow_htmlTrue)if __name__ __main__:main()