本文通过Python3+pyqt5实现了python Qt GUI 快速编程的19章的页面索引器应用程序例子。
/home/yrd/eric_workspace/chap19/walker_ans.py
#!/usr/bin/env python3 import codecs import html.entities import re import sys from PyQt5.QtCore import (QMutex, QThread,pyqtSignal,Qt) class Walker(QThread): finished = pyqtSignal(bool,int) indexed = pyqtSignal(str,int) COMMON_WORDS_THRESHOLD = 250 MIN_WORD_LEN = 3 MAX_WORD_LEN = 25 INVALID_FIRST_OR_LAST = frozenset("0123456789_") STRIPHTML_RE = re.compile(r"<[^>]*?>", re.IGNORECASE|re.MULTILINE) ENTITY_RE = re.compile(r"&(\w+") SPLIT_RE = re.compile(r"\W+", re.IGNORECASE|re.MULTILINE) def __init__(self, index, lock, files, filenamesForWords, commonWords, parent=None): super(Walker, self).__init__(parent) self.index = index self.lock = lock self.files = files self.filenamesForWords = filenamesForWords self.commonWords = commonWords self.stopped = False self.mutex = QMutex() self.completed = False def stop(self): try: self.mutex.lock() self.stopped = True finally: self.mutex.unlock() def isStopped(self): try: self.mutex.lock() return self.stopped finally: self.mutex.unlock() def run(self): self.processFiles() self.stop() self.finished.emit(self.completed,self.index) def processFiles(self): def unichrFromEntity(match): text = match.group(match.lastindex) if text.isdigit(): return chr(int(text)) u = html.entities.name2codepoint.get(text) return chr(u) if u is not None else "" for fname in self.files: if self.isStopped(): return words = set() fh = None try: fh = codecs.open(fname, "r", "UTF8", "ignore") text = fh.read() except EnvironmentError as e: sys.stderr.write("Error: {0}\n".format(e)) continue finally: if fh is not None: fh.close() if self.isStopped(): return text = self.STRIPHTML_RE.sub("", text) text = self.ENTITY_RE.sub(unichrFromEntity, text) text = text.lower() for word in self.SPLIT_RE.split(text): if (self.MIN_WORD_LEN <= len(word) <= self.MAX_WORD_LEN and word[0] not in self.INVALID_FIRST_OR_LAST and word[-1] not in self.INVALID_FIRST_OR_LAST): try: self.lock.lockForRead() new = word not in self.commonWords finally: self.lock.unlock() if new: words.add(word) if self.isStopped(): return for word in words: try: self.lock.lockForWrite() files = self.filenamesForWords[word] if len(files) > self.COMMON_WORDS_THRESHOLD: del self.filenamesForWords[word] self.commonWords.add(word) else: files.add(str(fname)) finally: self.lock.unlock() self.indexed.emit(fname,self.index) self.completed = True /home/yrd/eric_workspace/chap19/pageindexer_ans.pyw #!/usr/bin/env python3 import collections import os import sys from PyQt5.QtCore import (QDir, QReadWriteLock, QMutex,Qt) from PyQt5.QtWidgets import (QApplication, QDialog, QFileDialog, QFrame, QHBoxLayout, QLCDNumber, QLabel, QLineEdit, QListWidget, QPushButton, QVBoxLayout) import walker_ans as walker def isAlive(qobj): import sip try: sip.unwrapinstance(qobj) except RuntimeError: return False return True class Form(QDialog): def __init__(self, parent=None): super(Form, self).__init__(parent) self.mutex = QMutex() self.fileCount = 0 self.filenamesForWords = collections.defaultdict(set) self.commonWords = set() self.lock = QReadWriteLock() self.path = QDir.homePath() pathLabel = QLabel("Indexing path:") self.pathLabel = QLabel() self.pathLabel.setFrameStyle(QFrame.StyledPanel|QFrame.Sunken) self.pathButton = QPushButton("Set &Path...") self.pathButton.setAutoDefault(False) findLabel = QLabel("&Find word:") self.findEdit = QLineEdit() findLabel.setBuddy(self.findEdit) commonWordsLabel = QLabel("&Common words:") self.commonWordsListWidget = QListWidget() commonWordsLabel.setBuddy(self.commonWordsListWidget) filesLabel = QLabel("Files containing the &word:") self.filesListWidget = QListWidget() filesLabel.setBuddy(self.filesListWidget) filesIndexedLabel = QLabel("Files indexed") self.filesIndexedLCD = QLCDNumber() self.filesIndexedLCD.setSegmentStyle(QLCDNumber.Flat) wordsIndexedLabel = QLabel("Words indexed") self.wordsIndexedLCD = QLCDNumber() self.wordsIndexedLCD.setSegmentStyle(QLCDNumber.Flat) commonWordsLCDLabel = QLabel("Common words") self.commonWordsLCD = QLCDNumber() self.commonWordsLCD.setSegmentStyle(QLCDNumber.Flat) self.statusLabel = QLabel("Click the 'Set Path' " "button to start indexing") self.statusLabel.setFrameStyle(QFrame.StyledPanel|QFrame.Sunken) topLayout = QHBoxLayout() topLayout.addWidget(pathLabel) topLayout.addWidget(self.pathLabel, 1) topLayout.addWidget(self.pathButton) topLayout.addWidget(findLabel) topLayout.addWidget(self.findEdit, 1) leftLayout = QVBoxLayout() leftLayout.addWidget(filesLabel) leftLayout.addWidget(self.filesListWidget) rightLayout = QVBoxLayout() rightLayout.addWidget(commonWordsLabel) rightLayout.addWidget(self.commonWordsListWidget) middleLayout = QHBoxLayout() middleLayout.addLayout(leftLayout, 1) middleLayout.addLayout(rightLayout) bottomLayout = QHBoxLayout() bottomLayout.addWidget(filesIndexedLabel) bottomLayout.addWidget(self.filesIndexedLCD) bottomLayout.addWidget(wordsIndexedLabel) bottomLayout.addWidget(self.wordsIndexedLCD) bottomLayout.addWidget(commonWordsLCDLabel) bottomLayout.addWidget(self.commonWordsLCD) bottomLayout.addStretch() layout = QVBoxLayout() layout.addLayout(topLayout) layout.addLayout(middleLayout) layout.addLayout(bottomLayout) layout.addWidget(self.statusLabel) self.setLayout(layout) self.walkers = [] self.completed = [] self.pathButton.clicked.connect(self.setPath) self.findEdit.returnPressed.connect(self.find) self.setWindowTitle("Page Indexer") def stopWalkers(self): for walker in self.walkers: if isAlive(walker) and walker.isRunning(): walker.stop() for walker in self.walkers: if isAlive(walker) and walker.isRunning(): walker.wait() self.walkers = [] self.completed = [] def setPath(self): self.stopWalkers() self.pathButton.setEnabled(False) path = QFileDialog.getExistingDirectory(self, "Choose a Path to Index", self.path) if not path: self.statusLabel.setText("Click the 'Set Path' " "button to start indexing") self.pathButton.setEnabled(True) return self.statusLabel.setText("Scanning directories...") QApplication.processEvents() # Needed for Windows self.path = QDir.toNativeSeparators(path) self.findEdit.setFocus() self.pathLabel.setText(self.path) self.statusLabel.clear() self.filesListWidget.clear() self.fileCount = 0 self.filenamesForWords = collections.defaultdict(set) self.commonWords = set() nofilesfound = True files = [] index = 0 for root, dirs, fnames in os.walk(str(self.path)): for name in [name for name in fnames if name.endswith((".htm", ".html"))]: files.append(os.path.join(root, name)) if len(files) == 1000: self.processFiles(index, files[:]) files = [] index += 1 nofilesfound = False if files: self.processFiles(index, files[:]) nofilesfound = False if nofilesfound: self.finishedIndexing() self.statusLabel.setText( "No HTML files found in the given path") def processFiles(self, index, files): thread = walker.Walker(index, self.lock, files, self.filenamesForWords, self.commonWords, self) thread.indexed[str,int].connect(self.indexed) thread.finished[bool,int].connect(self.finished) thread.finished.connect(thread.deleteLater) self.walkers.append(thread) self.completed.append(False) thread.start() thread.wait(300) # Needed for Windows def find(self): word = str(self.findEdit.text()) if not word: try: self.mutex.lock() self.statusLabel.setText("Enter a word to find in files") finally: self.mutex.unlock() return try: self.mutex.lock() self.statusLabel.clear() self.filesListWidget.clear() finally: self.mutex.unlock() word = word.lower() if " " in word: word = word.split()[0] try: self.lock.lockForRead() found = word in self.commonWords finally: self.lock.unlock() if found: try: self.mutex.lock() self.statusLabel.setText("Common words like '{0}' " "are not indexed".format(word)) finally: self.mutex.unlock() return try: self.lock.lockForRead() files = self.filenamesForWords.get(word, set()).copy() finally: self.lock.unlock() if not files: try: self.mutex.lock() self.statusLabel.setText("No indexed file contains " "the word '{0}'".format(word)) finally: self.mutex.unlock() return files = [QDir.toNativeSeparators(name) for name in sorted(files, key=str.lower)] try: self.mutex.lock() self.filesListWidget.addItems(files) self.statusLabel.setText( "{0} indexed files contain the word '{1}'".format( len(files), word)) finally: self.mutex.unlock() def indexed(self, fname, index): try: self.mutex.lock() self.statusLabel.setText(fname) self.fileCount += 1 count = self.fileCount finally: self.mutex.unlock() if count % 25 == 0: try: self.lock.lockForRead() indexedWordCount = len(self.filenamesForWords) commonWordCount = len(self.commonWords) finally: self.lock.unlock() try: self.mutex.lock() self.filesIndexedLCD.display(count) self.wordsIndexedLCD.display(indexedWordCount) self.commonWordsLCD.display(commonWordCount) finally: self.mutex.unlock() elif count % 101 == 0: try: self.lock.lockForRead() words = self.commonWords.copy() finally: self.lock.unlock() try: self.mutex.lock() self.commonWordsListWidget.clear() self.commonWordsListWidget.addItems(sorted(words)) finally: self.mutex.unlock() def finished(self, completed, index): done = False if self.walkers: self.completed[index] = True if all(self.completed): try: self.mutex.lock() self.statusLabel.setText("Finished") done = True finally: self.mutex.unlock() else: try: self.mutex.lock() self.statusLabel.setText("Finished") done = True finally: self.mutex.unlock() if done: self.finishedIndexing() def reject(self): if not all(self.completed): self.stopWalkers() self.finishedIndexing() else: self.accept() def closeEvent(self, event=None): self.stopWalkers() def finishedIndexing(self): self.filesIndexedLCD.display(self.fileCount) self.wordsIndexedLCD.display(len(self.filenamesForWords)) self.commonWordsLCD.display(len(self.commonWords)) self.pathButton.setEnabled(True) QApplication.processEvents() # Needed for Windows app = QApplication(sys.argv) form = Form() form.show() app.exec_()
运行结果:
以上就是本文的全部内容,希望对大家的学习有所帮助,也希望大家多多支持。
广告合作:本站广告合作请联系QQ:858582 申请时备注:广告合作(否则不回)
免责声明:本站资源来自互联网收集,仅供用于学习和交流,请遵循相关法律法规,本站一切资源不代表本站立场,如有侵权、后门、不妥请联系本站删除!
免责声明:本站资源来自互联网收集,仅供用于学习和交流,请遵循相关法律法规,本站一切资源不代表本站立场,如有侵权、后门、不妥请联系本站删除!
暂无评论...
稳了!魔兽国服回归的3条重磅消息!官宣时间再确认!
昨天有一位朋友在大神群里分享,自己亚服账号被封号之后居然弹出了国服的封号信息对话框。
这里面让他访问的是一个国服的战网网址,com.cn和后面的zh都非常明白地表明这就是国服战网。
而他在复制这个网址并且进行登录之后,确实是网易的网址,也就是我们熟悉的停服之后国服发布的暴雪游戏产品运营到期开放退款的说明。这是一件比较奇怪的事情,因为以前都没有出现这样的情况,现在突然提示跳转到国服战网的网址,是不是说明了简体中文客户端已经开始进行更新了呢?
更新日志
2024年11月26日
2024年11月26日
- 凤飞飞《我们的主题曲》飞跃制作[正版原抓WAV+CUE]
- 刘嘉亮《亮情歌2》[WAV+CUE][1G]
- 红馆40·谭咏麟《歌者恋歌浓情30年演唱会》3CD[低速原抓WAV+CUE][1.8G]
- 刘纬武《睡眠宝宝竖琴童谣 吉卜力工作室 白噪音安抚》[320K/MP3][193.25MB]
- 【轻音乐】曼托凡尼乐团《精选辑》2CD.1998[FLAC+CUE整轨]
- 邝美云《心中有爱》1989年香港DMIJP版1MTO东芝首版[WAV+CUE]
- 群星《情叹-发烧女声DSD》天籁女声发烧碟[WAV+CUE]
- 刘纬武《睡眠宝宝竖琴童谣 吉卜力工作室 白噪音安抚》[FLAC/分轨][748.03MB]
- 理想混蛋《Origin Sessions》[320K/MP3][37.47MB]
- 公馆青少年《我其实一点都不酷》[320K/MP3][78.78MB]
- 群星《情叹-发烧男声DSD》最值得珍藏的完美男声[WAV+CUE]
- 群星《国韵飘香·贵妃醉酒HQCD黑胶王》2CD[WAV]
- 卫兰《DAUGHTER》【低速原抓WAV+CUE】
- 公馆青少年《我其实一点都不酷》[FLAC/分轨][398.22MB]
- ZWEI《迟暮的花 (Explicit)》[320K/MP3][57.16MB]