本文为大家分享了python实现大音频文件语音识别功能的具体代码,供大家参考,具体内容如下
实现思路:先用ffmpeg将其他非wav格式的音频转换为wav格式,并转换音频的声道(百度支持声道为1),采样率(值为8000),格式转换完成后,再用ffmpeg将音频切成百度。
支持的时长(30秒和60秒2种,本程序用的是30秒)。
# coding: utf-8 import json import time import base64 from inc import rtysdb import urllib2 import requests import os import uuid from inc import db_config class BaiduRest: def __init__(self, cu_id, api_key, api_secert): self.token_url = "https://openapi.baidu.com/oauth/2.0/token" self.getvoice_url = "http://tsn.baidu.com/text2audio" self.upvoice_url = 'http://vop.baidu.com/server_api' self.cu_id = cu_id self.get_token(api_key, api_secert) return def get_token(self, api_key, api_secert): token_url = self.token_url % (api_key, api_secert) r_str = urllib2.urlopen(token_url).read() token_data = json.loads(r_str) self.token_str = token_data['access_token'] return True # 语音合成 def text2audio(self, text, filename): get_url = self.getvoice_url % (urllib2.quote(text), self.cu_id, self.token_str) voice_data = urllib2.urlopen(get_url).read() voice_fp = open(filename, 'wb+') voice_fp.write(voice_data) voice_fp.close() return True ##语音识别 def audio2text(self, filename): data = {} data['format'] = 'wav' data['rate'] = 8000 data['channel'] = 1 data['cuid'] = self.cu_id data['token'] = self.token_str wav_fp = open(filename, 'rb') voice_data = wav_fp.read() data['len'] = len(voice_data) # data['speech'] = base64.b64encode(voice_data).decode('utf-8') data['speech'] = base64.b64encode(voice_data).replace('\n', '') # post_data = json.dumps(data) result = requests.post(self.upvoice_url, json=data, headers={'Content-Type': 'application/json'}) data_result = result.json() if(data_result['err_msg'] == 'success.'): return data_result['result'][0] else: return False def test_voice(voice_file): api_key = "vossGHIgEETS6IMRxBDeahv8" api_secert = "3c1fe6a6312f41fa21fa2c394dad5510" bdr = BaiduRest("0-57-7B-9F-1F-A1", api_key, api_secert) # 生成 #start = time.time() #bdr.text2audio("你好啊", "out.wav") #using = time.time() - start #print using # 识别 #start = time.time() result = bdr.audio2text(voice_file) # result = bdr.audio2text("weather.pcm") #using = time.time() - start return result def get_master_audio(check_status='cut_status'): if check_status == 'cut_status': sql = "SELECT id,url, time_long,sharps FROM ocenter_recognition WHERE status=0" elif check_status == 'finished_status': sql = "SELECT id,url, time_long,sharps FROM ocenter_recognition WHERE finished_status=0" else: return False data = rtysdb.select_data(sql,'more') if data: return data else: return False def go_recognize(master_id): section_path = db_config.SYS_PATH sql = "SELECT id,rid,url,status FROM ocenter_section WHERE rid=%d AND status=0 order by id asc limit 10" % (master_id) #print sql record = rtysdb.select_data(sql,'more') #print record if not record: return False for rec in record: #print section_path+'/'+rec[1] voice_file = section_path+'/'+rec[2] if not os.path.exists(voice_file): continue result = test_voice(voice_file) print result exit(0) if result: #rtysdb.update_by_pk('ocenter_section',rec[0],{'content':result,'status':1}) sql = "update ocenter_section set content='%s', status='%d' where id=%d" % (result,1,rec[0]) #print sql rtysdb.do_exec_sql(sql) parent_content = rtysdb.select_data("SELECT id,content FROM ocenter_recognition WHERE id=%d" % (rec[1])) #print parent_content if parent_content: new_content = parent_content[1]+result update_content_sql = "update ocenter_recognition set content='%s' where id=%d" % (new_content,rec[1]) rtysdb.do_exec_sql(update_content_sql) else: rtysdb.do_exec_sql("update ocenter_section set status='%d' where id=%d" % (result,1,rec[0])) time.sleep(5) else: rtysdb.do_exec_sql("UPDATE ocenter_recognition SET finished_status=1 WHERE id=%d" % (master_id)) #对百度语音识别不了的音频文件进行转换 def ffmpeg_convert(): section_path = db_config.SYS_PATH #print section_path used_audio = get_master_audio('cut_status') #print used_audio if used_audio: for audio in used_audio: audio_path = section_path+'/'+audio[1] new_audio = uuid.uuid1() command_line = "ffmpeg -i "+audio_path +" -ar 8000 -ac 1 -f wav "+section_path+"/Uploads/Convert/convert_" + str(new_audio) +".wav"; #print command_line os.popen(command_line) if os.path.exists(section_path+"/Uploads/Convert/convert_" + str(new_audio) +".wav"): convert_name = "Uploads/Convert/convert_" + str(new_audio) +".wav" ffmpeg_cut(convert_name,audio[3],audio[0]) sql = "UPDATE ocenter_recognition SET status=1,convert_name='%s' where id=%d" % (convert_name,audio[0]) rtysdb.do_exec_sql(sql) #将大音频文件切成碎片 def ffmpeg_cut(convert_name,sharps,master_id): section_path = db_config.SYS_PATH if sharps>0: for i in range(0,sharps): timeArray = time.localtime(i*30) h = time.strftime("%H", timeArray) h = int(h) - 8 h = "0" + str(h) ms = time.strftime("%M:%S",timeArray) start_time = h+':'+str(ms) cut_name = section_path+'/'+convert_name db_store_name = "Uploads/Section/"+str(uuid.uuid1())+'-'+str(i+1)+".wav" section_name = section_path+"/"+db_store_name command_line = "ffmpeg.exe -i "+cut_name+" -vn -acodec copy -ss "+start_time+" -t 00:00:30 "+section_name #print command_line os.popen(command_line) data = {} data['rid'] = master_id data['url'] = db_store_name data['create_time'] = int(time.time()) data['status'] = 0 rtysdb.insert_one('ocenter_section',data) if __name__ == "__main__": ffmpeg_convert() audio = get_master_audio('finished_status') if audio: for ad in audio: go_recognize(ad[0])
以上就是本文的全部内容,希望对大家的学习有所帮助,也希望大家多多支持。
广告合作:本站广告合作请联系QQ:858582 申请时备注:广告合作(否则不回)
免责声明:本站资源来自互联网收集,仅供用于学习和交流,请遵循相关法律法规,本站一切资源不代表本站立场,如有侵权、后门、不妥请联系本站删除!
免责声明:本站资源来自互联网收集,仅供用于学习和交流,请遵循相关法律法规,本站一切资源不代表本站立场,如有侵权、后门、不妥请联系本站删除!
暂无评论...
稳了!魔兽国服回归的3条重磅消息!官宣时间再确认!
昨天有一位朋友在大神群里分享,自己亚服账号被封号之后居然弹出了国服的封号信息对话框。
这里面让他访问的是一个国服的战网网址,com.cn和后面的zh都非常明白地表明这就是国服战网。
而他在复制这个网址并且进行登录之后,确实是网易的网址,也就是我们熟悉的停服之后国服发布的暴雪游戏产品运营到期开放退款的说明。这是一件比较奇怪的事情,因为以前都没有出现这样的情况,现在突然提示跳转到国服战网的网址,是不是说明了简体中文客户端已经开始进行更新了呢?
更新日志
2024年11月26日
2024年11月26日
- 凤飞飞《我们的主题曲》飞跃制作[正版原抓WAV+CUE]
- 刘嘉亮《亮情歌2》[WAV+CUE][1G]
- 红馆40·谭咏麟《歌者恋歌浓情30年演唱会》3CD[低速原抓WAV+CUE][1.8G]
- 刘纬武《睡眠宝宝竖琴童谣 吉卜力工作室 白噪音安抚》[320K/MP3][193.25MB]
- 【轻音乐】曼托凡尼乐团《精选辑》2CD.1998[FLAC+CUE整轨]
- 邝美云《心中有爱》1989年香港DMIJP版1MTO东芝首版[WAV+CUE]
- 群星《情叹-发烧女声DSD》天籁女声发烧碟[WAV+CUE]
- 刘纬武《睡眠宝宝竖琴童谣 吉卜力工作室 白噪音安抚》[FLAC/分轨][748.03MB]
- 理想混蛋《Origin Sessions》[320K/MP3][37.47MB]
- 公馆青少年《我其实一点都不酷》[320K/MP3][78.78MB]
- 群星《情叹-发烧男声DSD》最值得珍藏的完美男声[WAV+CUE]
- 群星《国韵飘香·贵妃醉酒HQCD黑胶王》2CD[WAV]
- 卫兰《DAUGHTER》【低速原抓WAV+CUE】
- 公馆青少年《我其实一点都不酷》[FLAC/分轨][398.22MB]
- ZWEI《迟暮的花 (Explicit)》[320K/MP3][57.16MB]