bs4的非常奇怪的行为

问题描述

我正在尝试使用音频元数据解析html-doc,并且编写了该函数

from bs4 import *
import warnings
import re
import sys

warnings.filterwarnings('ignore')

def pairing(soup,author_class: tuple,track_class: tuple,subclass: tuple) -> dict:
    
    use_dict = {}

    for pair in zip(
            soup.find_all(author_class[0],{"class": author_class[1]}),soup.find_all(track_class[0],{"class": track_class[1]}),soup.find_all(subclass[0],{"class": subclass[1]})
    ):
        """
        pair[0] - musician(-s),pair[1] - track_name,pair[2] - subtitle for track(if any)
        """
        print(pair[0],"\n",pair[1],'\n',pair[2])

        track_author = pair[0].find('a').text
        # Clear row
        track_author = re.sub('[!@#$_]','',track_author)

        try:
            add_Meta = pair[2].find('span').text
        except AttributeError:
            print(sys.exc_info())
            add_Meta = ""

        track_name = pair[1].text + add_Meta
        # Clear
        track_name = re.sub('[!@#$_]',track_name)

        use_dict.update({track_author: track_name})
    return use_dict

问题就在附近,除了信任:try-author:当我请求track_author时,没问题,一切正常。请注意,并非所有曲目都带有字幕,因此有时此标签(跨度)为空(无)。但是我的代码认为在任何迭代中它都是空的。这是很奇怪和令人困惑的,因为上面带有track_author的代码看起来相同并且可以工作。我需要在子类(如果有)中返回文本,否则返回空行。
终端输出

<div class="audio_row__performers"><a>John Paesano</a></div> 
 <span class="audio_row__title_inner _audio_row__title_inner">The Final Lesson</span> 
 <span class="audio_row__title_inner_subtitle _audio_row__title_inner_subtitle"></span>
(<class 'AttributeError'>,AttributeError("'nonetype' object has no attribute 'text'"),<traceback object at 0x7fb32d283f80>)

<div class="audio_row__performers"><a>The Blue Notes</a></div> 
 <span class="audio_row__title_inner _audio_row__title_inner">Halo Theme</span> 
 <span class="audio_row__title_inner_subtitle _audio_row__title_inner_subtitle">Piano Rendition</span>
(<class 'AttributeError'>,<traceback object at 0x7fb32d267ec0>)

有人可以向我解释吗?

解决方法

所以,我决定采用另一种方式-正则表达式。

import UIKit
import PlaygroundSupport
import AVFoundation
import CoreMedia


class MyViewController : UIViewController {

    var mPlayer : AVPlayer?
    var mItem   : AVPlayerItem?
    var playerLayer        : AVPlayerLayer?
    var playerView = UIView()  
  


    override func loadView() {
    
        let view = UIView()
        self.view = view
        view.frame = CGRect(x: 0,y: 0,width: 1000,height: 1000)
        view.backgroundColor = .darkGray
    
        let playerFrame = CGRect(x: 1,y: 10,width: 500,height: 500)
        playerView.frame =  playerFrame
        view.addSubview(playerView)
        playVideo()
   
    }

   func playVideo(){
        let url0 =  FileManager.default.urls(for: .documentDirectory,in: .userDomainMask)[0].appendingPathComponent("IMG_0626.MOV")
    
    
        mItem                       = AVPlayerItem(url: url0)
        mPlayer                     = AVPlayer(playerItem: mItem!)
        playerLayer                 = AVPlayerLayer(player: mPlayer) 
    
        playerView.layer.addSublayer(playerLayer!)
        playerLayer?.frame          = playerView.bounds
        playerLayer?.videoGravity   = AVLayerVideoGravity.resizeAspect
        mPlayer?.play()
    }
}



// Present the view controller in the Live View window
PlaygroundPage.current.liveView = MyViewController()
PlaygroundPage.current.needsIndefiniteExecution = true