爬取网站音乐免费排行榜歌曲

听老师的公开课,然后今晚自己凭记忆,琢磨出来的

以下是个人又加了一些功能

  • 获取免费排行榜中的专辑信息(id,title)
  • 计算每个专辑歌曲的页数
  • 自己手动输入专辑id进行下载指定专辑中的歌曲

源代码如下:

# -*- coding: utf-8 -*-


import requests
import json
import os
import time

# 获取音乐免费排行榜 的专辑名字,id
def get_albumid():
    url = 'https://www.ximalaya.com/revision/getRankList?code=yinyue'
    headers = {"User-Agent": "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:63.0) Gecko/20100101 Firefox/63.0"}
    r = requests.get(url, headers=headers)
    ret = r.content.decode()
    result = json.loads(ret)
    for i in result['data']['albums']:
        id = i['id']
        albumTitle = i['albumTitle']
        trackCount = i['trackCount']
        # 想输出左对齐,但是没有达到自己想要的效果
        print('{:<30}'.format(str(id) + '\t《' + albumTitle + '》\t------专辑有' + str(trackCount) + '条歌曲'))


# 指定专辑进行查询歌曲清单信息并下载
def album_get(albumid):
    album_url = 'https://www.ximalaya.com/revision/album?albumId=' + str(albumid)
    headers = {"User-Agent": "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:63.0) Gecko/20100101 Firefox/63.0"}
    r = requests.get(album_url, headers=headers)
    ret = r.content.decode()
    result = json.loads(ret)
    albumTitle = result['data']['mainInfo']['albumTitle'] # 专辑标题
    totalCount = result['data']['tracksInfo']['trackTotalCount']  # 专辑中歌曲总数量
    # pageNum = result['data']['tracksInfo']['pageNum'] # 当前页码
    pageSize = result['data']['tracksInfo']['pageSize'] # 每页的歌曲条目数
    # 计算该专辑有多少页
    v, a = divmod(totalCount, pageSize)
    if a :
        totalpageNum = v + 1
    else:
        totalpageNum = v
    # 构造翻页url
    for pn in range(1, totalpageNum+1):
        track_url = 'https://www.ximalaya.com/revision/play/album?albumId=' + str(albumid) + '&pageNum=' + str(pn) + '&sort=-1&pageSize=30'
        headers = {"User-Agent": "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:63.0) Gecko/20100101 Firefox/63.0"}
        track_r = requests.get(track_url, headers=headers)
        track_ret = track_r.content.decode()
        track_result = json.loads(track_ret)
        # 获取专辑下每一条歌曲信息
        for i in track_result['data']['tracksAudioPlay']:
            trackName = i['trackName']
            trackId = i['trackId']
            albumUrl = i['albumUrl']
            src = i['src']
            print(trackId, trackName)
            try:
                music = requests.get(src, headers=headers)
            except:
                print('Unfortunitely -- An Unknow Error Happened, Please wait 3 seconds')
                time.sleep(3)
            if not os.path.exists('./ximalaya' + albumUrl[:-1] + '-' + albumTitle):
                # 如果没有目录ximalaya,也会自动创建
                os.makedirs('./ximalaya' + albumUrl[:-1] + '-' + albumTitle)
            with open('./ximalaya/' + albumUrl[:-1] + '-' + albumTitle +'/'+ str(trackId) + '-' + trackName + '.m4a', 'ab') as f:
                f.write(music.content)


if __name__ == '__main__':
    get_albumid()
    print("==============================================================================")
    print("以上是所列出来的专辑信息,请输入专辑ID号进行下载")
    albumid = input('请输入你喜欢的专辑ID:')
    album_get(albumid)