list、tuple

引用、浅拷贝

def shallow_copy():
    """
    py中，对象分为两类：可变对象，不可变对象。
    可变对象: list，dict，set
    不可变对象: int，float，str，tuple

    浅拷贝中的元素，实际上是对相同对象的引用。

    **浅拷贝中可变对象的影响**
    对于ori_list中的可变对象，对任意一个list中可变对象的修改，都会影响到另一个list

    **不影响浅拷贝的情况**
    对于ori_list中的不可变对象，对任意一个list中不可变对象的修改，都不会影响到另一个list

    详见下面注释
    """
    list_1 = [0, 1, 2, [3, 4, 5], (6, 7, 8)]
    list_2 = list_1.copy()  # 浅拷贝
    list_1[0] = 9  # 不会影响list_2
    list_1[3] = [10, 11, 12]  # 不会影响list_2，这里不是"修改"可变对象，而是直接替换了元素
    list_1[4] = (13, 14, 15)  # 不会影响list_2

    list_1[3][1] = 123  # 没有影响list_2的原因是，`list_1[3] = [10, 11, 12]`已经替换了元素，这里list_1与list_2引用的已经不是同一个可变对象了
    print('here')

    list_3 = [100, 200, [300, 400, 500]]
    list_4 = list_3.copy()  # 浅拷贝
    list_3[2][1] = 0  # 会影响list_4，因修改了可变对象
    print('here')

    list_5 = [10, [20, 30, [40, 50, 60]]]
    list_6 = list_5.copy()  # 浅拷贝
    list_5[1][2][2] = 0  # 会影响list_6，因修改了可变对象
    print('here')

    list_7 = [1000, 2000, (1, 2, [3, 4])]
    list_8 = list_7.copy()  # 浅拷贝
    list_7[2][2][0] = 3000  # 会影响list_8，因为list_7与list_8引用的仍是相同的元组。该元组没有被替换，该元组是被"内部修改"了
    print('here')

    #########################################################
    # 个人理解，在list_a和list_b完成浅拷贝时，list_a和list_b引用的是相同的元素。(列表名有俩，但元素只有一套)
    # 你拿一个新元素过来，让list_a去引用它，当然不会影响list_b
    # 但是如果你没有拿新的元素过来，只是在原来的那套元素上做修改，那么俩list肯定是同步变化的
    #########################################################

def ls_reference():
    # reference
    # 创建一个新的引用，两者指向同一个列表。内存中只有一个列表对象。
    # 修改list_1或list_2中任何元素，都会影响另一个。因为它们实际就是一个对象
    list_1 = [1, 2, 3, 4, 5]
    list_2 = list_1
    list_1[0] = 233		# list_2: [233, 2, 3, 4, 5]
    
    # copy
    # 创建一个全新的列表对象，一个新的独立副本。(对于内部可变元素，浅拷贝拷的是可变元素的引用)
    # 修改list_3或list_4中的不可变元素，不会影响另一个独立副本
    list_3 = [6, 7, 8, 9, 10]
    list_4 = list_3.copy()
    list_3[0] = 233		# list_4: [6, 7, 8, 9, 10]

def tuple_test():
    """
    tuple是不可变的
    但需要注意的是，元组的不可变性仅指的是元组本身所直接包含的元素。如果tuple包含了可变对象，那么这些可变对象本身是可以被修改的
    """
    t = (1, 2, [3, 4])
    # t[0] = 'a'    # 报错，不可修改
    t[2][0] = 'a'  # 可修改
    print('here')

常用操作

import random

def list_test():
    ori_list = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
    # .append(object)
    # .extend((10, 11, 12))   # 将list或tuple中的元素，逐个添加
    # .clear()   # 删除所有元素
    # .copy()    # 浅拷贝
    # .deepcopy()   # 深拷贝
    # .count(value)  # 统计list中value出现的次数

    # .index(value, start, stop)     # 返回list中value首次出现的位置，[start, stop)
    # tmp_1 = [2, 3, [1, 2, 3]]
    # index = tmp_1.index(1)   # 报错ValueError: 1 is not in list

    # .insert(index, object)     # 在index处，插入object
    # .pop(index)    # 取出，删除，并返回index处的元素。省略index则默认处理最末尾元素
    # .remove(value) # 删除首个值为value的元素。若不存在则抛异常
    # .reverse()     # 反转
    # .sort(reverse=False)  # 排序，reverse=True为倒序

    # 去重
    # tmp_list = [1, 2, 3, 3, 4, 4, 5, 6]
    # print(list(set(tmp_list)))

    # len(list)
    # max(list)
    # min(list)
    # sum(list)

    # join拼接与split分割
    # tmp_list = ["111", "222", "333", "444"]
    # res_1 = ";".join(tmp_list)      # 返回拼接后的str
    # res_2_list = res_1.split(";")
    # res_3_list = res_1.split(";", 2)    # 第二个参数可选，表示要分割的次数

    # range([start], end, [step])   # 创建整数列表，[start, end)
    #   start: 可选，起始数字，默认0
    #   end: 必选，末尾数字
    #   step: 可选，步长，默认1
    # res_list = list(range(1, 7, 1))

    # "+"拼接，创建新的list
    # a = [1, 2, 3]
    # b = [4, 5, 6]
    # c = a + b		# [1, 2, 3, 4, 5, 6]

    # "*"扩展
    # a = [1, 2, 3] * 3		# [1, 2, 3, 1, 2, 3, 1, 2, 3]

    # random.shuffle(ori_list)    # 对原list随机乱序

    print('here')

dict

def dict_test():
    ori_dict = {
        'k_1': 'v_1',
        'k_2': 'v_2',
        'k_3': 'v_3'
    }

    # .clear()  # 全部清空

    # len(ori_dict) # 元素个数，即key的个数

    # key存在即修改，key不存在即添加
    # ori_dict['k_3'] = 'v_3_new'
    # ori_dict['k_4'] = 'v_4'

    # 删除元素，并返回元素value
    # res = ori_dict.pop('k_2')

    # 随机删除一个元素，并返回元素tuple (key, value)
    # res = ori_dict.popitem()

    # ori_dict['k_8']   # 查询，不存在则报错
    # ori_dict.get('k_8', 'not exist')  # 查询，不存在返回第二个可选参数，默认不存在返回None

    # 用于遍历
    # dict_keys = ori_dict.keys()			# type: 'dict_keys'
    # dict_values = ori_dict.values()		# type: 'dict_values'
    # dict_items = ori_dict.items()			# type: 'dict_items'
    
    # 可用操作
    # for key in dict_keys
    # if 'k_a' in dict_keys
    # keys_list = list(keys)
    
    # if ('a', 1) in dict_items
    # items_list = list(items)
    
    
    # 将dict_2各个元素扩充到ori_dict
    # dict_2 = {'k_100': 'v_100', 'k_101': 'v_101'}
    # ori_dict.update(dict_2)

    print('here')

set

def set_test():
    ori_set = set()

    # 花括号初始set，至少包含一个元素，否则默认创建空字典
    # tmp_set = {"123", "456"}

    # 新增一个元素
    ori_set.add(1)
    ori_set.add(2)
    ori_set.add(3)

    # 新增多个元素，自动去重
    ori_set.update([4, 4, 5, 6, 6])

    # ori_set.remove(6)   # 删除一个元素，若不存在则报错
    # ori_set.discard(8)  # 删除一个元素，若不存在则不进行任何操作
    # res = ori_set.pop()   # 删除任意一个元素，set为空则报错。(list.pop()默认删除最后一个；set无序，因此随机删一个)

    # .clear()  # 全部清空

    # 是否为包含关系，包含则返回True
    # tmp_set = set()
    # tmp_set.update([2, 3, 4])
    # res = tmp_set.issubset(ori_set)   # 等价于`res = tmp_set <= ori_set`
    # res = ori_set.issuperset(tmp_set)   # 等价于`res = ori_set >= tmp_set`

    # 取并集，返回一个新集合
    # tmp_set = set()
    # tmp_set.update([5, 6, 7, 8, 9])
    # res_set = ori_set.union(tmp_set)  # 等价于`res_set = ori_set | tmp_set`

    # 取交集，返回一个新集合
    # tmp_set = set()
    # tmp_set.update([5, 6, 7, 8, 9])
    # res_set = ori_set.intersection(tmp_set)   # 等价于`res_set = ori_set & tmp_set`

    # 也可使用"=="，"!=" 进行比较

    print('here')

defaultdict

from collections import defaultdict

def default_dict_test():
    # dict = defaultdict(factory_function)
    #   factory_function可以是list、set、str、int。当key不存在时，返回的是工厂函数的默认值。
    #   list对应[]，str对应""，set对应set()，int对应0
    tmp_dict = defaultdict(list)
    tmp_dict[1] = [1, 2, 3]
    print(tmp_dict[0])
    print(tmp_dict[1])

    print('here')

Series

from pandas import Series
import numpy as np


def series_test():
    """
    Series数据结构，由一组数据，以及与之相关的索引构成
    Series有序，索引可以唯一，也可以不唯一
    """

    # 默认自动创建0到N-1的整数索引
    ori_ser = Series([10, 20, 30, 40, 50, 60])
    # print(ori_ser)
    # print(ori_ser.index)
    # print(ori_ser.values)

    # 手动指定索引
    ori_ser_2 = Series([4, -1, 8, 5], index=['a', 'b', 'c', 'd'])
    # print(ori_ser_2)

    # 通过索引获取具体值
    # print(ori_ser_2['c'])
    # print(ori_ser_2[['c', 'd']])      # 等价于 ori_ser_2.loc[:, ['c', 'd']]
    # 切片获取值
    # print(ori_ser[0:2])
    # 通过索引，值替换
    # ori_ser_2['c'] = 10
    # print(ori_ser_2)

    # 条件筛选
    # print(ori_ser_2[ori_ser_2.values > 0])
    # print(ori_ser[ori_ser.index < 3])

    # 通过dict创建Series
    ori_ser_3 = Series({'a': 0, 'b': 1, 'c': 2, 'd': np.NAN})
    # print(ori_ser_3)

    # 对np.NAN进行排查
    # print(ori_ser_3.isnull())		# 返回一个Series，索引同ori_ser_3，值为True/False
    # print(ori_ser_3.notnull())	# 同上

    # 算数运算，自动对齐不同索引的数据
    obj_a = Series([1, 2, np.NAN, 4], index=['a', 'b', 'c', 'd'])
    obj_b = Series([1, np.NAN, 3, 4], index=['a', 'b', 'd', 'e'])
    # print(obj_a + obj_b)

    # 重新修改索引
    ori_ser_3.index = ['aa', 'bb', 'cc', 'dd']
    # print(ori_ser_3)

    # 基本运算
    # print(ori_ser.mean())
    # print(ori_ser.sum())
    # print(ori_ser.median())     # 中位数
    # print(ori_ser.value_counts())   # 每个值的数量

    # 根据索引进行删除，返回一个新的Series
    new_ori_ser_2 = ori_ser_2.drop('a')

    print('here')

range

def range_test():
    # 依次生成[0, 10)的整数，步长为1
    # for i in range(0, 10, 1):
    #     print(i)

    print('here')

np.arange

def np_arange_test():
    # `np.arange()`与`range()`的区别
    #   1. 两者功能类似，均可迭代，范围均左闭右开
    #   2. 返回类型不同，np.arange()返回ndarray；range()返回range这个object
    #   3. np.arange()支持小数步长；range()不支持
    print(np.arange(0, 5, 0.5))

    print('here')

np.random

def np_random_test():
    """
    py的random模块(import random)与`numpy.random`的区别
    后者功能更强大，效率更高。暂统一使用后者，(除了random.sample)
    """
    # 指定随机数种子
    # np.random.seed(233)

    # 生成指定维度的[0, 1)之间的随机浮点数
    # `np.random.rand`等价于`np.random.random`
    # print(np.random.rand(4, 3))

    # 生成指定维度的随机浮点数，服从正态分布
    # print(np.random.randn())    # 仅生成一个浮点数
    # print(np.random.randn(2, 4))

    # 生成size维度的随机整数，范围[low, high)，dtype默认为int，服从均匀分布
    # print(np.random.randint(low=1, high=10, size=(3, 4), dtype='I'))

    # 生成size维度的随机浮点数，范围[low, high)，服从均匀分布
    # print(np.random.uniform(low=1.0, high=10.0, size=(3, 4)))

    # 从`a`中随机抽取`size`个数，组成array
    # `replace=False`，表示不可重复抽取
    # 还有一个参数`p`，表示抽取概率。默认省略，即抽取概率一致。若手动指定，则`p`的长度必须与`a`一致，且和为1.
    tmp_list = range(0, 10, 1)
    choices = np.random.choice(a=tmp_list, size=5, replace=False)     # 等价于py random模块的`random.sample(tmp_list, 5)`
    print(choices)

    # 将原list内元素打乱
    tmp_list = [val for val in range(10)]
    # np.random.shuffle(tmp_list)
    # print(tmp_list)

    print('here')

异常处理

def func(x, y):
    try:
        z = x / y
        # 可能的操作
    except IndexError as e:			# 在使用序列的索引时，索引超出序列的范围
        print(f"IndexError: {e}")
    except KeyError as e:			# 在使用字典的键时，键在字典中不存在
        print(f"KeyError: {e}")
    except ValueError as e:			# 当一个操作或函数接收到具有正确类型但不合适的值时发生
        # '123sss'转int
        # math.sqrt(-1)
        print(f"ValueError: {e}")
    except OSError as e:			# 输入/输出操作失败，如文件未找到或无法打开
        print(f"OSError: {e}")
    except Exception as e:			# 兜底
        print(f"Exception: {e}")
    finally:					
        print("finally here")

for、enumerate

list1 = [1, 2, 3]
list2 = ['a', 'b', 'c']
list3 = [True, False, True]

for item1, item2, item3 in zip(list1, list2, list3):
    print(item1, item2, item3)

# 前面括号
for idx, (item_1, item_2, item_3) in enumerate(zip(list1, list2, list3)):
    print(f"Index: {idx}, List1: {item_1}, List2: {item_2}, List3: {item_3}")