Python数据处理代码笔记 自用,一些工具代码。 pd读取excel 1 2 3 4 5 6 7 data = pd.DataFrame(pd.read_excel(excel_path)) for row in data.itertuples(): id = getattr(row, 'uid') text = getattr(row, '文本') if pd.isna(text): continue info = re.findall(r'"重要":"([^"]+)",', str(text)) pd输出excel 1 2 3 4 5 6 7 8 def output_excel(outputdata, result_path): title = ['姓名', '性别', '年龄'] writer = pd.ExcelWriter(result_path) df = pd.DataFrame(outputdata, columns=title) df.to_excel(writer, sheet_name='Sheet1', index=False) writer.save() 读取json 1 2 3 files = os.listdir(data_dir) for jsonfile in files: json_data = json.load(open(data_dir + jsonfile, 'r', encoding='utf-8')) 输出json 1 2 3 def output_json(outputdata, result_path): with open(result_path, 'w+', encoding='utf-8') as file: json.dump(outputdata, file, indent=4, ensure_ascii=False) 读写excel 1 2 3 4 5 6 7 import xlrd file = xlrd.open_workbook('test.xlsx') sheet = flie.sheets()[0] rows = sheet.nrows cols = sheet.ncols for i in range(rows): val = sheet.cell_value(i, 0) 字典 1 2 3 4 5 6 7 8 9 10 11 User = dict() User[id] = [] User[id].append([val1, val2]) userdic = dict() for line in data.readlines(): linestr = line.strip().split('\t') tmp1 = int(linestr[0]) tmp2 = int(linestr[1]) if tmp1 not in userdic: userdic[tmp1] = set() userdic[tmp1].add(tmp2) 读写word 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 from win32com import client as wc word = wc.Dispatch("Word.Application") bookList = os.listdir(r'./result_new_2/') for file in bookList: #print(file) try: doc = word.Documents.Open("C:\\docdir\\" + file) doc.SaveAs("{}x".format("C:\\docxdir\\" + file), 12)#另存为后缀为".docx"的文件,其中参数12指docx文件 doc.Close() except Exception as e: print(file) continue word.Quit() print("完成!") re 1 text = re.findall(r'"type":"([^"]+)",', str(longtext)) 以前的笔记 Pandas自用笔记