excelproc.py 6.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189
  1. #!/opt/miniconda3/bin/python3
  2. import os
  3. import time
  4. import datetime
  5. import glob
  6. import re
  7. import json
  8. import pandas
  9. import xlrd
  10. import openpyxl as xl
  11. from openpyxl.worksheet.worksheet import Worksheet
  12. from openpyxl.cell import MergedCell
  13. def debug(*values: object, sep: str = None, end: str = None):
  14. # print(*values, sep=sep, end=end)
  15. return
  16. def info(*values: object, sep: str = None, end: str = None):
  17. print(*values, sep=sep, end=end)
  18. return
  19. # 将excel转换为csv
  20. # 指定sheet_name
  21. def excel2csv_1(sheet_name, excelfilename, csvfilename):
  22. wb = xlrd.open_workbook(excelfilename, logfile=open(os.devnull, 'w'))
  23. df = pandas.read_excel(wb, sheet_name=sheet_name)
  24. os.makedirs(os.path.dirname(csvfilename), exist_ok=True)
  25. df.to_csv(csvfilename)
  26. # 处理合并表格,复制相同内容
  27. def parse_merged_cell(sheet:Worksheet, row, col):
  28. cell = sheet.cell(row=row, column=col)
  29. if isinstance(cell, MergedCell):
  30. for merged_range in sheet.merged_cells.ranges:
  31. if cell.coordinate in merged_range:
  32. cell = sheet.cell(row=merged_range.min_row, column=merged_range.min_col)
  33. break
  34. return cell
  35. # 将excel转换为json,
  36. # 提取特定流量汇总数据
  37. def excel2json_1(excelfilename, jsonfilename):
  38. json_obj = {}
  39. workbook = xl.load_workbook(excelfilename, data_only=True)
  40. sheetname = "汇总数据"
  41. sheet = workbook[sheetname]
  42. json_obj["核心网A平面"] = json_sheet = {}
  43. for row_index in range(2,5,2):
  44. for col_index in range(1,10):
  45. key_cell = sheet.cell(row=row_index, column=col_index)
  46. val_cell = sheet.cell(row=row_index+1, column=col_index)
  47. if key_cell.value and val_cell:
  48. key = key_cell.value
  49. val = val_cell.value
  50. json_sheet[key] = val
  51. json_obj["核心网B平面"] = json_sheet = {}
  52. for row_index in range(8,11,2):
  53. for col_index in range(1,sheet.max_column):
  54. key_cell = sheet.cell(row=row_index, column=col_index)
  55. val_cell = sheet.cell(row=row_index+1, column=col_index)
  56. if key_cell.value and val_cell:
  57. key = key_cell.value
  58. val = val_cell.value
  59. json_sheet[key] = val
  60. for row_index in range(28,29,2):
  61. for col_index in range(1,sheet.max_column):
  62. key_cell = sheet.cell(row=row_index, column=col_index)
  63. val_cell = sheet.cell(row=row_index+1, column=col_index)
  64. if key_cell.value and val_cell:
  65. key = key_cell.value
  66. val = val_cell.value
  67. json_obj[key] = val
  68. for row_index in range(40,41,2):
  69. for col_index in range(1,sheet.max_column):
  70. key_cell = sheet.cell(row=row_index, column=col_index)
  71. val_cell = sheet.cell(row=row_index+1, column=col_index)
  72. if key_cell.value and val_cell:
  73. key = key_cell.value
  74. val = val_cell.value
  75. json_obj[key] = val
  76. json_str = json.dumps(json_obj, indent=4, ensure_ascii=False)
  77. debug(json_str)
  78. os.makedirs(os.path.dirname(jsonfilename), exist_ok=True)
  79. with open(jsonfilename, mode='w', encoding='utf8') as fd:
  80. fd.write(json_str)
  81. # 将excel转换为json,
  82. # 处理合并表格数据
  83. def excel2json_2(excelfilename, jsonfilename):
  84. json_obj = {}
  85. workbook = xl.load_workbook(excelfilename, data_only=True)
  86. for sheetname in workbook.sheetnames:
  87. sheet = workbook[sheetname]
  88. json_sheet = []
  89. debug(sheetname)
  90. cells = []
  91. for row_index in range(1,sheet.max_row+1):
  92. cols = [None]*(sheet.max_column+1)
  93. cells += [cols]
  94. allcolsisnone = True
  95. for col_index in range(1,sheet.max_column+1):
  96. cell = parse_merged_cell(sheet, row_index, col_index)
  97. if cell.value:
  98. allcolsisnone = False
  99. cols[col_index-1] = str(cell.value).replace("\n","") if isinstance(cell.value, str) else cell.value
  100. if not allcolsisnone:
  101. if row_index == 1:
  102. keys = cells[0]
  103. for i in range(len(keys)):
  104. val = cols[i]
  105. debug(val," \t",end="")
  106. debug()
  107. if row_index > 1:
  108. json_one = {}
  109. keys = cells[0]
  110. for i in range(len(keys)):
  111. key = keys[i]
  112. val = cols[i]
  113. debug(type(val).__name__+":"+str(val)," \t",end="")
  114. if val is not None:
  115. if key is None:
  116. key = 'col{}'.format(i+1)
  117. if isinstance(val, datetime.time):
  118. val = '{}'.format(val)
  119. json_one[key] = val
  120. json_sheet.append(json_one)
  121. debug()
  122. if len(json_sheet) > 0:
  123. json_obj[sheetname] = json_sheet
  124. json_str = json.dumps(json_obj, indent=4, ensure_ascii=False)
  125. debug(json_str)
  126. os.makedirs(os.path.dirname(jsonfilename), exist_ok=True)
  127. with open(jsonfilename, mode='w', encoding='utf8') as fd:
  128. fd.write(json_str)
  129. # 覆盖已经存在的结果文件
  130. overwrite = False
  131. def main():
  132. # 文件扫描
  133. files = glob.glob("report/recent/**", recursive=True)
  134. for f in files:
  135. if re.match('''.*/系统一部/SHVPSGSDB1.*.xls''', f):
  136. info('系统一部 Excel转CSV', f)
  137. csvfn = f.replace(".xls",".csv") .replace("report/recent/","report/files/")
  138. if overwrite or not os.path.exists(csvfn):
  139. excel2csv_1("历史性能",f,csvfn)
  140. if re.match('''.*/基础设施部/.*.xlsx''', f):
  141. info('基础设施部 Excel转JSON', f)
  142. jsonfn = f.replace(".xlsx",".json") .replace("report/recent/","report/files/")
  143. if overwrite or not os.path.exists(jsonfn):
  144. excel2json_2(f,jsonfn)
  145. if re.match('''.*/网络与平台部/关键设备CPU内存使用率统计日报报表.*.xls''', f):
  146. info('网络与平台部 Excel转CSV', f)
  147. tofn = f.replace(".xls",".csv") .replace("report/recent/","report/files/")
  148. if overwrite or not os.path.exists(tofn):
  149. excel2csv_1("Sheet1",f,tofn)
  150. if re.match('''.*/网络与平台部/支付系统流量日报.*.xls''', f):
  151. info('网络与平台部 Excel转JSON', f)
  152. tofn = f.replace(".xlsx",".json") .replace("report/recent/","report/files/")
  153. if overwrite or not os.path.exists(tofn):
  154. excel2json_1(f,tofn)
  155. # 自动清除,保留10天
  156. files = glob.glob("report/files/**", recursive=True)
  157. for f in files:
  158. if os.path.isfile(f) and os.path.getmtime(f) < time.time() - 3600 * 24 * 10:
  159. os.remove(f)
  160. # 清除空目录
  161. dir = os.path.dirname(f)
  162. while not os.listdir(dir):
  163. os.rmdir(dir)
  164. dir = os.path.dirname(dir)
  165. return
  166. if __name__ == '__main__':
  167. try:
  168. # 重定向当前目录
  169. os.chdir(os.path.dirname(os.path.abspath(__file__)))
  170. exit(main())
  171. except KeyboardInterrupt as ki:
  172. info("")
  173. exit(130)