package importer import ( "encoding/json" "fmt" "io" "os" "path/filepath" "regexp" "strings" "sync" "sync/atomic" "time" "git.wecise.com/wecise/cgimport/graph" "git.wecise.com/wecise/cgimport/odbc" "git.wecise.com/wecise/cgimport/reader" "git.wecise.com/wecise/cgimport/schema" "github.com/wecisecode/util/filewalker" "github.com/wecisecode/util/merrs" "github.com/wecisecode/util/rc" ) var mcfg = odbc.Config var logger = odbc.Logger type Importer struct { datapath string parallel int rebuild bool reload bool importstatus *CGIStatus fileimportrc *rc.RoutinesController odbcqueryrc *rc.RoutinesController odbcimporter *ODBCImporter starttime time.Time currentstarttime time.Time } func ImportDir(datapath string, parallel int, rebuild, reload bool) (totalfilescount, totallinecount, totalrecordscount int64, totalusetime time.Duration, filescount, linescount, recordscount int64, usetime time.Duration, err error) { concurlimt := mcfg.GetInt("odbc.concurrent.limit", 100) importer := &Importer{ datapath: datapath, parallel: parallel, rebuild: rebuild, reload: reload, importstatus: NewCGIStatus(), fileimportrc: rc.NewRoutinesController("", parallel), odbcqueryrc: rc.NewRoutinesControllerLimit("", concurlimt, concurlimt*2), odbcimporter: NewODBCImporter(), } return importer.Import() } func (importer *Importer) Import() (totalfilescount, totallinecount, totalrecordscount int64, totalusetime time.Duration, filescount, linescount, recordscount int64, usetime time.Duration, err error) { if odbc.DevPhase&odbc.DP_PROCESSCONTINUE != 0 && !importer.reload { // reload err = importer.importstatus.Load() if err != nil { return } err = importer.odbcimporter.init() if err != nil { return } } if importer.rebuild { // rebuild // 清除已有类 err = importer.odbcimporter.rebuild() if err != nil { return } } // 建类 err = importer.odbcimporter.ReviseClassStruct() if err != nil { return } totalusetime = importer.importstatus.TotalUseTime importer.starttime = time.Now().Add(-totalusetime) importer.currentstarttime = time.Now() reedgefile := regexp.MustCompile("(?i).*edge.*.csv") efc, elc, erc, ut, e := importer.ImportEdgeFiles(reedgefile, false) if e != nil { err = e return } afc, alc, arc, ut, e := importer.ImportNonEdgeFiles(reedgefile, true) if e != nil { err = e return } totalfilescount = int64(len(importer.importstatus.ImportStatus)) + efc for _, v := range importer.importstatus.ImportStatus { totallinecount += v.LinesCount totalrecordscount += v.RecordsCount } totallinecount += elc totalrecordscount += erc filescount = afc + efc linescount = alc + elc recordscount = arc + erc usetime = ut totalusetime = importer.importstatus.TotalUseTime importer.importstatus.WaitSaveDone() importer.alldone() return } func (importer *Importer) ImportEdgeFiles(reedgefile *regexp.Regexp, logstatus bool) (filescount, linecount, recordscount int64, usetime time.Duration, err error) { return importer.ImportFiles(func(basedir string, fpath string) FWOP { if !reedgefile.MatchString(filepath.Base(fpath)) { // 忽略非EDGE文件 return FWOP_IGNORE } return FWOP_CONTINUE }, logstatus) } func (importer *Importer) ImportNonEdgeFiles(reedgefile *regexp.Regexp, logstatus bool) (filescount, linecount, recordscount int64, usetime time.Duration, err error) { return importer.ImportFiles(func(basedir string, fpath string) FWOP { if reedgefile.MatchString(filepath.Base(fpath)) { // 忽略EDGE文件 return FWOP_IGNORE } return FWOP_CONTINUE }, logstatus) } type FWOP int const ( FWOP_IGNORE FWOP = iota + 1 FWOP_BREAK FWOP_CONTINUE ) func (importer *Importer) ImportFiles(fwop func(basedir string, fpath string) FWOP, logstatus bool) (filescount, linescount, recordscount int64, usetime time.Duration, err error) { // 遍历文件目录 var wg sync.WaitGroup fw, e := filewalker.NewFileWalker([]string{importer.datapath}, ".*") if e != nil { err = e return } e = fw.List(func(basedir string, fpath string) bool { if err != nil { // 前方发生错误,结束遍历 return false } if strings.Contains(fpath, string(filepath.Separator)) { // 忽略子目录,fw.List有序,目录排在文件后面,遇到子目录即可结束遍历 return false } switch fwop(basedir, fpath) { case FWOP_IGNORE: // 忽略当前文件,继续处理下一文件 return true case FWOP_BREAK: // 结束遍历 return false case FWOP_CONTINUE: default: } // 继续处理当前文件 filename := filepath.Join(basedir, fpath) filescount++ wg.Add(1) // 并发处理 importer.fileimportrc.ConcurCall(1, func() { defer wg.Done() importer.importstatus.mutex.RLock() importstatus := importer.importstatus.ImportStatus[filename] importer.importstatus.mutex.RUnlock() linefrom, blockfrom := int64(0), int64(0) if importstatus != nil { linefrom, blockfrom = importstatus.LinesCount, importstatus.RecordsCount } if linefrom == 0 { logger.Info("import", "file", filename) } else { logger.Info("import", "file", filename, "from line", linefrom) } lines, records, e := importer.ImportFile(filename, linefrom, blockfrom, logstatus) if e != nil { err = e return } atomic.AddInt64(&linescount, lines-linefrom) atomic.AddInt64(&recordscount, records-blockfrom) usetime = time.Since(importer.currentstarttime) importer.importstatus.mutex.Lock() if logstatus { importer.importstatus.ImportStatus[filename] = &ImportStatus{ LinesCount: lines, RecordsCount: records} } importer.importstatus.TotalUseTime = time.Since(importer.starttime) importer.importstatus.mutex.Unlock() importer.importstatus.Save() logger.Info("file", filename, "imported", records, "records") }, ) return true }) wg.Wait() if e != nil { if os.IsNotExist(e) { err = merrs.NewError(`directory "`+importer.datapath+`" not exist specified by "datapath"`, e) } else { err = merrs.NewError(e) } return } return } func (importer *Importer) ImportFile(filepath string, linefrom, blockfrom int64, logstatus bool) (linecount, blockcount int64, err error) { f, e := os.Open(filepath) if e != nil { return linecount, blockcount, merrs.NewError(e, merrs.SSMaps{{"filename": filepath}}) } defer f.Close() return importer.importReader(filepath, f, linefrom, blockfrom, logstatus) } func (importer *Importer) importReader(filename string, buf io.Reader, linefrom, blockfrom int64, logstatus bool) (linecount, blockcount int64, err error) { var filetype schema.FileType switch { case strings.Contains(filename, "_L1_"): filetype = schema.FT_LEVEL1 case strings.Contains(filename, "_L2_"): filetype = schema.FT_LEVEL2 case strings.Contains(filename, "_L3_"): filetype = schema.FT_LEVEL3 case strings.Contains(filename, "_L4_"): filetype = schema.FT_LEVEL4 case strings.Contains(filename, "_L5_"): filetype = schema.FT_LEVEL5 case strings.Contains(filename, "_L6_"): filetype = schema.FT_LEVEL6 case strings.Contains(filename, "_L7_"): filetype = schema.FT_LEVEL7 case strings.Contains(filename, "_L8_"): filetype = schema.FT_LEVEL8 case strings.Contains(filename, "MASTER"): filetype = schema.FT_MASTER case strings.Contains(filename, "EDGE"): filetype = schema.FT_EDGE default: err = merrs.NewError("filename does not conform to the agreed format " + filename) return } br, e := reader.NewBlockReader(filename, filetype, buf) if e != nil { return linecount, blockcount, merrs.NewError(e, merrs.SSMaps{{"filename": filename}}) } lastlogtime := time.Now() skiplines := int(linefrom) blockcount = blockfrom doinglines := []int64{} var wg sync.WaitGroup defer importer.done() defer wg.Wait() for { if err != nil { break } block, line, linenumber, e := br.ReadBlock(skiplines) linecount = int64(linenumber) if e != nil { return linecount, blockcount, merrs.NewError(e, merrs.SSMaps{{"filename": filename}, {"linecount": fmt.Sprint(linecount)}, {"line": line}}) } if block == nil { return } blockcount++ wg.Add(1) doingline := linecount doingblock := blockcount if logstatus { doinglines = append(doinglines, doingline) } e = importer.odbcqueryrc.ConcurCall(1, func() { defer wg.Done() e = importer.importRecord(block, line, filename, filetype, int(doingline)) if e != nil { err = merrs.NewError(e, merrs.SSMaps{{"filename": filename}, {"linecount": fmt.Sprint(doingline)}, {"line": line}}) return } if logstatus { if doingline == doinglines[0] { importer.importstatus.mutex.Lock() importer.importstatus.ImportStatus[filename] = &ImportStatus{ LinesCount: doingline, RecordsCount: doingblock, } importer.importstatus.TotalUseTime = time.Since(importer.starttime) importer.importstatus.Save() doinglines = doinglines[1:] if time.Since(lastlogtime) > 5*time.Second { logger.Info("file", filename, "imported", doingblock, "records") lastlogtime = time.Now() } importer.importstatus.mutex.Unlock() } else { for i, l := range doinglines { if l == doingline { doinglines = append(doinglines[:i], doinglines[i+1:]...) } } } } }) if e != nil { return linecount, blockcount, merrs.NewError(e, merrs.SSMaps{{"filename": filename}, {"linecount": fmt.Sprint(linecount)}, {"line": line}}) } } return } func (importer *Importer) importRecord(record map[string]any, line string, filename string, filetype schema.FileType, linecount int) (err error) { if odbc.LogDebug { bs, e := json.MarshalIndent(record, "", " ") if e != nil { return merrs.NewError(e) } logger.Debug(fmt.Sprint("import ", filename, "[", linecount, "]:", string(bs))) } var classaliasname string switch filetype { case schema.FT_EDGE: graph.CacheEdgeInfo(record) default: classaliasname = string(filetype) err = importer.odbcimporter.InsertData(classaliasname, record) if err != nil { err = merrs.NewError(err, merrs.SSMaps{{"filename": filename}, {"linecount": fmt.Sprint(linecount)}, {"line": line}}) return } } return } func (importer *Importer) alldone() { importer.odbcimporter.alldone() } func (importer *Importer) done() { importer.odbcimporter.done() } func Check() { client := odbc.ODBClient if client == nil { return } { // mql := "select id,uniqueid,tags,contain,day,vtime from level1 where uniqueid='E2E:OTR0002L'" mql := "select * from level1 where uniqueid='E2E:OTR0002L'" r, e := client.Query(mql).Do() if e != nil { panic(merrs.NewError(e)) } bs, _ := json.MarshalIndent(r.Data, "", " ") fmt.Println(string(bs)) } { mql := `match ("level1:E2E:OTR0002L")-[*]->(),("level1:E2E:OTR0002L")<-[*]-() in "level1","level2"` r, e := client.Query(mql).Do() if e != nil { panic(merrs.NewError(e)) } bs, _ := json.MarshalIndent(r.Data, "", " ") fmt.Println(string(bs)) } }