docIndex.json 2.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119
  1. {
  2. "index": {
  3. "number_of_shards": "1",
  4. "number_of_replicas": "0",
  5. "max_result_window": 100000000
  6. },
  7. "analysis": {
  8. "analyzer": {
  9. "codeprefix_analyzer": {
  10. "tokenizer": "codeprefix_tokenizer",
  11. "filter": [
  12. "lowercase"
  13. ]
  14. },
  15. "codefull_analyzer": {
  16. "tokenizer": "codefull_tokenizer",
  17. "filter": [
  18. "lowercase"
  19. ]
  20. },
  21. "lowercase_analyzer": {
  22. "tokenizer": "standard",
  23. "filter": [
  24. "lowercase"
  25. ]
  26. },
  27. "number_analyzer": {
  28. "tokenizer": "number_tokenizer"
  29. },
  30. "number_ngram_analyzer": {
  31. "tokenizer": "number_ngram_tokenizer",
  32. "filter": [
  33. "lowercase"
  34. ]
  35. },
  36. "number2_analyzer": {
  37. "tokenizer": "standard",
  38. "filter": [
  39. "lowercase", "number_filter"
  40. ]
  41. },
  42. "letter_analyzer": {
  43. "tokenizer": "letter2_tokenizer",
  44. "filter": [
  45. "lowercase"
  46. ]
  47. },
  48. "letter2_analyzer": {
  49. "tokenizer": "letter_tokenizer",
  50. "filter": [
  51. "lowercase"
  52. ]
  53. },
  54. "keywordignorespace_analyzer": {
  55. "tokenizer": "keyword",
  56. "char_filter": [
  57. "replace_whitespace"
  58. ]
  59. }
  60. },
  61. "tokenizer": {
  62. "codeprefix_tokenizer": {
  63. "type": "edge_ngram",
  64. "min_gram": 2,
  65. "max_gram": 50,
  66. "token_chars": [
  67. "letter",
  68. "digit"
  69. ]
  70. },
  71. "codefull_tokenizer": {
  72. "type": "ngram",
  73. "min_gram": 2,
  74. "max_gram": 50
  75. },
  76. "number_tokenizer": {
  77. "type": "pattern",
  78. "pattern": "\\D+"
  79. },
  80. "number_ngram_tokenizer": {
  81. "type": "ngram",
  82. "min_gram": 2,
  83. "max_gram": 50,
  84. "token_chars": [
  85. "digit"
  86. ]
  87. },
  88. "letter_tokenizer": {
  89. "type": "pattern",
  90. "pattern": "\\W+"
  91. },
  92. "letter2_tokenizer": {
  93. "type": "pattern",
  94. "pattern": "[^a-zA-Z]"
  95. }
  96. },
  97. "filter": {
  98. "number_filter" : {
  99. "type" : "pattern_capture",
  100. "preserve_original" : true,
  101. "patterns" : [
  102. "(\\d+)"
  103. ]
  104. },
  105. "ascii_filter" : {
  106. "type" : "asciifolding",
  107. "preserve_original" : true
  108. }
  109. },
  110. "char_filter": {
  111. "replace_whitespace": {
  112. "type": "mapping",
  113. "mappings": [
  114. "\\u0020=>"
  115. ]
  116. }
  117. }
  118. }
  119. }